diff --git a/v0/ast.h b/v0/ast.h index 99f1ce6..62ea45d 100644 --- a/v0/ast.h +++ b/v0/ast.h @@ -4,124 +4,8 @@ #ifndef AST_H #define AST_H -#include "bool.h" -#include "token.h" - -#include - -typedef struct { - /** @brief The name of the module being imported. */ - char* module_name; - - /** @brief Whether the import is public or not. */ - bool is_public; -} ImportDeclaration; - -typedef enum { - EXPRESSION_INTEGER, - EXPRESSION_STRING, - EXPRESSION_BOOLEAN -} ExpressionTag; - -typedef struct { - ExpressionTag tag; - union { - int integer; - const char* string; - bool boolean; - }; -} Expression; - -typedef enum { - TYPE_EXPRESSION_BUILTIN, - TYPE_EXPRESSION_ARRAY -} TypeExpressionTag; - -/** - * An expression that evaluates to a type. - */ -typedef struct TypeExpression TypeExpression; -struct TypeExpression { - /** @brief defines which entry in the union is valid */ - TypeExpressionTag tag; - union { - /** @brief Evaluates to an array of the given type. */ - struct { - /** @brief A pointer to the type of the elements stored in the array. */ - TypeExpression* array; - } array; - /** @brief Evaluates to a builtin integer type.*/ - struct { - /** - * @brief The number of bits in the integer. - * Typical values are 8, 16, 32, and 64. - */ - int bitSize; - /** @brief `true` if the type is signed, `false` if it's unsigned. */ - bool isSigned; - } builtin; - }; -}; - -/** - * A declaration that aliases one type to another. - */ -typedef struct { - /** @brief The name of the alias. */ - const char* name; - - /** @brief The value of the alias. */ - TypeExpression value; -} AliasDeclaration; - -/** - * A declaration of a variable, which may be a constant or not, and may be static or not. - */ -typedef struct { - /** @brief The name of the variable. */ - char* name; - - /** @brief The type of the variable. */ - TypeExpression type; - - /** @brief The optional initializer expression. */ - Expression* initializer; - - /** @brief Whether the variable is public or not. */ - bool is_public; - - /** @brief Whether the variable is static or not. */ - bool is_static; - - /** @brief Whether the variable is a constant or not. */ - bool is_const; -} VariableDeclaration; - -/** - * The top-level model. - * Every file matches an entire Module. - */ -typedef struct { - /** @brief The name of the module. */ - char* name; - - /** @brief The list of imports in the module. */ - ImportDeclaration* imports; - - /** @brief The number of imports in the module. */ - size_t import_count; - - /** @brief The list of aliases in the module. */ - AliasDeclaration* aliases; - - /** @brief The number of aliases in the module. */ - size_t alias_count; - - /** @brief The list of variables in the module. */ - VariableDeclaration* variables; - - /** @brief The number of variables in the module. */ - size_t variable_count; -} Module; +#include "ast/expression.h" +#include "ast/declaration.h" +#include "ast/module.h" #endif diff --git a/v0/ast/declaration.h b/v0/ast/declaration.h new file mode 100644 index 0000000..b7324a5 --- /dev/null +++ b/v0/ast/declaration.h @@ -0,0 +1,49 @@ +#ifndef AST_DECLARATION_H +#define AST_DECLARATION_H + +#include "expression.h" +#include "../bool.h" + +typedef struct { + /** @brief The name of the module being imported. */ + char* module_name; + + /** @brief Whether the import is public or not. */ + bool is_public; +} ImportDeclaration; + +/** + * A declaration that aliases one type to another. + */ +typedef struct { + /** @brief The name of the alias. */ + const char* name; + + /** @brief The value of the alias. */ + TypeExpression value; +} AliasDeclaration; + +/** + * A declaration of a variable, which may be a constant or not, and may be static or not. + */ +typedef struct { + /** @brief The name of the variable. */ + char* name; + + /** @brief The type of the variable. */ + TypeExpression type; + + /** @brief The optional initializer expression. */ + Expression* initializer; + + /** @brief Whether the variable is public or not. */ + bool is_public; + + /** @brief Whether the variable is static or not. */ + bool is_static; + + /** @brief Whether the variable is a constant or not. */ + bool is_const; +} VariableDeclaration; + +#endif diff --git a/v0/ast/expression.h b/v0/ast/expression.h new file mode 100644 index 0000000..79d448c --- /dev/null +++ b/v0/ast/expression.h @@ -0,0 +1,52 @@ +#ifndef AST_EXPRESSION_H +#define AST_EXPRESSION_H + +#include "../bool.h" + +typedef enum { + EXPRESSION_INTEGER, + EXPRESSION_STRING, + EXPRESSION_BOOLEAN +} ExpressionTag; + +typedef struct { + ExpressionTag tag; + union { + int integer; + const char* string; + bool boolean; + }; +} Expression; + +typedef enum { + TYPE_EXPRESSION_BUILTIN, + TYPE_EXPRESSION_ARRAY +} TypeExpressionTag; + +/** + * An expression that evaluates to a type. + */ +typedef struct TypeExpression TypeExpression; +struct TypeExpression { + /** @brief defines which entry in the union is valid */ + TypeExpressionTag tag; + union { + /** @brief Evaluates to an array of the given type. */ + struct { + /** @brief A pointer to the type of the elements stored in the array. */ + TypeExpression* array; + } array; + /** @brief Evaluates to a builtin integer type.*/ + struct { + /** + * @brief The number of bits in the integer. + * Typical values are 8, 16, 32, and 64. + */ + int bitSize; + /** @brief `true` if the type is signed, `false` if it's unsigned. */ + bool isSigned; + } builtin; + }; +}; + +#endif diff --git a/v0/ast/include.mk b/v0/ast/include.mk new file mode 100644 index 0000000..a485f41 --- /dev/null +++ b/v0/ast/include.mk @@ -0,0 +1,3 @@ +# There are currently no .c files in the ast directory. +# This file is provided for future consistency. +AST_SRC := diff --git a/v0/ast/module.h b/v0/ast/module.h new file mode 100644 index 0000000..837de13 --- /dev/null +++ b/v0/ast/module.h @@ -0,0 +1,34 @@ +#ifndef AST_MODULE_H +#define AST_MODULE_H + +#include "declaration.h" +#include + +/** + * The top-level model. + * Every file matches an entire Module. + */ +typedef struct { + /** @brief The name of the module. */ + char* name; + + /** @brief The list of imports in the module. */ + ImportDeclaration* imports; + + /** @brief The number of imports in the module. */ + size_t import_count; + + /** @brief The list of aliases in the module. */ + AliasDeclaration* aliases; + + /** @brief The number of aliases in the module. */ + size_t alias_count; + + /** @brief The list of variables in the module. */ + VariableDeclaration* variables; + + /** @brief The number of variables in the module. */ + size_t variable_count; +} Module; + +#endif diff --git a/v0/include.mk b/v0/include.mk index daffb2c..c2015dc 100644 --- a/v0/include.mk +++ b/v0/include.mk @@ -1,4 +1,7 @@ -V0_SRC := v0/main.c v0/util.c v0/token.c v0/parser.c v0/log.c v0/str.c +include v0/ast/include.mk +include v0/parser/include.mk + +V0_SRC := v0/main.c v0/util.c v0/token.c $(AST_SRC) $(PARSER_SRC) v0/log.c v0/str.c # V0_TEST must only include `v0/test.c` itself, as all other test C–source files are # included directly into `v0/test.c` using `#include "test_xyz.c"`. diff --git a/v0/parser.c b/v0/parser.c deleted file mode 100644 index 2a46acf..0000000 --- a/v0/parser.c +++ /dev/null @@ -1,455 +0,0 @@ -#include "parser.h" -#include "log.h" -#include -#include -#include - -typedef struct { - TokenStream* ts; - Token token; -} Parser; - -/** - * Reads a new token into p->token. - */ -static void parser_next_token(Parser* p) { - p->token = tokenstream_next(p->ts); -} - -/** - * Reads a new token if the current token is equal to the expected token. - * - * If they are equal, it continues to the next token. - * - * @param p - * @param token The expected token. - * @returns `true` if the current token matches the expected, `false` if it does not. - */ -static bool parser_accept(Parser* p, TokenType token) { - if (p->token.token == token) { - parser_next_token(p); - return true; - } - return false; -} - -/** - * Consumes the expected token if present, otherwise logs an error. - * - * @param p The parser state. - * @param token The token type to expect. - * @param msg The error message if the token is not found. - * @return true if the token was consumed, false otherwise. - */ -static bool parser_expect(Parser* p, TokenType token, const char* msg) { - if (parser_accept(p, token)) { - return true; - } - log_on_line(&p->token.location, msg); - return false; -} - -/** - * Checks if the current token matches the expected token type without consuming it. - * - * @param p The parser state. - * @param token The token type to check. - * @return true if the current token matches, false otherwise. - */ -static bool parser_peek(Parser* p, TokenType token) { - if (p->token.token == token) { - return true; - } - return false; -} - -/** - * Checks if the current token matches the expected token type without consuming it. - * - * @param p The parser state. - * @param token The token type to check. - * @return true if the current token matches, false otherwise. - */ -static bool parser_require(Parser* p, TokenType token, const char* msg) { - if (parser_peek(p, token)) { - return true; - } - log_on_line(&p->token.location, msg); - return false; -} - -/** - * Returns the text of the current token and advances the parser to the next token. - * - * @param p The parser state. - * @return A newly allocated string containing the current token's text. - */ -static char* parser_to_text(Parser* p) { - char* str = string_copy(p->token.text); - parser_next_token(p); - return str; -} - -/** - * Parses the "module" keyword - */ -static bool parse_module_declaration(Parser* p, Module* module) { - if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) { - return false; - } - - if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) { - return false; - } - module->name = parser_to_text(p); - - return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name"); -} - -/** - * Parses an import declaration. - * - * @param p The parser state. - * @param module The module to add the import to. - * @param is_public Whether the import is public. - * @return true if successful, false otherwise. - */ -static bool parse_import_declaration(Parser* p, Module* module, bool is_public) { - module->import_count++; - module->imports = realloc(module->imports, sizeof(ImportDeclaration) * module->import_count); - - ImportDeclaration* import = &module->imports[module->import_count - 1]; - memset(import, 0, sizeof(ImportDeclaration)); - import->is_public = is_public; - - if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) { - return false; - } - - import->module_name = parser_to_text(p); - - if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) { - return false; - } - - return true; -} - -/** - * Parses a primitive type expression. - * - * @param p The parser state. - * @param expr The type expression to populate. - * @return true if successful, false otherwise. - */ -static bool parse_primitive_type_expression(Parser* p, TypeExpression* expr) { - if (parser_accept(p, TOKEN_U8)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 8; - expr->builtin.isSigned = false; - return true; - } else if (parser_accept(p, TOKEN_U16)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 16; - expr->builtin.isSigned = false; - return true; - } else if (parser_accept(p, TOKEN_U32)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 32; - expr->builtin.isSigned = false; - return true; - } else if (parser_accept(p, TOKEN_U64)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 64; - expr->builtin.isSigned = false; - return true; - } else if (parser_accept(p, TOKEN_I8)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 8; - expr->builtin.isSigned = true; - return true; - } else if (parser_accept(p, TOKEN_I16)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 16; - expr->builtin.isSigned = true; - return true; - } else if (parser_accept(p, TOKEN_I32)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 32; - expr->builtin.isSigned = true; - return true; - } else if (parser_accept(p, TOKEN_I64)) { - expr->tag = TYPE_EXPRESSION_BUILTIN; - expr->builtin.bitSize = 64; - expr->builtin.isSigned = true; - return true; - } else { - log_on_line(&p->token.location, "expected type expression"); - return false; - } -} - -/** - * Parses an array type expression. - * - * @param p The parser state. - * @param expr The type expression to populate. - * @return true if successful, false otherwise. - */ -static bool parse_array_type_expression(Parser* p, TypeExpression* expr) { - TypeExpression elementType; - if (!parse_primitive_type_expression(p, &elementType)) { - return false; - } - - if (parser_accept(p, TOKEN_BRACKET_OPEN)) { - expr->tag = TYPE_EXPRESSION_ARRAY; - expr->array.array = malloc(sizeof(TypeExpression)); - *expr->array.array = elementType; - - if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) { - return false; - } - } else { - *expr = elementType; - return true; - } - return true; -} - -/** - * Parses a type expression. - * - * @param p The parser state. - * @param expr The type expression to populate. - * @return true if successful, false otherwise. - */ -static bool parse_type_expression(Parser* p, TypeExpression* expr) { - return parse_array_type_expression(p, expr); -} - -/** - * Parses an alias declaration. - * - * @param p The parser state. - * @param module The module to add the alias to. - * @param is_public Whether the alias is public. - * @return true if successful, false otherwise. - */ -static bool parse_alias_declaration(Parser* p, Module* module, bool is_public) { - module->alias_count++; - module->aliases = realloc(module->aliases, sizeof(AliasDeclaration) * module->alias_count); - - AliasDeclaration* alias = &module->aliases[module->alias_count - 1]; - memset(alias, 0, sizeof(AliasDeclaration)); - - if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) { - return false; - } - alias->name = parser_to_text(p); - - if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) { - return false; - } - - if (!parse_type_expression(p, &alias->value)) { - return false; - } - - if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) { - return false; - } - - return true; -} - -/** - * Parses an expression. - */ -static bool parse_expression(Parser* p, Expression* expr) { - if (parser_accept(p, TOKEN_INTEGER)) { - expr->tag = EXPRESSION_INTEGER; - expr->integer = atoi(p->token.text.data); - return true; - } else if (parser_accept(p, TOKEN_STRING)) { - expr->tag = EXPRESSION_STRING; - expr->string = parser_to_text(p); - return true; - } else if (parser_accept(p, TOKEN_TRUE)) { - expr->tag = EXPRESSION_BOOLEAN; - expr->boolean = true; - return true; - } else if (parser_accept(p, TOKEN_FALSE)) { - expr->tag = EXPRESSION_BOOLEAN; - expr->boolean = false; - return true; - } - log_on_line(&p->token.location, "expected expression"); - return false; -} - -/** - * Parses a variable declaration. - */ -static bool parse_variable_declaration(Parser* p, Module* module, bool is_public, bool is_static, bool is_const) { - module->variable_count++; - module->variables = realloc(module->variables, sizeof(VariableDeclaration) * module->variable_count); - - VariableDeclaration* var = &module->variables[module->variable_count - 1]; - memset(var, 0, sizeof(VariableDeclaration)); - var->is_public = is_public; - var->is_static = is_static; - var->is_const = is_const; - - if (!parse_type_expression(p, &var->type)) { - return false; - } - - if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) { - return false; - } - var->name = parser_to_text(p); - - if (parser_accept(p, TOKEN_ASSIGN)) { - var->initializer = malloc(sizeof(Expression)); - if (!parse_expression(p, var->initializer)) { - return false; - } - } - - if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) { - return false; - } - - return true; -} - -/** - * Checks if the current token is a primitive type. - * - * The token will not be consumed by this function, so the caller can decide how to handle it if it is a primitive type. - * - * @param p The parser state. - * @return true if the current token is a primitive type, false otherwise. - */ -static bool parser_accept_primitive(Parser* p) { - return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) || - parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) || - parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) || - parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64); -} - -Module* parser_parse(TokenStream* ts) { - Parser* p = malloc(sizeof(Parser)); - p->ts = ts; - parser_next_token(p); - - Module* module = malloc(sizeof(Module)); - memset(module, 0, sizeof(Module)); - if (!parse_module_declaration(p, module)) { - goto fail; - } - - while (!parser_peek(p, TOKEN_EOF)) { - bool is_public = false; - bool is_static = false; - bool is_const = false; - bool terminal = false; - - while (!terminal) { - if (parser_accept(p, TOKEN_IMPORT)) { - if (is_static) { - log_on_line(&p->token.location, "import declarations cannot be static or const"); - goto fail; - } - if (is_const) { - log_on_line(&p->token.location, "import declarations cannot be static or const"); - goto fail; - } - if (!parse_import_declaration(p, module, is_public)) { - goto fail; - } - terminal = true; - } else if (parser_accept(p, TOKEN_ALIAS)) { - if (is_static) { - log_on_line(&p->token.location, "alias declarations cannot be static or const"); - goto fail; - } - if (is_const) { - log_on_line(&p->token.location, "alias declarations cannot be static or const"); - goto fail; - } - if (!parse_alias_declaration(p, module, is_public)) { - goto fail; - } - terminal = true; - } else if (parser_accept(p, TOKEN_PUBLIC)) { - is_public = true; - } else if (parser_accept(p, TOKEN_STATIC)) { - is_static = true; - } else if (parser_accept(p, TOKEN_CONST)) { - is_const = true; - } else if (parser_accept_primitive(p)) { - if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) { - goto fail; - } - terminal = true; - } else { - log_on_line(&p->token.location, "unexpected token"); - goto fail; - } - } - } - - free(p); - return module; -fail: - free(p); - parser_free(module); - return NULL; -} - -void free_type_expression(TypeExpression* expr) { - if (expr->tag == TYPE_EXPRESSION_ARRAY) { - free_type_expression(expr->array.array); - free(expr->array.array); - } -} - -void parser_free(Module* module) { - if (module == NULL) { - return; - } - - if (module->imports != NULL) { - for(size_t i = 0; i < module->import_count; i++) { - free(module->imports[i].module_name); - } - free(module->imports); - } - - if (module->aliases != NULL) { - for(size_t i = 0; i < module->alias_count; i++) { - free((void*)module->aliases[i].name); - free_type_expression(&module->aliases[i].value); - } - free(module->aliases); - } - - if (module->variables != NULL) { - for(size_t i = 0; i < module->variable_count; i++) { - free(module->variables[i].name); - free_type_expression(&module->variables[i].type); - if (module->variables[i].initializer) { - if (module->variables[i].initializer->tag == EXPRESSION_STRING) { - free((void*)module->variables[i].initializer->string); - } - free(module->variables[i].initializer); - } - } - free(module->variables); - } - - free(module->name); - free(module); -} diff --git a/v0/parser/core.c b/v0/parser/core.c new file mode 100644 index 0000000..567d33e --- /dev/null +++ b/v0/parser/core.c @@ -0,0 +1,52 @@ +#include "internal.h" +#include "../str.h" +#include "../log.h" +#include + +void parser_next_token(Parser* p) { + p->token = tokenstream_next(p->ts); +} + +bool parser_accept(Parser* p, TokenType token) { + if (p->token.token == token) { + parser_next_token(p); + return true; + } + return false; +} + +bool parser_expect(Parser* p, TokenType token, const char* msg) { + if (parser_accept(p, token)) { + return true; + } + log_on_line(&p->token.location, msg); + return false; +} + +bool parser_peek(Parser* p, TokenType token) { + if (p->token.token == token) { + return true; + } + return false; +} + +bool parser_require(Parser* p, TokenType token, const char* msg) { + if (parser_peek(p, token)) { + return true; + } + log_on_line(&p->token.location, msg); + return false; +} + +char* parser_to_text(Parser* p) { + char* str = string_copy(p->token.text); + parser_next_token(p); + return str; +} + +bool parser_accept_primitive(Parser* p) { + return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) || + parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) || + parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) || + parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64); +} diff --git a/v0/parser/declaration.c b/v0/parser/declaration.c new file mode 100644 index 0000000..c3286f5 --- /dev/null +++ b/v0/parser/declaration.c @@ -0,0 +1,87 @@ +#include "internal.h" +#include +#include + +bool parse_import_declaration(Parser* p, Module* module, bool is_public) { + module->import_count++; + module->imports = realloc(module->imports, sizeof(ImportDeclaration) * module->import_count); + + ImportDeclaration* import = &module->imports[module->import_count - 1]; + memset(import, 0, sizeof(ImportDeclaration)); + import->is_public = is_public; + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) { + return false; + } + + import->module_name = parser_to_text(p); + + if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) { + return false; + } + + return true; +} + +bool parse_alias_declaration(Parser* p, Module* module, bool is_public) { + (void)is_public; + module->alias_count++; + module->aliases = realloc(module->aliases, sizeof(AliasDeclaration) * module->alias_count); + + AliasDeclaration* alias = &module->aliases[module->alias_count - 1]; + memset(alias, 0, sizeof(AliasDeclaration)); + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) { + return false; + } + alias->name = parser_to_text(p); + + if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) { + return false; + } + + if (!parse_type_expression(p, &alias->value)) { + return false; + } + + if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) { + return false; + } + + return true; +} + +bool parse_variable_declaration(Parser* p, Module* module, bool is_public, bool is_static, bool is_const) { + module->variable_count++; + module->variables = realloc(module->variables, sizeof(VariableDeclaration) * module->variable_count); + + VariableDeclaration* var = &module->variables[module->variable_count - 1]; + memset(var, 0, sizeof(VariableDeclaration)); + var->is_public = is_public; + var->is_static = is_static; + var->is_const = is_const; + + if (parser_accept_primitive(p)) { + if (!parse_type_expression(p, &var->type)) { + return false; + } + } + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) { + return false; + } + var->name = parser_to_text(p); + + if (parser_accept(p, TOKEN_ASSIGN)) { + var->initializer = malloc(sizeof(Expression)); + if (!parse_expression(p, var->initializer)) { + return false; + } + } + + if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) { + return false; + } + + return true; +} diff --git a/v0/parser/expression.c b/v0/parser/expression.c new file mode 100644 index 0000000..31af7eb --- /dev/null +++ b/v0/parser/expression.c @@ -0,0 +1,105 @@ +#include "internal.h" +#include "../log.h" +#include + +bool parse_primitive_type_expression(Parser* p, TypeExpression* expr) { + if (parser_accept(p, TOKEN_U8)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 8; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U16)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 16; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U32)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 32; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U64)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 64; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_I8)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 8; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I16)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 16; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I32)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 32; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I64)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 64; + expr->builtin.isSigned = true; + return true; + } else { + log_on_line(&p->token.location, "expected type expression"); + return false; + } +} + +bool parse_array_type_expression(Parser* p, TypeExpression* expr) { + TypeExpression elementType; + if (!parse_primitive_type_expression(p, &elementType)) { + return false; + } + + if (parser_accept(p, TOKEN_BRACKET_OPEN)) { + expr->tag = TYPE_EXPRESSION_ARRAY; + expr->array.array = malloc(sizeof(TypeExpression)); + *expr->array.array = elementType; + + if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) { + return false; + } + } else { + *expr = elementType; + return true; + } + return true; +} + +bool parse_type_expression(Parser* p, TypeExpression* expr) { + return parse_array_type_expression(p, expr); +} + +bool parse_expression(Parser* p, Expression* expr) { + if (parser_peek(p, TOKEN_INTEGER)) { + expr->tag = EXPRESSION_INTEGER; + expr->integer = atoi(p->token.text.data); + parser_next_token(p); + return true; + } else if (parser_peek(p, TOKEN_STRING)) { + expr->tag = EXPRESSION_STRING; + expr->string = parser_to_text(p); + return true; + } else if (parser_accept(p, TOKEN_TRUE)) { + expr->tag = EXPRESSION_BOOLEAN; + expr->boolean = true; + return true; + } else if (parser_accept(p, TOKEN_FALSE)) { + expr->tag = EXPRESSION_BOOLEAN; + expr->boolean = false; + return true; + } + log_on_line(&p->token.location, "expected expression"); + return false; +} + +void free_type_expression(TypeExpression* expr) { + if (expr->tag == TYPE_EXPRESSION_ARRAY) { + free_type_expression(expr->array.array); + free(expr->array.array); + } +} diff --git a/v0/parser/include.mk b/v0/parser/include.mk new file mode 100644 index 0000000..ee9ff30 --- /dev/null +++ b/v0/parser/include.mk @@ -0,0 +1 @@ +PARSER_SRC := v0/parser/core.c v0/parser/expression.c v0/parser/declaration.c v0/parser/module.c diff --git a/v0/parser/internal.h b/v0/parser/internal.h new file mode 100644 index 0000000..9f09452 --- /dev/null +++ b/v0/parser/internal.h @@ -0,0 +1,37 @@ +#ifndef PARSER_INTERNAL_H +#define PARSER_INTERNAL_H + +#include "../parser.h" +#include "../token.h" +#include "../ast.h" + +typedef struct { + TokenStream* ts; + Token token; +} Parser; + +// Core functions +void parser_next_token(Parser* p); +bool parser_accept(Parser* p, TokenType token); +bool parser_expect(Parser* p, TokenType token, const char* msg); +bool parser_peek(Parser* p, TokenType token); +bool parser_require(Parser* p, TokenType token, const char* msg); +char* parser_to_text(Parser* p); +bool parser_accept_primitive(Parser* p); + +// Base parsing (expressions, types) +bool parse_primitive_type_expression(Parser* p, TypeExpression* expr); +bool parse_array_type_expression(Parser* p, TypeExpression* expr); +bool parse_type_expression(Parser* p, TypeExpression* expr); +bool parse_expression(Parser* p, Expression* expr); +void free_type_expression(TypeExpression* expr); + +// Declaration parsing +bool parse_import_declaration(Parser* p, Module* module, bool is_public); +bool parse_alias_declaration(Parser* p, Module* module, bool is_public); +bool parse_variable_declaration(Parser* p, Module* module, bool is_public, bool is_static, bool is_const); + +// Module parsing +bool parse_module_declaration(Parser* p, Module* module); + +#endif diff --git a/v0/parser/module.c b/v0/parser/module.c new file mode 100644 index 0000000..1c83f99 --- /dev/null +++ b/v0/parser/module.c @@ -0,0 +1,125 @@ +#include "internal.h" +#include "../log.h" +#include +#include + +bool parse_module_declaration(Parser* p, Module* module) { + if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) { + return false; + } + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) { + return false; + } + module->name = parser_to_text(p); + + return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name"); +} + +Module* parser_parse(TokenStream* ts) { + Parser* p = malloc(sizeof(Parser)); + p->ts = ts; + parser_next_token(p); + + Module* module = malloc(sizeof(Module)); + memset(module, 0, sizeof(Module)); + if (!parse_module_declaration(p, module)) { + goto fail; + } + + while (!parser_peek(p, TOKEN_EOF)) { + bool is_public = false; + bool is_static = false; + bool is_const = false; + bool terminal = false; + + while (!terminal) { + if (parser_accept(p, TOKEN_IMPORT)) { + if (is_static) { + log_on_line(&p->token.location, "import declarations cannot be static or const"); + goto fail; + } + if (is_const) { + log_on_line(&p->token.location, "import declarations cannot be static or const"); + goto fail; + } + if (!parse_import_declaration(p, module, is_public)) { + goto fail; + } + terminal = true; + } else if (parser_accept(p, TOKEN_ALIAS)) { + if (is_static) { + log_on_line(&p->token.location, "alias declarations cannot be static or const"); + goto fail; + } + if (is_const) { + log_on_line(&p->token.location, "alias declarations cannot be static or const"); + goto fail; + } + if (!parse_alias_declaration(p, module, is_public)) { + goto fail; + } + terminal = true; + } else if (parser_accept(p, TOKEN_PUBLIC)) { + is_public = true; + } else if (parser_accept(p, TOKEN_STATIC)) { + is_static = true; + } else if (parser_accept(p, TOKEN_CONST)) { + is_const = true; + } else if (parser_accept(p, TOKEN_VAR) || parser_accept_primitive(p)) { + if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) { + goto fail; + } + terminal = true; + } else { + log_on_line(&p->token.location, "unexpected token"); + goto fail; + } + } + } + + free(p); + return module; +fail: + free(p); + parser_free(module); + return NULL; +} + +void parser_free(Module* module) { + if (module == NULL) { + return; + } + + if (module->imports != NULL) { + for(size_t i = 0; i < module->import_count; i++) { + free(module->imports[i].module_name); + } + free(module->imports); + } + + if (module->aliases != NULL) { + for(size_t i = 0; i < module->alias_count; i++) { + free((void*)module->aliases[i].name); + free_type_expression(&module->aliases[i].value); + } + free(module->aliases); + } + + if (module->variables != NULL) { + for(size_t i = 0; i < module->variable_count; i++) { + free(module->variables[i].name); + free_type_expression(&module->variables[i].type); + if (module->variables[i].initializer) { + if (module->variables[i].initializer->tag == EXPRESSION_STRING) { + free((void*)module->variables[i].initializer->string); + } + free(module->variables[i].initializer); + } + } + free(module->variables); + } + + free(module->name); + free(module); +} diff --git a/v0/parser/test_core.c b/v0/parser/test_core.c new file mode 100644 index 0000000..94f537f --- /dev/null +++ b/v0/parser/test_core.c @@ -0,0 +1,8 @@ +#include "../test.h" +#include "../parser.h" + +// Currently core utilities are tested indirectly through other parser tests. +// Placeholder for future explicit core utility tests. +static void test_parser_core_placeholder(void) { + // No-op +} diff --git a/v0/test_parser.c b/v0/parser/test_declaration.c similarity index 59% rename from v0/test_parser.c rename to v0/parser/test_declaration.c index 5a5c0cd..90d8c14 100644 --- a/v0/test_parser.c +++ b/v0/parser/test_declaration.c @@ -1,25 +1,8 @@ -#include "test.h" -#include "parser.h" +#include "../test.h" +#include "../parser.h" #include #include -static void test_parser_module_name(void) { - Module* m = test_get_ast(); - - assert_not_null(m, "expected module to be parsed"); - assert_str("my_module", m->name, "expected name 'my_module'"); -} - -static void test_parser_bad_module_name(void) { - test_get_ast(); - assert_log_file("expected error to be logged for bad module name"); -} - -static void test_parser_missing_semicolon_module(void) { - test_get_ast(); - assert_log_file("expected error for missing semicolon"); -} - static void test_parser_missing_semicolon_import(void) { test_get_ast(); assert_log_file("expected error for missing semicolon"); @@ -61,24 +44,6 @@ static void test_parser_alias_simple(void) { assert_int(1, (int)m->alias_count, "expected correct number of aliases"); AliasDeclaration alias = m->aliases[0]; assert_str("myalias", alias.name, "expected correct alias name"); - assert_int(TYPE_EXPRESSION_BUILTIN, alias.value.tag, "expected correct alias tag"); - assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32"); - assert_true(alias.value.builtin.isSigned, "expected signed"); -} - -static void test_parser_alias_array(void) { - Module* m = test_get_ast(); - - assert_not_null(m, "expected module to be parsed"); - assert_int(1, (int)m->alias_count, "expected correct number of aliases"); - AliasDeclaration alias = m->aliases[0]; - assert_str("myalias", alias.name, "expected correct alias name"); - assert_int(TYPE_EXPRESSION_ARRAY, alias.value.tag, "expected correct alias tag"); - TypeExpression* valueType = alias.value.array.array; - assert_not_null(valueType, "expected pointer to array type"); - assert_int(TYPE_EXPRESSION_BUILTIN, valueType->tag, "expected correct type tag"); - assert_int(32, valueType->builtin.bitSize, "expected bitSize 32"); - assert_true(valueType->builtin.isSigned, "expected signed"); } static void test_parser_variable_simple(void) { @@ -88,9 +53,6 @@ static void test_parser_variable_simple(void) { assert_int(1, (int)m->variable_count, "expected correct number of variables"); VariableDeclaration var = m->variables[0]; assert_str("my_var", var.name, "expected correct variable name"); - assert_int(TYPE_EXPRESSION_BUILTIN, var.type.tag, "expected correct type tag"); - assert_int(32, var.type.builtin.bitSize, "expected bitSize 32"); - assert_true(var.type.builtin.isSigned, "expected signed"); assert_false(var.is_const, "expected not const"); assert_false(var.is_static, "expected not static"); } @@ -125,17 +87,3 @@ static void test_parser_multiple_vars(void) { assert_str("var1", m->variables[0].name, "expected first variable name 'var1'"); assert_str("var2", m->variables[1].name, "expected second variable name 'var2'"); } - - -static void test_parser_variable_init(void) { - Module* m = test_get_ast(); - - assert_not_null(m, "expected module to be parsed"); - assert_int(1, (int)m->variable_count, "expected 1 variable"); - VariableDeclaration* var = &m->variables[0]; - assert_str("x", var->name, "expected variable name 'x'"); - assert_not_null(var->initializer, "expected variable to have an initializer"); - assert_int(EXPRESSION_INTEGER, var->initializer->tag, "expected integer initializer"); - assert_int(123, var->initializer->integer, "expected value 123"); - parser_free(m); -} diff --git a/v0/parser/test_expression.c b/v0/parser/test_expression.c new file mode 100644 index 0000000..019c284 --- /dev/null +++ b/v0/parser/test_expression.c @@ -0,0 +1,52 @@ +#include "../test.h" +#include "../parser.h" +#include +#include + +static void test_parser_alias_simple_type(void) { + Module* m = test_get_ast(); + + assert_not_null(m, "expected module to be parsed"); + assert_int(1, (int)m->alias_count, "expected correct number of aliases"); + AliasDeclaration alias = m->aliases[0]; + assert_int(TYPE_EXPRESSION_BUILTIN, alias.value.tag, "expected correct alias tag"); + assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32"); + assert_true(alias.value.builtin.isSigned, "expected signed"); +} + +static void test_parser_alias_array(void) { + Module* m = test_get_ast(); + + assert_not_null(m, "expected module to be parsed"); + assert_int(1, (int)m->alias_count, "expected correct number of aliases"); + AliasDeclaration alias = m->aliases[0]; + assert_int(TYPE_EXPRESSION_ARRAY, alias.value.tag, "expected correct alias tag"); + TypeExpression* valueType = alias.value.array.array; + assert_not_null(valueType, "expected pointer to array type"); + assert_int(TYPE_EXPRESSION_BUILTIN, valueType->tag, "expected correct type tag"); + assert_int(32, valueType->builtin.bitSize, "expected bitSize 32"); + assert_true(valueType->builtin.isSigned, "expected signed"); +} + +static void test_parser_variable_init(void) { + Module* m = test_get_ast(); + + assert_not_null(m, "expected module to be parsed"); + assert_int(1, (int)m->variable_count, "expected 1 variable"); + VariableDeclaration* var = &m->variables[0]; + assert_str("x", var->name, "expected variable name 'x'"); + assert_not_null(var->initializer, "expected variable to have an initializer"); + assert_int(EXPRESSION_INTEGER, var->initializer->tag, "expected integer initializer"); + assert_int(123, var->initializer->integer, "expected value 123"); +} + +static void test_parser_variable_simple_type(void) { + Module* m = test_get_ast(); + + assert_not_null(m, "expected module to be parsed"); + assert_int(1, (int)m->variable_count, "expected correct number of variables"); + VariableDeclaration var = m->variables[0]; + assert_int(TYPE_EXPRESSION_BUILTIN, var.type.tag, "expected correct type tag"); + assert_int(32, var.type.builtin.bitSize, "expected bitSize 32"); + assert_true(var.type.builtin.isSigned, "expected signed"); +} diff --git a/v0/parser/test_module.c b/v0/parser/test_module.c new file mode 100644 index 0000000..b49bb49 --- /dev/null +++ b/v0/parser/test_module.c @@ -0,0 +1,21 @@ +#include "../test.h" +#include "../parser.h" +#include +#include + +static void test_parser_module_name(void) { + Module* m = test_get_ast(); + + assert_not_null(m, "expected module to be parsed"); + assert_str("my_module", m->name, "expected name 'my_module'"); +} + +static void test_parser_bad_module_name(void) { + test_get_ast(); + assert_log_file("expected error to be logged for bad module name"); +} + +static void test_parser_missing_semicolon_module(void) { + test_get_ast(); + assert_log_file("expected error for missing semicolon"); +} diff --git a/v0/test.c b/v0/test.c index 7fdcfb1..d78939c 100644 --- a/v0/test.c +++ b/v0/test.c @@ -189,7 +189,10 @@ typedef struct { } TestCase; #include "test_token.c" -#include "test_parser.c" +#include "parser/test_module.c" +#include "parser/test_declaration.c" +#include "parser/test_expression.c" +#include "parser/test_core.c" #include "test_log.c" static int s_totalTests; @@ -201,20 +204,23 @@ static TestCase s_tests[] = { TEST(test_log_error) TEST(test_log_on_line_variadic) TEST(test_log_on_line) - TEST(test_parser_alias_array) - TEST(test_parser_alias_simple) - TEST(test_parser_bad_import_name) - TEST(test_parser_bad_module_name) - TEST(test_parser_imports) - TEST(test_parser_missing_semicolon_import) - TEST(test_parser_missing_semicolon_module) TEST(test_parser_module_name) + TEST(test_parser_bad_module_name) + TEST(test_parser_missing_semicolon_module) + TEST(test_parser_missing_semicolon_import) + TEST(test_parser_bad_import_name) + TEST(test_parser_imports) TEST(test_parser_public_imports) + TEST(test_parser_alias_simple) + TEST(test_parser_alias_simple_type) + TEST(test_parser_alias_array) TEST(test_parser_variable_simple) + TEST(test_parser_variable_simple_type) TEST(test_parser_variable_const) TEST(test_parser_variable_init) TEST(test_parser_variable_static) TEST(test_parser_multiple_vars) + TEST(test_parser_core_placeholder) TEST(test_tokenstream_comma) TEST(test_tokenstream_info) TEST(test_tokenstream_keywords_and_symbols) diff --git a/v0/tests/parser_alias_simple_type.c2 b/v0/tests/parser_alias_simple_type.c2 new file mode 100644 index 0000000..7a31568 --- /dev/null +++ b/v0/tests/parser_alias_simple_type.c2 @@ -0,0 +1,3 @@ +module mymodule; + +alias myalias = i32; diff --git a/v0/tests/parser_variable_simple_type.c2 b/v0/tests/parser_variable_simple_type.c2 new file mode 100644 index 0000000..3ef4b2b --- /dev/null +++ b/v0/tests/parser_variable_simple_type.c2 @@ -0,0 +1,4 @@ +module my_module; + +// Defines a global variable called my_var. +i32 my_var;