From f260e02efab73e5fbc5cc35e78022311882e7723 Mon Sep 17 00:00:00 2001 From: Sebastiaan de Schaetzen Date: Wed, 29 Apr 2026 20:15:05 +0200 Subject: [PATCH] Refactor parser --- v0/parser.c | 245 ++++++++++++------------ v0/test.c | 18 +- v0/test_token.c | 14 ++ v0/tests/parser_alias_and_import_mix.c2 | 4 +- v0/tests/parser_alias_array.c2 | 2 +- v0/tests/parser_alias_simple.c2 | 2 +- v0/tests/parser_bad_import_name.log | 2 +- v0/tests/parser_bad_module_name.log | 2 +- v0/tests/tokenstream_primitive_types.c2 | 1 + v0/token.c | 8 + v0/token.h | 9 +- 11 files changed, 179 insertions(+), 128 deletions(-) create mode 100644 v0/tests/tokenstream_primitive_types.c2 diff --git a/v0/parser.c b/v0/parser.c index 23cfdf4..63f4b80 100644 --- a/v0/parser.c +++ b/v0/parser.c @@ -4,114 +4,6 @@ #include #include -// /** -// * Parses an import declaration. -// * -// * @param ts The token stream to parse from. -// * @param module The module being parsed. -// * @returns true on success, false on failure. -// */ -// static bool parse_import(TokenStream* ts, Module* module) { -// ImportDeclaration* new_imports = realloc(module->imports, (module->import_count + 1) * sizeof(ImportDeclaration)); -// if (!new_imports) { -// fprintf(stderr, "Out of memory\n"); -// exit(1); -// } -// module->imports = new_imports; - -// Token t = tokenstream_next(ts); -// bool is_public = false; -// if (t.token == TOKEN_PUBLIC) { -// is_public = true; -// t = tokenstream_next(ts); -// } - -// if (t.token != TOKEN_IDENTIFIER) { -// log_on_line(&t.location, t.location.column_end, "expected module name to import"); -// return false; -// } - -// char* name = (char*)malloc(t.text.length + 1); -// memcpy(name, t.text.data, t.text.length); -// name[t.text.length] = '\0'; - -// module->imports[module->import_count] = (ImportDeclaration){ .module_name = name, .is_public = is_public }; -// module->import_count++; - -// t = tokenstream_next(ts); -// if (t.token != TOKEN_SEMICOLON) { -// log_on_line(&t.location, t.location.column_end, "expected ';' after import"); -// return false; -// } -// return true; -// } - -// /** -// * Parses an alias declaration. -// * -// * @param ts The token stream to parse from. -// * @param module The module being parsed. -// * @returns true on success, false on failure. -// */ -// static bool parse_alias(TokenStream* ts, Module* module) { -// AliasDeclaration* new_aliases = realloc(module->aliases, (module->alias_count + 1) * sizeof(AliasDeclaration)); -// if (!new_aliases) { -// fprintf(stderr, "Out of memory\n"); -// exit(1); -// } -// module->aliases = new_aliases; - -// Token t = tokenstream_next(ts); -// if (t.token != TOKEN_IDENTIFIER) { -// log_on_line(&t.location, t.location.column_end, "expected alias name"); -// return false; -// } -// char* name = (char*)malloc(t.text.length + 1); -// memcpy(name, t.text.data, t.text.length); -// name[t.text.length] = '\0'; -// AliasDeclaration alias; -// alias.name = name; - -// t = tokenstream_next(ts); -// if (t.token != TOKEN_ASSIGN) { -// log_on_line(&t.location, t.location.column_end, "expected '='"); -// return false; -// } - -// t = tokenstream_next(ts); - -// TypeExpression type; -// if (t.token == TOKEN_IDENTIFIER && strncmp(t.text.data, "int32", t.text.length) == 0) { -// type = (TypeExpression){ .tag = TYPE_EXPRESSION_BUILTIN, .builtin = { .bitSize = 32, .isSigned = true } }; -// t = tokenstream_next(ts); -// if (t.token == TOKEN_BRACKET_OPEN) { -// t = tokenstream_next(ts); -// if (t.token != TOKEN_BRACKET_CLOSE) { -// log_on_line(&t.location, t.location.column_end, "expected ']'"); -// return false; -// } -// TypeExpression* inner = malloc(sizeof(TypeExpression)); -// *inner = type; -// type = (TypeExpression){ .tag = TYPE_EXPRESSION_ARRAY, .array = { .array = inner } }; -// t = tokenstream_next(ts); -// } -// } else { -// log_on_line(&t.location, t.location.column_end, "expected type"); -// return false; -// } - -// alias.value = type; - -// module->aliases[module->alias_count] = alias; -// module->alias_count++; - -// if (t.token != TOKEN_SEMICOLON) { -// log_on_line(&t.location, t.location.column_end, "expected ';'"); -// return false; -// } -// return true; -// } - typedef struct { TokenStream* ts; Token token; @@ -186,7 +78,7 @@ static char* parser_to_text(Parser* p) { /** * Parses the "module" keyword */ -static bool parse_declaration_module(Parser* p, Module* module) { +static bool parse_module_declaration(Parser* p, Module* module) { if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) { return false; } @@ -199,22 +91,139 @@ static bool parse_declaration_module(Parser* p, Module* module) { return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name"); } -static bool parse_import(Parser* p, Module* module, bool is_public) { +/** + * @copilot add docs + */ +static bool parse_import_declaration(Parser* p, Module* module, bool is_public) { module->import_count++; module->imports = realloc(module->imports, sizeof(ImportDeclaration) * module->import_count); - ImportDeclaration* import = &module->imports[module->import_count - 1]; - if (!parser_expect(p, TOKEN_IDENTIFIER, "expected module identifier")) { + ImportDeclaration* import = &module->imports[module->import_count - 1]; + memset(import, 0, sizeof(ImportDeclaration)); + import->is_public = is_public; + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) { return false; } import->module_name = parser_to_text(p); - import->is_public = is_public; + + if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) { + return false; + } + return true; } -static bool parse_alias(Parser* p, Module* module, bool is_public) { - // @copilot implement +/** + * @copilot add docs + */ +static bool parse_primitive_type_expression(Parser* p, TypeExpression* expr) { + if (parser_accept(p, TOKEN_U8)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 8; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U16)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 16; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U32)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 32; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_U64)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 64; + expr->builtin.isSigned = false; + return true; + } else if (parser_accept(p, TOKEN_I8)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 8; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I16)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 16; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I32)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 32; + expr->builtin.isSigned = true; + return true; + } else if (parser_accept(p, TOKEN_I64)) { + expr->tag = TYPE_EXPRESSION_BUILTIN; + expr->builtin.bitSize = 64; + expr->builtin.isSigned = true; + return true; + } else { + log_on_line(&p->token.location, "expected type expression"); + return false; + } +} + +/** + * @copilot add docs + */ +static bool parse_array_type_expression(Parser* p, TypeExpression* expr) { + TypeExpression elementType; + if (!parse_primitive_type_expression(p, &elementType)) { + return false; + } + + if (parser_accept(p, TOKEN_BRACKET_OPEN)) { + expr->tag = TYPE_EXPRESSION_ARRAY; + expr->array.array = malloc(sizeof(TypeExpression)); + *expr->array.array = elementType; + + if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) { + return false; + } + } else { + *expr = elementType; + return true; + } + return true; +} + +/** + * @copilot add docs + */ +static bool parse_type_expression(Parser* p, TypeExpression* expr) { + return parse_array_type_expression(p, expr); +} + +/** + * @copilot add docs + */ +static bool parse_alias_declaration(Parser* p, Module* module, bool is_public) { + module->alias_count++; + module->aliases = realloc(module->aliases, sizeof(AliasDeclaration) * module->alias_count); + + AliasDeclaration* alias = &module->aliases[module->alias_count - 1]; + memset(alias, 0, sizeof(AliasDeclaration)); + alias->is_public = is_public; + + if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) { + return false; + } + alias->name = parser_to_text(p); + + if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) { + return false; + } + + if (!parse_type_expression(p, &alias->value)) { + return false; + } + + if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) { + return false; + } + return true; } @@ -225,7 +234,7 @@ Module* parser_parse(TokenStream* ts) { Module* module = malloc(sizeof(Module)); memset(module, 0, sizeof(Module)); - if (!parse_declaration_module(p, module)) { + if (!parse_module_declaration(p, module)) { goto fail; } @@ -234,12 +243,12 @@ Module* parser_parse(TokenStream* ts) { bool terminal = false; do { if (parser_accept(p, TOKEN_IMPORT)) { - if (!parse_import(p, module, is_public)) { + if (!parse_import_declaration(p, module, is_public)) { goto fail; } terminal = true; } else if (parser_accept(p, TOKEN_ALIAS)) { - if (!parse_alias(p, module, is_public)) { + if (!parse_alias_declaration(p, module, is_public)) { goto fail; } terminal = true; diff --git a/v0/test.c b/v0/test.c index dc5dd48..1fe1fa9 100644 --- a/v0/test.c +++ b/v0/test.c @@ -9,7 +9,7 @@ #include static jmp_buf s_testJmp; -static const char* s_failMsg = NULL; +static char s_failMsg[1024]; static char* s_logOutput = NULL; static const char* s_currentTestName = NULL; static char* s_testSource = NULL; @@ -18,7 +18,12 @@ static Module* s_currentModule = NULL; static TokenStream* s_currentTokenStream = NULL; void fail(const char* msg) { - s_failMsg = msg; + if (msg) { + strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1); + s_failMsg[sizeof(s_failMsg) - 1] = '\0'; + } else { + s_failMsg[0] = '\0'; + } longjmp(s_testJmp, 1); } @@ -211,6 +216,7 @@ static TestCase s_tests[] = { TEST(test_tokenstream_keywords_and_symbols) TEST(test_tokenstream_open_fail) TEST(test_tokenstream_parentheses_and_brackets) + TEST(test_tokenstream_primitive_types) TEST(test_tokenstream_simple_keyword) TEST(test_tokenstream_unknown_token) TEST(test_tokenstream_void_function_signature) @@ -238,7 +244,7 @@ int main(int argc, char** argv) { log_set_output(log_append); printf("%s...", s_tests[i].name); fflush(stdout); - s_failMsg = NULL; + s_failMsg[0] = '\0'; if (setjmp(s_testJmp) == 0) { log_clear(); @@ -252,8 +258,14 @@ int main(int argc, char** argv) { } else { printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : ""); failedTests[failedCount++] = s_tests[i].name; + + // Log output on failure + if (s_logOutput && s_logOutput[0]) { + printf("%s\n", s_logOutput); + } } + // Free AST and TokenStream after each test if (s_currentModule) { parser_free(s_currentModule); s_currentModule = NULL; diff --git a/v0/test_token.c b/v0/test_token.c index f425e84..df51987 100644 --- a/v0/test_token.c +++ b/v0/test_token.c @@ -97,3 +97,17 @@ static void test_tokenstream_info(void) { if (t2.location.line != 1) fail("expected line 1"); if (t2.location.column_start != 8) fail("expected column 8"); } + +static void test_tokenstream_primitive_types(void) { + TokenStream* ts = test_get_tokenstream(); + + if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8"); + if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16"); + if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32"); + if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64"); + if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8"); + if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16"); + if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32"); + if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); +} diff --git a/v0/tests/parser_alias_and_import_mix.c2 b/v0/tests/parser_alias_and_import_mix.c2 index 5628cc9..bec1547 100644 --- a/v0/tests/parser_alias_and_import_mix.c2 +++ b/v0/tests/parser_alias_and_import_mix.c2 @@ -2,8 +2,8 @@ module mymodule; import foo; -alias myalias = int32[]; +alias myalias = i32[]; import bar; -alias otheralias = int32; +alias otheralias = i32; diff --git a/v0/tests/parser_alias_array.c2 b/v0/tests/parser_alias_array.c2 index 65139ba..b95ea9a 100644 --- a/v0/tests/parser_alias_array.c2 +++ b/v0/tests/parser_alias_array.c2 @@ -1,3 +1,3 @@ module mymodule; -alias myalias = int32[]; +alias myalias = i32[]; diff --git a/v0/tests/parser_alias_simple.c2 b/v0/tests/parser_alias_simple.c2 index 8e40d71..7a31568 100644 --- a/v0/tests/parser_alias_simple.c2 +++ b/v0/tests/parser_alias_simple.c2 @@ -1,3 +1,3 @@ module mymodule; -alias myalias = int32; +alias myalias = i32; diff --git a/v0/tests/parser_bad_import_name.log b/v0/tests/parser_bad_import_name.log index 0d50293..da31571 100644 --- a/v0/tests/parser_bad_import_name.log +++ b/v0/tests/parser_bad_import_name.log @@ -1,4 +1,4 @@ --- v0/tests/parser_bad_import_name.c2 --- 1| import ; ^^^^^^ - expected 'module' keyword + expected keyword 'module' diff --git a/v0/tests/parser_bad_module_name.log b/v0/tests/parser_bad_module_name.log index 8c8f707..5cf3ef5 100644 --- a/v0/tests/parser_bad_module_name.log +++ b/v0/tests/parser_bad_module_name.log @@ -1,4 +1,4 @@ --- v0/tests/parser_bad_module_name.c2 --- 1| import other_module; ^^^^^^ - expected 'module' keyword + expected keyword 'module' diff --git a/v0/tests/tokenstream_primitive_types.c2 b/v0/tests/tokenstream_primitive_types.c2 new file mode 100644 index 0000000..43618e8 --- /dev/null +++ b/v0/tests/tokenstream_primitive_types.c2 @@ -0,0 +1 @@ +i8 i16 i32 i64 u8 u16 u32 u64 diff --git a/v0/token.c b/v0/token.c index 0648076..dadf73e 100644 --- a/v0/token.c +++ b/v0/token.c @@ -32,6 +32,14 @@ static const KeywordMap keywords[] = { {"alias", TOKEN_ALIAS}, {"public", TOKEN_PUBLIC}, {"void", TOKEN_VOID}, + {"i8", TOKEN_I8}, + {"i16", TOKEN_I16}, + {"i32", TOKEN_I32}, + {"i64", TOKEN_I64}, + {"u8", TOKEN_U8}, + {"u16", TOKEN_U16}, + {"u32", TOKEN_U32}, + {"u64", TOKEN_U64}, }; /** diff --git a/v0/token.h b/v0/token.h index da2011f..69a4141 100644 --- a/v0/token.h +++ b/v0/token.h @@ -27,7 +27,14 @@ typedef enum { /* Primitives */ TOKEN_VOID, - TOKEN_BUILTIN_TYPE, + TOKEN_I8, + TOKEN_I16, + TOKEN_I32, + TOKEN_I64, + TOKEN_U8, + TOKEN_U16, + TOKEN_U32, + TOKEN_U64, /* Variable */ TOKEN_IDENTIFIER,