Refactor parser

This commit is contained in:
2026-04-29 20:15:05 +02:00
parent 1c5d49d682
commit f260e02efa
11 changed files with 179 additions and 128 deletions
+127 -118
View File
@@ -4,114 +4,6 @@
#include <string.h>
#include <stdio.h>
// /**
// * Parses an import declaration.
// *
// * @param ts The token stream to parse from.
// * @param module The module being parsed.
// * @returns true on success, false on failure.
// */
// static bool parse_import(TokenStream* ts, Module* module) {
// ImportDeclaration* new_imports = realloc(module->imports, (module->import_count + 1) * sizeof(ImportDeclaration));
// if (!new_imports) {
// fprintf(stderr, "Out of memory\n");
// exit(1);
// }
// module->imports = new_imports;
// Token t = tokenstream_next(ts);
// bool is_public = false;
// if (t.token == TOKEN_PUBLIC) {
// is_public = true;
// t = tokenstream_next(ts);
// }
// if (t.token != TOKEN_IDENTIFIER) {
// log_on_line(&t.location, t.location.column_end, "expected module name to import");
// return false;
// }
// char* name = (char*)malloc(t.text.length + 1);
// memcpy(name, t.text.data, t.text.length);
// name[t.text.length] = '\0';
// module->imports[module->import_count] = (ImportDeclaration){ .module_name = name, .is_public = is_public };
// module->import_count++;
// t = tokenstream_next(ts);
// if (t.token != TOKEN_SEMICOLON) {
// log_on_line(&t.location, t.location.column_end, "expected ';' after import");
// return false;
// }
// return true;
// }
// /**
// * Parses an alias declaration.
// *
// * @param ts The token stream to parse from.
// * @param module The module being parsed.
// * @returns true on success, false on failure.
// */
// static bool parse_alias(TokenStream* ts, Module* module) {
// AliasDeclaration* new_aliases = realloc(module->aliases, (module->alias_count + 1) * sizeof(AliasDeclaration));
// if (!new_aliases) {
// fprintf(stderr, "Out of memory\n");
// exit(1);
// }
// module->aliases = new_aliases;
// Token t = tokenstream_next(ts);
// if (t.token != TOKEN_IDENTIFIER) {
// log_on_line(&t.location, t.location.column_end, "expected alias name");
// return false;
// }
// char* name = (char*)malloc(t.text.length + 1);
// memcpy(name, t.text.data, t.text.length);
// name[t.text.length] = '\0';
// AliasDeclaration alias;
// alias.name = name;
// t = tokenstream_next(ts);
// if (t.token != TOKEN_ASSIGN) {
// log_on_line(&t.location, t.location.column_end, "expected '='");
// return false;
// }
// t = tokenstream_next(ts);
// TypeExpression type;
// if (t.token == TOKEN_IDENTIFIER && strncmp(t.text.data, "int32", t.text.length) == 0) {
// type = (TypeExpression){ .tag = TYPE_EXPRESSION_BUILTIN, .builtin = { .bitSize = 32, .isSigned = true } };
// t = tokenstream_next(ts);
// if (t.token == TOKEN_BRACKET_OPEN) {
// t = tokenstream_next(ts);
// if (t.token != TOKEN_BRACKET_CLOSE) {
// log_on_line(&t.location, t.location.column_end, "expected ']'");
// return false;
// }
// TypeExpression* inner = malloc(sizeof(TypeExpression));
// *inner = type;
// type = (TypeExpression){ .tag = TYPE_EXPRESSION_ARRAY, .array = { .array = inner } };
// t = tokenstream_next(ts);
// }
// } else {
// log_on_line(&t.location, t.location.column_end, "expected type");
// return false;
// }
// alias.value = type;
// module->aliases[module->alias_count] = alias;
// module->alias_count++;
// if (t.token != TOKEN_SEMICOLON) {
// log_on_line(&t.location, t.location.column_end, "expected ';'");
// return false;
// }
// return true;
// }
typedef struct {
TokenStream* ts;
Token token;
@@ -186,7 +78,7 @@ static char* parser_to_text(Parser* p) {
/**
* Parses the "module" keyword
*/
static bool parse_declaration_module(Parser* p, Module* module) {
static bool parse_module_declaration(Parser* p, Module* module) {
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
return false;
}
@@ -199,22 +91,139 @@ static bool parse_declaration_module(Parser* p, Module* module) {
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
}
static bool parse_import(Parser* p, Module* module, bool is_public) {
/**
* @copilot add docs
*/
static bool parse_import_declaration(Parser* p, Module* module, bool is_public) {
module->import_count++;
module->imports = realloc(module->imports, sizeof(ImportDeclaration) * module->import_count);
ImportDeclaration* import = &module->imports[module->import_count - 1];
if (!parser_expect(p, TOKEN_IDENTIFIER, "expected module identifier")) {
ImportDeclaration* import = &module->imports[module->import_count - 1];
memset(import, 0, sizeof(ImportDeclaration));
import->is_public = is_public;
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
import->module_name = parser_to_text(p);
import->is_public = is_public;
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) {
return false;
}
return true;
}
static bool parse_alias(Parser* p, Module* module, bool is_public) {
// @copilot implement
/**
* @copilot add docs
*/
static bool parse_primitive_type_expression(Parser* p, TypeExpression* expr) {
if (parser_accept(p, TOKEN_U8)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U16)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U32)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U64)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_I8)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I16)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I32)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I64)) {
expr->tag = TYPE_EXPRESSION_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = true;
return true;
} else {
log_on_line(&p->token.location, "expected type expression");
return false;
}
}
/**
* @copilot add docs
*/
static bool parse_array_type_expression(Parser* p, TypeExpression* expr) {
TypeExpression elementType;
if (!parse_primitive_type_expression(p, &elementType)) {
return false;
}
if (parser_accept(p, TOKEN_BRACKET_OPEN)) {
expr->tag = TYPE_EXPRESSION_ARRAY;
expr->array.array = malloc(sizeof(TypeExpression));
*expr->array.array = elementType;
if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) {
return false;
}
} else {
*expr = elementType;
return true;
}
return true;
}
/**
* @copilot add docs
*/
static bool parse_type_expression(Parser* p, TypeExpression* expr) {
return parse_array_type_expression(p, expr);
}
/**
* @copilot add docs
*/
static bool parse_alias_declaration(Parser* p, Module* module, bool is_public) {
module->alias_count++;
module->aliases = realloc(module->aliases, sizeof(AliasDeclaration) * module->alias_count);
AliasDeclaration* alias = &module->aliases[module->alias_count - 1];
memset(alias, 0, sizeof(AliasDeclaration));
alias->is_public = is_public;
if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) {
return false;
}
alias->name = parser_to_text(p);
if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) {
return false;
}
if (!parse_type_expression(p, &alias->value)) {
return false;
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) {
return false;
}
return true;
}
@@ -225,7 +234,7 @@ Module* parser_parse(TokenStream* ts) {
Module* module = malloc(sizeof(Module));
memset(module, 0, sizeof(Module));
if (!parse_declaration_module(p, module)) {
if (!parse_module_declaration(p, module)) {
goto fail;
}
@@ -234,12 +243,12 @@ Module* parser_parse(TokenStream* ts) {
bool terminal = false;
do {
if (parser_accept(p, TOKEN_IMPORT)) {
if (!parse_import(p, module, is_public)) {
if (!parse_import_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_ALIAS)) {
if (!parse_alias(p, module, is_public)) {
if (!parse_alias_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
+15 -3
View File
@@ -9,7 +9,7 @@
#include <stdlib.h>
static jmp_buf s_testJmp;
static const char* s_failMsg = NULL;
static char s_failMsg[1024];
static char* s_logOutput = NULL;
static const char* s_currentTestName = NULL;
static char* s_testSource = NULL;
@@ -18,7 +18,12 @@ static Module* s_currentModule = NULL;
static TokenStream* s_currentTokenStream = NULL;
void fail(const char* msg) {
s_failMsg = msg;
if (msg) {
strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1);
s_failMsg[sizeof(s_failMsg) - 1] = '\0';
} else {
s_failMsg[0] = '\0';
}
longjmp(s_testJmp, 1);
}
@@ -211,6 +216,7 @@ static TestCase s_tests[] = {
TEST(test_tokenstream_keywords_and_symbols)
TEST(test_tokenstream_open_fail)
TEST(test_tokenstream_parentheses_and_brackets)
TEST(test_tokenstream_primitive_types)
TEST(test_tokenstream_simple_keyword)
TEST(test_tokenstream_unknown_token)
TEST(test_tokenstream_void_function_signature)
@@ -238,7 +244,7 @@ int main(int argc, char** argv) {
log_set_output(log_append);
printf("%s...", s_tests[i].name);
fflush(stdout);
s_failMsg = NULL;
s_failMsg[0] = '\0';
if (setjmp(s_testJmp) == 0) {
log_clear();
@@ -252,8 +258,14 @@ int main(int argc, char** argv) {
} else {
printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : "");
failedTests[failedCount++] = s_tests[i].name;
// Log output on failure
if (s_logOutput && s_logOutput[0]) {
printf("%s\n", s_logOutput);
}
}
// Free AST and TokenStream after each test
if (s_currentModule) {
parser_free(s_currentModule);
s_currentModule = NULL;
+14
View File
@@ -97,3 +97,17 @@ static void test_tokenstream_info(void) {
if (t2.location.line != 1) fail("expected line 1");
if (t2.location.column_start != 8) fail("expected column 8");
}
static void test_tokenstream_primitive_types(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8");
if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16");
if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32");
if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64");
if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8");
if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16");
if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32");
if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
+2 -2
View File
@@ -2,8 +2,8 @@ module mymodule;
import foo;
alias myalias = int32[];
alias myalias = i32[];
import bar;
alias otheralias = int32;
alias otheralias = i32;
+1 -1
View File
@@ -1,3 +1,3 @@
module mymodule;
alias myalias = int32[];
alias myalias = i32[];
+1 -1
View File
@@ -1,3 +1,3 @@
module mymodule;
alias myalias = int32;
alias myalias = i32;
+1 -1
View File
@@ -1,4 +1,4 @@
--- v0/tests/parser_bad_import_name.c2 ---
1| import ;
^^^^^^
expected 'module' keyword
expected keyword 'module'
+1 -1
View File
@@ -1,4 +1,4 @@
--- v0/tests/parser_bad_module_name.c2 ---
1| import other_module;
^^^^^^
expected 'module' keyword
expected keyword 'module'
+1
View File
@@ -0,0 +1 @@
i8 i16 i32 i64 u8 u16 u32 u64
+8
View File
@@ -32,6 +32,14 @@ static const KeywordMap keywords[] = {
{"alias", TOKEN_ALIAS},
{"public", TOKEN_PUBLIC},
{"void", TOKEN_VOID},
{"i8", TOKEN_I8},
{"i16", TOKEN_I16},
{"i32", TOKEN_I32},
{"i64", TOKEN_I64},
{"u8", TOKEN_U8},
{"u16", TOKEN_U16},
{"u32", TOKEN_U32},
{"u64", TOKEN_U64},
};
/**
+8 -1
View File
@@ -27,7 +27,14 @@ typedef enum {
/* Primitives */
TOKEN_VOID,
TOKEN_BUILTIN_TYPE,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
/* Variable */
TOKEN_IDENTIFIER,