Working on parser refactor

This commit is contained in:
2026-04-29 14:36:42 +02:00
parent 1f40c8f5ee
commit eb4b0495f2
10 changed files with 253 additions and 138 deletions
+220 -129
View File
@@ -4,165 +4,256 @@
#include <string.h>
#include <stdio.h>
/**
* Parses an import declaration.
*
* @param ts The token stream to parse from.
* @param module The module being parsed.
* @returns true on success, false on failure.
*/
static bool parse_import(TokenStream* ts, Module* module) {
ImportDeclaration* new_imports = realloc(module->imports, (module->import_count + 1) * sizeof(ImportDeclaration));
if (!new_imports) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
module->imports = new_imports;
// /**
// * Parses an import declaration.
// *
// * @param ts The token stream to parse from.
// * @param module The module being parsed.
// * @returns true on success, false on failure.
// */
// static bool parse_import(TokenStream* ts, Module* module) {
// ImportDeclaration* new_imports = realloc(module->imports, (module->import_count + 1) * sizeof(ImportDeclaration));
// if (!new_imports) {
// fprintf(stderr, "Out of memory\n");
// exit(1);
// }
// module->imports = new_imports;
Token t = tokenstream_next(ts);
bool is_public = false;
if (t.token == TOKEN_PUBLIC) {
is_public = true;
t = tokenstream_next(ts);
}
// Token t = tokenstream_next(ts);
// bool is_public = false;
// if (t.token == TOKEN_PUBLIC) {
// is_public = true;
// t = tokenstream_next(ts);
// }
if (t.token != TOKEN_IDENTIFIER) {
log_on_line(&t.location, t.location.column_end, "expected module name to import");
return false;
}
// if (t.token != TOKEN_IDENTIFIER) {
// log_on_line(&t.location, t.location.column_end, "expected module name to import");
// return false;
// }
char* name = (char*)malloc(t.text.length + 1);
memcpy(name, t.text.data, t.text.length);
name[t.text.length] = '\0';
// char* name = (char*)malloc(t.text.length + 1);
// memcpy(name, t.text.data, t.text.length);
// name[t.text.length] = '\0';
module->imports[module->import_count] = (ImportDeclaration){ .module_name = name, .is_public = is_public };
module->import_count++;
// module->imports[module->import_count] = (ImportDeclaration){ .module_name = name, .is_public = is_public };
// module->import_count++;
t = tokenstream_next(ts);
if (t.token != TOKEN_SEMICOLON) {
log_on_line(&t.location, t.location.column_end, "expected ';' after import");
return false;
}
return true;
// t = tokenstream_next(ts);
// if (t.token != TOKEN_SEMICOLON) {
// log_on_line(&t.location, t.location.column_end, "expected ';' after import");
// return false;
// }
// return true;
// }
// /**
// * Parses an alias declaration.
// *
// * @param ts The token stream to parse from.
// * @param module The module being parsed.
// * @returns true on success, false on failure.
// */
// static bool parse_alias(TokenStream* ts, Module* module) {
// AliasDeclaration* new_aliases = realloc(module->aliases, (module->alias_count + 1) * sizeof(AliasDeclaration));
// if (!new_aliases) {
// fprintf(stderr, "Out of memory\n");
// exit(1);
// }
// module->aliases = new_aliases;
// Token t = tokenstream_next(ts);
// if (t.token != TOKEN_IDENTIFIER) {
// log_on_line(&t.location, t.location.column_end, "expected alias name");
// return false;
// }
// char* name = (char*)malloc(t.text.length + 1);
// memcpy(name, t.text.data, t.text.length);
// name[t.text.length] = '\0';
// AliasDeclaration alias;
// alias.name = name;
// t = tokenstream_next(ts);
// if (t.token != TOKEN_ASSIGN) {
// log_on_line(&t.location, t.location.column_end, "expected '='");
// return false;
// }
// t = tokenstream_next(ts);
// TypeExpression type;
// if (t.token == TOKEN_IDENTIFIER && strncmp(t.text.data, "int32", t.text.length) == 0) {
// type = (TypeExpression){ .tag = TYPE_EXPRESSION_BUILTIN, .builtin = { .bitSize = 32, .isSigned = true } };
// t = tokenstream_next(ts);
// if (t.token == TOKEN_BRACKET_OPEN) {
// t = tokenstream_next(ts);
// if (t.token != TOKEN_BRACKET_CLOSE) {
// log_on_line(&t.location, t.location.column_end, "expected ']'");
// return false;
// }
// TypeExpression* inner = malloc(sizeof(TypeExpression));
// *inner = type;
// type = (TypeExpression){ .tag = TYPE_EXPRESSION_ARRAY, .array = { .array = inner } };
// t = tokenstream_next(ts);
// }
// } else {
// log_on_line(&t.location, t.location.column_end, "expected type");
// return false;
// }
// alias.value = type;
// module->aliases[module->alias_count] = alias;
// module->alias_count++;
// if (t.token != TOKEN_SEMICOLON) {
// log_on_line(&t.location, t.location.column_end, "expected ';'");
// return false;
// }
// return true;
// }
typedef struct {
TokenStream* ts;
Token token;
} Parser;
/**
* Reads a new token into p->token.
*/
static void parser_next_token(Parser* p) {
p->token = tokenstream_next(p->ts);
}
/**
* Parses an alias declaration.
* Reads a new token if the current token is equal to the expected token.
*
* @param ts The token stream to parse from.
* @param module The module being parsed.
* @returns true on success, false on failure.
* If they are equal, it continues to the next token.
*
* @param p
* @param token The expected token.
* @returns `true` if the current token matches the expected, `false` if it does not.
*/
static bool parse_alias(TokenStream* ts, Module* module) {
AliasDeclaration* new_aliases = realloc(module->aliases, (module->alias_count + 1) * sizeof(AliasDeclaration));
if (!new_aliases) {
fprintf(stderr, "Out of memory\n");
exit(1);
static bool parser_accept(Parser* p, TokenType token) {
if (p->token.token == token) {
parser_next_token(p);
return true;
}
module->aliases = new_aliases;
return false;
}
Token t = tokenstream_next(ts);
if (t.token != TOKEN_IDENTIFIER) {
log_on_line(&t.location, t.location.column_end, "expected alias name");
return false;
/**
* @copilot todo
*/
static bool parser_expect(Parser* p, TokenType token, const char* msg) {
if (parser_accept(p, token)) {
return true;
}
char* name = (char*)malloc(t.text.length + 1);
memcpy(name, t.text.data, t.text.length);
name[t.text.length] = '\0';
AliasDeclaration alias;
alias.name = name;
log_on_line(&p->token.location, msg);
return false;
}
t = tokenstream_next(ts);
if (t.token != TOKEN_ASSIGN) {
log_on_line(&t.location, t.location.column_end, "expected '='");
/**
* @copilot todo add docs
*/
static bool parser_peek(Parser* p, TokenType token) {
if (p->token.token == token) {
return true;
}
return false;
}
/**
* @copilot todo add docs
*/
static bool parser_require(Parser* p, TokenType token, const char* msg) {
if (parser_peek(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
/**
* Converts the current token to a string.
* @copilot add proper docs
*/
static char* parser_to_text(Parser* p) {
char* str = string_copy(p->token.text);
parser_next_token(p);
return str;
}
/**
* Parses the "module" keyword
*/
static bool parse_declaration_module(Parser* p, Module* module) {
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
return false;
}
t = tokenstream_next(ts);
TypeExpression type;
if (t.token == TOKEN_IDENTIFIER && strncmp(t.text.data, "int32", t.text.length) == 0) {
type = (TypeExpression){ .tag = TYPE_EXPRESSION_BUILTIN, .builtin = { .bitSize = 32, .isSigned = true } };
t = tokenstream_next(ts);
if (t.token == TOKEN_BRACKET_OPEN) {
t = tokenstream_next(ts);
if (t.token != TOKEN_BRACKET_CLOSE) {
log_on_line(&t.location, t.location.column_end, "expected ']'");
return false;
}
TypeExpression* inner = malloc(sizeof(TypeExpression));
*inner = type;
type = (TypeExpression){ .tag = TYPE_EXPRESSION_ARRAY, .array = { .array = inner } };
t = tokenstream_next(ts);
}
} else {
log_on_line(&t.location, t.location.column_end, "expected type");
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
module->name = parser_to_text(p);
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
}
static bool parse_import(Parser* p, Module* module, bool is_public) {
module->import_count++;
module->imports = realloc(module->imports, sizeof(ImportDeclaration) * module->import_count);
ImportDeclaration* import = &module->imports[module->import_count - 1];
if (!parser_expect(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
alias.value = type;
module->aliases[module->alias_count] = alias;
module->alias_count++;
import->module_name = parser_to_text(p);
import->is_public = is_public;
return true;
}
if (t.token != TOKEN_SEMICOLON) {
log_on_line(&t.location, t.location.column_end, "expected ';'");
return false;
}
static bool parse_alias(Parser* p, Module* module, bool is_public) {
// @copilot implement
return true;
}
Module* parser_parse(TokenStream* ts) {
Token t = tokenstream_next(ts);
if (t.token != TOKEN_MODULE) {
log_on_line(&t.location, t.location.column_end, "expected 'module' keyword");
return NULL;
}
Parser* p = malloc(sizeof(Parser));
p->ts = ts;
parser_next_token(p);
t = tokenstream_next(ts);
if (t.token != TOKEN_IDENTIFIER) {
log_on_line(&t.location, t.location.column_end, "expected module name");
return NULL;
Module* module = malloc(sizeof(Module));
if (!parse_declaration_module(p, module)) {
goto fail;
}
Module* module = (Module*)malloc(sizeof(Module));
if (module == NULL) {
fprintf(stderr, "Out of memory\n");
exit(1);
while (!parser_peek(p, TOKEN_EOF)) {
bool is_public = false;
bool terminal = false;
do {
if (parser_accept(p, TOKEN_IMPORT)) {
if (!parse_import(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_ALIAS)) {
if (!parse_alias(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_PUBLIC)) {
is_public = true;
} else {
log_on_line(&p->token.location, "unexpected token");
}
} while (!terminal);
}
module->name = (const char*)malloc(t.text.length + 1);
memcpy((void*)module->name, t.text.data, t.text.length);
((char*)module->name)[t.text.length] = '\0';
module->imports = NULL;
module->import_count = 0;
module->aliases = NULL;
module->alias_count = 0;
t = tokenstream_next(ts);
if (t.token != TOKEN_SEMICOLON) {
log_on_line(&t.location, t.location.column_end, "expected ';' after module name");
parser_free(module);
return NULL;
}
while (1) {
t = tokenstream_next(ts);
if (t.token == TOKEN_IMPORT) {
if (!parse_import(ts, module)) {
parser_free(module);
return NULL;
}
} else if (t.token == TOKEN_ALIAS) {
if (!parse_alias(ts, module)) {
parser_free(module);
return NULL;
}
} else {
break;
}
}
return module;
return module;
fail:
free(module);
return NULL;
}
void free_type_expression(TypeExpression* expr) {