#include "token.h" #include "log.h" #include #include #include struct TokenStream { const char* filename; const char* code; size_t pos; int line; int column; const char* line_start; }; /** * Easy-to-read and modify keyword-to-token mapping. * Add new keywords here. */ typedef struct { const char* keyword; TokenType token; } KeywordMap; static const KeywordMap keywords[] = { {"module", TOKEN_MODULE}, {"import", TOKEN_IMPORT}, {"void", TOKEN_VOID}, }; /** * Look up a keyword in the keyword map. * Returns TOKEN_IDENTIFIER if not found. */ static TokenType lookup_keyword(const char* str, size_t length) { int count = sizeof(keywords) / sizeof(keywords[0]); for (int i = 0; i < count; i++) { if (strlen(keywords[i].keyword) == length && strncmp(keywords[i].keyword, str, length) == 0) { return keywords[i].token; } } return TOKEN_IDENTIFIER; } /** * Check if a character is the start of an identifier. */ static int is_identifier_start(char c) { return isalpha(c) || c == '_'; } /** * Check if a character can be part of an identifier. */ static int is_identifier_part(char c) { return isalnum(c) || c == '_'; } /** * Peek at the next character in the stream. */ static char peek_char(TokenStream* ts) { return ts->code[ts->pos]; } /** * Read a character from the stream and update position. */ static char read_char(TokenStream* ts) { char c = ts->code[ts->pos]; if (c == '\0') return '\0'; ts->pos++; if (c == '\n') { ts->line++; ts->column = 1; ts->line_start = &ts->code[ts->pos]; } else { ts->column++; } return c; } static size_t get_line_length(const char* line_start) { const char* p = line_start; while (*p != '\n' && *p != '\0') { p++; } return (size_t)(p - line_start); } static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) { Token t; t.token = type; t.text = (char*)text; t.text_length = length; t.location.filename = (char*)ts->filename; t.location.line = line; t.location.column = column; t.location.line_text = (char*)line_start; t.location.line_text_length = get_line_length(line_start); return t; } TokenStream* tokenstream_open(const char* filename, const char* code) { if (code == NULL) return NULL; TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream)); if (ts == NULL) { return NULL; } ts->filename = filename ? filename : "unknown"; ts->code = code; ts->pos = 0; ts->line = 1; ts->column = 1; ts->line_start = code; return ts; } void tokenstream_close(TokenStream* ts) { if (ts == NULL) return; free(ts); } Token tokenstream_next(TokenStream* ts) { if (ts == NULL) { Token t = {0}; t.token = TOKEN_EOF; return t; } char c; /* Skip whitespace and comments */ while ((c = peek_char(ts)) != '\0') { if (isspace(c)) { read_char(ts); continue; } /* Handle comments */ if (c == '/') { if (ts->code[ts->pos + 1] == '/') { /* Skip until end of line */ while ((c = read_char(ts)) != '\0' && c != '\n') { /* Skip */ } continue; } /* It's just a slash, which we don't handle yet */ break; } /* We found a non-whitespace, non-comment character */ break; } if (peek_char(ts) == '\0') { return create_token(ts, TOKEN_EOF, NULL, 0, ts->line, ts->column, ts->line_start); } int start_line = ts->line; int start_column = ts->column; const char* line_start = ts->line_start; const char* start_text = &ts->code[ts->pos]; c = read_char(ts); /* Single-character tokens */ switch (c) { case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start); case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start); case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start); case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start); case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start); case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start); } /* Keywords and identifiers */ if (is_identifier_start(c)) { size_t length = 1; while (is_identifier_part(peek_char(ts))) { read_char(ts); length++; } TokenType type = lookup_keyword(start_text, length); return create_token(ts, type, start_text, length, start_line, start_column, line_start); } /* Unknown character */ Token t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start); log_on_line(ts->filename, t.location.line_text, t.location.line, t.location.column, t.location.column, "unexpected token '%c'", c); return t; }