diff --git a/v0/log.c b/v0/log.c index f5e4daa..e5e9cdc 100644 --- a/v0/log.c +++ b/v0/log.c @@ -1,5 +1,7 @@ #include "log.h" #include +#include +#include static LogError* s_logError = NULL; @@ -14,3 +16,38 @@ void log_error(const char* msg) { fprintf(stderr, "Error: %s\n", msg); } } + +void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg) { + char line_prefix[32]; + int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", line); + + int caret_len = to - from + 1; + if (caret_len < 1) caret_len = 1; + + size_t total_size = strlen(filename) + 16 + // --- filename --- + prefix_len + strlen(line_text) + 2 + // line| text\n + prefix_len + from - 1 + caret_len + 2 + // indent + ^^\n + prefix_len + strlen(msg) + 2 + // indent + msg\n + 1; + + char* buffer = (char*)malloc(total_size); + if (!buffer) return; + + char* p = buffer; + p += sprintf(p, "--- %s ---\n", filename); + p += sprintf(p, "%s%s\n", line_prefix, line_text); + + // Caret line + for (int i = 0; i < prefix_len + from - 1; i++) *p++ = ' '; + for (int i = 0; i < caret_len; i++) *p++ = '^'; + *p++ = '\n'; + + // Message line + for (int i = 0; i < prefix_len; i++) *p++ = ' '; + p += sprintf(p, "%s\n", msg); + + *p = '\0'; + + log_error(buffer); + free(buffer); +} diff --git a/v0/log.h b/v0/log.h index cf268f7..a2ac5bd 100644 --- a/v0/log.h +++ b/v0/log.h @@ -19,4 +19,16 @@ void log_set_output(LogError* destination); */ void log_error(const char* msg); +/** + * Logs a pretty error with additional information about the line where the error occurred. + * + * @param filename The name of the file where the error occurred. + * @param line_text The entire line of text where the error occurred. + * @param line The line number where the error occurred. + * @param from The column number where the error starts. + * @param to The column number where the error ends. + * @param msg The error message to log. + */ +void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg); + #endif diff --git a/v0/parser.c b/v0/parser.c index b1a992c..28b0081 100644 --- a/v0/parser.c +++ b/v0/parser.c @@ -4,32 +4,29 @@ Module* parser_parse(TokenStream* ts) { Token t = tokenstream_next(ts); - if (t != TOKEN_MODULE) { + if (t.token != TOKEN_MODULE) { return NULL; } t = tokenstream_next(ts); - if (t != TOKEN_IDENTIFIER) { + if (t.token != TOKEN_IDENTIFIER) { return NULL; } - TokenInfo info; - tokenstream_info(ts, &info); - Module* module = (Module*)malloc(sizeof(Module)); if (module == NULL) return NULL; - module->name = (char*)malloc(info.text_length + 1); + module->name = (char*)malloc(t.text_length + 1); if (module->name == NULL) { free(module); return NULL; } - memcpy(module->name, info.text, info.text_length); - module->name[info.text_length] = '\0'; + memcpy(module->name, t.text, t.text_length); + module->name[t.text_length] = '\0'; t = tokenstream_next(ts); - if (t != TOKEN_SEMICOLON) { + if (t.token != TOKEN_SEMICOLON) { free(module->name); free(module); return NULL; diff --git a/v0/test.c b/v0/test.c index ef60a6f..5ab65c8 100644 --- a/v0/test.c +++ b/v0/test.c @@ -25,6 +25,10 @@ void assert_str(const char* expected, const char* actual, const char* msg) { } } +void assert_log(const char* expected, const char* msg) { + assert_str(expected, s_logOutput, msg); +} + static void log_append(const char* msg) { size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0; size_t newLen = oldLen + strlen(msg) + 1; @@ -69,6 +73,7 @@ static TestCase s_tests[] = { {"tokenstream_info", test_tokenstream_info}, {"parser_module_name", test_parser_module_name}, {"log_error", test_log_error}, + {"log_on_line", test_log_on_line}, }; @@ -82,9 +87,8 @@ int main(int argc, char** argv) { const char* failedTests[s_totalTests + 1]; int failedCount = 0; - log_set_output(log_append); - for (int i = 0; i < s_totalTests; i++) { + log_set_output(log_append); printf("%s...", s_tests[i].name); s_failMsg = NULL; diff --git a/v0/test.h b/v0/test.h index b3151d2..1044a0a 100644 --- a/v0/test.h +++ b/v0/test.h @@ -33,4 +33,9 @@ void assert_not_null(void* ptr, const char* msg); */ void assert_str(const char* expected, const char* actual, const char* msg); +/** + * Asserts that the logged output matches the expected value. + */ +void assert_log(const char* expected, const char* msg); + #endif diff --git a/v0/test_log.c b/v0/test_log.c index 84203a4..06b73b8 100644 --- a/v0/test_log.c +++ b/v0/test_log.c @@ -19,3 +19,15 @@ static void test_log_error(void) { log_set_output(NULL); // Reset to default } + +static void test_log_on_line(void) { + const char* expected = + "--- test.c ---\n" + "1| int main() []\n" + " ^^\n" + " unexpected token\n"; + + log_on_line("test.c", "int main() []", 1, 12, 13, "unexpected token"); + + assert_log(expected, "expected formatted error message"); +} \ No newline at end of file diff --git a/v0/test_token.c b/v0/test_token.c index 2e2c96e..b08b53d 100644 --- a/v0/test_token.c +++ b/v0/test_token.c @@ -11,10 +11,10 @@ static void test_tokenstream_simple_keyword(void) { TokenStream* ts = tokenstream_open("module"); Token t = tokenstream_next(ts); - if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE"); + if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE"); Token eof = tokenstream_next(ts); - if (eof != -1) fail("expected EOF"); + if (eof.token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -22,13 +22,13 @@ static void test_tokenstream_simple_keyword(void) { static void test_tokenstream_keywords_and_symbols(void) { TokenStream* ts = tokenstream_open("module main; import stdio;"); - if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)"); - if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); - if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)"); - if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); - if (tokenstream_next(ts) != -1) fail("expected EOF"); + if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)"); + if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); + if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)"); + if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -36,11 +36,11 @@ static void test_tokenstream_keywords_and_symbols(void) { static void test_tokenstream_parentheses_and_brackets(void) { TokenStream* ts = tokenstream_open("()[]"); - if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); - if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); - if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN"); - if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE"); - if (tokenstream_next(ts) != -1) fail("expected EOF"); + if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); + if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); + if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN"); + if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -48,12 +48,12 @@ static void test_tokenstream_parentheses_and_brackets(void) { static void test_tokenstream_comma(void) { TokenStream* ts = tokenstream_open("a,b,c"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a"); - if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b"); - if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c"); - if (tokenstream_next(ts) != -1) fail("expected EOF"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a"); + if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b"); + if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -61,10 +61,10 @@ static void test_tokenstream_comma(void) { static void test_tokenstream_whitespace_ignored(void) { TokenStream* ts = tokenstream_open(" module \n\t import ; "); - if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE"); - if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); - if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); - if (tokenstream_next(ts) != -1) fail("expected EOF"); + if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE"); + if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); + if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -72,11 +72,11 @@ static void test_tokenstream_whitespace_ignored(void) { static void test_tokenstream_void_function_signature(void) { TokenStream* ts = tokenstream_open("void main()"); - if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID"); - if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER"); - if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); - if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); - if (tokenstream_next(ts) != -1) fail("expected EOF"); + if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID"); + if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER"); + if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); + if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); + if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF"); tokenstream_close(ts); } @@ -85,26 +85,24 @@ static void test_tokenstream_info(void) { TokenStream* ts = tokenstream_open("module main;"); Token t1 = tokenstream_next(ts); - TokenInfo info1; - tokenstream_info(ts, &info1); - if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE"); - if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE"); + if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE"); char buf1[32]; - memcpy(buf1, info1.text, info1.text_length); - buf1[info1.text_length] = '\0'; + memcpy(buf1, t1.text, t1.text_length); + buf1[t1.text_length] = '\0'; assert_str("module", buf1, "info: expected 'module'"); + if (t1.line != 1) fail("expected line 1"); + if (t1.column != 1) fail("expected column 1"); Token t2 = tokenstream_next(ts); - TokenInfo info2; - tokenstream_info(ts, &info2); - if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER"); - if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER"); + if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER"); char buf2[32]; - memcpy(buf2, info2.text, info2.text_length); - buf2[info2.text_length] = '\0'; + memcpy(buf2, t2.text, t2.text_length); + buf2[t2.text_length] = '\0'; assert_str("main", buf2, "info: expected 'main'"); + if (t2.line != 1) fail("expected line 1"); + if (t2.column != 8) fail("expected column 8"); tokenstream_close(ts); } diff --git a/v0/token.c b/v0/token.c index 07ae101..c3ffa8a 100644 --- a/v0/token.c +++ b/v0/token.c @@ -6,7 +6,9 @@ struct TokenStream { const char* code; size_t pos; - TokenInfo last_info; + int line; + int column; + const char* line_start; }; /** @@ -15,7 +17,7 @@ struct TokenStream { */ typedef struct { const char* keyword; - Token token; + TokenType token; } KeywordMap; static const KeywordMap keywords[] = { @@ -28,7 +30,7 @@ static const KeywordMap keywords[] = { * Look up a keyword in the keyword map. * Returns TOKEN_IDENTIFIER if not found. */ -static Token lookup_keyword(const char* str, size_t length) { +static TokenType lookup_keyword(const char* str, size_t length) { int count = sizeof(keywords) / sizeof(keywords[0]); for (int i = 0; i < count; i++) { if (strlen(keywords[i].keyword) == length && @@ -53,39 +55,49 @@ static int is_identifier_part(char c) { return isalnum(c) || c == '_'; } -/** - * Read a character from the stream. - */ -static char read_char(TokenStream* ts) { - char c = ts->code[ts->pos]; - if (c == '\0') return (char)-1; - ts->pos++; - return c; -} - /** * Peek at the next character in the stream. */ static char peek_char(TokenStream* ts) { + return ts->code[ts->pos]; +} + +/** + * Read a character from the stream and update position. + */ +static char read_char(TokenStream* ts) { char c = ts->code[ts->pos]; - if (c == '\0') return (char)-1; + if (c == '\0') return '\0'; + + ts->pos++; + if (c == '\n') { + ts->line++; + ts->column = 1; + ts->line_start = &ts->code[ts->pos]; + } else { + ts->column++; + } return c; } -static Token read_keyword_or_identifier(TokenStream* ts, char first) { - const char* start = &ts->code[ts->pos - 1]; - size_t length = 1; - - while (is_identifier_part(peek_char(ts))) { - read_char(ts); - length++; +static size_t get_line_length(const char* line_start) { + const char* p = line_start; + while (*p != '\n' && *p != '\0') { + p++; } + return (size_t)(p - line_start); +} - Token token = lookup_keyword(start, length); - ts->last_info.token = token; - ts->last_info.text = (char*)start; - ts->last_info.text_length = length; - return token; +static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) { + Token t; + t.token = type; + t.text = (char*)text; + t.text_length = length; + t.line = line; + t.column = column; + t.line_text = (char*)line_start; + t.line_text_length = get_line_length(line_start); + return t; } TokenStream* tokenstream_open(const char* code) { @@ -98,9 +110,9 @@ TokenStream* tokenstream_open(const char* code) { ts->code = code; ts->pos = 0; - ts->last_info.text = NULL; - ts->last_info.text_length = 0; - ts->last_info.token = (Token)-1; + ts->line = 1; + ts->column = 1; + ts->line_start = code; return ts; } @@ -110,66 +122,70 @@ void tokenstream_close(TokenStream* ts) { } Token tokenstream_next(TokenStream* ts) { - if (ts == NULL) return -1; + if (ts == NULL) { + Token t = {0}; + t.token = TOKEN_EOF; + return t; + } char c; /* Skip whitespace and comments */ - while ((c = read_char(ts)) != (char)-1) { + while ((c = peek_char(ts)) != '\0') { if (isspace(c)) { + read_char(ts); continue; } /* Handle comments */ if (c == '/') { - if (peek_char(ts) == '/') { + if (ts->code[ts->pos + 1] == '/') { /* Skip until end of line */ - while ((c = read_char(ts)) != (char)-1 && c != '\n') { + while ((c = read_char(ts)) != '\0' && c != '\n') { /* Skip */ } continue; } /* It's just a slash, which we don't handle yet */ - return -1; + break; } /* We found a non-whitespace, non-comment character */ break; } - if (c == (char)-1) { - ts->last_info.token = (Token)-1; - ts->last_info.text = NULL; - ts->last_info.text_length = 0; - return -1; /* EOF */ + if (peek_char(ts) == '\0') { + return create_token(ts, TOKEN_EOF, NULL, 0, ts->line, ts->column, ts->line_start); } - /* Single-character tokens */ - ts->last_info.text = (char*)&ts->code[ts->pos - 1]; - ts->last_info.text_length = 1; + int start_line = ts->line; + int start_column = ts->column; + const char* line_start = ts->line_start; + const char* start_text = &ts->code[ts->pos]; + c = read_char(ts); + + /* Single-character tokens */ switch (c) { - case '(': return ts->last_info.token = TOKEN_PARENT_OPEN; - case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE; - case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN; - case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE; - case ',': return ts->last_info.token = TOKEN_COMMA; - case ';': return ts->last_info.token = TOKEN_SEMICOLON; + case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start); + case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start); + case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start); + case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start); + case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start); + case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start); } /* Keywords and identifiers */ if (is_identifier_start(c)) { - return read_keyword_or_identifier(ts, c); + size_t length = 1; + while (is_identifier_part(peek_char(ts))) { + read_char(ts); + length++; + } + TokenType type = lookup_keyword(start_text, length); + return create_token(ts, type, start_text, length, start_line, start_column, line_start); } /* Unknown character */ - ts->last_info.token = (Token)-1; - ts->last_info.text = NULL; - ts->last_info.text_length = 0; - return -1; -} - -void tokenstream_info(TokenStream* ts, TokenInfo* info) { - if (ts == NULL || info == NULL) return; - *info = ts->last_info; + return create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start); } diff --git a/v0/token.h b/v0/token.h index 5be1565..d6c5557 100644 --- a/v0/token.h +++ b/v0/token.h @@ -27,7 +27,11 @@ typedef enum { // Variable TOKEN_IDENTIFIER, -} Token; + + // Others + TOKEN_EOF, + TOKEN_UNKNOWN, +} TokenType; /** * Holds additional information about a token. @@ -37,12 +41,24 @@ typedef struct { /// Note that this is not necessarily null-terminated. char* text; + /// @brief The entire line of text where the token was found. + char* line_text; + /// @brief The length of the `text` string. size_t text_length; + /// @brief The length of the `line_text` string. + size_t line_text_length; + /// @brief The actual token. - Token token; -} TokenInfo; + TokenType token; + + /// @brief The line number where the token was found. + int line; + + /// @brief The column number where the token was found. + int column; +} Token; typedef struct TokenStream TokenStream; @@ -67,13 +83,4 @@ void tokenstream_close(TokenStream* ts); */ Token tokenstream_next(TokenStream* ts); -/** - * Gets additional information about the last token that was returned - * by `tokenstream_next`. - * - * @param ts The TokenStream to use. - * @param info The TokenInfo object to store the results in. - */ -void tokenstream_info(TokenStream* ts, TokenInfo* info); - #endif \ No newline at end of file