Compare commits

..

5 Commits

Author SHA1 Message Date
seeseemelk 9ca72ef5bf Split test 2026-04-24 20:41:57 +02:00
seeseemelk 0306530fe8 Better logging in tokenstream 2026-04-24 20:40:31 +02:00
seeseemelk 451a9a2a22 Token refactor and better logs 2026-04-24 20:28:08 +02:00
seeseemelk da3425ec10 All target run tests 2026-04-24 20:04:43 +02:00
seeseemelk e021a2d63e During test, log to in-memory log 2026-04-24 20:04:00 +02:00
11 changed files with 289 additions and 133 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
.PHONY: all test clean .PHONY: all test clean
all: c2 all: c2 test
c2: v0/bin/c2 c2: v0/bin/c2
cp $< $@ cp $< $@
+45
View File
@@ -1,5 +1,8 @@
#include "log.h" #include "log.h"
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
static LogError* s_logError = NULL; static LogError* s_logError = NULL;
@@ -14,3 +17,45 @@ void log_error(const char* msg) {
fprintf(stderr, "Error: %s\n", msg); fprintf(stderr, "Error: %s\n", msg);
} }
} }
void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg, ...) {
char line_prefix[32];
int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", line);
int caret_len = to - from + 1;
if (caret_len < 1) caret_len = 1;
// Format the message
va_list args;
va_start(args, msg);
char formatted_msg[256];
vsnprintf(formatted_msg, sizeof(formatted_msg), msg, args);
va_end(args);
size_t total_size = strlen(filename) + 16 + // --- filename ---
prefix_len + strlen(line_text) + 2 + // line| text\n
prefix_len + from - 1 + caret_len + 2 + // indent + ^^\n
prefix_len + strlen(formatted_msg) + 2 + // indent + msg\n
1;
char* buffer = (char*)malloc(total_size);
if (!buffer) return;
char* p = buffer;
p += sprintf(p, "--- %s ---\n", filename);
p += sprintf(p, "%s%s\n", line_prefix, line_text);
// Caret line
for (int i = 0; i < prefix_len + from - 1; i++) *p++ = ' ';
for (int i = 0; i < caret_len; i++) *p++ = '^';
*p++ = '\n';
// Message line
for (int i = 0; i < prefix_len; i++) *p++ = ' ';
p += sprintf(p, "%s\n", formatted_msg);
*p = '\0';
log_error(buffer);
free(buffer);
}
+13
View File
@@ -19,4 +19,17 @@ void log_set_output(LogError* destination);
*/ */
void log_error(const char* msg); void log_error(const char* msg);
/**
* Logs a pretty error with additional information about the line where the error occurred.
*
* @param filename The name of the file where the error occurred.
* @param line_text The entire line of text where the error occurred.
* @param line The line number where the error occurred.
* @param from The column number where the error starts.
* @param to The column number where the error ends.
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* @param ... Additional arguments to format into the error message.
*/
void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg, ...);
#endif #endif
+6 -9
View File
@@ -4,32 +4,29 @@
Module* parser_parse(TokenStream* ts) { Module* parser_parse(TokenStream* ts) {
Token t = tokenstream_next(ts); Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) { if (t.token != TOKEN_MODULE) {
return NULL; return NULL;
} }
t = tokenstream_next(ts); t = tokenstream_next(ts);
if (t != TOKEN_IDENTIFIER) { if (t.token != TOKEN_IDENTIFIER) {
return NULL; return NULL;
} }
TokenInfo info;
tokenstream_info(ts, &info);
Module* module = (Module*)malloc(sizeof(Module)); Module* module = (Module*)malloc(sizeof(Module));
if (module == NULL) return NULL; if (module == NULL) return NULL;
module->name = (char*)malloc(info.text_length + 1); module->name = (char*)malloc(t.text_length + 1);
if (module->name == NULL) { if (module->name == NULL) {
free(module); free(module);
return NULL; return NULL;
} }
memcpy(module->name, info.text, info.text_length); memcpy(module->name, t.text, t.text_length);
module->name[info.text_length] = '\0'; module->name[t.text_length] = '\0';
t = tokenstream_next(ts); t = tokenstream_next(ts);
if (t != TOKEN_SEMICOLON) { if (t.token != TOKEN_SEMICOLON) {
free(module->name); free(module->name);
free(module); free(module);
return NULL; return NULL;
+32
View File
@@ -2,9 +2,11 @@
#include <setjmp.h> #include <setjmp.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdlib.h>
static jmp_buf s_testJmp; static jmp_buf s_testJmp;
static const char* s_failMsg; static const char* s_failMsg;
static char* s_logOutput = NULL;
void fail(const char* msg) { void fail(const char* msg) {
s_failMsg = msg; s_failMsg = msg;
@@ -23,6 +25,31 @@ void assert_str(const char* expected, const char* actual, const char* msg) {
} }
} }
void assert_log(const char* expected, const char* msg) {
assert_str(expected, s_logOutput, msg);
}
static void log_append(const char* msg) {
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
size_t newLen = oldLen + strlen(msg) + 1;
char* newOutput = malloc(newLen);
if (newOutput) {
if (s_logOutput) {
strcpy(newOutput, s_logOutput);
free(s_logOutput);
} else {
newOutput[0] = '\0';
}
strcat(newOutput, msg);
s_logOutput = newOutput;
}
}
static void log_clear() {
free(s_logOutput);
s_logOutput = NULL;
}
typedef struct { typedef struct {
const char* name; const char* name;
Test func; Test func;
@@ -43,9 +70,12 @@ static TestCase s_tests[] = {
{"tokenstream_comma", test_tokenstream_comma}, {"tokenstream_comma", test_tokenstream_comma},
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored}, {"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature}, {"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
{"tokenstream_unknown_token", test_tokenstream_unknown_token},
{"tokenstream_info", test_tokenstream_info}, {"tokenstream_info", test_tokenstream_info},
{"parser_module_name", test_parser_module_name}, {"parser_module_name", test_parser_module_name},
{"log_error", test_log_error}, {"log_error", test_log_error},
{"log_on_line", test_log_on_line},
{"log_on_line_format", test_log_on_line_format},
}; };
@@ -60,10 +90,12 @@ int main(int argc, char** argv) {
int failedCount = 0; int failedCount = 0;
for (int i = 0; i < s_totalTests; i++) { for (int i = 0; i < s_totalTests; i++) {
log_set_output(log_append);
printf("%s...", s_tests[i].name); printf("%s...", s_tests[i].name);
s_failMsg = NULL; s_failMsg = NULL;
if (setjmp(s_testJmp) == 0) { if (setjmp(s_testJmp) == 0) {
log_clear();
s_tests[i].func(); s_tests[i].func();
printf(" [OK]\n"); printf(" [OK]\n");
s_greenTests++; s_greenTests++;
+5
View File
@@ -33,4 +33,9 @@ void assert_not_null(void* ptr, const char* msg);
*/ */
void assert_str(const char* expected, const char* actual, const char* msg); void assert_str(const char* expected, const char* actual, const char* msg);
/**
* Asserts that the logged output matches the expected value.
*/
void assert_log(const char* expected, const char* msg);
#endif #endif
+22
View File
@@ -19,3 +19,25 @@ static void test_log_error(void) {
log_set_output(NULL); // Reset to default log_set_output(NULL); // Reset to default
} }
static void test_log_on_line(void) {
const char* expected =
"--- test.c ---\n"
"1| int main() []\n"
" ^^\n"
" unexpected token\n";
log_on_line("test.c", "int main() []", 1, 12, 13, "unexpected token");
assert_log(expected, "expected formatted error message");
}
static void test_log_on_line_format(void) {
const char* expected =
"--- test.c ---\n"
"1| int main() []\n"
" ^^\n"
" unexpected token 'x'\n";
log_on_line("test.c", "int main() []", 1, 12, 13, "unexpected token '%c'", 'x');
assert_log(expected, "expected formatted error message with variadic args");
}
+1 -1
View File
@@ -3,7 +3,7 @@
#include <string.h> #include <string.h>
static void test_parser_module_name(void) { static void test_parser_module_name(void) {
TokenStream* ts = tokenstream_open("module my_module;"); TokenStream* ts = tokenstream_open("test.c", "module my_module;");
Module* m = parser_parse(ts); Module* m = parser_parse(ts);
assert_not_null(m, "expected module to be parsed"); assert_not_null(m, "expected module to be parsed");
+62 -49
View File
@@ -3,108 +3,121 @@
#include <string.h> #include <string.h>
static void test_tokenstream_open_fail(void) { static void test_tokenstream_open_fail(void) {
TokenStream* ts = tokenstream_open(NULL); TokenStream* ts = tokenstream_open(NULL, NULL);
if (ts != NULL) fail("expected NULL for NULL buffer"); if (ts != NULL) fail("expected NULL for NULL buffer");
} }
static void test_tokenstream_simple_keyword(void) { static void test_tokenstream_simple_keyword(void) {
TokenStream* ts = tokenstream_open("module"); TokenStream* ts = tokenstream_open("test.c", "module");
Token t = tokenstream_next(ts); Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE"); if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
Token eof = tokenstream_next(ts); Token eof = tokenstream_next(ts);
if (eof != -1) fail("expected EOF"); if (eof.token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_keywords_and_symbols(void) { static void test_tokenstream_keywords_and_symbols(void) {
TokenStream* ts = tokenstream_open("module main; import stdio;"); TokenStream* ts = tokenstream_open("test.c", "module main; import stdio;");
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE"); if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF"); if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_parentheses_and_brackets(void) { static void test_tokenstream_parentheses_and_brackets(void) {
TokenStream* ts = tokenstream_open("()[]"); TokenStream* ts = tokenstream_open("test.c", "()[]");
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN"); if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE"); if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF"); if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_comma(void) { static void test_tokenstream_comma(void) {
TokenStream* ts = tokenstream_open("a,b,c"); TokenStream* ts = tokenstream_open("test.c", "a,b,c");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma"); if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma"); if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts) != -1) fail("expected EOF"); if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_whitespace_ignored(void) { static void test_tokenstream_whitespace_ignored(void) {
TokenStream* ts = tokenstream_open(" module \n\t import ; "); TokenStream* ts = tokenstream_open("test.c", " module \n\t import ; ");
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE"); if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT"); if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON"); if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF"); if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_void_function_signature(void) { static void test_tokenstream_void_function_signature(void) {
TokenStream* ts = tokenstream_open("void main()"); TokenStream* ts = tokenstream_open("test.c", "void main()");
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID"); if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER"); if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN"); if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE"); if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF"); if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
tokenstream_close(ts);
}
static void test_tokenstream_unknown_token(void) {
TokenStream* ts = tokenstream_open("test.c", "%");
if (tokenstream_next(ts).token != TOKEN_UNKNOWN) fail("expected TOKEN_UNKNOWN");
assert_log(
"--- test.c ---\n"
"1| %\n"
" ^\n"
" unexpected token '%'\n",
"expected error message for unknown token");
tokenstream_close(ts); tokenstream_close(ts);
} }
static void test_tokenstream_info(void) { static void test_tokenstream_info(void) {
TokenStream* ts = tokenstream_open("module main;"); TokenStream* ts = tokenstream_open("test.c", "module main;");
Token t1 = tokenstream_next(ts); Token t1 = tokenstream_next(ts);
TokenInfo info1; if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
tokenstream_info(ts, &info1);
if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE");
char buf1[32]; char buf1[32];
memcpy(buf1, info1.text, info1.text_length); memcpy(buf1, t1.text, t1.text_length);
buf1[info1.text_length] = '\0'; buf1[t1.text_length] = '\0';
assert_str("module", buf1, "info: expected 'module'"); assert_str("module", buf1, "info: expected 'module'");
if (t1.line != 1) fail("expected line 1");
if (t1.column != 1) fail("expected column 1");
Token t2 = tokenstream_next(ts); Token t2 = tokenstream_next(ts);
TokenInfo info2; if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
tokenstream_info(ts, &info2);
if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER");
char buf2[32]; char buf2[32];
memcpy(buf2, info2.text, info2.text_length); memcpy(buf2, t2.text, t2.text_length);
buf2[info2.text_length] = '\0'; buf2[t2.text_length] = '\0';
assert_str("main", buf2, "info: expected 'main'"); assert_str("main", buf2, "info: expected 'main'");
if (t2.line != 1) fail("expected line 1");
if (t2.column != 8) fail("expected column 8");
tokenstream_close(ts); tokenstream_close(ts);
} }
+80 -59
View File
@@ -1,12 +1,16 @@
#include "token.h" #include "token.h"
#include "log.h"
#include <stdlib.h> #include <stdlib.h>
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
struct TokenStream { struct TokenStream {
const char* filename;
const char* code; const char* code;
size_t pos; size_t pos;
TokenInfo last_info; int line;
int column;
const char* line_start;
}; };
/** /**
@@ -15,7 +19,7 @@ struct TokenStream {
*/ */
typedef struct { typedef struct {
const char* keyword; const char* keyword;
Token token; TokenType token;
} KeywordMap; } KeywordMap;
static const KeywordMap keywords[] = { static const KeywordMap keywords[] = {
@@ -28,7 +32,7 @@ static const KeywordMap keywords[] = {
* Look up a keyword in the keyword map. * Look up a keyword in the keyword map.
* Returns TOKEN_IDENTIFIER if not found. * Returns TOKEN_IDENTIFIER if not found.
*/ */
static Token lookup_keyword(const char* str, size_t length) { static TokenType lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]); int count = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
if (strlen(keywords[i].keyword) == length && if (strlen(keywords[i].keyword) == length &&
@@ -53,42 +57,52 @@ static int is_identifier_part(char c) {
return isalnum(c) || c == '_'; return isalnum(c) || c == '_';
} }
/**
* Read a character from the stream.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
ts->pos++;
return c;
}
/** /**
* Peek at the next character in the stream. * Peek at the next character in the stream.
*/ */
static char peek_char(TokenStream* ts) { static char peek_char(TokenStream* ts) {
return ts->code[ts->pos];
}
/**
* Read a character from the stream and update position.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos]; char c = ts->code[ts->pos];
if (c == '\0') return (char)-1; if (c == '\0') return '\0';
ts->pos++;
if (c == '\n') {
ts->line++;
ts->column = 1;
ts->line_start = &ts->code[ts->pos];
} else {
ts->column++;
}
return c; return c;
} }
static Token read_keyword_or_identifier(TokenStream* ts, char first) { static size_t get_line_length(const char* line_start) {
const char* start = &ts->code[ts->pos - 1]; const char* p = line_start;
size_t length = 1; while (*p != '\n' && *p != '\0') {
p++;
while (is_identifier_part(peek_char(ts))) { }
read_char(ts); return (size_t)(p - line_start);
length++;
} }
Token token = lookup_keyword(start, length); static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
ts->last_info.token = token; Token t;
ts->last_info.text = (char*)start; t.token = type;
ts->last_info.text_length = length; t.text = (char*)text;
return token; t.text_length = length;
t.line = line;
t.column = column;
t.line_text = (char*)line_start;
t.line_text_length = get_line_length(line_start);
return t;
} }
TokenStream* tokenstream_open(const char* code) { TokenStream* tokenstream_open(const char* filename, const char* code) {
if (code == NULL) return NULL; if (code == NULL) return NULL;
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream)); TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
@@ -96,11 +110,12 @@ TokenStream* tokenstream_open(const char* code) {
return NULL; return NULL;
} }
ts->filename = filename ? filename : "unknown";
ts->code = code; ts->code = code;
ts->pos = 0; ts->pos = 0;
ts->last_info.text = NULL; ts->line = 1;
ts->last_info.text_length = 0; ts->column = 1;
ts->last_info.token = (Token)-1; ts->line_start = code;
return ts; return ts;
} }
@@ -110,66 +125,72 @@ void tokenstream_close(TokenStream* ts) {
} }
Token tokenstream_next(TokenStream* ts) { Token tokenstream_next(TokenStream* ts) {
if (ts == NULL) return -1; if (ts == NULL) {
Token t = {0};
t.token = TOKEN_EOF;
return t;
}
char c; char c;
/* Skip whitespace and comments */ /* Skip whitespace and comments */
while ((c = read_char(ts)) != (char)-1) { while ((c = peek_char(ts)) != '\0') {
if (isspace(c)) { if (isspace(c)) {
read_char(ts);
continue; continue;
} }
/* Handle comments */ /* Handle comments */
if (c == '/') { if (c == '/') {
if (peek_char(ts) == '/') { if (ts->code[ts->pos + 1] == '/') {
/* Skip until end of line */ /* Skip until end of line */
while ((c = read_char(ts)) != (char)-1 && c != '\n') { while ((c = read_char(ts)) != '\0' && c != '\n') {
/* Skip */ /* Skip */
} }
continue; continue;
} }
/* It's just a slash, which we don't handle yet */ /* It's just a slash, which we don't handle yet */
return -1; break;
} }
/* We found a non-whitespace, non-comment character */ /* We found a non-whitespace, non-comment character */
break; break;
} }
if (c == (char)-1) { if (peek_char(ts) == '\0') {
ts->last_info.token = (Token)-1; return create_token(ts, TOKEN_EOF, NULL, 0, ts->line, ts->column, ts->line_start);
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1; /* EOF */
} }
/* Single-character tokens */ int start_line = ts->line;
ts->last_info.text = (char*)&ts->code[ts->pos - 1]; int start_column = ts->column;
ts->last_info.text_length = 1; const char* line_start = ts->line_start;
const char* start_text = &ts->code[ts->pos];
c = read_char(ts);
/* Single-character tokens */
switch (c) { switch (c) {
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN; case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE; case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN; case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE; case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
case ',': return ts->last_info.token = TOKEN_COMMA; case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
case ';': return ts->last_info.token = TOKEN_SEMICOLON; case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
} }
/* Keywords and identifiers */ /* Keywords and identifiers */
if (is_identifier_start(c)) { if (is_identifier_start(c)) {
return read_keyword_or_identifier(ts, c); size_t length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
}
TokenType type = lookup_keyword(start_text, length);
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
} }
/* Unknown character */ /* Unknown character */
ts->last_info.token = (Token)-1; Token t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
ts->last_info.text = NULL; log_on_line(ts->filename, t.line_text, t.line, t.column, t.column, "unexpected token '%c'", c);
ts->last_info.text_length = 0; return t;
return -1;
}
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
if (ts == NULL || info == NULL) return;
*info = ts->last_info;
} }
+21 -13
View File
@@ -27,7 +27,11 @@ typedef enum {
// Variable // Variable
TOKEN_IDENTIFIER, TOKEN_IDENTIFIER,
} Token;
// Others
TOKEN_EOF,
TOKEN_UNKNOWN,
} TokenType;
/** /**
* Holds additional information about a token. * Holds additional information about a token.
@@ -37,22 +41,35 @@ typedef struct {
/// Note that this is not necessarily null-terminated. /// Note that this is not necessarily null-terminated.
char* text; char* text;
/// @brief The entire line of text where the token was found.
char* line_text;
/// @brief The length of the `text` string. /// @brief The length of the `text` string.
size_t text_length; size_t text_length;
/// @brief The length of the `line_text` string.
size_t line_text_length;
/// @brief The actual token. /// @brief The actual token.
Token token; TokenType token;
} TokenInfo;
/// @brief The line number where the token was found.
int line;
/// @brief The column number where the token was found.
int column;
} Token;
typedef struct TokenStream TokenStream; typedef struct TokenStream TokenStream;
/** /**
* Returns a TokenStream for a text. * Returns a TokenStream for a text.
* *
* @param filename The name of the file to read. This is only used for error reporting.
* @param code The text to read. * @param code The text to read.
* @returns A handle to the TokenStream. * @returns A handle to the TokenStream.
*/ */
TokenStream* tokenstream_open(const char* code); TokenStream* tokenstream_open(const char* filename, const char* code);
/** /**
* Closes a TokenStream. * Closes a TokenStream.
@@ -67,13 +84,4 @@ void tokenstream_close(TokenStream* ts);
*/ */
Token tokenstream_next(TokenStream* ts); Token tokenstream_next(TokenStream* ts);
/**
* Gets additional information about the last token that was returned
* by `tokenstream_next`.
*
* @param ts The TokenStream to use.
* @param info The TokenInfo object to store the results in.
*/
void tokenstream_info(TokenStream* ts, TokenInfo* info);
#endif #endif