Token refactor and better logs
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
static LogError* s_logError = NULL;
|
static LogError* s_logError = NULL;
|
||||||
|
|
||||||
@@ -14,3 +16,38 @@ void log_error(const char* msg) {
|
|||||||
fprintf(stderr, "Error: %s\n", msg);
|
fprintf(stderr, "Error: %s\n", msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg) {
|
||||||
|
char line_prefix[32];
|
||||||
|
int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", line);
|
||||||
|
|
||||||
|
int caret_len = to - from + 1;
|
||||||
|
if (caret_len < 1) caret_len = 1;
|
||||||
|
|
||||||
|
size_t total_size = strlen(filename) + 16 + // --- filename ---
|
||||||
|
prefix_len + strlen(line_text) + 2 + // line| text\n
|
||||||
|
prefix_len + from - 1 + caret_len + 2 + // indent + ^^\n
|
||||||
|
prefix_len + strlen(msg) + 2 + // indent + msg\n
|
||||||
|
1;
|
||||||
|
|
||||||
|
char* buffer = (char*)malloc(total_size);
|
||||||
|
if (!buffer) return;
|
||||||
|
|
||||||
|
char* p = buffer;
|
||||||
|
p += sprintf(p, "--- %s ---\n", filename);
|
||||||
|
p += sprintf(p, "%s%s\n", line_prefix, line_text);
|
||||||
|
|
||||||
|
// Caret line
|
||||||
|
for (int i = 0; i < prefix_len + from - 1; i++) *p++ = ' ';
|
||||||
|
for (int i = 0; i < caret_len; i++) *p++ = '^';
|
||||||
|
*p++ = '\n';
|
||||||
|
|
||||||
|
// Message line
|
||||||
|
for (int i = 0; i < prefix_len; i++) *p++ = ' ';
|
||||||
|
p += sprintf(p, "%s\n", msg);
|
||||||
|
|
||||||
|
*p = '\0';
|
||||||
|
|
||||||
|
log_error(buffer);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,4 +19,16 @@ void log_set_output(LogError* destination);
|
|||||||
*/
|
*/
|
||||||
void log_error(const char* msg);
|
void log_error(const char* msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs a pretty error with additional information about the line where the error occurred.
|
||||||
|
*
|
||||||
|
* @param filename The name of the file where the error occurred.
|
||||||
|
* @param line_text The entire line of text where the error occurred.
|
||||||
|
* @param line The line number where the error occurred.
|
||||||
|
* @param from The column number where the error starts.
|
||||||
|
* @param to The column number where the error ends.
|
||||||
|
* @param msg The error message to log.
|
||||||
|
*/
|
||||||
|
void log_on_line(const char* filename, const char* line_text, int line, int from, int to, const char* msg);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
+6
-9
@@ -4,32 +4,29 @@
|
|||||||
|
|
||||||
Module* parser_parse(TokenStream* ts) {
|
Module* parser_parse(TokenStream* ts) {
|
||||||
Token t = tokenstream_next(ts);
|
Token t = tokenstream_next(ts);
|
||||||
if (t != TOKEN_MODULE) {
|
if (t.token != TOKEN_MODULE) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
t = tokenstream_next(ts);
|
t = tokenstream_next(ts);
|
||||||
if (t != TOKEN_IDENTIFIER) {
|
if (t.token != TOKEN_IDENTIFIER) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenInfo info;
|
|
||||||
tokenstream_info(ts, &info);
|
|
||||||
|
|
||||||
Module* module = (Module*)malloc(sizeof(Module));
|
Module* module = (Module*)malloc(sizeof(Module));
|
||||||
if (module == NULL) return NULL;
|
if (module == NULL) return NULL;
|
||||||
|
|
||||||
module->name = (char*)malloc(info.text_length + 1);
|
module->name = (char*)malloc(t.text_length + 1);
|
||||||
if (module->name == NULL) {
|
if (module->name == NULL) {
|
||||||
free(module);
|
free(module);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(module->name, info.text, info.text_length);
|
memcpy(module->name, t.text, t.text_length);
|
||||||
module->name[info.text_length] = '\0';
|
module->name[t.text_length] = '\0';
|
||||||
|
|
||||||
t = tokenstream_next(ts);
|
t = tokenstream_next(ts);
|
||||||
if (t != TOKEN_SEMICOLON) {
|
if (t.token != TOKEN_SEMICOLON) {
|
||||||
free(module->name);
|
free(module->name);
|
||||||
free(module);
|
free(module);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ void assert_str(const char* expected, const char* actual, const char* msg) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void assert_log(const char* expected, const char* msg) {
|
||||||
|
assert_str(expected, s_logOutput, msg);
|
||||||
|
}
|
||||||
|
|
||||||
static void log_append(const char* msg) {
|
static void log_append(const char* msg) {
|
||||||
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
|
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
|
||||||
size_t newLen = oldLen + strlen(msg) + 1;
|
size_t newLen = oldLen + strlen(msg) + 1;
|
||||||
@@ -69,6 +73,7 @@ static TestCase s_tests[] = {
|
|||||||
{"tokenstream_info", test_tokenstream_info},
|
{"tokenstream_info", test_tokenstream_info},
|
||||||
{"parser_module_name", test_parser_module_name},
|
{"parser_module_name", test_parser_module_name},
|
||||||
{"log_error", test_log_error},
|
{"log_error", test_log_error},
|
||||||
|
{"log_on_line", test_log_on_line},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -82,9 +87,8 @@ int main(int argc, char** argv) {
|
|||||||
const char* failedTests[s_totalTests + 1];
|
const char* failedTests[s_totalTests + 1];
|
||||||
int failedCount = 0;
|
int failedCount = 0;
|
||||||
|
|
||||||
log_set_output(log_append);
|
|
||||||
|
|
||||||
for (int i = 0; i < s_totalTests; i++) {
|
for (int i = 0; i < s_totalTests; i++) {
|
||||||
|
log_set_output(log_append);
|
||||||
printf("%s...", s_tests[i].name);
|
printf("%s...", s_tests[i].name);
|
||||||
s_failMsg = NULL;
|
s_failMsg = NULL;
|
||||||
|
|
||||||
|
|||||||
@@ -33,4 +33,9 @@ void assert_not_null(void* ptr, const char* msg);
|
|||||||
*/
|
*/
|
||||||
void assert_str(const char* expected, const char* actual, const char* msg);
|
void assert_str(const char* expected, const char* actual, const char* msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts that the logged output matches the expected value.
|
||||||
|
*/
|
||||||
|
void assert_log(const char* expected, const char* msg);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -19,3 +19,15 @@ static void test_log_error(void) {
|
|||||||
|
|
||||||
log_set_output(NULL); // Reset to default
|
log_set_output(NULL); // Reset to default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_log_on_line(void) {
|
||||||
|
const char* expected =
|
||||||
|
"--- test.c ---\n"
|
||||||
|
"1| int main() []\n"
|
||||||
|
" ^^\n"
|
||||||
|
" unexpected token\n";
|
||||||
|
|
||||||
|
log_on_line("test.c", "int main() []", 1, 12, 13, "unexpected token");
|
||||||
|
|
||||||
|
assert_log(expected, "expected formatted error message");
|
||||||
|
}
|
||||||
+39
-41
@@ -11,10 +11,10 @@ static void test_tokenstream_simple_keyword(void) {
|
|||||||
TokenStream* ts = tokenstream_open("module");
|
TokenStream* ts = tokenstream_open("module");
|
||||||
|
|
||||||
Token t = tokenstream_next(ts);
|
Token t = tokenstream_next(ts);
|
||||||
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
|
|
||||||
Token eof = tokenstream_next(ts);
|
Token eof = tokenstream_next(ts);
|
||||||
if (eof != -1) fail("expected EOF");
|
if (eof.token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -22,13 +22,13 @@ static void test_tokenstream_simple_keyword(void) {
|
|||||||
static void test_tokenstream_keywords_and_symbols(void) {
|
static void test_tokenstream_keywords_and_symbols(void) {
|
||||||
TokenStream* ts = tokenstream_open("module main; import stdio;");
|
TokenStream* ts = tokenstream_open("module main; import stdio;");
|
||||||
|
|
||||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
||||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
||||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -36,11 +36,11 @@ static void test_tokenstream_keywords_and_symbols(void) {
|
|||||||
static void test_tokenstream_parentheses_and_brackets(void) {
|
static void test_tokenstream_parentheses_and_brackets(void) {
|
||||||
TokenStream* ts = tokenstream_open("()[]");
|
TokenStream* ts = tokenstream_open("()[]");
|
||||||
|
|
||||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||||
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
||||||
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
||||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -48,12 +48,12 @@ static void test_tokenstream_parentheses_and_brackets(void) {
|
|||||||
static void test_tokenstream_comma(void) {
|
static void test_tokenstream_comma(void) {
|
||||||
TokenStream* ts = tokenstream_open("a,b,c");
|
TokenStream* ts = tokenstream_open("a,b,c");
|
||||||
|
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
|
||||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
|
||||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
|
||||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -61,10 +61,10 @@ static void test_tokenstream_comma(void) {
|
|||||||
static void test_tokenstream_whitespace_ignored(void) {
|
static void test_tokenstream_whitespace_ignored(void) {
|
||||||
TokenStream* ts = tokenstream_open(" module \n\t import ; ");
|
TokenStream* ts = tokenstream_open(" module \n\t import ; ");
|
||||||
|
|
||||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -72,11 +72,11 @@ static void test_tokenstream_whitespace_ignored(void) {
|
|||||||
static void test_tokenstream_void_function_signature(void) {
|
static void test_tokenstream_void_function_signature(void) {
|
||||||
TokenStream* ts = tokenstream_open("void main()");
|
TokenStream* ts = tokenstream_open("void main()");
|
||||||
|
|
||||||
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
|
if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
|
||||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
@@ -85,26 +85,24 @@ static void test_tokenstream_info(void) {
|
|||||||
TokenStream* ts = tokenstream_open("module main;");
|
TokenStream* ts = tokenstream_open("module main;");
|
||||||
|
|
||||||
Token t1 = tokenstream_next(ts);
|
Token t1 = tokenstream_next(ts);
|
||||||
TokenInfo info1;
|
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
tokenstream_info(ts, &info1);
|
|
||||||
if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
|
||||||
if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE");
|
|
||||||
|
|
||||||
char buf1[32];
|
char buf1[32];
|
||||||
memcpy(buf1, info1.text, info1.text_length);
|
memcpy(buf1, t1.text, t1.text_length);
|
||||||
buf1[info1.text_length] = '\0';
|
buf1[t1.text_length] = '\0';
|
||||||
assert_str("module", buf1, "info: expected 'module'");
|
assert_str("module", buf1, "info: expected 'module'");
|
||||||
|
if (t1.line != 1) fail("expected line 1");
|
||||||
|
if (t1.column != 1) fail("expected column 1");
|
||||||
|
|
||||||
Token t2 = tokenstream_next(ts);
|
Token t2 = tokenstream_next(ts);
|
||||||
TokenInfo info2;
|
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||||
tokenstream_info(ts, &info2);
|
|
||||||
if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
|
||||||
if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER");
|
|
||||||
|
|
||||||
char buf2[32];
|
char buf2[32];
|
||||||
memcpy(buf2, info2.text, info2.text_length);
|
memcpy(buf2, t2.text, t2.text_length);
|
||||||
buf2[info2.text_length] = '\0';
|
buf2[t2.text_length] = '\0';
|
||||||
assert_str("main", buf2, "info: expected 'main'");
|
assert_str("main", buf2, "info: expected 'main'");
|
||||||
|
if (t2.line != 1) fail("expected line 1");
|
||||||
|
if (t2.column != 8) fail("expected column 8");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
|
|||||||
+74
-58
@@ -6,7 +6,9 @@
|
|||||||
struct TokenStream {
|
struct TokenStream {
|
||||||
const char* code;
|
const char* code;
|
||||||
size_t pos;
|
size_t pos;
|
||||||
TokenInfo last_info;
|
int line;
|
||||||
|
int column;
|
||||||
|
const char* line_start;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -15,7 +17,7 @@ struct TokenStream {
|
|||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char* keyword;
|
const char* keyword;
|
||||||
Token token;
|
TokenType token;
|
||||||
} KeywordMap;
|
} KeywordMap;
|
||||||
|
|
||||||
static const KeywordMap keywords[] = {
|
static const KeywordMap keywords[] = {
|
||||||
@@ -28,7 +30,7 @@ static const KeywordMap keywords[] = {
|
|||||||
* Look up a keyword in the keyword map.
|
* Look up a keyword in the keyword map.
|
||||||
* Returns TOKEN_IDENTIFIER if not found.
|
* Returns TOKEN_IDENTIFIER if not found.
|
||||||
*/
|
*/
|
||||||
static Token lookup_keyword(const char* str, size_t length) {
|
static TokenType lookup_keyword(const char* str, size_t length) {
|
||||||
int count = sizeof(keywords) / sizeof(keywords[0]);
|
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
if (strlen(keywords[i].keyword) == length &&
|
if (strlen(keywords[i].keyword) == length &&
|
||||||
@@ -53,39 +55,49 @@ static int is_identifier_part(char c) {
|
|||||||
return isalnum(c) || c == '_';
|
return isalnum(c) || c == '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Read a character from the stream.
|
|
||||||
*/
|
|
||||||
static char read_char(TokenStream* ts) {
|
|
||||||
char c = ts->code[ts->pos];
|
|
||||||
if (c == '\0') return (char)-1;
|
|
||||||
ts->pos++;
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Peek at the next character in the stream.
|
* Peek at the next character in the stream.
|
||||||
*/
|
*/
|
||||||
static char peek_char(TokenStream* ts) {
|
static char peek_char(TokenStream* ts) {
|
||||||
|
return ts->code[ts->pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a character from the stream and update position.
|
||||||
|
*/
|
||||||
|
static char read_char(TokenStream* ts) {
|
||||||
char c = ts->code[ts->pos];
|
char c = ts->code[ts->pos];
|
||||||
if (c == '\0') return (char)-1;
|
if (c == '\0') return '\0';
|
||||||
|
|
||||||
|
ts->pos++;
|
||||||
|
if (c == '\n') {
|
||||||
|
ts->line++;
|
||||||
|
ts->column = 1;
|
||||||
|
ts->line_start = &ts->code[ts->pos];
|
||||||
|
} else {
|
||||||
|
ts->column++;
|
||||||
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
static size_t get_line_length(const char* line_start) {
|
||||||
const char* start = &ts->code[ts->pos - 1];
|
const char* p = line_start;
|
||||||
size_t length = 1;
|
while (*p != '\n' && *p != '\0') {
|
||||||
|
p++;
|
||||||
while (is_identifier_part(peek_char(ts))) {
|
|
||||||
read_char(ts);
|
|
||||||
length++;
|
|
||||||
}
|
}
|
||||||
|
return (size_t)(p - line_start);
|
||||||
|
}
|
||||||
|
|
||||||
Token token = lookup_keyword(start, length);
|
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
|
||||||
ts->last_info.token = token;
|
Token t;
|
||||||
ts->last_info.text = (char*)start;
|
t.token = type;
|
||||||
ts->last_info.text_length = length;
|
t.text = (char*)text;
|
||||||
return token;
|
t.text_length = length;
|
||||||
|
t.line = line;
|
||||||
|
t.column = column;
|
||||||
|
t.line_text = (char*)line_start;
|
||||||
|
t.line_text_length = get_line_length(line_start);
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenStream* tokenstream_open(const char* code) {
|
TokenStream* tokenstream_open(const char* code) {
|
||||||
@@ -98,9 +110,9 @@ TokenStream* tokenstream_open(const char* code) {
|
|||||||
|
|
||||||
ts->code = code;
|
ts->code = code;
|
||||||
ts->pos = 0;
|
ts->pos = 0;
|
||||||
ts->last_info.text = NULL;
|
ts->line = 1;
|
||||||
ts->last_info.text_length = 0;
|
ts->column = 1;
|
||||||
ts->last_info.token = (Token)-1;
|
ts->line_start = code;
|
||||||
return ts;
|
return ts;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,66 +122,70 @@ void tokenstream_close(TokenStream* ts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Token tokenstream_next(TokenStream* ts) {
|
Token tokenstream_next(TokenStream* ts) {
|
||||||
if (ts == NULL) return -1;
|
if (ts == NULL) {
|
||||||
|
Token t = {0};
|
||||||
|
t.token = TOKEN_EOF;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
char c;
|
char c;
|
||||||
|
|
||||||
/* Skip whitespace and comments */
|
/* Skip whitespace and comments */
|
||||||
while ((c = read_char(ts)) != (char)-1) {
|
while ((c = peek_char(ts)) != '\0') {
|
||||||
if (isspace(c)) {
|
if (isspace(c)) {
|
||||||
|
read_char(ts);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle comments */
|
/* Handle comments */
|
||||||
if (c == '/') {
|
if (c == '/') {
|
||||||
if (peek_char(ts) == '/') {
|
if (ts->code[ts->pos + 1] == '/') {
|
||||||
/* Skip until end of line */
|
/* Skip until end of line */
|
||||||
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
|
while ((c = read_char(ts)) != '\0' && c != '\n') {
|
||||||
/* Skip */
|
/* Skip */
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* It's just a slash, which we don't handle yet */
|
/* It's just a slash, which we don't handle yet */
|
||||||
return -1;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We found a non-whitespace, non-comment character */
|
/* We found a non-whitespace, non-comment character */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == (char)-1) {
|
if (peek_char(ts) == '\0') {
|
||||||
ts->last_info.token = (Token)-1;
|
return create_token(ts, TOKEN_EOF, NULL, 0, ts->line, ts->column, ts->line_start);
|
||||||
ts->last_info.text = NULL;
|
|
||||||
ts->last_info.text_length = 0;
|
|
||||||
return -1; /* EOF */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Single-character tokens */
|
int start_line = ts->line;
|
||||||
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
|
int start_column = ts->column;
|
||||||
ts->last_info.text_length = 1;
|
const char* line_start = ts->line_start;
|
||||||
|
const char* start_text = &ts->code[ts->pos];
|
||||||
|
|
||||||
|
c = read_char(ts);
|
||||||
|
|
||||||
|
/* Single-character tokens */
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
|
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||||
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
|
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||||
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
|
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||||
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
|
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||||
case ',': return ts->last_info.token = TOKEN_COMMA;
|
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
|
||||||
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
|
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Keywords and identifiers */
|
/* Keywords and identifiers */
|
||||||
if (is_identifier_start(c)) {
|
if (is_identifier_start(c)) {
|
||||||
return read_keyword_or_identifier(ts, c);
|
size_t length = 1;
|
||||||
|
while (is_identifier_part(peek_char(ts))) {
|
||||||
|
read_char(ts);
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
TokenType type = lookup_keyword(start_text, length);
|
||||||
|
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unknown character */
|
/* Unknown character */
|
||||||
ts->last_info.token = (Token)-1;
|
return create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
|
||||||
ts->last_info.text = NULL;
|
|
||||||
ts->last_info.text_length = 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
|
|
||||||
if (ts == NULL || info == NULL) return;
|
|
||||||
*info = ts->last_info;
|
|
||||||
}
|
}
|
||||||
|
|||||||
+19
-12
@@ -27,7 +27,11 @@ typedef enum {
|
|||||||
|
|
||||||
// Variable
|
// Variable
|
||||||
TOKEN_IDENTIFIER,
|
TOKEN_IDENTIFIER,
|
||||||
} Token;
|
|
||||||
|
// Others
|
||||||
|
TOKEN_EOF,
|
||||||
|
TOKEN_UNKNOWN,
|
||||||
|
} TokenType;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds additional information about a token.
|
* Holds additional information about a token.
|
||||||
@@ -37,12 +41,24 @@ typedef struct {
|
|||||||
/// Note that this is not necessarily null-terminated.
|
/// Note that this is not necessarily null-terminated.
|
||||||
char* text;
|
char* text;
|
||||||
|
|
||||||
|
/// @brief The entire line of text where the token was found.
|
||||||
|
char* line_text;
|
||||||
|
|
||||||
/// @brief The length of the `text` string.
|
/// @brief The length of the `text` string.
|
||||||
size_t text_length;
|
size_t text_length;
|
||||||
|
|
||||||
|
/// @brief The length of the `line_text` string.
|
||||||
|
size_t line_text_length;
|
||||||
|
|
||||||
/// @brief The actual token.
|
/// @brief The actual token.
|
||||||
Token token;
|
TokenType token;
|
||||||
} TokenInfo;
|
|
||||||
|
/// @brief The line number where the token was found.
|
||||||
|
int line;
|
||||||
|
|
||||||
|
/// @brief The column number where the token was found.
|
||||||
|
int column;
|
||||||
|
} Token;
|
||||||
|
|
||||||
typedef struct TokenStream TokenStream;
|
typedef struct TokenStream TokenStream;
|
||||||
|
|
||||||
@@ -67,13 +83,4 @@ void tokenstream_close(TokenStream* ts);
|
|||||||
*/
|
*/
|
||||||
Token tokenstream_next(TokenStream* ts);
|
Token tokenstream_next(TokenStream* ts);
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets additional information about the last token that was returned
|
|
||||||
* by `tokenstream_next`.
|
|
||||||
*
|
|
||||||
* @param ts The TokenStream to use.
|
|
||||||
* @param info The TokenInfo object to store the results in.
|
|
||||||
*/
|
|
||||||
void tokenstream_info(TokenStream* ts, TokenInfo* info);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
Reference in New Issue
Block a user