Token refactor and better logs
This commit is contained in:
+74
-58
@@ -6,7 +6,9 @@
|
||||
struct TokenStream {
|
||||
const char* code;
|
||||
size_t pos;
|
||||
TokenInfo last_info;
|
||||
int line;
|
||||
int column;
|
||||
const char* line_start;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -15,7 +17,7 @@ struct TokenStream {
|
||||
*/
|
||||
typedef struct {
|
||||
const char* keyword;
|
||||
Token token;
|
||||
TokenType token;
|
||||
} KeywordMap;
|
||||
|
||||
static const KeywordMap keywords[] = {
|
||||
@@ -28,7 +30,7 @@ static const KeywordMap keywords[] = {
|
||||
* Look up a keyword in the keyword map.
|
||||
* Returns TOKEN_IDENTIFIER if not found.
|
||||
*/
|
||||
static Token lookup_keyword(const char* str, size_t length) {
|
||||
static TokenType lookup_keyword(const char* str, size_t length) {
|
||||
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (strlen(keywords[i].keyword) == length &&
|
||||
@@ -53,39 +55,49 @@ static int is_identifier_part(char c) {
|
||||
return isalnum(c) || c == '_';
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a character from the stream.
|
||||
*/
|
||||
static char read_char(TokenStream* ts) {
|
||||
char c = ts->code[ts->pos];
|
||||
if (c == '\0') return (char)-1;
|
||||
ts->pos++;
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at the next character in the stream.
|
||||
*/
|
||||
static char peek_char(TokenStream* ts) {
|
||||
return ts->code[ts->pos];
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a character from the stream and update position.
|
||||
*/
|
||||
static char read_char(TokenStream* ts) {
|
||||
char c = ts->code[ts->pos];
|
||||
if (c == '\0') return (char)-1;
|
||||
if (c == '\0') return '\0';
|
||||
|
||||
ts->pos++;
|
||||
if (c == '\n') {
|
||||
ts->line++;
|
||||
ts->column = 1;
|
||||
ts->line_start = &ts->code[ts->pos];
|
||||
} else {
|
||||
ts->column++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
||||
const char* start = &ts->code[ts->pos - 1];
|
||||
size_t length = 1;
|
||||
|
||||
while (is_identifier_part(peek_char(ts))) {
|
||||
read_char(ts);
|
||||
length++;
|
||||
static size_t get_line_length(const char* line_start) {
|
||||
const char* p = line_start;
|
||||
while (*p != '\n' && *p != '\0') {
|
||||
p++;
|
||||
}
|
||||
return (size_t)(p - line_start);
|
||||
}
|
||||
|
||||
Token token = lookup_keyword(start, length);
|
||||
ts->last_info.token = token;
|
||||
ts->last_info.text = (char*)start;
|
||||
ts->last_info.text_length = length;
|
||||
return token;
|
||||
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
|
||||
Token t;
|
||||
t.token = type;
|
||||
t.text = (char*)text;
|
||||
t.text_length = length;
|
||||
t.line = line;
|
||||
t.column = column;
|
||||
t.line_text = (char*)line_start;
|
||||
t.line_text_length = get_line_length(line_start);
|
||||
return t;
|
||||
}
|
||||
|
||||
TokenStream* tokenstream_open(const char* code) {
|
||||
@@ -98,9 +110,9 @@ TokenStream* tokenstream_open(const char* code) {
|
||||
|
||||
ts->code = code;
|
||||
ts->pos = 0;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
ts->last_info.token = (Token)-1;
|
||||
ts->line = 1;
|
||||
ts->column = 1;
|
||||
ts->line_start = code;
|
||||
return ts;
|
||||
}
|
||||
|
||||
@@ -110,66 +122,70 @@ void tokenstream_close(TokenStream* ts) {
|
||||
}
|
||||
|
||||
Token tokenstream_next(TokenStream* ts) {
|
||||
if (ts == NULL) return -1;
|
||||
if (ts == NULL) {
|
||||
Token t = {0};
|
||||
t.token = TOKEN_EOF;
|
||||
return t;
|
||||
}
|
||||
|
||||
char c;
|
||||
|
||||
/* Skip whitespace and comments */
|
||||
while ((c = read_char(ts)) != (char)-1) {
|
||||
while ((c = peek_char(ts)) != '\0') {
|
||||
if (isspace(c)) {
|
||||
read_char(ts);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle comments */
|
||||
if (c == '/') {
|
||||
if (peek_char(ts) == '/') {
|
||||
if (ts->code[ts->pos + 1] == '/') {
|
||||
/* Skip until end of line */
|
||||
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
|
||||
while ((c = read_char(ts)) != '\0' && c != '\n') {
|
||||
/* Skip */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* It's just a slash, which we don't handle yet */
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We found a non-whitespace, non-comment character */
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == (char)-1) {
|
||||
ts->last_info.token = (Token)-1;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
return -1; /* EOF */
|
||||
if (peek_char(ts) == '\0') {
|
||||
return create_token(ts, TOKEN_EOF, NULL, 0, ts->line, ts->column, ts->line_start);
|
||||
}
|
||||
|
||||
/* Single-character tokens */
|
||||
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
|
||||
ts->last_info.text_length = 1;
|
||||
int start_line = ts->line;
|
||||
int start_column = ts->column;
|
||||
const char* line_start = ts->line_start;
|
||||
const char* start_text = &ts->code[ts->pos];
|
||||
|
||||
c = read_char(ts);
|
||||
|
||||
/* Single-character tokens */
|
||||
switch (c) {
|
||||
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
|
||||
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
|
||||
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
|
||||
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
|
||||
case ',': return ts->last_info.token = TOKEN_COMMA;
|
||||
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
|
||||
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
|
||||
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
|
||||
}
|
||||
|
||||
/* Keywords and identifiers */
|
||||
if (is_identifier_start(c)) {
|
||||
return read_keyword_or_identifier(ts, c);
|
||||
size_t length = 1;
|
||||
while (is_identifier_part(peek_char(ts))) {
|
||||
read_char(ts);
|
||||
length++;
|
||||
}
|
||||
TokenType type = lookup_keyword(start_text, length);
|
||||
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
|
||||
}
|
||||
|
||||
/* Unknown character */
|
||||
ts->last_info.token = (Token)-1;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
|
||||
if (ts == NULL || info == NULL) return;
|
||||
*info = ts->last_info;
|
||||
return create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user