#include "token.h" #include #include #include struct TokenStream { const char* code; size_t pos; TokenInfo last_info; }; /** * Easy-to-read and modify keyword-to-token mapping. * Add new keywords here. */ typedef struct { const char* keyword; Token token; } KeywordMap; static const KeywordMap keywords[] = { {"module", TOKEN_MODULE}, {"import", TOKEN_IMPORT}, {"void", TOKEN_VOID}, }; /** * Look up a keyword in the keyword map. * Returns TOKEN_IDENTIFIER if not found. */ static Token lookup_keyword(const char* str, size_t length) { int count = sizeof(keywords) / sizeof(keywords[0]); for (int i = 0; i < count; i++) { if (strlen(keywords[i].keyword) == length && strncmp(keywords[i].keyword, str, length) == 0) { return keywords[i].token; } } return TOKEN_IDENTIFIER; } /** * Check if a character is the start of an identifier. */ static int is_identifier_start(char c) { return isalpha(c) || c == '_'; } /** * Check if a character can be part of an identifier. */ static int is_identifier_part(char c) { return isalnum(c) || c == '_'; } /** * Read a character from the stream. */ static char read_char(TokenStream* ts) { char c = ts->code[ts->pos]; if (c == '\0') return (char)-1; ts->pos++; return c; } /** * Peek at the next character in the stream. */ static char peek_char(TokenStream* ts) { char c = ts->code[ts->pos]; if (c == '\0') return (char)-1; return c; } static Token read_keyword_or_identifier(TokenStream* ts, char first) { const char* start = &ts->code[ts->pos - 1]; size_t length = 1; while (is_identifier_part(peek_char(ts))) { read_char(ts); length++; } Token token = lookup_keyword(start, length); ts->last_info.token = token; ts->last_info.text = (char*)start; ts->last_info.text_length = length; return token; } TokenStream* tokenstream_open(const char* code) { if (code == NULL) return NULL; TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream)); if (ts == NULL) { return NULL; } ts->code = code; ts->pos = 0; ts->last_info.text = NULL; ts->last_info.text_length = 0; ts->last_info.token = (Token)-1; return ts; } void tokenstream_close(TokenStream* ts) { if (ts == NULL) return; free(ts); } Token tokenstream_next(TokenStream* ts) { if (ts == NULL) return -1; char c; /* Skip whitespace and comments */ while ((c = read_char(ts)) != (char)-1) { if (isspace(c)) { continue; } /* Handle comments */ if (c == '/') { if (peek_char(ts) == '/') { /* Skip until end of line */ while ((c = read_char(ts)) != (char)-1 && c != '\n') { /* Skip */ } continue; } /* It's just a slash, which we don't handle yet */ return -1; } /* We found a non-whitespace, non-comment character */ break; } if (c == (char)-1) { ts->last_info.token = (Token)-1; ts->last_info.text = NULL; ts->last_info.text_length = 0; return -1; /* EOF */ } /* Single-character tokens */ ts->last_info.text = (char*)&ts->code[ts->pos - 1]; ts->last_info.text_length = 1; switch (c) { case '(': return ts->last_info.token = TOKEN_PARENT_OPEN; case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE; case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN; case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE; case ',': return ts->last_info.token = TOKEN_COMMA; case ';': return ts->last_info.token = TOKEN_SEMICOLON; } /* Keywords and identifiers */ if (is_identifier_start(c)) { return read_keyword_or_identifier(ts, c); } /* Unknown character */ ts->last_info.token = (Token)-1; ts->last_info.text = NULL; ts->last_info.text_length = 0; return -1; } void tokenstream_info(TokenStream* ts, TokenInfo* info) { if (ts == NULL || info == NULL) return; *info = ts->last_info; }