Refactor token mapping: use keyword map for tokenization instead of strcmp
- Created KeywordMap structure with keyword-to-token mapping at top of token.c - Added lookup_keyword() function to check if identifier is a keyword - Replaced 3 strcmp calls (lines 99-101) with single lookup_keyword() call - Removed token_to_string() function and its tests (3 tests removed) - Single easy-to-read and modify keyword map serves both documentation and implementation - Added new keywords by editing the keywords[] array at top of token.c All 12 tests passing (removed token_to_string tests which are now unnecessary). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -28,9 +28,6 @@ static TestCase s_tests[] = {
|
|||||||
{"buffer_string_eof_after_content", test_buffer_string_eof_after_content},
|
{"buffer_string_eof_after_content", test_buffer_string_eof_after_content},
|
||||||
{"buffer_file_reads_chars", test_buffer_file_reads_chars},
|
{"buffer_file_reads_chars", test_buffer_file_reads_chars},
|
||||||
{"buffer_file_open_fail", test_buffer_file_open_fail},
|
{"buffer_file_open_fail", test_buffer_file_open_fail},
|
||||||
{"token_to_string_keywords", test_token_to_string_keywords},
|
|
||||||
{"token_to_string_symbols", test_token_to_string_symbols},
|
|
||||||
{"token_to_string_identifier", test_token_to_string_identifier},
|
|
||||||
{"tokenstream_open_fail", test_tokenstream_open_fail},
|
{"tokenstream_open_fail", test_tokenstream_open_fail},
|
||||||
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
|
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
|
||||||
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
|
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
|
||||||
|
|||||||
@@ -13,25 +13,6 @@ static void write_test_file(const char* filename, const char* content) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_token_to_string_keywords(void) {
|
|
||||||
if (strcmp(token_to_string(TOKEN_MODULE), "module") != 0) fail("module");
|
|
||||||
if (strcmp(token_to_string(TOKEN_IMPORT), "import") != 0) fail("import");
|
|
||||||
if (strcmp(token_to_string(TOKEN_VOID), "void") != 0) fail("void");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_token_to_string_symbols(void) {
|
|
||||||
if (strcmp(token_to_string(TOKEN_SEMICOLON), "semicolon") != 0) fail("semicolon");
|
|
||||||
if (strcmp(token_to_string(TOKEN_PARENT_OPEN), "paren_open") != 0) fail("paren_open");
|
|
||||||
if (strcmp(token_to_string(TOKEN_PARENT_CLOSE), "paren_close") != 0) fail("paren_close");
|
|
||||||
if (strcmp(token_to_string(TOKEN_BRACKET_OPEN), "bracket_open") != 0) fail("bracket_open");
|
|
||||||
if (strcmp(token_to_string(TOKEN_BRACKET_CLOSE), "bracket_close") != 0) fail("bracket_close");
|
|
||||||
if (strcmp(token_to_string(TOKEN_COMMA), "comma") != 0) fail("comma");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_token_to_string_identifier(void) {
|
|
||||||
if (strcmp(token_to_string(TOKEN_IDENTIFIER), "identifier") != 0) fail("identifier");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_tokenstream_open_fail(void) {
|
static void test_tokenstream_open_fail(void) {
|
||||||
Buffer* buf = buffer_open_file("v0/does_not_exist.c2");
|
Buffer* buf = buffer_open_file("v0/does_not_exist.c2");
|
||||||
if (buf != NULL) fail("expected NULL for non-existent file");
|
if (buf != NULL) fail("expected NULL for non-existent file");
|
||||||
|
|||||||
+26
-31
@@ -5,41 +5,40 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Easy-to-read and modify token-to-string mapping.
|
* Easy-to-read and modify keyword-to-token mapping.
|
||||||
* Order must match the Token enum in token.h.
|
* Add new keywords here.
|
||||||
*/
|
*/
|
||||||
static const char* token_names[] = {
|
typedef struct {
|
||||||
"module",
|
const char* keyword;
|
||||||
"import",
|
Token token;
|
||||||
"semicolon",
|
} KeywordMap;
|
||||||
"paren_open",
|
|
||||||
"paren_close",
|
static const KeywordMap keywords[] = {
|
||||||
"bracket_open",
|
{"module", TOKEN_MODULE},
|
||||||
"bracket_close",
|
{"import", TOKEN_IMPORT},
|
||||||
"comma",
|
{"void", TOKEN_VOID},
|
||||||
"void",
|
|
||||||
"identifier",
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up a keyword in the keyword map.
|
||||||
|
* Returns TOKEN_IDENTIFIER if not found.
|
||||||
|
*/
|
||||||
|
static Token lookup_keyword(const char* str) {
|
||||||
|
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
if (strcmp(keywords[i].keyword, str) == 0) {
|
||||||
|
return keywords[i].token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TOKEN_IDENTIFIER;
|
||||||
|
}
|
||||||
|
|
||||||
struct TokenStream {
|
struct TokenStream {
|
||||||
Buffer* buffer;
|
Buffer* buffer;
|
||||||
char lookahead;
|
char lookahead;
|
||||||
int has_lookahead;
|
int has_lookahead;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a Token enum to its string representation.
|
|
||||||
* @param token The token to convert.
|
|
||||||
* @returns The string name of the token.
|
|
||||||
*/
|
|
||||||
const char* token_to_string(Token token) {
|
|
||||||
int count = sizeof(token_names) / sizeof(token_names[0]);
|
|
||||||
if (token >= 0 && token < count) {
|
|
||||||
return token_names[token];
|
|
||||||
}
|
|
||||||
return "unknown";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a character is the start of an identifier.
|
* Check if a character is the start of an identifier.
|
||||||
*/
|
*/
|
||||||
@@ -96,11 +95,7 @@ static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
|||||||
buffer[index] = '\0';
|
buffer[index] = '\0';
|
||||||
|
|
||||||
/* Check for keywords */
|
/* Check for keywords */
|
||||||
if (strcmp(buffer, "module") == 0) return TOKEN_MODULE;
|
return lookup_keyword(buffer);
|
||||||
if (strcmp(buffer, "import") == 0) return TOKEN_IMPORT;
|
|
||||||
if (strcmp(buffer, "void") == 0) return TOKEN_VOID;
|
|
||||||
|
|
||||||
return TOKEN_IDENTIFIER;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenStream* tokenstream_open(Buffer* buffer) {
|
TokenStream* tokenstream_open(Buffer* buffer) {
|
||||||
|
|||||||
@@ -28,13 +28,6 @@ typedef enum {
|
|||||||
|
|
||||||
typedef struct TokenStream TokenStream;
|
typedef struct TokenStream TokenStream;
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a Token enum to its string representation.
|
|
||||||
* @param token The token to convert.
|
|
||||||
* @returns The string name of the token.
|
|
||||||
*/
|
|
||||||
const char* token_to_string(Token token);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a TokenStream for a given buffer.
|
* Returns a TokenStream for a given buffer.
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user