Files
c2/v0/token.c
T

176 lines
3.7 KiB
C

#include "token.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
struct TokenStream {
const char* code;
size_t pos;
TokenInfo last_info;
};
/**
* Easy-to-read and modify keyword-to-token mapping.
* Add new keywords here.
*/
typedef struct {
const char* keyword;
Token token;
} KeywordMap;
static const KeywordMap keywords[] = {
{"module", TOKEN_MODULE},
{"import", TOKEN_IMPORT},
{"void", TOKEN_VOID},
};
/**
* Look up a keyword in the keyword map.
* Returns TOKEN_IDENTIFIER if not found.
*/
static Token lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < count; i++) {
if (strlen(keywords[i].keyword) == length &&
strncmp(keywords[i].keyword, str, length) == 0) {
return keywords[i].token;
}
}
return TOKEN_IDENTIFIER;
}
/**
* Check if a character is the start of an identifier.
*/
static int is_identifier_start(char c) {
return isalpha(c) || c == '_';
}
/**
* Check if a character can be part of an identifier.
*/
static int is_identifier_part(char c) {
return isalnum(c) || c == '_';
}
/**
* Read a character from the stream.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
ts->pos++;
return c;
}
/**
* Peek at the next character in the stream.
*/
static char peek_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
return c;
}
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
const char* start = &ts->code[ts->pos - 1];
size_t length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
}
Token token = lookup_keyword(start, length);
ts->last_info.token = token;
ts->last_info.text = (char*)start;
ts->last_info.text_length = length;
return token;
}
TokenStream* tokenstream_open(const char* code) {
if (code == NULL) return NULL;
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
if (ts == NULL) {
return NULL;
}
ts->code = code;
ts->pos = 0;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
ts->last_info.token = (Token)-1;
return ts;
}
void tokenstream_close(TokenStream* ts) {
if (ts == NULL) return;
free(ts);
}
Token tokenstream_next(TokenStream* ts) {
if (ts == NULL) return -1;
char c;
/* Skip whitespace and comments */
while ((c = read_char(ts)) != (char)-1) {
if (isspace(c)) {
continue;
}
/* Handle comments */
if (c == '/') {
if (peek_char(ts) == '/') {
/* Skip until end of line */
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
/* Skip */
}
continue;
}
/* It's just a slash, which we don't handle yet */
return -1;
}
/* We found a non-whitespace, non-comment character */
break;
}
if (c == (char)-1) {
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1; /* EOF */
}
/* Single-character tokens */
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
ts->last_info.text_length = 1;
switch (c) {
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
case ',': return ts->last_info.token = TOKEN_COMMA;
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
}
/* Keywords and identifiers */
if (is_identifier_start(c)) {
return read_keyword_or_identifier(ts, c);
}
/* Unknown character */
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1;
}
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
if (ts == NULL || info == NULL) return;
*info = ts->last_info;
}