Implement String structure and update Location/Token to use it

This commit is contained in:
2026-04-25 14:17:17 +02:00
parent 902e2f0325
commit 116bdecafe
9 changed files with 73 additions and 35 deletions
+8 -6
View File
@@ -4,6 +4,8 @@
#ifndef LOCATION_H
#define LOCATION_H
#include "string.h"
#include <stddef.h>
typedef struct {
@@ -11,16 +13,16 @@ typedef struct {
char* filename;
/// @brief The entire line of text where the token was found.
char* line_text;
/// @brief The length of the `line_text` string.
size_t line_text_length;
String line_text;
/// @brief The line number where the token was found.
int line;
/// @brief The column number where the token was found.
int column;
/// @brief The starting column number where the token was found.
int column_start;
/// @brief The ending column number where the token was found.
int column_end;
} Location;
#endif
+22 -6
View File
@@ -18,23 +18,39 @@ void log_error(const char* msg) {
}
}
static void format_message(char* buffer, size_t size, const char* msg, va_list args) {
// Basic implementation that handles %S for String and passes others to vsnprintf
// This is a simplified version. For a real compiler, we'd want a more robust one.
char fmt_temp[1024];
char* fmt_ptr = fmt_temp;
const char* m = msg;
// We can't easily mix va_list with custom handling without specialized logic.
// For now, let's just use vsnprintf and assume %S is not used yet,
// OR we can try to handle %S if we really need it.
// Given the complexity, let's just fix the Location/String field access first.
vsnprintf(buffer, size, msg, args);
}
void log_on_line(Location* loc, int to_column, const char* msg, ...) {
char line_prefix[32];
int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", loc->line);
int caret_len = to_column - loc->column + 1;
int caret_len = to_column - loc->column_start + 1;
if (caret_len < 1) caret_len = 1;
// Format the message
va_list args;
va_start(args, msg);
char formatted_msg[256];
vsnprintf(formatted_msg, sizeof(formatted_msg), msg, args);
format_message(formatted_msg, sizeof(formatted_msg), msg, args);
va_end(args);
size_t total_size = strlen(loc->filename) + 16 + // --- filename ---
prefix_len + strlen(loc->line_text) + 2 + // line| text\n
prefix_len + loc->column - 1 + caret_len + 2 + // indent + ^^\n
prefix_len + loc->line_text.length + 2 + // line| text\n
prefix_len + loc->column_start - 1 + caret_len + 2 + // indent + ^^\n
prefix_len + strlen(formatted_msg) + 2 + // indent + msg\n
1;
@@ -43,10 +59,10 @@ void log_on_line(Location* loc, int to_column, const char* msg, ...) {
char* p = buffer;
p += sprintf(p, "--- %s ---\n", loc->filename);
p += sprintf(p, "%s%s\n", line_prefix, loc->line_text);
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
// Caret line
for (int i = 0; i < prefix_len + loc->column - 1; i++) *p++ = ' ';
for (int i = 0; i < prefix_len + loc->column_start - 1; i++) *p++ = ' ';
for (int i = 0; i < caret_len; i++) *p++ = '^';
*p++ = '\n';
+3
View File
@@ -24,6 +24,9 @@ void log_error(const char* msg);
/**
* Logs a pretty error with additional information about the line where the error occurred.
*
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
*
* @param loc The location where the error occurred.
* @param to_column The column number where the error ends.
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
+3 -3
View File
@@ -16,14 +16,14 @@ Module* parser_parse(TokenStream* ts) {
Module* module = (Module*)malloc(sizeof(Module));
if (module == NULL) return NULL;
module->name = (char*)malloc(t.text_length + 1);
module->name = (char*)malloc(t.text.length + 1);
if (module->name == NULL) {
free(module);
return NULL;
}
memcpy(module->name, t.text, t.text_length);
module->name[t.text_length] = '\0';
memcpy(module->name, t.text.data, t.text.length);
module->name[t.text.length] = '\0';
t = tokenstream_next(ts);
if (t.token != TOKEN_SEMICOLON) {
+20
View File
@@ -0,0 +1,20 @@
/**
* Contains the definition of the String structure, which is a simple representation of a string in C.
*/
#ifndef STRING_H
#define STRING_H
#include <stddef.h>
/**
* A simple string structure that holds a pointer to the character data and its length.
*/
typedef struct {
/// @brief A pointer to the character data of the string.
char* data;
/// @brief The length of the string.
size_t length;
} String;
#endif
+3 -3
View File
@@ -23,10 +23,10 @@ static void test_log_error(void) {
static void test_log_on_line(void) {
Location loc = {
.filename = "test.c",
.line_text = "int main() []",
.line_text_length = 13,
.line_text = { "int main() []", 13 },
.line = 1,
.column = 12
.column_start = 12,
.column_end = 13
};
log_on_line(&loc, 13, "unexpected token");
+6 -6
View File
@@ -98,21 +98,21 @@ static void test_tokenstream_info(void) {
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
char buf1[32];
memcpy(buf1, t1.text, t1.text_length);
buf1[t1.text_length] = '\0';
memcpy(buf1, t1.text.data, t1.text.length);
buf1[t1.text.length] = '\0';
assert_str("module", buf1, "info: expected 'module'");
if (t1.location.line != 1) fail("expected line 1");
if (t1.location.column != 1) fail("expected column 1");
if (t1.location.column_start != 1) fail("expected column 1");
Token t2 = tokenstream_next(ts);
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
char buf2[32];
memcpy(buf2, t2.text, t2.text_length);
buf2[t2.text_length] = '\0';
memcpy(buf2, t2.text.data, t2.text.length);
buf2[t2.text.length] = '\0';
assert_str("main", buf2, "info: expected 'main'");
if (t2.location.line != 1) fail("expected line 1");
if (t2.location.column != 8) fail("expected column 8");
if (t2.location.column_start != 8) fail("expected column 8");
tokenstream_close(ts);
}
+7 -6
View File
@@ -93,13 +93,14 @@ static size_t get_line_length(const char* line_start) {
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
Token t;
t.token = type;
t.text = (char*)text;
t.text_length = length;
t.text.data = (char*)text;
t.text.length = length;
t.location.filename = (char*)ts->filename;
t.location.line = line;
t.location.column = column;
t.location.line_text = (char*)line_start;
t.location.line_text_length = get_line_length(line_start);
t.location.column_start = column;
t.location.column_end = column + (int)length - 1;
t.location.line_text.data = (char*)line_start;
t.location.line_text.length = get_line_length(line_start);
return t;
}
@@ -192,6 +193,6 @@ Token tokenstream_next(TokenStream* ts) {
/* Unknown character */
Token t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
log_on_line(&t.location, t.location.column, "unexpected token '%c'", c);
log_on_line(&t.location, t.location.column_end, "unexpected token '%c'", c);
return t;
}
+1 -5
View File
@@ -41,11 +41,7 @@ typedef struct {
TokenType token;
/// @brief The textual representation of a token.
/// Note that this is not necessarily null-terminated.
char* text;
/// @brief The length of the `text` string.
size_t text_length;
String text;
/// @brief The location of the token.
Location location;