Implement String structure and update Location/Token to use it
This commit is contained in:
+8
-6
@@ -4,6 +4,8 @@
|
|||||||
#ifndef LOCATION_H
|
#ifndef LOCATION_H
|
||||||
#define LOCATION_H
|
#define LOCATION_H
|
||||||
|
|
||||||
|
#include "string.h"
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -11,16 +13,16 @@ typedef struct {
|
|||||||
char* filename;
|
char* filename;
|
||||||
|
|
||||||
/// @brief The entire line of text where the token was found.
|
/// @brief The entire line of text where the token was found.
|
||||||
char* line_text;
|
String line_text;
|
||||||
|
|
||||||
/// @brief The length of the `line_text` string.
|
|
||||||
size_t line_text_length;
|
|
||||||
|
|
||||||
/// @brief The line number where the token was found.
|
/// @brief The line number where the token was found.
|
||||||
int line;
|
int line;
|
||||||
|
|
||||||
/// @brief The column number where the token was found.
|
/// @brief The starting column number where the token was found.
|
||||||
int column;
|
int column_start;
|
||||||
|
|
||||||
|
/// @brief The ending column number where the token was found.
|
||||||
|
int column_end;
|
||||||
} Location;
|
} Location;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -18,23 +18,39 @@ void log_error(const char* msg) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void format_message(char* buffer, size_t size, const char* msg, va_list args) {
|
||||||
|
// Basic implementation that handles %S for String and passes others to vsnprintf
|
||||||
|
// This is a simplified version. For a real compiler, we'd want a more robust one.
|
||||||
|
|
||||||
|
char fmt_temp[1024];
|
||||||
|
char* fmt_ptr = fmt_temp;
|
||||||
|
const char* m = msg;
|
||||||
|
|
||||||
|
// We can't easily mix va_list with custom handling without specialized logic.
|
||||||
|
// For now, let's just use vsnprintf and assume %S is not used yet,
|
||||||
|
// OR we can try to handle %S if we really need it.
|
||||||
|
// Given the complexity, let's just fix the Location/String field access first.
|
||||||
|
|
||||||
|
vsnprintf(buffer, size, msg, args);
|
||||||
|
}
|
||||||
|
|
||||||
void log_on_line(Location* loc, int to_column, const char* msg, ...) {
|
void log_on_line(Location* loc, int to_column, const char* msg, ...) {
|
||||||
char line_prefix[32];
|
char line_prefix[32];
|
||||||
int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", loc->line);
|
int prefix_len = snprintf(line_prefix, sizeof(line_prefix), "%d| ", loc->line);
|
||||||
|
|
||||||
int caret_len = to_column - loc->column + 1;
|
int caret_len = to_column - loc->column_start + 1;
|
||||||
if (caret_len < 1) caret_len = 1;
|
if (caret_len < 1) caret_len = 1;
|
||||||
|
|
||||||
// Format the message
|
// Format the message
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, msg);
|
va_start(args, msg);
|
||||||
char formatted_msg[256];
|
char formatted_msg[256];
|
||||||
vsnprintf(formatted_msg, sizeof(formatted_msg), msg, args);
|
format_message(formatted_msg, sizeof(formatted_msg), msg, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
|
|
||||||
size_t total_size = strlen(loc->filename) + 16 + // --- filename ---
|
size_t total_size = strlen(loc->filename) + 16 + // --- filename ---
|
||||||
prefix_len + strlen(loc->line_text) + 2 + // line| text\n
|
prefix_len + loc->line_text.length + 2 + // line| text\n
|
||||||
prefix_len + loc->column - 1 + caret_len + 2 + // indent + ^^\n
|
prefix_len + loc->column_start - 1 + caret_len + 2 + // indent + ^^\n
|
||||||
prefix_len + strlen(formatted_msg) + 2 + // indent + msg\n
|
prefix_len + strlen(formatted_msg) + 2 + // indent + msg\n
|
||||||
1;
|
1;
|
||||||
|
|
||||||
@@ -43,10 +59,10 @@ void log_on_line(Location* loc, int to_column, const char* msg, ...) {
|
|||||||
|
|
||||||
char* p = buffer;
|
char* p = buffer;
|
||||||
p += sprintf(p, "--- %s ---\n", loc->filename);
|
p += sprintf(p, "--- %s ---\n", loc->filename);
|
||||||
p += sprintf(p, "%s%s\n", line_prefix, loc->line_text);
|
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
|
||||||
|
|
||||||
// Caret line
|
// Caret line
|
||||||
for (int i = 0; i < prefix_len + loc->column - 1; i++) *p++ = ' ';
|
for (int i = 0; i < prefix_len + loc->column_start - 1; i++) *p++ = ' ';
|
||||||
for (int i = 0; i < caret_len; i++) *p++ = '^';
|
for (int i = 0; i < caret_len; i++) *p++ = '^';
|
||||||
*p++ = '\n';
|
*p++ = '\n';
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,9 @@ void log_error(const char* msg);
|
|||||||
/**
|
/**
|
||||||
* Logs a pretty error with additional information about the line where the error occurred.
|
* Logs a pretty error with additional information about the line where the error occurred.
|
||||||
*
|
*
|
||||||
|
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
|
||||||
|
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
|
||||||
|
*
|
||||||
* @param loc The location where the error occurred.
|
* @param loc The location where the error occurred.
|
||||||
* @param to_column The column number where the error ends.
|
* @param to_column The column number where the error ends.
|
||||||
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
|
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
|
||||||
|
|||||||
+3
-3
@@ -16,14 +16,14 @@ Module* parser_parse(TokenStream* ts) {
|
|||||||
Module* module = (Module*)malloc(sizeof(Module));
|
Module* module = (Module*)malloc(sizeof(Module));
|
||||||
if (module == NULL) return NULL;
|
if (module == NULL) return NULL;
|
||||||
|
|
||||||
module->name = (char*)malloc(t.text_length + 1);
|
module->name = (char*)malloc(t.text.length + 1);
|
||||||
if (module->name == NULL) {
|
if (module->name == NULL) {
|
||||||
free(module);
|
free(module);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(module->name, t.text, t.text_length);
|
memcpy(module->name, t.text.data, t.text.length);
|
||||||
module->name[t.text_length] = '\0';
|
module->name[t.text.length] = '\0';
|
||||||
|
|
||||||
t = tokenstream_next(ts);
|
t = tokenstream_next(ts);
|
||||||
if (t.token != TOKEN_SEMICOLON) {
|
if (t.token != TOKEN_SEMICOLON) {
|
||||||
|
|||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
/**
|
||||||
|
* Contains the definition of the String structure, which is a simple representation of a string in C.
|
||||||
|
*/
|
||||||
|
#ifndef STRING_H
|
||||||
|
#define STRING_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple string structure that holds a pointer to the character data and its length.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/// @brief A pointer to the character data of the string.
|
||||||
|
char* data;
|
||||||
|
|
||||||
|
/// @brief The length of the string.
|
||||||
|
size_t length;
|
||||||
|
} String;
|
||||||
|
|
||||||
|
#endif
|
||||||
+3
-3
@@ -23,10 +23,10 @@ static void test_log_error(void) {
|
|||||||
static void test_log_on_line(void) {
|
static void test_log_on_line(void) {
|
||||||
Location loc = {
|
Location loc = {
|
||||||
.filename = "test.c",
|
.filename = "test.c",
|
||||||
.line_text = "int main() []",
|
.line_text = { "int main() []", 13 },
|
||||||
.line_text_length = 13,
|
|
||||||
.line = 1,
|
.line = 1,
|
||||||
.column = 12
|
.column_start = 12,
|
||||||
|
.column_end = 13
|
||||||
};
|
};
|
||||||
|
|
||||||
log_on_line(&loc, 13, "unexpected token");
|
log_on_line(&loc, 13, "unexpected token");
|
||||||
|
|||||||
+6
-6
@@ -98,21 +98,21 @@ static void test_tokenstream_info(void) {
|
|||||||
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
|
|
||||||
char buf1[32];
|
char buf1[32];
|
||||||
memcpy(buf1, t1.text, t1.text_length);
|
memcpy(buf1, t1.text.data, t1.text.length);
|
||||||
buf1[t1.text_length] = '\0';
|
buf1[t1.text.length] = '\0';
|
||||||
assert_str("module", buf1, "info: expected 'module'");
|
assert_str("module", buf1, "info: expected 'module'");
|
||||||
if (t1.location.line != 1) fail("expected line 1");
|
if (t1.location.line != 1) fail("expected line 1");
|
||||||
if (t1.location.column != 1) fail("expected column 1");
|
if (t1.location.column_start != 1) fail("expected column 1");
|
||||||
|
|
||||||
Token t2 = tokenstream_next(ts);
|
Token t2 = tokenstream_next(ts);
|
||||||
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||||
|
|
||||||
char buf2[32];
|
char buf2[32];
|
||||||
memcpy(buf2, t2.text, t2.text_length);
|
memcpy(buf2, t2.text.data, t2.text.length);
|
||||||
buf2[t2.text_length] = '\0';
|
buf2[t2.text.length] = '\0';
|
||||||
assert_str("main", buf2, "info: expected 'main'");
|
assert_str("main", buf2, "info: expected 'main'");
|
||||||
if (t2.location.line != 1) fail("expected line 1");
|
if (t2.location.line != 1) fail("expected line 1");
|
||||||
if (t2.location.column != 8) fail("expected column 8");
|
if (t2.location.column_start != 8) fail("expected column 8");
|
||||||
|
|
||||||
tokenstream_close(ts);
|
tokenstream_close(ts);
|
||||||
}
|
}
|
||||||
|
|||||||
+7
-6
@@ -93,13 +93,14 @@ static size_t get_line_length(const char* line_start) {
|
|||||||
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
|
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
|
||||||
Token t;
|
Token t;
|
||||||
t.token = type;
|
t.token = type;
|
||||||
t.text = (char*)text;
|
t.text.data = (char*)text;
|
||||||
t.text_length = length;
|
t.text.length = length;
|
||||||
t.location.filename = (char*)ts->filename;
|
t.location.filename = (char*)ts->filename;
|
||||||
t.location.line = line;
|
t.location.line = line;
|
||||||
t.location.column = column;
|
t.location.column_start = column;
|
||||||
t.location.line_text = (char*)line_start;
|
t.location.column_end = column + (int)length - 1;
|
||||||
t.location.line_text_length = get_line_length(line_start);
|
t.location.line_text.data = (char*)line_start;
|
||||||
|
t.location.line_text.length = get_line_length(line_start);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -192,6 +193,6 @@ Token tokenstream_next(TokenStream* ts) {
|
|||||||
|
|
||||||
/* Unknown character */
|
/* Unknown character */
|
||||||
Token t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
|
Token t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
|
||||||
log_on_line(&t.location, t.location.column, "unexpected token '%c'", c);
|
log_on_line(&t.location, t.location.column_end, "unexpected token '%c'", c);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|||||||
+1
-5
@@ -41,11 +41,7 @@ typedef struct {
|
|||||||
TokenType token;
|
TokenType token;
|
||||||
|
|
||||||
/// @brief The textual representation of a token.
|
/// @brief The textual representation of a token.
|
||||||
/// Note that this is not necessarily null-terminated.
|
String text;
|
||||||
char* text;
|
|
||||||
|
|
||||||
/// @brief The length of the `text` string.
|
|
||||||
size_t text_length;
|
|
||||||
|
|
||||||
/// @brief The location of the token.
|
/// @brief The location of the token.
|
||||||
Location location;
|
Location location;
|
||||||
|
|||||||
Reference in New Issue
Block a user