Compare commits

...

55 Commits

Author SHA1 Message Date
seeseemelk b6d0a78d06 Add integration test 2026-05-01 09:44:46 +02:00
seeseemelk 3bdccf2000 Add integration test framework 2026-04-30 22:21:08 +02:00
seeseemelk 177fb971e4 Rename AST structures to Tree and relocate freeing logic 2026-04-30 21:46:15 +02:00
seeseemelk ea55dedd07 Refactor AST and Parser into modular subdirectories
- Split ast.h into granular headers in v0/ast/
- Split parser.c into modular implementation files in v0/parser/
- Move and rename parser tests to v0/parser/test_*.c
- Update build system (include.mk) with modular sub-makefiles
- Maintain v0/ast.h and v0/parser.h as umbrella headers
2026-04-30 21:23:07 +02:00
seeseemelk 4bd66ea216 More variable stuff 2026-04-30 20:25:53 +02:00
seeseemelk 0704284726 Can parse variables 2026-04-29 21:39:48 +02:00
seeseemelk 94ae665a0a Add initial variable work 2026-04-29 21:20:52 +02:00
seeseemelk e2d8e385f0 Add basic var tokens 2026-04-29 20:28:52 +02:00
seeseemelk 76f9168c5f Fix docs 2026-04-29 20:21:52 +02:00
seeseemelk 1ab021561e Fix bad test 2026-04-29 20:20:16 +02:00
seeseemelk f260e02efa Refactor parser 2026-04-29 20:15:05 +02:00
seeseemelk 1c5d49d682 Fix valgrind errors 2026-04-29 19:41:00 +02:00
seeseemelk cc25563cd2 Cleanup 2026-04-29 19:23:59 +02:00
seeseemelk 323a599399 Build with debug symbols 2026-04-29 18:53:02 +02:00
seeseemelk ec896495a3 Fix infinite loop bug 2026-04-29 14:40:06 +02:00
seeseemelk eb4b0495f2 Working on parser refactor 2026-04-29 14:36:42 +02:00
seeseemelk 1f40c8f5ee Refactor tests a bit more 2026-04-29 13:25:41 +02:00
seeseemelk 98d58a2169 Refactor tests 2026-04-29 13:09:14 +02:00
seeseemelk f0621a8076 Refactor parser 2026-04-29 11:53:26 +02:00
seeseemelk 84747028f5 Ensure alias and import can be mixed 2026-04-29 11:46:02 +02:00
seeseemelk f90cad2b96 Use proper public keyword 2026-04-29 11:43:14 +02:00
seeseemelk e09bd72441 Update ast interface 2026-04-29 11:24:42 +02:00
seeseemelk 9035cc639c Add alias to ast 2026-04-29 11:18:40 +02:00
seeseemelk 3288efdfd7 Refactor test interface 2026-04-29 10:59:06 +02:00
seeseemelk 34b7939f76 Refactor parser to C11 and update build configuration 2026-04-29 10:38:34 +02:00
seeseemelk 15714393c3 Refactor parser to use Token in AST and update tests 2026-04-29 10:35:12 +02:00
seeseemelk 146aa4d9d1 Convert codebase to C89 compatibility and update test scripts 2026-04-29 10:21:29 +02:00
seeseemelk 189c21667b Ignore intellij files 2026-04-28 16:07:46 +02:00
seeseemelk abdc6d67dc Re-order log lines 2026-04-28 16:06:21 +02:00
seeseemelk d89833b705 Add TYPES documentation 2026-04-28 16:06:12 +02:00
seeseemelk bfb3b69be1 fix: add util.c to source files 2026-04-26 22:48:31 +02:00
seeseemelk dc523c8d3c chore: remove legacy v0/string.h 2026-04-26 22:42:10 +02:00
seeseemelk 05dfb3725b fix: replace unsafe fixed-size buffers with dynamic formatting helpers; add util format helpers; centralize log_on_line cleanup 2026-04-26 22:42:10 +02:00
seeseemelk 70998643fb Add AGENTS.md 2026-04-26 22:30:51 +02:00
seeseemelk 129036b539 Fix all valgrind errors 2026-04-26 22:13:39 +02:00
seeseemelk dbc69eddc8 Update test target to use valgrind 2026-04-26 21:35:14 +02:00
seeseemelk 421338d995 Fix log header generation and EOF location reporting 2026-04-26 21:34:28 +02:00
seeseemelk f33e8d3e25 Update log headers 2026-04-26 21:19:59 +02:00
seeseemelk c219a303ec Fix error reporting position and match updated log headers 2026-04-26 21:16:50 +02:00
seeseemelk 9449f16e02 Implement tokenstream_get_test and simplified assert_log_file using test names 2026-04-26 20:31:17 +02:00
seeseemelk e910c01348 Refactor golden files to follow xyz_log_ and xyz_src_ naming convention 2026-04-25 20:05:16 +02:00
seeseemelk a6bdadac0c Add public imports
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 15:28:33 +02:00
seeseemelk 63dd5fa5c9 Implement public import parsing and add test case 2026-04-25 15:06:20 +02:00
seeseemelk d8544d7743 Add new rule to agent instructions 2026-04-25 14:38:35 +02:00
seeseemelk 91593e12b7 Add error logging and corresponding tests for parser syntax errors 2026-04-25 14:37:08 +02:00
seeseemelk 7c7e0c3272 Add import parsing
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 14:30:11 +02:00
seeseemelk 116bdecafe Implement String structure and update Location/Token to use it 2026-04-25 14:17:17 +02:00
seeseemelk 902e2f0325 Update log_on_line to take Location* instead of individual fields 2026-04-24 22:13:29 +02:00
seeseemelk 26a1d0285e Refactor Token to use Location struct 2026-04-24 22:07:00 +02:00
seeseemelk a89e61eedd Introduce golden file mechanism for tests 2026-04-24 21:09:47 +02:00
seeseemelk 9ca72ef5bf Split test 2026-04-24 20:41:57 +02:00
seeseemelk 0306530fe8 Better logging in tokenstream 2026-04-24 20:40:31 +02:00
seeseemelk 451a9a2a22 Token refactor and better logs 2026-04-24 20:28:08 +02:00
seeseemelk da3425ec10 All target run tests 2026-04-24 20:04:43 +02:00
seeseemelk e021a2d63e During test, log to in-memory log 2026-04-24 20:04:00 +02:00
86 changed files with 1918 additions and 286 deletions
+9 -3
View File
@@ -23,10 +23,11 @@ For instance, a test for `buffer.c` must be called `test_buffer.c`.
There will be no `test_buffer.h`. Instead, `test.c` will directly
`#include` the Csource-file directly.
Every syntax error path identified in the parser MUST have a corresponding test.
## Language Syntax
Since this is a compiler for a new language, do not assume anything
of its syntax.
Always check the `specs` directory.
Since this is a compiler for a new language, do not assume anything of its syntax.
Always check the `specs` directory to see examples and documentation about the language.
If there is anything unclear, ask the user for clarification.
It is certainly possible that there are contradictions in the
@@ -38,3 +39,8 @@ the agent to update the implementation.
When creating a commit, make sure that both the user's and the agent's modifications
are included in the commit.
Only create a commit when specifically asked for that. Never assume implicitly that the
user wants you to create a commit.
Even if they asked you to create a commit in an earlier task, it does not mean that
you should also create a commit in a later task.
+2
View File
@@ -1 +1,3 @@
/c2
/.idea/*
!/.idea/c_cpp_properties.json
+12
View File
@@ -0,0 +1,12 @@
{
"configurations": [
{
"name": "CLion",
"includePath": [
"${workspaceFolder}/v0/*"
],
"cStandard": "c89",
}
],
"version": 4
}
Symlink
+1
View File
@@ -0,0 +1 @@
.github/copilot-instructions.md
+9 -1
View File
@@ -1,13 +1,21 @@
.PHONY: all test clean
all: c2
all: c2 test integration-test
c2: v0/bin/c2
cp $< $@
test::
generate_golden::
clean::
rm -f c2
include v0/include.mk
integration-test: v0/bin/c2 v0/bin/test_integration
./v0/bin/test_integration
v0/bin/test_integration: v0/test_integration.c
$(CC) $(CFLAGS) -o $@ $<
+16
View File
@@ -13,5 +13,21 @@ In order to run the tests, run `make test`.
## Versioning
The current version is v0. Its source code lives in the `v0` directory.
## Testing
### Unit Tests
Run unit tests with:
```bash
make test
```
### Integration Tests
Integration tests compare the compiler output with expected C files.
To add a new integration test, create a new directory under `v0/integration_tests/` with `input.c2` and `expected.c` files.
Run integration tests with:
```bash
make integration-test
```
## Languages Specifications
See the specs directory for information on the actual language syntax.
+1
View File
@@ -0,0 +1 @@
Hello, world
+1
View File
@@ -12,3 +12,4 @@ void main() {
puts("Hello, world!");
}
```
.
+31
View File
@@ -0,0 +1,31 @@
# Imports
The import statement allows one module access to the public declarations of another module.
## Syntax
The import statement uses the following syntax:
```c2
import module_name;
```
They can optionally be prefixed by the `public` keyword, in which case the module will
export everything in the import transitively.
For instance,
```c2
--- a.c2
module a;
import b;
--- b.c2
module b;
public import c;
--- c.c2
module c;
// Some declarations
```
In this example, both module a and b can access the declarations in module c.
+25
View File
@@ -0,0 +1,25 @@
# Types
C2 has both built-in types and user-defined types.
## Builtin types
C2 has the following types builtin:
- `void`
- `i8`
- `i16`
- `i32`
- `i64`
- `u8`
- `u16`
- `u32`
- `u64`
## Type Aliases
Types can be aliased to different names using the alias keyword.
Here's a list of the default builtin aliases.
```c2
alias int = i32;
alias uint = u32;
alias char = u8;
alias string = char[];
```
+24
View File
@@ -0,0 +1,24 @@
# Variables
Variables can be defined in the global scope, in structs and classes, and in functions.
## Global variables
Global variables can be defined as such:
```c2
// Defines a global variable called my_var.
i32 my_var;
// Defines a const variable.
const i32 my_var;
// Defines a global variable whose type is determined automatically.
// The value will be determined at runtime.
var my_var = 123;
// Defines a const variable whose type is determined automatically.
const my_var = 123;
// Defines a global variable whose initial value is computed at compile-time.
// If it cannot be computed at compile-time, an error is thrown.
static my_var = 123;
```
+11 -6
View File
@@ -4,13 +4,18 @@
#ifndef AST_H
#define AST_H
#include "ast/expression.h"
#include "ast/declaration.h"
#include "ast/module.h"
/**
* The top-level model.
* Every file matches an entire Module.
* Frees a module and all its children.
*/
typedef struct {
/// @brief The name of the module.
char* name;
} Module;
void ast_free_module(ModuleTree* module);
/**
* Frees a type expression.
*/
void ast_free_type(TypeTree* type);
#endif
+49
View File
@@ -0,0 +1,49 @@
#ifndef AST_DECLARATION_H
#define AST_DECLARATION_H
#include "expression.h"
#include "../bool.h"
typedef struct {
/** @brief The name of the module being imported. */
char* module_name;
/** @brief Whether the import is public or not. */
bool is_public;
} ImportTree;
/**
* A declaration that aliases one type to another.
*/
typedef struct {
/** @brief The name of the alias. */
const char* name;
/** @brief The value of the alias. */
TypeTree value;
} AliasTree;
/**
* A declaration of a variable, which may be a constant or not, and may be static or not.
*/
typedef struct {
/** @brief The name of the variable. */
char* name;
/** @brief The type of the variable. */
TypeTree type;
/** @brief The optional initializer expression. */
ExpressionTree* initializer;
/** @brief Whether the variable is public or not. */
bool is_public;
/** @brief Whether the variable is static or not. */
bool is_static;
/** @brief Whether the variable is a constant or not. */
bool is_const;
} VariableTree;
#endif
+9
View File
@@ -0,0 +1,9 @@
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* expr) {
if (expr->tag == TYPE_TREE_ARRAY) {
ast_free_type(expr->array.array);
free(expr->array.array);
}
}
+52
View File
@@ -0,0 +1,52 @@
#ifndef AST_EXPRESSION_H
#define AST_EXPRESSION_H
#include "../bool.h"
typedef enum {
EXPRESSION_TREE_INTEGER,
EXPRESSION_TREE_STRING,
EXPRESSION_TREE_BOOLEAN
} ExpressionTreeTag;
typedef struct {
ExpressionTreeTag tag;
union {
int integer;
const char* string;
bool boolean;
};
} ExpressionTree;
typedef enum {
TYPE_TREE_BUILTIN,
TYPE_TREE_ARRAY
} TypeTreeTag;
/**
* An expression that evaluates to a type.
*/
typedef struct TypeTree TypeTree;
struct TypeTree {
/** @brief defines which entry in the union is valid */
TypeTreeTag tag;
union {
/** @brief Evaluates to an array of the given type. */
struct {
/** @brief A pointer to the type of the elements stored in the array. */
TypeTree* array;
} array;
/** @brief Evaluates to a builtin integer type.*/
struct {
/**
* @brief The number of bits in the integer.
* Typical values are 8, 16, 32, and 64.
*/
int bitSize;
/** @brief `true` if the type is signed, `false` if it's unsigned. */
bool isSigned;
} builtin;
};
};
#endif
+3
View File
@@ -0,0 +1,3 @@
# There are currently no .c files in the ast directory.
# This file is provided for future consistency.
AST_SRC := v0/ast/module.c v0/ast/expression.c
+43
View File
@@ -0,0 +1,43 @@
#include "module.h"
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* type);
void ast_free_module(ModuleTree* module) {
if (module == NULL) {
return;
}
if (module->imports != NULL) {
for(size_t i = 0; i < module->import_count; i++) {
free(module->imports[i].module_name);
}
free(module->imports);
}
if (module->aliases != NULL) {
for(size_t i = 0; i < module->alias_count; i++) {
free((void*)module->aliases[i].name);
ast_free_type(&module->aliases[i].value);
}
free(module->aliases);
}
if (module->variables != NULL) {
for(size_t i = 0; i < module->variable_count; i++) {
free(module->variables[i].name);
ast_free_type(&module->variables[i].type);
if (module->variables[i].initializer) {
if (module->variables[i].initializer->tag == EXPRESSION_TREE_STRING) {
free((void*)module->variables[i].initializer->string);
}
free(module->variables[i].initializer);
}
}
free(module->variables);
}
free(module->name);
free(module);
}
+34
View File
@@ -0,0 +1,34 @@
#ifndef AST_MODULE_H
#define AST_MODULE_H
#include "declaration.h"
#include <stddef.h>
/**
* The top-level model.
* Every file matches an entire Module.
*/
typedef struct {
/** @brief The name of the module. */
char* name;
/** @brief The list of imports in the module. */
ImportTree* imports;
/** @brief The number of imports in the module. */
size_t import_count;
/** @brief The list of aliases in the module. */
AliasTree* aliases;
/** @brief The number of aliases in the module. */
size_t alias_count;
/** @brief The list of variables in the module. */
VariableTree* variables;
/** @brief The number of variables in the module. */
size_t variable_count;
} ModuleTree;
#endif
+10
View File
@@ -0,0 +1,10 @@
/* Minimal boolean type for C89 compatibility */
#ifndef BOOL_H
#define BOOL_H
typedef int bool;
#define true 1
#define false 0
#endif
+18 -2
View File
@@ -1,4 +1,7 @@
V0_SRC := v0/main.c v0/token.c v0/parser.c v0/log.c
include v0/ast/include.mk
include v0/parser/include.mk
V0_SRC := v0/main.c v0/util.c v0/token.c $(AST_SRC) $(PARSER_SRC) v0/log.c v0/str.c
# V0_TEST must only include `v0/test.c` itself, as all other test Csource files are
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
@@ -11,6 +14,8 @@ V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
CFLAGS += -Werror -Wall -pedantic -std=c11 -g
v0/bin/c2: $(V0_SRC_OBJ)
$(CC) $(CFLAGS) -o $@ $^
@@ -19,8 +24,19 @@ V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
$(CC) $(CFLAGS) -o $@ $^
# Only run tests under valgrind on Linux. On macOS (Darwin) valgrind is
# typically unavailable or unsupported, so run the test binary directly.
ifeq ($(shell uname -s),Linux)
TEST_CMD := valgrind --quiet --leak-check=full --error-exitcode=1 v0/bin/test
else
TEST_CMD := v0/bin/test
endif
test:: v0/bin/test
v0/bin/test
$(TEST_CMD)
generate_golden:: v0/bin/test
GENERATE_GOLDEN=1 v0/bin/test
clean::
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
+4
View File
@@ -0,0 +1,4 @@
#include <stdint.h>
// u32 simple:x
static uint32_t v_6simple_1x = 123;
+2
View File
@@ -0,0 +1,2 @@
module simple;
u32 x = 123;
+28
View File
@@ -0,0 +1,28 @@
/**
* Location handling for error reporting.
*/
#ifndef LOCATION_H
#define LOCATION_H
#include "str.h"
#include <stddef.h>
typedef struct {
/* @brief The name of the file where the token was found. */
char* filename;
/* @brief The entire line of text where the token was found. */
String line_text;
/* @brief The line number where the token was found. */
int line;
/* @brief The starting column number where the token was found. */
int column_start;
/* @brief The ending column number where the token was found. */
int column_end;
} Location;
#endif
+71
View File
@@ -1,5 +1,10 @@
#include "log.h"
#include "util.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
static LogError* s_logError = NULL;
@@ -14,3 +19,69 @@ void log_error(const char* msg) {
fprintf(stderr, "Error: %s\n", msg);
}
}
void log_on_line(Location* loc, const char* msg, ...) {
/* Declarations first for C89 */
char* line_prefix = NULL;
char* formatted_msg = NULL;
char* header = NULL;
char* buffer = NULL;
va_list args;
int caret_len;
char* p;
int i1, i2;
size_t i3;
size_t total_size;
line_prefix = format_string("%d| ", loc->line);
if (!line_prefix) goto cleanup;
caret_len = loc->column_end - loc->column_start + 1;
if (caret_len < 1) caret_len = 1;
/* Format the message */
va_start(args, msg);
formatted_msg = format_string_va(msg, args);
va_end(args);
if (!formatted_msg) goto cleanup;
/* Header logic */
if (loc->filename && loc->filename[0] != '\0') {
header = format_string("--- %s ---\n", loc->filename);
} else {
header = format_string("--- \n");
}
if (!header) goto cleanup;
total_size = strlen(header) + 20 +
strlen(line_prefix) + loc->line_text.length + 2 + /* line| text\n */
strlen(line_prefix) + loc->column_start - 1 + caret_len + 2 + /* indent + ^^\n */
strlen(line_prefix) + 3 + strlen(formatted_msg) + 2 + /* indent + msg\n */
10;
buffer = (char*)malloc(total_size);
if (!buffer) goto cleanup;
p = buffer;
p += sprintf(p, "%s", header);
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
/* Caret line */
for (i1 = 0; i1 < (int)(strlen(line_prefix) + loc->column_start - 1); i1++) *p++ = ' ';
for (i2 = 0; i2 < caret_len; i2++) *p++ = '^';
*p++ = '\n';
/* Message line */
for (i3 = 0; i3 < strlen(line_prefix); i3++) *p++ = ' ';
p += sprintf(p, "%s\n", formatted_msg);
*p = '\0';
log_error(buffer);
cleanup:
free(line_prefix);
free(formatted_msg);
free(header);
free(buffer);
}
+14
View File
@@ -4,6 +4,8 @@
#ifndef LOG_H
#define LOG_H
#include "location.h"
/**
* A method that can log an error.
*/
@@ -19,4 +21,16 @@ void log_set_output(LogError* destination);
*/
void log_error(const char* msg);
/**
* Logs a pretty error with additional information about the line where the error occurred.
*
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
*
* @param loc The location where the error occurred.
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* @param ... Additional arguments to format into the error message.
*/
void log_on_line(Location* loc, const char* msg, ...);
#endif
+2 -1
View File
@@ -1,5 +1,6 @@
#include <stdio.h>
int main(int argc, char** argv) {
puts("Hello, world");
puts("Hello, world");
return 0;
}
-45
View File
@@ -1,45 +0,0 @@
#include "parser.h"
#include <stdlib.h>
#include <string.h>
Module* parser_parse(TokenStream* ts) {
Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) {
return NULL;
}
t = tokenstream_next(ts);
if (t != TOKEN_IDENTIFIER) {
return NULL;
}
TokenInfo info;
tokenstream_info(ts, &info);
Module* module = (Module*)malloc(sizeof(Module));
if (module == NULL) return NULL;
module->name = (char*)malloc(info.text_length + 1);
if (module->name == NULL) {
free(module);
return NULL;
}
memcpy(module->name, info.text, info.text_length);
module->name[info.text_length] = '\0';
t = tokenstream_next(ts);
if (t != TOKEN_SEMICOLON) {
free(module->name);
free(module);
return NULL;
}
return module;
}
void parser_free(Module* module) {
if (module == NULL) return;
free(module->name);
free(module);
}
+1 -8
View File
@@ -10,13 +10,6 @@
* @param ts The TokenStream to read.
* @returns The parsed module.
*/
Module* parser_parse(TokenStream* ts);
/**
* Frees the parsed AST.
*
* @param module The AST return by parser_parse.
*/
void parser_free(Module* module);
ModuleTree* parser_parse(TokenStream* ts);
#endif
+52
View File
@@ -0,0 +1,52 @@
#include "internal.h"
#include "../str.h"
#include "../log.h"
#include <stdlib.h>
void parser_next_token(Parser* p) {
p->token = tokenstream_next(p->ts);
}
bool parser_accept(Parser* p, TokenType token) {
if (p->token.token == token) {
parser_next_token(p);
return true;
}
return false;
}
bool parser_expect(Parser* p, TokenType token, const char* msg) {
if (parser_accept(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
bool parser_peek(Parser* p, TokenType token) {
if (p->token.token == token) {
return true;
}
return false;
}
bool parser_require(Parser* p, TokenType token, const char* msg) {
if (parser_peek(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
char* parser_to_text(Parser* p) {
char* str = string_copy(p->token.text);
parser_next_token(p);
return str;
}
bool parser_accept_primitive(Parser* p) {
return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) ||
parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) ||
parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) ||
parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64);
}
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include <stdlib.h>
#include <string.h>
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public) {
module->import_count++;
module->imports = realloc(module->imports, sizeof(ImportTree) * module->import_count);
ImportTree* import = &module->imports[module->import_count - 1];
memset(import, 0, sizeof(ImportTree));
import->is_public = is_public;
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
import->module_name = parser_to_text(p);
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) {
return false;
}
return true;
}
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public) {
(void)is_public;
module->alias_count++;
module->aliases = realloc(module->aliases, sizeof(AliasTree) * module->alias_count);
AliasTree* alias = &module->aliases[module->alias_count - 1];
memset(alias, 0, sizeof(AliasTree));
if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) {
return false;
}
alias->name = parser_to_text(p);
if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) {
return false;
}
if (!parse_type_expression(p, &alias->value)) {
return false;
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) {
return false;
}
return true;
}
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const) {
module->variable_count++;
module->variables = realloc(module->variables, sizeof(VariableTree) * module->variable_count);
VariableTree* var = &module->variables[module->variable_count - 1];
memset(var, 0, sizeof(VariableTree));
var->is_public = is_public;
var->is_static = is_static;
var->is_const = is_const;
if (parser_accept_primitive(p)) {
if (!parse_type_expression(p, &var->type)) {
return false;
}
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) {
return false;
}
var->name = parser_to_text(p);
if (parser_accept(p, TOKEN_ASSIGN)) {
var->initializer = malloc(sizeof(ExpressionTree));
if (!parse_expression(p, var->initializer)) {
return false;
}
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) {
return false;
}
return true;
}
+98
View File
@@ -0,0 +1,98 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
bool parse_primitive_type_expression(Parser* p, TypeTree* expr) {
if (parser_accept(p, TOKEN_U8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_I8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = true;
return true;
} else {
log_on_line(&p->token.location, "expected type expression");
return false;
}
}
bool parse_array_type_expression(Parser* p, TypeTree* expr) {
TypeTree elementType;
if (!parse_primitive_type_expression(p, &elementType)) {
return false;
}
if (parser_accept(p, TOKEN_BRACKET_OPEN)) {
expr->tag = TYPE_TREE_ARRAY;
expr->array.array = malloc(sizeof(TypeTree));
*expr->array.array = elementType;
if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) {
return false;
}
} else {
*expr = elementType;
return true;
}
return true;
}
bool parse_type_expression(Parser* p, TypeTree* expr) {
return parse_array_type_expression(p, expr);
}
bool parse_expression(Parser* p, ExpressionTree* expr) {
if (parser_peek(p, TOKEN_INTEGER)) {
expr->tag = EXPRESSION_TREE_INTEGER;
expr->integer = atoi(p->token.text.data);
parser_next_token(p);
return true;
} else if (parser_peek(p, TOKEN_STRING)) {
expr->tag = EXPRESSION_TREE_STRING;
expr->string = parser_to_text(p);
return true;
} else if (parser_accept(p, TOKEN_TRUE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = true;
return true;
} else if (parser_accept(p, TOKEN_FALSE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = false;
return true;
}
log_on_line(&p->token.location, "expected expression");
return false;
}
+1
View File
@@ -0,0 +1 @@
PARSER_SRC := v0/parser/core.c v0/parser/expression.c v0/parser/declaration.c v0/parser/module.c
+36
View File
@@ -0,0 +1,36 @@
#ifndef PARSER_INTERNAL_H
#define PARSER_INTERNAL_H
#include "../parser.h"
#include "../token.h"
#include "../ast.h"
typedef struct {
TokenStream* ts;
Token token;
} Parser;
// Core functions
void parser_next_token(Parser* p);
bool parser_accept(Parser* p, TokenType token);
bool parser_expect(Parser* p, TokenType token, const char* msg);
bool parser_peek(Parser* p, TokenType token);
bool parser_require(Parser* p, TokenType token, const char* msg);
char* parser_to_text(Parser* p);
bool parser_accept_primitive(Parser* p);
// Base parsing (expressions, types)
bool parse_primitive_type_expression(Parser* p, TypeTree* expr);
bool parse_array_type_expression(Parser* p, TypeTree* expr);
bool parse_type_expression(Parser* p, TypeTree* expr);
bool parse_expression(Parser* p, ExpressionTree* expr);
// Declaration parsing
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const);
// Module parsing
bool parse_module_declaration(Parser* p, ModuleTree* module);
#endif
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
#include <string.h>
bool parse_module_declaration(Parser* p, ModuleTree* module) {
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
return false;
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
module->name = parser_to_text(p);
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
}
ModuleTree* parser_parse(TokenStream* ts) {
Parser* p = malloc(sizeof(Parser));
p->ts = ts;
parser_next_token(p);
ModuleTree* module = malloc(sizeof(ModuleTree));
memset(module, 0, sizeof(ModuleTree));
if (!parse_module_declaration(p, module)) {
goto fail;
}
while (!parser_peek(p, TOKEN_EOF)) {
bool is_public = false;
bool is_static = false;
bool is_const = false;
bool terminal = false;
while (!terminal) {
if (parser_accept(p, TOKEN_IMPORT)) {
if (is_static) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (!parse_import_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_ALIAS)) {
if (is_static) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (!parse_alias_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_PUBLIC)) {
is_public = true;
} else if (parser_accept(p, TOKEN_STATIC)) {
is_static = true;
} else if (parser_accept(p, TOKEN_CONST)) {
is_const = true;
} else if (parser_accept(p, TOKEN_VAR) || parser_accept_primitive(p)) {
if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) {
goto fail;
}
terminal = true;
} else {
log_on_line(&p->token.location, "unexpected token");
goto fail;
}
}
}
free(p);
return module;
fail:
free(p);
ast_free_module(module);
return NULL;
}
+8
View File
@@ -0,0 +1,8 @@
#include "../test.h"
#include "../parser.h"
// Currently core utilities are tested indirectly through other parser tests.
// Placeholder for future explicit core utility tests.
static void test_parser_core_placeholder(void) {
// No-op
}
+89
View File
@@ -0,0 +1,89 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_missing_semicolon_import(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
static void test_parser_bad_import_name(void) {
test_get_ast();
assert_log_file("expected error for bad import name");
}
static void test_parser_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_false(m->imports[0].is_public, "expected import to not be public");
}
static void test_parser_public_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_true(m->imports[0].is_public, "expected import to be public");
}
static void test_parser_alias_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_str("myalias", alias.name, "expected correct alias name");
}
static void test_parser_variable_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_var", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_const(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_const", var.name, "expected correct variable name");
assert_true(var.is_const, "expected const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_static(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_static", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_true(var.is_static, "expected static");
}
static void test_parser_multiple_vars(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(2, (int)m->variable_count, "expected correct number of variables");
assert_str("var1", m->variables[0].name, "expected first variable name 'var1'");
assert_str("var2", m->variables[1].name, "expected second variable name 'var2'");
}
+52
View File
@@ -0,0 +1,52 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_alias_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_BUILTIN, alias.value.tag, "expected correct alias tag");
assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32");
assert_true(alias.value.builtin.isSigned, "expected signed");
}
static void test_parser_alias_array(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_ARRAY, alias.value.tag, "expected correct alias tag");
TypeTree* valueType = alias.value.array.array;
assert_not_null(valueType, "expected pointer to array type");
assert_int(TYPE_TREE_BUILTIN, valueType->tag, "expected correct type tag");
assert_int(32, valueType->builtin.bitSize, "expected bitSize 32");
assert_true(valueType->builtin.isSigned, "expected signed");
}
static void test_parser_variable_init(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected 1 variable");
VariableTree* var = &m->variables[0];
assert_str("x", var->name, "expected variable name 'x'");
assert_not_null(var->initializer, "expected variable to have an initializer");
assert_int(EXPRESSION_TREE_INTEGER, var->initializer->tag, "expected integer initializer");
assert_int(123, var->initializer->integer, "expected value 123");
}
static void test_parser_variable_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_int(TYPE_TREE_BUILTIN, var.type.tag, "expected correct type tag");
assert_int(32, var.type.builtin.bitSize, "expected bitSize 32");
assert_true(var.type.builtin.isSigned, "expected signed");
}
+21
View File
@@ -0,0 +1,21 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_module_name(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
}
static void test_parser_bad_module_name(void) {
test_get_ast();
assert_log_file("expected error to be logged for bad module name");
}
static void test_parser_missing_semicolon_module(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
+11
View File
@@ -0,0 +1,11 @@
#include "str.h"
#include <string.h>
#include <stdlib.h>
char* string_copy(String string) {
char* str = malloc(string.length + 1);
memcpy(str, string.data, string.length);
str[string.length] = '\0';
return str;
}
+27
View File
@@ -0,0 +1,27 @@
/**
* Contains the definition of the String structure, which is a simple representation of a string in C.
*/
#ifndef STR_H
#define STR_H
#include <stddef.h>
/**
* A simple string structure that holds a pointer to the character data and its length.
*/
typedef struct {
char* data;
size_t length;
} String;
/**
* Creates a copy of a string.
*
* Note that this copy has to be freed afterwards.
*
* @param string The string to copy.
* @returns A null-terminated copy of the string.
*/
char* string_copy(String string);
#endif
+237 -19
View File
@@ -1,85 +1,303 @@
#include "test.h"
#include "util.h"
#include "parser.h"
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static jmp_buf s_testJmp;
static const char* s_failMsg;
static char s_failMsg[1024];
static char* s_logOutput = NULL;
static const char* s_currentTestName = NULL;
static char* s_testSource = NULL;
static ModuleTree* s_currentModule = NULL;
static TokenStream* s_currentTokenStream = NULL;
void fail(const char* msg) {
s_failMsg = msg;
if (msg) {
strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1);
s_failMsg[sizeof(s_failMsg) - 1] = '\0';
} else {
s_failMsg[0] = '\0';
}
longjmp(s_testJmp, 1);
}
char* read_file_content(const char* filepath) {
FILE* f;
long size;
char* content;
f = fopen(filepath, "r");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
size = ftell(f);
fseek(f, 0, SEEK_SET);
content = malloc(size + 1);
if (!content) {
fclose(f);
return NULL;
}
fread(content, 1, size, f);
content[size] = '\0';
fclose(f);
return content;
}
void assert_not_null(void* ptr, const char* msg) {
if (ptr == NULL) {
fail(msg);
}
}
void assert_string(const char* expected, String actual, const char* msg) {
if (expected == NULL || actual.data == NULL || strlen(expected) != actual.length || strncmp(expected, actual.data, actual.length) != 0) {
fail(msg);
}
}
void assert_str(const char* expected, const char* actual, const char* msg) {
if (expected == NULL || actual == NULL || strcmp(expected, actual) != 0) {
fail(msg);
}
}
TokenStream* test_get_tokenstream(void) {
if (s_currentTokenStream == NULL) {
char* filepath = NULL;
filepath = format_string("v0/tests/%s.c2", s_currentTestName);
if (!filepath) {
fail("out of memory");
return NULL;
}
if (s_testSource) free(s_testSource);
s_testSource = read_file_content(filepath);
if (!s_testSource) {
puts(filepath);
free(filepath);
fail("could not read test source file");
return NULL;
}
s_currentTokenStream = tokenstream_open(filepath, s_testSource);
free(filepath);
}
return s_currentTokenStream;
}
ModuleTree* test_get_ast(void) {
if (s_currentModule == NULL) {
s_currentModule = parser_parse(test_get_tokenstream());
}
return s_currentModule;
}
void assert_log(const char* expected, const char* msg) {
assert_str(expected, s_logOutput, msg);
}
void assert_log_file(const char* msg) {
char* filepath = format_string("v0/tests/%s.log", s_currentTestName);
const char* generate;
char* content;
if (!filepath) {
fail("out of memory");
return;
}
generate = getenv("GENERATE_GOLDEN");
if (generate && strcmp(generate, "1") == 0) {
FILE* f = fopen(filepath, "w");
if (!f) {
free(filepath);
fail("could not open golden file for writing");
return;
}
fputs(s_logOutput ? s_logOutput : "", f);
fclose(f);
free(filepath);
return;
}
content = read_file_content(filepath);
if (!content) {
free(filepath);
fail("could not open golden file for reading");
return;
}
bool match = strcmp(content, s_logOutput ? s_logOutput : "") == 0;
free(content);
free(filepath);
if (!match) {
fail(msg);
}
}
void assert_int(int expected, int actual, const char* msg) {
if (expected != actual) {
char* buf = format_string("%s (expected %d, got %d)", msg, expected, actual);
if (buf) {
fail(buf);
free(buf);
} else {
fail("out of memory");
}
}
}
void assert_true(bool condition, const char* msg) {
if (!condition) {
fail(msg);
}
}
void assert_false(bool condition, const char* msg) {
if (condition) {
fail(msg);
}
}
static void log_append(const char* msg) {
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
size_t newLen = oldLen + strlen(msg) + 1;
char* newOutput = malloc(newLen);
if (newOutput) {
if (s_logOutput) {
strcpy(newOutput, s_logOutput);
free(s_logOutput);
} else {
newOutput[0] = '\0';
}
strcat(newOutput, msg);
s_logOutput = newOutput;
}
}
static void log_clear(void) {
free(s_logOutput);
s_logOutput = NULL;
}
typedef struct {
const char* name;
Test func;
} TestCase;
#include "test_token.c"
#include "test_parser.c"
#include "parser/test_module.c"
#include "parser/test_declaration.c"
#include "parser/test_expression.c"
#include "parser/test_core.c"
#include "test_log.c"
static int s_totalTests;
static int s_greenTests;
#define TEST(name) {#name, name},
static TestCase s_tests[] = {
{"tokenstream_open_fail", test_tokenstream_open_fail},
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
{"tokenstream_parentheses_and_brackets", test_tokenstream_parentheses_and_brackets},
{"tokenstream_comma", test_tokenstream_comma},
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
{"tokenstream_info", test_tokenstream_info},
{"parser_module_name", test_parser_module_name},
{"log_error", test_log_error},
TEST(test_log_error)
TEST(test_log_on_line_variadic)
TEST(test_log_on_line)
TEST(test_parser_module_name)
TEST(test_parser_bad_module_name)
TEST(test_parser_missing_semicolon_module)
TEST(test_parser_missing_semicolon_import)
TEST(test_parser_bad_import_name)
TEST(test_parser_imports)
TEST(test_parser_public_imports)
TEST(test_parser_alias_simple)
TEST(test_parser_alias_simple_type)
TEST(test_parser_alias_array)
TEST(test_parser_variable_simple)
TEST(test_parser_variable_simple_type)
TEST(test_parser_variable_const)
TEST(test_parser_variable_init)
TEST(test_parser_variable_static)
TEST(test_parser_multiple_vars)
TEST(test_parser_core_placeholder)
TEST(test_tokenstream_comma)
TEST(test_tokenstream_info)
TEST(test_tokenstream_keywords_and_symbols)
TEST(test_tokenstream_open_fail)
TEST(test_tokenstream_parentheses_and_brackets)
TEST(test_tokenstream_primitive_types)
TEST(test_tokenstream_simple_keyword)
TEST(test_tokenstream_unknown_token)
TEST(test_tokenstream_void_function_signature)
TEST(test_tokenstream_whitespace_ignored)
};
int main(int argc, char** argv) {
const char** failedTests;
int failedCount;
(void)argc;
(void)argv;
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
s_greenTests = 0;
const char* failedTests[s_totalTests + 1];
int failedCount = 0;
// Allocate failed tests array dynamically to avoid VLAs
failedTests = (const char**)malloc((s_totalTests + 1) * sizeof(const char*));
failedCount = 0;
for (int i = 0; i < s_totalTests; i++) {
// Add 5 to strip the 'test_' prefix.
s_currentTestName = s_tests[i].name + 5;
log_set_output(log_append);
printf("%s...", s_tests[i].name);
s_failMsg = NULL;
fflush(stdout);
s_failMsg[0] = '\0';
if (setjmp(s_testJmp) == 0) {
log_clear();
if (s_testSource) {
free(s_testSource);
s_testSource = NULL;
}
s_tests[i].func();
printf(" [OK]\n");
s_greenTests++;
} else {
printf(" [FAIL]: %s\n", s_failMsg ? s_failMsg : "");
printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : "");
failedTests[failedCount++] = s_tests[i].name;
// Log output on failure
if (s_logOutput && s_logOutput[0]) {
printf("%s\n", s_logOutput);
}
}
// Free AST and TokenStream after each test
if (s_currentModule) {
ast_free_module(s_currentModule);
s_currentModule = NULL;
}
if (s_currentTokenStream) {
tokenstream_close(s_currentTokenStream);
s_currentTokenStream = NULL;
}
fflush(stdout);
}
if (s_testSource) free(s_testSource);
log_clear();
if (failedCount > 0) {
printf("\nFailed tests:\n");
for (int i = 0; i < failedCount; i++) {
printf(" - %s\n", failedTests[i]);
for (int j = 0; j < failedCount; j++) {
printf(" - %s\n", failedTests[j]);
}
}
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
free(failedTests);
return failedCount > 0 ? 1 : 0;
}
+58
View File
@@ -4,6 +4,9 @@
#ifndef TEST_H
#define TEST_H
#include "token.h"
#include "ast.h"
typedef void (*Test)(void);
/**
@@ -33,4 +36,59 @@ void assert_not_null(void* ptr, const char* msg);
*/
void assert_str(const char* expected, const char* actual, const char* msg);
/**
* Asserts that a string has the expected value.
*
* Calls `fail` if the assertion does not hold.
*
* @param expected The expected value. This is typically a string literal.
* @param actual The actual value. This is typically an expression.
* @param msg The message to print if these do not match.
*/
void assert_string(const char* expected, String actual, const char* msg);
/**
* Asserts that the logged output matches the expected value.
*/
void assert_log(const char* expected, const char* msg);
/**
* Asserts that the logged output matches the content of the file `v0/tests/xyz.log`, where xyz is the test name.
* If GENERATE_GOLDEN=1, the file is overwritten with the actual output.
*/
void assert_log_file(const char* msg);
/**
* Asserts that two integers are equal.
*/
void assert_int(int expected, int actual, const char* msg);
/**
* Asserts that a condition is true.
*/
#include "bool.h"
void assert_true(bool condition, const char* msg);
/**
* Asserts that a condition is false.
*/
void assert_false(bool condition, const char* msg);
/**
* Get the token stream used for this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the tokenstream will be freed automatically by the test harness.
*/
TokenStream* test_get_tokenstream(void);
/**
* Gets a parsed module for the this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the AST will be freed automatically by the test harness.
*/
ModuleTree* test_get_ast(void);
#endif
+63
View File
@@ -0,0 +1,63 @@
#define _DEFAULT_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
int run_test(const char* dir_name) {
char cmd[2048];
char input_path[1024];
char expected_path[1024];
snprintf(input_path, sizeof(input_path), "v0/integration_tests/%s/input.c2", dir_name);
snprintf(expected_path, sizeof(expected_path), "v0/integration_tests/%s/expected.c", dir_name);
if (snprintf(cmd, sizeof(cmd), "./v0/bin/c2 %s > actual.c", input_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Failed to run compiler for %s\n", dir_name);
return 1;
}
if (snprintf(cmd, sizeof(cmd), "diff -u %s actual.c", expected_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Test %s failed: Output mismatch\n", dir_name);
return 1;
}
printf("Test %s passed\n", dir_name);
return 0;
}
int main() {
DIR* d = opendir("v0/integration_tests");
if (!d) {
perror("opendir");
return 1;
}
struct dirent* dir;
int passed = 0;
int failed = 0;
while ((dir = readdir(d)) != NULL) {
if (dir->d_type == DT_DIR && strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0) {
if (run_test(dir->d_name) == 0) {
passed++;
} else {
failed++;
}
}
}
closedir(d);
printf("\nTotal tests: %d, Passed: %d, Failed: %d\n", passed + failed, passed, failed);
return failed > 0 ? 1 : 0;
}
+36 -5
View File
@@ -1,21 +1,52 @@
#include "test.h"
#include "log.h"
#include <string.h>
#include <stdlib.h>
#include "util.h"
static char s_lastLoggedError[256];
static char* s_lastLoggedError = NULL;
static void mock_log(const char* msg) {
strncpy(s_lastLoggedError, msg, sizeof(s_lastLoggedError) - 1);
s_lastLoggedError[sizeof(s_lastLoggedError) - 1] = '\0';
free(s_lastLoggedError);
s_lastLoggedError = format_string("%s", msg ? msg : "");
}
static void test_log_error(void) {
log_set_output(mock_log);
memset(s_lastLoggedError, 0, sizeof(s_lastLoggedError));
free(s_lastLoggedError);
s_lastLoggedError = NULL;
log_error("test error message");
assert_str("test error message", s_lastLoggedError, "expected 'test error message'");
log_set_output(NULL); // Reset to default
log_set_output(NULL);
free(s_lastLoggedError);
s_lastLoggedError = NULL;
}
static void test_log_on_line(void) {
Location loc;
loc.filename = "v0/tests/log_on_line.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token");
assert_log_file("expected formatted error message");
}
static void test_log_on_line_variadic(void) {
Location loc;
loc.filename = "v0/tests/log_on_line_variadic.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token '%c'", 'x');
assert_log_file("expected formatted error message with variadic args");
}
-14
View File
@@ -1,14 +0,0 @@
#include "test.h"
#include "parser.h"
#include <string.h>
static void test_parser_module_name(void) {
TokenStream* ts = tokenstream_open("module my_module;");
Module* m = parser_parse(ts);
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
parser_free(m);
tokenstream_close(ts);
}
+75 -72
View File
@@ -1,110 +1,113 @@
#include "test.h"
#include "token.h"
#include <string.h>
#include <stdlib.h>
static void test_tokenstream_open_fail(void) {
TokenStream* ts = tokenstream_open(NULL);
TokenStream* ts = tokenstream_open(NULL, NULL);
if (ts != NULL) fail("expected NULL for NULL buffer");
}
static void test_tokenstream_simple_keyword(void) {
TokenStream* ts = tokenstream_open("module");
TokenStream* ts = test_get_tokenstream();
Token t;
Token eof;
Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
t = tokenstream_next(ts);
if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
Token eof = tokenstream_next(ts);
if (eof != -1) fail("expected EOF");
tokenstream_close(ts);
eof = tokenstream_next(ts);
if (eof.token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_keywords_and_symbols(void) {
TokenStream* ts = tokenstream_open("module main; import stdio;");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_parentheses_and_brackets(void) {
TokenStream* ts = tokenstream_open("()[]");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_comma(void) {
TokenStream* ts = tokenstream_open("a,b,c");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_whitespace_ignored(void) {
TokenStream* ts = tokenstream_open(" module \n\t import ; ");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_void_function_signature(void) {
TokenStream* ts = tokenstream_open("void main()");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF");
if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
tokenstream_close(ts);
static void test_tokenstream_unknown_token(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_UNKNOWN) fail("expected TOKEN_UNKNOWN");
assert_log_file("expected error message for unknown token");
}
static void test_tokenstream_info(void) {
TokenStream* ts = tokenstream_open("module main;");
TokenStream* ts = test_get_tokenstream();
Token t1;
Token t2;
Token t1 = tokenstream_next(ts);
TokenInfo info1;
tokenstream_info(ts, &info1);
if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE");
t1 = tokenstream_next(ts);
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
assert_string("module", t1.text, "info: expected 'module'");
if (t1.location.line != 1) fail("expected line 1");
if (t1.location.column_start != 1) fail("expected column 1");
char buf1[32];
memcpy(buf1, info1.text, info1.text_length);
buf1[info1.text_length] = '\0';
assert_str("module", buf1, "info: expected 'module'");
Token t2 = tokenstream_next(ts);
TokenInfo info2;
tokenstream_info(ts, &info2);
if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER");
char buf2[32];
memcpy(buf2, info2.text, info2.text_length);
buf2[info2.text_length] = '\0';
assert_str("main", buf2, "info: expected 'main'");
tokenstream_close(ts);
t2 = tokenstream_next(ts);
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
assert_string("main", t2.text, "info: expected 'main'");
if (t2.location.line != 1) fail("expected line 1");
if (t2.location.column_start != 8) fail("expected column 8");
}
static void test_tokenstream_primitive_types(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8");
if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16");
if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32");
if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64");
if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8");
if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16");
if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32");
if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line.c2 ---
1| int main() []
^^
unexpected token
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line_variadic.c2 ---
1| int main() []
^^
unexpected token 'x'
+9
View File
@@ -0,0 +1,9 @@
module mymodule;
import foo;
alias myalias = i32[];
import bar;
alias otheralias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32[];
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
import ;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_import_name.c2 ---
2| import ;
^
expected module identifier
+1
View File
@@ -0,0 +1 @@
import other_module;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_module_name.c2 ---
1| import other_module;
^^^^^^
expected keyword 'module'
+2
View File
@@ -0,0 +1,2 @@
module my_module;
import other_module;
@@ -0,0 +1 @@
module my_module; import other_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_import.c2 ---
1| module my_module; import other_module
^
expected ';' after import
@@ -0,0 +1 @@
module my_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_module.c2 ---
1| module my_module
^
expected ';' after module name
+1
View File
@@ -0,0 +1 @@
module my_module;
+4
View File
@@ -0,0 +1,4 @@
module test_multiple_vars;
i32 var1;
i32 var2;
+3
View File
@@ -0,0 +1,3 @@
module my_module;
public import other_module;
+3
View File
@@ -0,0 +1,3 @@
module test_const_var;
const i32 my_const;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
var x = 123;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+3
View File
@@ -0,0 +1,3 @@
module test_static_var;
static i32 my_static;
+1
View File
@@ -0,0 +1 @@
a,b,c
+1
View File
@@ -0,0 +1 @@
module main;
@@ -0,0 +1 @@
module main; import stdio;
@@ -0,0 +1 @@
()[]
+1
View File
@@ -0,0 +1 @@
i8 i16 i32 i64 u8 u16 u32 u64
+1
View File
@@ -0,0 +1 @@
module
+1
View File
@@ -0,0 +1 @@
%
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/tokenstream_unknown_token.c2 ---
1| %
^
unexpected token '%'
@@ -0,0 +1 @@
void main()
@@ -0,0 +1,2 @@
module
import ;
+180 -76
View File
@@ -1,12 +1,21 @@
#include "token.h"
#include "log.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
struct TokenStream {
const char* code;
size_t pos;
TokenInfo last_info;
char* filename;
const char* code;
size_t pos;
int line;
int column;
const char* line_start;
/* End of last non-EOF token */
int last_line;
int last_column_end;
const char* last_line_start;
};
/**
@@ -15,22 +24,38 @@ struct TokenStream {
*/
typedef struct {
const char* keyword;
Token token;
TokenType token;
} KeywordMap;
static const KeywordMap keywords[] = {
{"module", TOKEN_MODULE},
{"import", TOKEN_IMPORT},
{"alias", TOKEN_ALIAS},
{"public", TOKEN_PUBLIC},
{"var", TOKEN_VAR},
{"const", TOKEN_CONST},
{"static", TOKEN_STATIC},
{"void", TOKEN_VOID},
{"i8", TOKEN_I8},
{"i16", TOKEN_I16},
{"i32", TOKEN_I32},
{"i64", TOKEN_I64},
{"u8", TOKEN_U8},
{"u16", TOKEN_U16},
{"u32", TOKEN_U32},
{"u64", TOKEN_U64},
{"true", TOKEN_TRUE},
{"false", TOKEN_FALSE},
};
/**
* Look up a keyword in the keyword map.
* Returns TOKEN_IDENTIFIER if not found.
*/
static Token lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < count; i++) {
static TokenType lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]);
int i;
for (i = 0; i < count; i++) {
if (strlen(keywords[i].keyword) == length &&
strncmp(keywords[i].keyword, str, length) == 0) {
return keywords[i].token;
@@ -53,123 +78,202 @@ static int is_identifier_part(char c) {
return isalnum(c) || c == '_';
}
/**
* Read a character from the stream.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
ts->pos++;
return c;
}
/**
* Peek at the next character in the stream.
*/
static char peek_char(TokenStream* ts) {
return ts->code[ts->pos];
}
/**
* Read a character from the stream and update position.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
if (c == '\0') return '\0';
ts->pos++;
if (c == '\n') {
ts->line++;
ts->column = 1;
ts->line_start = &ts->code[ts->pos];
} else {
ts->column++;
}
return c;
}
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
const char* start = &ts->code[ts->pos - 1];
size_t length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
static size_t get_line_length(const char* line_start) {
const char* p = line_start;
while (*p != '\n' && *p != '\0') {
p++;
}
Token token = lookup_keyword(start, length);
ts->last_info.token = token;
ts->last_info.text = (char*)start;
ts->last_info.text_length = length;
return token;
return (size_t)(p - line_start);
}
TokenStream* tokenstream_open(const char* code) {
if (code == NULL) return NULL;
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
Token t;
t.token = type;
t.text.data = (char*)text;
t.text.length = length;
t.location.filename = ts->filename;
t.location.line = line;
t.location.column_start = column;
t.location.column_end = column + (int)length - 1;
t.location.line_text.data = (char*)line_start;
t.location.line_text.length = get_line_length(line_start);
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
if (ts == NULL) {
return NULL;
if (type != TOKEN_EOF) {
ts->last_line = t.location.line;
ts->last_column_end = t.location.column_end;
ts->last_line_start = t.location.line_text.data;
}
ts->code = code;
ts->pos = 0;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
ts->last_info.token = (Token)-1;
return ts;
return t;
}
TokenStream* tokenstream_open(const char* filename, const char* code) {
/* Declarations first for C89 */
TokenStream* ts;
const char* name_src;
if (code == NULL) return NULL;
ts = (TokenStream*)malloc(sizeof(struct TokenStream));
if (ts == NULL) {
return NULL;
}
name_src = filename ? filename : "unknown";
ts->filename = malloc(strlen(name_src) + 1);
if (ts->filename) {
memcpy(ts->filename, name_src, strlen(name_src) + 1);
}
ts->code = code;
ts->pos = 0;
ts->line = 1;
ts->column = 1;
ts->line_start = code;
ts->last_line = 1;
ts->last_column_end = 0;
ts->last_line_start = code;
return ts;
}
void tokenstream_close(TokenStream* ts) {
if (ts == NULL) return;
if (ts->filename) free(ts->filename);
free(ts);
}
Token tokenstream_next(TokenStream* ts) {
if (ts == NULL) return -1;
/* Declarations first for C89 */
char c;
int start_line;
int start_column;
const char* line_start;
const char* start_text;
Token t;
char c;
if (ts == NULL) {
Token t = {0};
t.token = TOKEN_EOF;
return t;
}
/* Skip whitespace and comments */
while ((c = read_char(ts)) != (char)-1) {
/* Skip whitespace and comments */
while ((c = peek_char(ts)) != '\0') {
if (isspace(c)) {
read_char(ts);
continue;
}
/* Handle comments */
if (c == '/') {
if (peek_char(ts) == '/') {
if (ts->code[ts->pos + 1] == '/') {
/* Skip until end of line */
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
while ((c = read_char(ts)) != '\0' && c != '\n') {
/* Skip */
}
continue;
}
/* It's just a slash, which we don't handle yet */
return -1;
break;
}
/* We found a non-whitespace, non-comment character */
break;
}
if (c == (char)-1) {
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1; /* EOF */
if (peek_char(ts) == '\0') {
Token t;
t.token = TOKEN_EOF;
t.text.data = NULL;
t.text.length = 0;
t.location.filename = ts->filename;
t.location.line = ts->last_line;
t.location.column_start = ts->last_column_end + 1;
t.location.column_end = ts->last_column_end + 1;
t.location.line_text.data = (char*)ts->last_line_start;
t.location.line_text.length = get_line_length(ts->last_line_start);
return t;
}
start_line = ts->line;
start_column = ts->column;
line_start = ts->line_start;
start_text = &ts->code[ts->pos];
c = read_char(ts);
/* Single-character tokens */
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
ts->last_info.text_length = 1;
switch (c) {
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
case ',': return ts->last_info.token = TOKEN_COMMA;
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
case '=': return create_token(ts, TOKEN_ASSIGN, start_text, 1, start_line, start_column, line_start);
case '"': {
size_t len = 0;
const char* start = &ts->code[ts->pos];
while (peek_char(ts) != '"' && peek_char(ts) != '\0') {
read_char(ts);
len++;
}
if (peek_char(ts) == '"') read_char(ts);
return create_token(ts, TOKEN_STRING, start, len, start_line, start_column + 1, line_start);
}
}
if (isdigit(c)) {
size_t len = 1;
while (isdigit(peek_char(ts))) {
read_char(ts);
len++;
}
return create_token(ts, TOKEN_INTEGER, start_text, len, start_line, start_column, line_start);
}
/* Keywords and identifiers */
if (is_identifier_start(c)) {
return read_keyword_or_identifier(ts, c);
}
if (is_identifier_start(c)) {
/* Declarations first for C89 */
size_t length;
TokenType type;
length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
}
type = lookup_keyword(start_text, length);
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
}
/* Unknown character */
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1;
}
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
if (ts == NULL || info == NULL) return;
*info = ts->last_info;
t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
log_on_line(&t.location, "unexpected token '%c'", c);
return t;
}
+41 -28
View File
@@ -4,55 +4,77 @@
#ifndef TOKEN_H
#define TOKEN_H
#include <stddef.h>
#include "location.h"
/**
* A list of all possible tokens.
*/
typedef enum {
// Keywords
TOKEN_MODULE,
/* Keywords */
TOKEN_MODULE,
TOKEN_IMPORT,
TOKEN_SEMICOLON,
TOKEN_ALIAS,
TOKEN_PUBLIC,
TOKEN_VAR,
TOKEN_CONST,
TOKEN_STATIC,
// Symbols
TOKEN_PARENT_OPEN,
/* Symbols */
TOKEN_PARENT_OPEN,
TOKEN_PARENT_CLOSE,
TOKEN_BRACKET_OPEN,
TOKEN_BRACKET_CLOSE,
TOKEN_COMMA,
TOKEN_ASSIGN,
// Primitives
TOKEN_VOID,
/* Primitives */
TOKEN_VOID,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
TOKEN_STRING,
TOKEN_INTEGER,
TOKEN_TRUE,
TOKEN_FALSE,
// Variable
TOKEN_IDENTIFIER,
} Token;
/* Variable */
TOKEN_IDENTIFIER,
/* Others */
TOKEN_EOF,
TOKEN_UNKNOWN
} TokenType;
/**
* Holds additional information about a token.
*/
typedef struct {
/// @brief The textual representation of a token.
/// Note that this is not necessarily null-terminated.
char* text;
/* @brief The actual token. */
TokenType token;
/// @brief The length of the `text` string.
size_t text_length;
/* @brief The textual representation of a token. */
String text;
/// @brief The actual token.
Token token;
} TokenInfo;
/* @brief The location of the token. */
Location location;
} Token;
typedef struct TokenStream TokenStream;
/**
* Returns a TokenStream for a text.
*
* @param filename The name of the file to read. This is only used for error reporting.
* @param code The text to read.
* @returns A handle to the TokenStream.
*/
TokenStream* tokenstream_open(const char* code);
TokenStream* tokenstream_open(const char* filename, const char* code);
/**
* Closes a TokenStream.
@@ -67,13 +89,4 @@ void tokenstream_close(TokenStream* ts);
*/
Token tokenstream_next(TokenStream* ts);
/**
* Gets additional information about the last token that was returned
* by `tokenstream_next`.
*
* @param ts The TokenStream to use.
* @param info The TokenInfo object to store the results in.
*/
void tokenstream_info(TokenStream* ts, TokenInfo* info);
#endif
+9
View File
@@ -0,0 +1,9 @@
/**
* Contains runtime information about types.
*/
#ifndef TYPES_H
#define TYPES_H
#endif
+46
View File
@@ -0,0 +1,46 @@
#include "util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
/* Portable va_copy fallback for pre-C99 or platforms without va_copy. */
#ifndef va_copy
# if defined(__va_copy)
# define va_copy(dest, src) __va_copy(dest, src)
# else
# define va_copy(dest, src) ((dest) = (src))
# endif
#endif
char* format_string_va(const char* fmt, va_list args) {
/* Declarations first to satisfy -std=c89 */
va_list args_copy;
int needed;
char* buf;
if (!fmt) return NULL;
va_copy(args_copy, args);
needed = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
if (needed < 0) return NULL;
buf = (char*)malloc((size_t)needed + 1);
if (!buf) return NULL;
vsnprintf(buf, (size_t)needed + 1, fmt, args);
return buf;
}
char* format_string(const char* fmt, ...) {
/* Declarations first to satisfy -std=c89 */
va_list args;
char* s;
if (!fmt) return NULL;
va_start(args, fmt);
s = format_string_va(fmt, args);
va_end(args);
return s;
}
+27
View File
@@ -0,0 +1,27 @@
#ifndef UTIL_H
#define UTIL_H
#include <stdarg.h>
#include <stddef.h>
/**
* Formats a string using printf-style formatting and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param ... The values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string(const char* fmt, ...);
/**
* Formats a string using printf-style formatting with a va_list and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param args The va_list of values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string_va(const char* fmt, va_list args);
#endif