Compare commits

...

58 Commits

Author SHA1 Message Date
seeseemelk b6d0a78d06 Add integration test 2026-05-01 09:44:46 +02:00
seeseemelk 3bdccf2000 Add integration test framework 2026-04-30 22:21:08 +02:00
seeseemelk 177fb971e4 Rename AST structures to Tree and relocate freeing logic 2026-04-30 21:46:15 +02:00
seeseemelk ea55dedd07 Refactor AST and Parser into modular subdirectories
- Split ast.h into granular headers in v0/ast/
- Split parser.c into modular implementation files in v0/parser/
- Move and rename parser tests to v0/parser/test_*.c
- Update build system (include.mk) with modular sub-makefiles
- Maintain v0/ast.h and v0/parser.h as umbrella headers
2026-04-30 21:23:07 +02:00
seeseemelk 4bd66ea216 More variable stuff 2026-04-30 20:25:53 +02:00
seeseemelk 0704284726 Can parse variables 2026-04-29 21:39:48 +02:00
seeseemelk 94ae665a0a Add initial variable work 2026-04-29 21:20:52 +02:00
seeseemelk e2d8e385f0 Add basic var tokens 2026-04-29 20:28:52 +02:00
seeseemelk 76f9168c5f Fix docs 2026-04-29 20:21:52 +02:00
seeseemelk 1ab021561e Fix bad test 2026-04-29 20:20:16 +02:00
seeseemelk f260e02efa Refactor parser 2026-04-29 20:15:05 +02:00
seeseemelk 1c5d49d682 Fix valgrind errors 2026-04-29 19:41:00 +02:00
seeseemelk cc25563cd2 Cleanup 2026-04-29 19:23:59 +02:00
seeseemelk 323a599399 Build with debug symbols 2026-04-29 18:53:02 +02:00
seeseemelk ec896495a3 Fix infinite loop bug 2026-04-29 14:40:06 +02:00
seeseemelk eb4b0495f2 Working on parser refactor 2026-04-29 14:36:42 +02:00
seeseemelk 1f40c8f5ee Refactor tests a bit more 2026-04-29 13:25:41 +02:00
seeseemelk 98d58a2169 Refactor tests 2026-04-29 13:09:14 +02:00
seeseemelk f0621a8076 Refactor parser 2026-04-29 11:53:26 +02:00
seeseemelk 84747028f5 Ensure alias and import can be mixed 2026-04-29 11:46:02 +02:00
seeseemelk f90cad2b96 Use proper public keyword 2026-04-29 11:43:14 +02:00
seeseemelk e09bd72441 Update ast interface 2026-04-29 11:24:42 +02:00
seeseemelk 9035cc639c Add alias to ast 2026-04-29 11:18:40 +02:00
seeseemelk 3288efdfd7 Refactor test interface 2026-04-29 10:59:06 +02:00
seeseemelk 34b7939f76 Refactor parser to C11 and update build configuration 2026-04-29 10:38:34 +02:00
seeseemelk 15714393c3 Refactor parser to use Token in AST and update tests 2026-04-29 10:35:12 +02:00
seeseemelk 146aa4d9d1 Convert codebase to C89 compatibility and update test scripts 2026-04-29 10:21:29 +02:00
seeseemelk 189c21667b Ignore intellij files 2026-04-28 16:07:46 +02:00
seeseemelk abdc6d67dc Re-order log lines 2026-04-28 16:06:21 +02:00
seeseemelk d89833b705 Add TYPES documentation 2026-04-28 16:06:12 +02:00
seeseemelk bfb3b69be1 fix: add util.c to source files 2026-04-26 22:48:31 +02:00
seeseemelk dc523c8d3c chore: remove legacy v0/string.h 2026-04-26 22:42:10 +02:00
seeseemelk 05dfb3725b fix: replace unsafe fixed-size buffers with dynamic formatting helpers; add util format helpers; centralize log_on_line cleanup 2026-04-26 22:42:10 +02:00
seeseemelk 70998643fb Add AGENTS.md 2026-04-26 22:30:51 +02:00
seeseemelk 129036b539 Fix all valgrind errors 2026-04-26 22:13:39 +02:00
seeseemelk dbc69eddc8 Update test target to use valgrind 2026-04-26 21:35:14 +02:00
seeseemelk 421338d995 Fix log header generation and EOF location reporting 2026-04-26 21:34:28 +02:00
seeseemelk f33e8d3e25 Update log headers 2026-04-26 21:19:59 +02:00
seeseemelk c219a303ec Fix error reporting position and match updated log headers 2026-04-26 21:16:50 +02:00
seeseemelk 9449f16e02 Implement tokenstream_get_test and simplified assert_log_file using test names 2026-04-26 20:31:17 +02:00
seeseemelk e910c01348 Refactor golden files to follow xyz_log_ and xyz_src_ naming convention 2026-04-25 20:05:16 +02:00
seeseemelk a6bdadac0c Add public imports
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 15:28:33 +02:00
seeseemelk 63dd5fa5c9 Implement public import parsing and add test case 2026-04-25 15:06:20 +02:00
seeseemelk d8544d7743 Add new rule to agent instructions 2026-04-25 14:38:35 +02:00
seeseemelk 91593e12b7 Add error logging and corresponding tests for parser syntax errors 2026-04-25 14:37:08 +02:00
seeseemelk 7c7e0c3272 Add import parsing
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 14:30:11 +02:00
seeseemelk 116bdecafe Implement String structure and update Location/Token to use it 2026-04-25 14:17:17 +02:00
seeseemelk 902e2f0325 Update log_on_line to take Location* instead of individual fields 2026-04-24 22:13:29 +02:00
seeseemelk 26a1d0285e Refactor Token to use Location struct 2026-04-24 22:07:00 +02:00
seeseemelk a89e61eedd Introduce golden file mechanism for tests 2026-04-24 21:09:47 +02:00
seeseemelk 9ca72ef5bf Split test 2026-04-24 20:41:57 +02:00
seeseemelk 0306530fe8 Better logging in tokenstream 2026-04-24 20:40:31 +02:00
seeseemelk 451a9a2a22 Token refactor and better logs 2026-04-24 20:28:08 +02:00
seeseemelk da3425ec10 All target run tests 2026-04-24 20:04:43 +02:00
seeseemelk e021a2d63e During test, log to in-memory log 2026-04-24 20:04:00 +02:00
seeseemelk 0e826e05e1 Add log framework 2026-04-24 15:14:15 +02:00
seeseemelk 78899f32a6 Update copilot instructions 2026-04-24 14:59:53 +02:00
seeseemelk 0fa7b599ed Implement assert_str and assert_not_null and update tests 2026-04-24 14:57:52 +02:00
86 changed files with 2012 additions and 272 deletions
+16 -3
View File
@@ -23,11 +23,24 @@ For instance, a test for `buffer.c` must be called `test_buffer.c`.
There will be no `test_buffer.h`. Instead, `test.c` will directly
`#include` the Csource-file directly.
Every syntax error path identified in the parser MUST have a corresponding test.
## Language Syntax
Since this is a compiler for a new language, do not assume anything
of its syntax.
Always check the `specs` directory.
Since this is a compiler for a new language, do not assume anything of its syntax.
Always check the `specs` directory to see examples and documentation about the language.
If there is anything unclear, ask the user for clarification.
It is certainly possible that there are contradictions in the
spec that have to be solved first.
## Comitting
Often, the user modifies an interface (typically in a header file), and then asks
the agent to update the implementation.
When creating a commit, make sure that both the user's and the agent's modifications
are included in the commit.
Only create a commit when specifically asked for that. Never assume implicitly that the
user wants you to create a commit.
Even if they asked you to create a commit in an earlier task, it does not mean that
you should also create a commit in a later task.
+2
View File
@@ -1 +1,3 @@
/c2
/.idea/*
!/.idea/c_cpp_properties.json
+12
View File
@@ -0,0 +1,12 @@
{
"configurations": [
{
"name": "CLion",
"includePath": [
"${workspaceFolder}/v0/*"
],
"cStandard": "c89",
}
],
"version": 4
}
Symlink
+1
View File
@@ -0,0 +1 @@
.github/copilot-instructions.md
+9 -1
View File
@@ -1,13 +1,21 @@
.PHONY: all test clean
all: c2
all: c2 test integration-test
c2: v0/bin/c2
cp $< $@
test::
generate_golden::
clean::
rm -f c2
include v0/include.mk
integration-test: v0/bin/c2 v0/bin/test_integration
./v0/bin/test_integration
v0/bin/test_integration: v0/test_integration.c
$(CC) $(CFLAGS) -o $@ $<
+16
View File
@@ -13,5 +13,21 @@ In order to run the tests, run `make test`.
## Versioning
The current version is v0. Its source code lives in the `v0` directory.
## Testing
### Unit Tests
Run unit tests with:
```bash
make test
```
### Integration Tests
Integration tests compare the compiler output with expected C files.
To add a new integration test, create a new directory under `v0/integration_tests/` with `input.c2` and `expected.c` files.
Run integration tests with:
```bash
make integration-test
```
## Languages Specifications
See the specs directory for information on the actual language syntax.
+1
View File
@@ -0,0 +1 @@
Hello, world
+1
View File
@@ -12,3 +12,4 @@ void main() {
puts("Hello, world!");
}
```
.
+31
View File
@@ -0,0 +1,31 @@
# Imports
The import statement allows one module access to the public declarations of another module.
## Syntax
The import statement uses the following syntax:
```c2
import module_name;
```
They can optionally be prefixed by the `public` keyword, in which case the module will
export everything in the import transitively.
For instance,
```c2
--- a.c2
module a;
import b;
--- b.c2
module b;
public import c;
--- c.c2
module c;
// Some declarations
```
In this example, both module a and b can access the declarations in module c.
+25
View File
@@ -0,0 +1,25 @@
# Types
C2 has both built-in types and user-defined types.
## Builtin types
C2 has the following types builtin:
- `void`
- `i8`
- `i16`
- `i32`
- `i64`
- `u8`
- `u16`
- `u32`
- `u64`
## Type Aliases
Types can be aliased to different names using the alias keyword.
Here's a list of the default builtin aliases.
```c2
alias int = i32;
alias uint = u32;
alias char = u8;
alias string = char[];
```
+24
View File
@@ -0,0 +1,24 @@
# Variables
Variables can be defined in the global scope, in structs and classes, and in functions.
## Global variables
Global variables can be defined as such:
```c2
// Defines a global variable called my_var.
i32 my_var;
// Defines a const variable.
const i32 my_var;
// Defines a global variable whose type is determined automatically.
// The value will be determined at runtime.
var my_var = 123;
// Defines a const variable whose type is determined automatically.
const my_var = 123;
// Defines a global variable whose initial value is computed at compile-time.
// If it cannot be computed at compile-time, an error is thrown.
static my_var = 123;
```
+11 -6
View File
@@ -4,13 +4,18 @@
#ifndef AST_H
#define AST_H
#include "ast/expression.h"
#include "ast/declaration.h"
#include "ast/module.h"
/**
* The top-level model.
* Every file matches an entire Module.
* Frees a module and all its children.
*/
typedef struct {
/// @brief The name of the module.
char* name;
} Module;
void ast_free_module(ModuleTree* module);
/**
* Frees a type expression.
*/
void ast_free_type(TypeTree* type);
#endif
+49
View File
@@ -0,0 +1,49 @@
#ifndef AST_DECLARATION_H
#define AST_DECLARATION_H
#include "expression.h"
#include "../bool.h"
typedef struct {
/** @brief The name of the module being imported. */
char* module_name;
/** @brief Whether the import is public or not. */
bool is_public;
} ImportTree;
/**
* A declaration that aliases one type to another.
*/
typedef struct {
/** @brief The name of the alias. */
const char* name;
/** @brief The value of the alias. */
TypeTree value;
} AliasTree;
/**
* A declaration of a variable, which may be a constant or not, and may be static or not.
*/
typedef struct {
/** @brief The name of the variable. */
char* name;
/** @brief The type of the variable. */
TypeTree type;
/** @brief The optional initializer expression. */
ExpressionTree* initializer;
/** @brief Whether the variable is public or not. */
bool is_public;
/** @brief Whether the variable is static or not. */
bool is_static;
/** @brief Whether the variable is a constant or not. */
bool is_const;
} VariableTree;
#endif
+9
View File
@@ -0,0 +1,9 @@
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* expr) {
if (expr->tag == TYPE_TREE_ARRAY) {
ast_free_type(expr->array.array);
free(expr->array.array);
}
}
+52
View File
@@ -0,0 +1,52 @@
#ifndef AST_EXPRESSION_H
#define AST_EXPRESSION_H
#include "../bool.h"
typedef enum {
EXPRESSION_TREE_INTEGER,
EXPRESSION_TREE_STRING,
EXPRESSION_TREE_BOOLEAN
} ExpressionTreeTag;
typedef struct {
ExpressionTreeTag tag;
union {
int integer;
const char* string;
bool boolean;
};
} ExpressionTree;
typedef enum {
TYPE_TREE_BUILTIN,
TYPE_TREE_ARRAY
} TypeTreeTag;
/**
* An expression that evaluates to a type.
*/
typedef struct TypeTree TypeTree;
struct TypeTree {
/** @brief defines which entry in the union is valid */
TypeTreeTag tag;
union {
/** @brief Evaluates to an array of the given type. */
struct {
/** @brief A pointer to the type of the elements stored in the array. */
TypeTree* array;
} array;
/** @brief Evaluates to a builtin integer type.*/
struct {
/**
* @brief The number of bits in the integer.
* Typical values are 8, 16, 32, and 64.
*/
int bitSize;
/** @brief `true` if the type is signed, `false` if it's unsigned. */
bool isSigned;
} builtin;
};
};
#endif
+3
View File
@@ -0,0 +1,3 @@
# There are currently no .c files in the ast directory.
# This file is provided for future consistency.
AST_SRC := v0/ast/module.c v0/ast/expression.c
+43
View File
@@ -0,0 +1,43 @@
#include "module.h"
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* type);
void ast_free_module(ModuleTree* module) {
if (module == NULL) {
return;
}
if (module->imports != NULL) {
for(size_t i = 0; i < module->import_count; i++) {
free(module->imports[i].module_name);
}
free(module->imports);
}
if (module->aliases != NULL) {
for(size_t i = 0; i < module->alias_count; i++) {
free((void*)module->aliases[i].name);
ast_free_type(&module->aliases[i].value);
}
free(module->aliases);
}
if (module->variables != NULL) {
for(size_t i = 0; i < module->variable_count; i++) {
free(module->variables[i].name);
ast_free_type(&module->variables[i].type);
if (module->variables[i].initializer) {
if (module->variables[i].initializer->tag == EXPRESSION_TREE_STRING) {
free((void*)module->variables[i].initializer->string);
}
free(module->variables[i].initializer);
}
}
free(module->variables);
}
free(module->name);
free(module);
}
+34
View File
@@ -0,0 +1,34 @@
#ifndef AST_MODULE_H
#define AST_MODULE_H
#include "declaration.h"
#include <stddef.h>
/**
* The top-level model.
* Every file matches an entire Module.
*/
typedef struct {
/** @brief The name of the module. */
char* name;
/** @brief The list of imports in the module. */
ImportTree* imports;
/** @brief The number of imports in the module. */
size_t import_count;
/** @brief The list of aliases in the module. */
AliasTree* aliases;
/** @brief The number of aliases in the module. */
size_t alias_count;
/** @brief The list of variables in the module. */
VariableTree* variables;
/** @brief The number of variables in the module. */
size_t variable_count;
} ModuleTree;
#endif
+10
View File
@@ -0,0 +1,10 @@
/* Minimal boolean type for C89 compatibility */
#ifndef BOOL_H
#define BOOL_H
typedef int bool;
#define true 1
#define false 0
#endif
+18 -2
View File
@@ -1,4 +1,7 @@
V0_SRC := v0/main.c v0/token.c v0/parser.c
include v0/ast/include.mk
include v0/parser/include.mk
V0_SRC := v0/main.c v0/util.c v0/token.c $(AST_SRC) $(PARSER_SRC) v0/log.c v0/str.c
# V0_TEST must only include `v0/test.c` itself, as all other test Csource files are
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
@@ -11,6 +14,8 @@ V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
CFLAGS += -Werror -Wall -pedantic -std=c11 -g
v0/bin/c2: $(V0_SRC_OBJ)
$(CC) $(CFLAGS) -o $@ $^
@@ -19,8 +24,19 @@ V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
$(CC) $(CFLAGS) -o $@ $^
# Only run tests under valgrind on Linux. On macOS (Darwin) valgrind is
# typically unavailable or unsupported, so run the test binary directly.
ifeq ($(shell uname -s),Linux)
TEST_CMD := valgrind --quiet --leak-check=full --error-exitcode=1 v0/bin/test
else
TEST_CMD := v0/bin/test
endif
test:: v0/bin/test
v0/bin/test
$(TEST_CMD)
generate_golden:: v0/bin/test
GENERATE_GOLDEN=1 v0/bin/test
clean::
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
+4
View File
@@ -0,0 +1,4 @@
#include <stdint.h>
// u32 simple:x
static uint32_t v_6simple_1x = 123;
+2
View File
@@ -0,0 +1,2 @@
module simple;
u32 x = 123;
+28
View File
@@ -0,0 +1,28 @@
/**
* Location handling for error reporting.
*/
#ifndef LOCATION_H
#define LOCATION_H
#include "str.h"
#include <stddef.h>
typedef struct {
/* @brief The name of the file where the token was found. */
char* filename;
/* @brief The entire line of text where the token was found. */
String line_text;
/* @brief The line number where the token was found. */
int line;
/* @brief The starting column number where the token was found. */
int column_start;
/* @brief The ending column number where the token was found. */
int column_end;
} Location;
#endif
+87
View File
@@ -0,0 +1,87 @@
#include "log.h"
#include "util.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
static LogError* s_logError = NULL;
void log_set_output(LogError* destination) {
s_logError = destination;
}
void log_error(const char* msg) {
if (s_logError != NULL) {
s_logError(msg);
} else {
fprintf(stderr, "Error: %s\n", msg);
}
}
void log_on_line(Location* loc, const char* msg, ...) {
/* Declarations first for C89 */
char* line_prefix = NULL;
char* formatted_msg = NULL;
char* header = NULL;
char* buffer = NULL;
va_list args;
int caret_len;
char* p;
int i1, i2;
size_t i3;
size_t total_size;
line_prefix = format_string("%d| ", loc->line);
if (!line_prefix) goto cleanup;
caret_len = loc->column_end - loc->column_start + 1;
if (caret_len < 1) caret_len = 1;
/* Format the message */
va_start(args, msg);
formatted_msg = format_string_va(msg, args);
va_end(args);
if (!formatted_msg) goto cleanup;
/* Header logic */
if (loc->filename && loc->filename[0] != '\0') {
header = format_string("--- %s ---\n", loc->filename);
} else {
header = format_string("--- \n");
}
if (!header) goto cleanup;
total_size = strlen(header) + 20 +
strlen(line_prefix) + loc->line_text.length + 2 + /* line| text\n */
strlen(line_prefix) + loc->column_start - 1 + caret_len + 2 + /* indent + ^^\n */
strlen(line_prefix) + 3 + strlen(formatted_msg) + 2 + /* indent + msg\n */
10;
buffer = (char*)malloc(total_size);
if (!buffer) goto cleanup;
p = buffer;
p += sprintf(p, "%s", header);
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
/* Caret line */
for (i1 = 0; i1 < (int)(strlen(line_prefix) + loc->column_start - 1); i1++) *p++ = ' ';
for (i2 = 0; i2 < caret_len; i2++) *p++ = '^';
*p++ = '\n';
/* Message line */
for (i3 = 0; i3 < strlen(line_prefix); i3++) *p++ = ' ';
p += sprintf(p, "%s\n", formatted_msg);
*p = '\0';
log_error(buffer);
cleanup:
free(line_prefix);
free(formatted_msg);
free(header);
free(buffer);
}
+36
View File
@@ -0,0 +1,36 @@
/**
* Contains the logging framework used for logging errors during compilation.
*/
#ifndef LOG_H
#define LOG_H
#include "location.h"
/**
* A method that can log an error.
*/
typedef void LogError(const char* msg);
/**
* Sets the destination for log errors.
*/
void log_set_output(LogError* destination);
/**
* Logs an error to the destination.
*/
void log_error(const char* msg);
/**
* Logs a pretty error with additional information about the line where the error occurred.
*
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
*
* @param loc The location where the error occurred.
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* @param ... Additional arguments to format into the error message.
*/
void log_on_line(Location* loc, const char* msg, ...);
#endif
+1
View File
@@ -2,4 +2,5 @@
int main(int argc, char** argv) {
puts("Hello, world");
return 0;
}
-45
View File
@@ -1,45 +0,0 @@
#include "parser.h"
#include <stdlib.h>
#include <string.h>
Module* parser_parse(TokenStream* ts) {
Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) {
return NULL;
}
t = tokenstream_next(ts);
if (t != TOKEN_IDENTIFIER) {
return NULL;
}
TokenInfo info;
tokenstream_info(ts, &info);
Module* module = (Module*)malloc(sizeof(Module));
if (module == NULL) return NULL;
module->name = (char*)malloc(info.text_length + 1);
if (module->name == NULL) {
free(module);
return NULL;
}
memcpy(module->name, info.text, info.text_length);
module->name[info.text_length] = '\0';
t = tokenstream_next(ts);
if (t != TOKEN_SEMICOLON) {
free(module->name);
free(module);
return NULL;
}
return module;
}
void parser_free(Module* module) {
if (module == NULL) return;
free(module->name);
free(module);
}
+1 -8
View File
@@ -10,13 +10,6 @@
* @param ts The TokenStream to read.
* @returns The parsed module.
*/
Module* parser_parse(TokenStream* ts);
/**
* Frees the parsed AST.
*
* @param module The AST return by parser_parse.
*/
void parser_free(Module* module);
ModuleTree* parser_parse(TokenStream* ts);
#endif
+52
View File
@@ -0,0 +1,52 @@
#include "internal.h"
#include "../str.h"
#include "../log.h"
#include <stdlib.h>
void parser_next_token(Parser* p) {
p->token = tokenstream_next(p->ts);
}
bool parser_accept(Parser* p, TokenType token) {
if (p->token.token == token) {
parser_next_token(p);
return true;
}
return false;
}
bool parser_expect(Parser* p, TokenType token, const char* msg) {
if (parser_accept(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
bool parser_peek(Parser* p, TokenType token) {
if (p->token.token == token) {
return true;
}
return false;
}
bool parser_require(Parser* p, TokenType token, const char* msg) {
if (parser_peek(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
char* parser_to_text(Parser* p) {
char* str = string_copy(p->token.text);
parser_next_token(p);
return str;
}
bool parser_accept_primitive(Parser* p) {
return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) ||
parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) ||
parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) ||
parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64);
}
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include <stdlib.h>
#include <string.h>
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public) {
module->import_count++;
module->imports = realloc(module->imports, sizeof(ImportTree) * module->import_count);
ImportTree* import = &module->imports[module->import_count - 1];
memset(import, 0, sizeof(ImportTree));
import->is_public = is_public;
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
import->module_name = parser_to_text(p);
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) {
return false;
}
return true;
}
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public) {
(void)is_public;
module->alias_count++;
module->aliases = realloc(module->aliases, sizeof(AliasTree) * module->alias_count);
AliasTree* alias = &module->aliases[module->alias_count - 1];
memset(alias, 0, sizeof(AliasTree));
if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) {
return false;
}
alias->name = parser_to_text(p);
if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) {
return false;
}
if (!parse_type_expression(p, &alias->value)) {
return false;
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) {
return false;
}
return true;
}
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const) {
module->variable_count++;
module->variables = realloc(module->variables, sizeof(VariableTree) * module->variable_count);
VariableTree* var = &module->variables[module->variable_count - 1];
memset(var, 0, sizeof(VariableTree));
var->is_public = is_public;
var->is_static = is_static;
var->is_const = is_const;
if (parser_accept_primitive(p)) {
if (!parse_type_expression(p, &var->type)) {
return false;
}
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) {
return false;
}
var->name = parser_to_text(p);
if (parser_accept(p, TOKEN_ASSIGN)) {
var->initializer = malloc(sizeof(ExpressionTree));
if (!parse_expression(p, var->initializer)) {
return false;
}
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) {
return false;
}
return true;
}
+98
View File
@@ -0,0 +1,98 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
bool parse_primitive_type_expression(Parser* p, TypeTree* expr) {
if (parser_accept(p, TOKEN_U8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_I8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = true;
return true;
} else {
log_on_line(&p->token.location, "expected type expression");
return false;
}
}
bool parse_array_type_expression(Parser* p, TypeTree* expr) {
TypeTree elementType;
if (!parse_primitive_type_expression(p, &elementType)) {
return false;
}
if (parser_accept(p, TOKEN_BRACKET_OPEN)) {
expr->tag = TYPE_TREE_ARRAY;
expr->array.array = malloc(sizeof(TypeTree));
*expr->array.array = elementType;
if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) {
return false;
}
} else {
*expr = elementType;
return true;
}
return true;
}
bool parse_type_expression(Parser* p, TypeTree* expr) {
return parse_array_type_expression(p, expr);
}
bool parse_expression(Parser* p, ExpressionTree* expr) {
if (parser_peek(p, TOKEN_INTEGER)) {
expr->tag = EXPRESSION_TREE_INTEGER;
expr->integer = atoi(p->token.text.data);
parser_next_token(p);
return true;
} else if (parser_peek(p, TOKEN_STRING)) {
expr->tag = EXPRESSION_TREE_STRING;
expr->string = parser_to_text(p);
return true;
} else if (parser_accept(p, TOKEN_TRUE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = true;
return true;
} else if (parser_accept(p, TOKEN_FALSE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = false;
return true;
}
log_on_line(&p->token.location, "expected expression");
return false;
}
+1
View File
@@ -0,0 +1 @@
PARSER_SRC := v0/parser/core.c v0/parser/expression.c v0/parser/declaration.c v0/parser/module.c
+36
View File
@@ -0,0 +1,36 @@
#ifndef PARSER_INTERNAL_H
#define PARSER_INTERNAL_H
#include "../parser.h"
#include "../token.h"
#include "../ast.h"
typedef struct {
TokenStream* ts;
Token token;
} Parser;
// Core functions
void parser_next_token(Parser* p);
bool parser_accept(Parser* p, TokenType token);
bool parser_expect(Parser* p, TokenType token, const char* msg);
bool parser_peek(Parser* p, TokenType token);
bool parser_require(Parser* p, TokenType token, const char* msg);
char* parser_to_text(Parser* p);
bool parser_accept_primitive(Parser* p);
// Base parsing (expressions, types)
bool parse_primitive_type_expression(Parser* p, TypeTree* expr);
bool parse_array_type_expression(Parser* p, TypeTree* expr);
bool parse_type_expression(Parser* p, TypeTree* expr);
bool parse_expression(Parser* p, ExpressionTree* expr);
// Declaration parsing
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const);
// Module parsing
bool parse_module_declaration(Parser* p, ModuleTree* module);
#endif
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
#include <string.h>
bool parse_module_declaration(Parser* p, ModuleTree* module) {
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
return false;
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
module->name = parser_to_text(p);
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
}
ModuleTree* parser_parse(TokenStream* ts) {
Parser* p = malloc(sizeof(Parser));
p->ts = ts;
parser_next_token(p);
ModuleTree* module = malloc(sizeof(ModuleTree));
memset(module, 0, sizeof(ModuleTree));
if (!parse_module_declaration(p, module)) {
goto fail;
}
while (!parser_peek(p, TOKEN_EOF)) {
bool is_public = false;
bool is_static = false;
bool is_const = false;
bool terminal = false;
while (!terminal) {
if (parser_accept(p, TOKEN_IMPORT)) {
if (is_static) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (!parse_import_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_ALIAS)) {
if (is_static) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (!parse_alias_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_PUBLIC)) {
is_public = true;
} else if (parser_accept(p, TOKEN_STATIC)) {
is_static = true;
} else if (parser_accept(p, TOKEN_CONST)) {
is_const = true;
} else if (parser_accept(p, TOKEN_VAR) || parser_accept_primitive(p)) {
if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) {
goto fail;
}
terminal = true;
} else {
log_on_line(&p->token.location, "unexpected token");
goto fail;
}
}
}
free(p);
return module;
fail:
free(p);
ast_free_module(module);
return NULL;
}
+8
View File
@@ -0,0 +1,8 @@
#include "../test.h"
#include "../parser.h"
// Currently core utilities are tested indirectly through other parser tests.
// Placeholder for future explicit core utility tests.
static void test_parser_core_placeholder(void) {
// No-op
}
+89
View File
@@ -0,0 +1,89 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_missing_semicolon_import(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
static void test_parser_bad_import_name(void) {
test_get_ast();
assert_log_file("expected error for bad import name");
}
static void test_parser_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_false(m->imports[0].is_public, "expected import to not be public");
}
static void test_parser_public_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_true(m->imports[0].is_public, "expected import to be public");
}
static void test_parser_alias_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_str("myalias", alias.name, "expected correct alias name");
}
static void test_parser_variable_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_var", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_const(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_const", var.name, "expected correct variable name");
assert_true(var.is_const, "expected const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_static(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_static", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_true(var.is_static, "expected static");
}
static void test_parser_multiple_vars(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(2, (int)m->variable_count, "expected correct number of variables");
assert_str("var1", m->variables[0].name, "expected first variable name 'var1'");
assert_str("var2", m->variables[1].name, "expected second variable name 'var2'");
}
+52
View File
@@ -0,0 +1,52 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_alias_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_BUILTIN, alias.value.tag, "expected correct alias tag");
assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32");
assert_true(alias.value.builtin.isSigned, "expected signed");
}
static void test_parser_alias_array(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_ARRAY, alias.value.tag, "expected correct alias tag");
TypeTree* valueType = alias.value.array.array;
assert_not_null(valueType, "expected pointer to array type");
assert_int(TYPE_TREE_BUILTIN, valueType->tag, "expected correct type tag");
assert_int(32, valueType->builtin.bitSize, "expected bitSize 32");
assert_true(valueType->builtin.isSigned, "expected signed");
}
static void test_parser_variable_init(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected 1 variable");
VariableTree* var = &m->variables[0];
assert_str("x", var->name, "expected variable name 'x'");
assert_not_null(var->initializer, "expected variable to have an initializer");
assert_int(EXPRESSION_TREE_INTEGER, var->initializer->tag, "expected integer initializer");
assert_int(123, var->initializer->integer, "expected value 123");
}
static void test_parser_variable_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_int(TYPE_TREE_BUILTIN, var.type.tag, "expected correct type tag");
assert_int(32, var.type.builtin.bitSize, "expected bitSize 32");
assert_true(var.type.builtin.isSigned, "expected signed");
}
+21
View File
@@ -0,0 +1,21 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_module_name(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
}
static void test_parser_bad_module_name(void) {
test_get_ast();
assert_log_file("expected error to be logged for bad module name");
}
static void test_parser_missing_semicolon_module(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
+11
View File
@@ -0,0 +1,11 @@
#include "str.h"
#include <string.h>
#include <stdlib.h>
char* string_copy(String string) {
char* str = malloc(string.length + 1);
memcpy(str, string.data, string.length);
str[string.length] = '\0';
return str;
}
+27
View File
@@ -0,0 +1,27 @@
/**
* Contains the definition of the String structure, which is a simple representation of a string in C.
*/
#ifndef STR_H
#define STR_H
#include <stddef.h>
/**
* A simple string structure that holds a pointer to the character data and its length.
*/
typedef struct {
char* data;
size_t length;
} String;
/**
* Creates a copy of a string.
*
* Note that this copy has to be freed afterwards.
*
* @param string The string to copy.
* @returns A null-terminated copy of the string.
*/
char* string_copy(String string);
#endif
+251 -18
View File
@@ -1,70 +1,303 @@
#include "test.h"
#include "util.h"
#include "parser.h"
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static jmp_buf s_testJmp;
static const char* s_failMsg;
static char s_failMsg[1024];
static char* s_logOutput = NULL;
static const char* s_currentTestName = NULL;
static char* s_testSource = NULL;
static ModuleTree* s_currentModule = NULL;
static TokenStream* s_currentTokenStream = NULL;
void fail(const char* msg) {
s_failMsg = msg;
if (msg) {
strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1);
s_failMsg[sizeof(s_failMsg) - 1] = '\0';
} else {
s_failMsg[0] = '\0';
}
longjmp(s_testJmp, 1);
}
char* read_file_content(const char* filepath) {
FILE* f;
long size;
char* content;
f = fopen(filepath, "r");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
size = ftell(f);
fseek(f, 0, SEEK_SET);
content = malloc(size + 1);
if (!content) {
fclose(f);
return NULL;
}
fread(content, 1, size, f);
content[size] = '\0';
fclose(f);
return content;
}
void assert_not_null(void* ptr, const char* msg) {
if (ptr == NULL) {
fail(msg);
}
}
void assert_string(const char* expected, String actual, const char* msg) {
if (expected == NULL || actual.data == NULL || strlen(expected) != actual.length || strncmp(expected, actual.data, actual.length) != 0) {
fail(msg);
}
}
void assert_str(const char* expected, const char* actual, const char* msg) {
if (expected == NULL || actual == NULL || strcmp(expected, actual) != 0) {
fail(msg);
}
}
TokenStream* test_get_tokenstream(void) {
if (s_currentTokenStream == NULL) {
char* filepath = NULL;
filepath = format_string("v0/tests/%s.c2", s_currentTestName);
if (!filepath) {
fail("out of memory");
return NULL;
}
if (s_testSource) free(s_testSource);
s_testSource = read_file_content(filepath);
if (!s_testSource) {
puts(filepath);
free(filepath);
fail("could not read test source file");
return NULL;
}
s_currentTokenStream = tokenstream_open(filepath, s_testSource);
free(filepath);
}
return s_currentTokenStream;
}
ModuleTree* test_get_ast(void) {
if (s_currentModule == NULL) {
s_currentModule = parser_parse(test_get_tokenstream());
}
return s_currentModule;
}
void assert_log(const char* expected, const char* msg) {
assert_str(expected, s_logOutput, msg);
}
void assert_log_file(const char* msg) {
char* filepath = format_string("v0/tests/%s.log", s_currentTestName);
const char* generate;
char* content;
if (!filepath) {
fail("out of memory");
return;
}
generate = getenv("GENERATE_GOLDEN");
if (generate && strcmp(generate, "1") == 0) {
FILE* f = fopen(filepath, "w");
if (!f) {
free(filepath);
fail("could not open golden file for writing");
return;
}
fputs(s_logOutput ? s_logOutput : "", f);
fclose(f);
free(filepath);
return;
}
content = read_file_content(filepath);
if (!content) {
free(filepath);
fail("could not open golden file for reading");
return;
}
bool match = strcmp(content, s_logOutput ? s_logOutput : "") == 0;
free(content);
free(filepath);
if (!match) {
fail(msg);
}
}
void assert_int(int expected, int actual, const char* msg) {
if (expected != actual) {
char* buf = format_string("%s (expected %d, got %d)", msg, expected, actual);
if (buf) {
fail(buf);
free(buf);
} else {
fail("out of memory");
}
}
}
void assert_true(bool condition, const char* msg) {
if (!condition) {
fail(msg);
}
}
void assert_false(bool condition, const char* msg) {
if (condition) {
fail(msg);
}
}
static void log_append(const char* msg) {
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
size_t newLen = oldLen + strlen(msg) + 1;
char* newOutput = malloc(newLen);
if (newOutput) {
if (s_logOutput) {
strcpy(newOutput, s_logOutput);
free(s_logOutput);
} else {
newOutput[0] = '\0';
}
strcat(newOutput, msg);
s_logOutput = newOutput;
}
}
static void log_clear(void) {
free(s_logOutput);
s_logOutput = NULL;
}
typedef struct {
const char* name;
Test func;
} TestCase;
#include "test_token.c"
#include "test_parser.c"
#include "parser/test_module.c"
#include "parser/test_declaration.c"
#include "parser/test_expression.c"
#include "parser/test_core.c"
#include "test_log.c"
static int s_totalTests;
static int s_greenTests;
#define TEST(name) {#name, name},
static TestCase s_tests[] = {
{"tokenstream_open_fail", test_tokenstream_open_fail},
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
{"tokenstream_parentheses_and_brackets", test_tokenstream_parentheses_and_brackets},
{"tokenstream_comma", test_tokenstream_comma},
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
{"tokenstream_info", test_tokenstream_info},
{"parser_module_name", test_parser_module_name},
TEST(test_log_error)
TEST(test_log_on_line_variadic)
TEST(test_log_on_line)
TEST(test_parser_module_name)
TEST(test_parser_bad_module_name)
TEST(test_parser_missing_semicolon_module)
TEST(test_parser_missing_semicolon_import)
TEST(test_parser_bad_import_name)
TEST(test_parser_imports)
TEST(test_parser_public_imports)
TEST(test_parser_alias_simple)
TEST(test_parser_alias_simple_type)
TEST(test_parser_alias_array)
TEST(test_parser_variable_simple)
TEST(test_parser_variable_simple_type)
TEST(test_parser_variable_const)
TEST(test_parser_variable_init)
TEST(test_parser_variable_static)
TEST(test_parser_multiple_vars)
TEST(test_parser_core_placeholder)
TEST(test_tokenstream_comma)
TEST(test_tokenstream_info)
TEST(test_tokenstream_keywords_and_symbols)
TEST(test_tokenstream_open_fail)
TEST(test_tokenstream_parentheses_and_brackets)
TEST(test_tokenstream_primitive_types)
TEST(test_tokenstream_simple_keyword)
TEST(test_tokenstream_unknown_token)
TEST(test_tokenstream_void_function_signature)
TEST(test_tokenstream_whitespace_ignored)
};
int main(int argc, char** argv) {
const char** failedTests;
int failedCount;
(void)argc;
(void)argv;
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
s_greenTests = 0;
const char* failedTests[s_totalTests + 1];
int failedCount = 0;
// Allocate failed tests array dynamically to avoid VLAs
failedTests = (const char**)malloc((s_totalTests + 1) * sizeof(const char*));
failedCount = 0;
for (int i = 0; i < s_totalTests; i++) {
// Add 5 to strip the 'test_' prefix.
s_currentTestName = s_tests[i].name + 5;
log_set_output(log_append);
printf("%s...", s_tests[i].name);
s_failMsg = NULL;
fflush(stdout);
s_failMsg[0] = '\0';
if (setjmp(s_testJmp) == 0) {
log_clear();
if (s_testSource) {
free(s_testSource);
s_testSource = NULL;
}
s_tests[i].func();
printf(" [OK]\n");
s_greenTests++;
} else {
printf(" [FAIL]: %s\n", s_failMsg ? s_failMsg : "");
printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : "");
failedTests[failedCount++] = s_tests[i].name;
// Log output on failure
if (s_logOutput && s_logOutput[0]) {
printf("%s\n", s_logOutput);
}
}
// Free AST and TokenStream after each test
if (s_currentModule) {
ast_free_module(s_currentModule);
s_currentModule = NULL;
}
if (s_currentTokenStream) {
tokenstream_close(s_currentTokenStream);
s_currentTokenStream = NULL;
}
fflush(stdout);
}
if (s_testSource) free(s_testSource);
log_clear();
if (failedCount > 0) {
printf("\nFailed tests:\n");
for (int i = 0; i < failedCount; i++) {
printf(" - %s\n", failedTests[i]);
for (int j = 0; j < failedCount; j++) {
printf(" - %s\n", failedTests[j]);
}
}
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
free(failedTests);
return failedCount > 0 ? 1 : 0;
}
+79
View File
@@ -4,6 +4,9 @@
#ifndef TEST_H
#define TEST_H
#include "token.h"
#include "ast.h"
typedef void (*Test)(void);
/**
@@ -12,4 +15,80 @@ typedef void (*Test)(void);
*/
void fail(const char* msg);
/**
* Asserts that a pointer is not null.
*
* Calls `fail` if the assertion does not hold.
*
* @param ptr The pointer to test.
* @param msg The message to print if the pointer is null.
*/
void assert_not_null(void* ptr, const char* msg);
/**
* Asserts that a string has the expected value.
*
* Calls `fail` if the assertion does not hold.
*
* @param expected The expected value. This is typically a string literal.
* @param actual The actual value. This is typically an expression.
* @param msg The message to print if these do not match.
*/
void assert_str(const char* expected, const char* actual, const char* msg);
/**
* Asserts that a string has the expected value.
*
* Calls `fail` if the assertion does not hold.
*
* @param expected The expected value. This is typically a string literal.
* @param actual The actual value. This is typically an expression.
* @param msg The message to print if these do not match.
*/
void assert_string(const char* expected, String actual, const char* msg);
/**
* Asserts that the logged output matches the expected value.
*/
void assert_log(const char* expected, const char* msg);
/**
* Asserts that the logged output matches the content of the file `v0/tests/xyz.log`, where xyz is the test name.
* If GENERATE_GOLDEN=1, the file is overwritten with the actual output.
*/
void assert_log_file(const char* msg);
/**
* Asserts that two integers are equal.
*/
void assert_int(int expected, int actual, const char* msg);
/**
* Asserts that a condition is true.
*/
#include "bool.h"
void assert_true(bool condition, const char* msg);
/**
* Asserts that a condition is false.
*/
void assert_false(bool condition, const char* msg);
/**
* Get the token stream used for this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the tokenstream will be freed automatically by the test harness.
*/
TokenStream* test_get_tokenstream(void);
/**
* Gets a parsed module for the this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the AST will be freed automatically by the test harness.
*/
ModuleTree* test_get_ast(void);
#endif
+63
View File
@@ -0,0 +1,63 @@
#define _DEFAULT_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
int run_test(const char* dir_name) {
char cmd[2048];
char input_path[1024];
char expected_path[1024];
snprintf(input_path, sizeof(input_path), "v0/integration_tests/%s/input.c2", dir_name);
snprintf(expected_path, sizeof(expected_path), "v0/integration_tests/%s/expected.c", dir_name);
if (snprintf(cmd, sizeof(cmd), "./v0/bin/c2 %s > actual.c", input_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Failed to run compiler for %s\n", dir_name);
return 1;
}
if (snprintf(cmd, sizeof(cmd), "diff -u %s actual.c", expected_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Test %s failed: Output mismatch\n", dir_name);
return 1;
}
printf("Test %s passed\n", dir_name);
return 0;
}
int main() {
DIR* d = opendir("v0/integration_tests");
if (!d) {
perror("opendir");
return 1;
}
struct dirent* dir;
int passed = 0;
int failed = 0;
while ((dir = readdir(d)) != NULL) {
if (dir->d_type == DT_DIR && strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0) {
if (run_test(dir->d_name) == 0) {
passed++;
} else {
failed++;
}
}
}
closedir(d);
printf("\nTotal tests: %d, Passed: %d, Failed: %d\n", passed + failed, passed, failed);
return failed > 0 ? 1 : 0;
}
+52
View File
@@ -0,0 +1,52 @@
#include "test.h"
#include "log.h"
#include <string.h>
#include <stdlib.h>
#include "util.h"
static char* s_lastLoggedError = NULL;
static void mock_log(const char* msg) {
free(s_lastLoggedError);
s_lastLoggedError = format_string("%s", msg ? msg : "");
}
static void test_log_error(void) {
log_set_output(mock_log);
free(s_lastLoggedError);
s_lastLoggedError = NULL;
log_error("test error message");
assert_str("test error message", s_lastLoggedError, "expected 'test error message'");
log_set_output(NULL);
free(s_lastLoggedError);
s_lastLoggedError = NULL;
}
static void test_log_on_line(void) {
Location loc;
loc.filename = "v0/tests/log_on_line.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token");
assert_log_file("expected formatted error message");
}
static void test_log_on_line_variadic(void) {
Location loc;
loc.filename = "v0/tests/log_on_line_variadic.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token '%c'", 'x');
assert_log_file("expected formatted error message with variadic args");
}
-14
View File
@@ -1,14 +0,0 @@
#include "test.h"
#include "parser.h"
#include <string.h>
static void test_parser_module_name(void) {
TokenStream* ts = tokenstream_open("module my_module;");
Module* m = parser_parse(ts);
if (m == NULL) fail("expected module to be parsed");
if (strcmp(m->name, "my_module") != 0) fail("expected name 'my_module'");
parser_free(m);
tokenstream_close(ts);
}
+75 -66
View File
@@ -1,104 +1,113 @@
#include "test.h"
#include "token.h"
#include <string.h>
#include <stdlib.h>
static void test_tokenstream_open_fail(void) {
TokenStream* ts = tokenstream_open(NULL);
TokenStream* ts = tokenstream_open(NULL, NULL);
if (ts != NULL) fail("expected NULL for NULL buffer");
}
static void test_tokenstream_simple_keyword(void) {
TokenStream* ts = tokenstream_open("module");
TokenStream* ts = test_get_tokenstream();
Token t;
Token eof;
Token t = tokenstream_next(ts);
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
t = tokenstream_next(ts);
if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
Token eof = tokenstream_next(ts);
if (eof != -1) fail("expected EOF");
tokenstream_close(ts);
eof = tokenstream_next(ts);
if (eof.token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_keywords_and_symbols(void) {
TokenStream* ts = tokenstream_open("module main; import stdio;");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_parentheses_and_brackets(void) {
TokenStream* ts = tokenstream_open("()[]");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_comma(void) {
TokenStream* ts = tokenstream_open("a,b,c");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_whitespace_ignored(void) {
TokenStream* ts = tokenstream_open(" module \n\t import ; ");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts) != -1) fail("expected EOF");
tokenstream_close(ts);
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_void_function_signature(void) {
TokenStream* ts = tokenstream_open("void main()");
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts) != -1) fail("expected EOF");
if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
tokenstream_close(ts);
static void test_tokenstream_unknown_token(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_UNKNOWN) fail("expected TOKEN_UNKNOWN");
assert_log_file("expected error message for unknown token");
}
static void test_tokenstream_info(void) {
TokenStream* ts = tokenstream_open("module main;");
TokenStream* ts = test_get_tokenstream();
Token t1;
Token t2;
Token t1 = tokenstream_next(ts);
TokenInfo info1;
tokenstream_info(ts, &info1);
if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE");
if (info1.text_length != 6) fail("info: expected length 6");
if (strncmp(info1.text, "module", 6) != 0) fail("info: expected 'module'");
t1 = tokenstream_next(ts);
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
assert_string("module", t1.text, "info: expected 'module'");
if (t1.location.line != 1) fail("expected line 1");
if (t1.location.column_start != 1) fail("expected column 1");
Token t2 = tokenstream_next(ts);
TokenInfo info2;
tokenstream_info(ts, &info2);
if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER");
if (info2.text_length != 4) fail("info: expected length 4");
if (strncmp(info2.text, "main", 4) != 0) fail("info: expected 'main'");
tokenstream_close(ts);
t2 = tokenstream_next(ts);
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
assert_string("main", t2.text, "info: expected 'main'");
if (t2.location.line != 1) fail("expected line 1");
if (t2.location.column_start != 8) fail("expected column 8");
}
static void test_tokenstream_primitive_types(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8");
if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16");
if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32");
if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64");
if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8");
if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16");
if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32");
if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line.c2 ---
1| int main() []
^^
unexpected token
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line_variadic.c2 ---
1| int main() []
^^
unexpected token 'x'
+9
View File
@@ -0,0 +1,9 @@
module mymodule;
import foo;
alias myalias = i32[];
import bar;
alias otheralias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32[];
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
import ;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_import_name.c2 ---
2| import ;
^
expected module identifier
+1
View File
@@ -0,0 +1 @@
import other_module;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_module_name.c2 ---
1| import other_module;
^^^^^^
expected keyword 'module'
+2
View File
@@ -0,0 +1,2 @@
module my_module;
import other_module;
@@ -0,0 +1 @@
module my_module; import other_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_import.c2 ---
1| module my_module; import other_module
^
expected ';' after import
@@ -0,0 +1 @@
module my_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_module.c2 ---
1| module my_module
^
expected ';' after module name
+1
View File
@@ -0,0 +1 @@
module my_module;
+4
View File
@@ -0,0 +1,4 @@
module test_multiple_vars;
i32 var1;
i32 var2;
+3
View File
@@ -0,0 +1,3 @@
module my_module;
public import other_module;
+3
View File
@@ -0,0 +1,3 @@
module test_const_var;
const i32 my_const;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
var x = 123;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+3
View File
@@ -0,0 +1,3 @@
module test_static_var;
static i32 my_static;
+1
View File
@@ -0,0 +1 @@
a,b,c
+1
View File
@@ -0,0 +1 @@
module main;
@@ -0,0 +1 @@
module main; import stdio;
@@ -0,0 +1 @@
()[]
+1
View File
@@ -0,0 +1 @@
i8 i16 i32 i64 u8 u16 u32 u64
+1
View File
@@ -0,0 +1 @@
module
+1
View File
@@ -0,0 +1 @@
%
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/tokenstream_unknown_token.c2 ---
1| %
^
unexpected token '%'
@@ -0,0 +1 @@
void main()
@@ -0,0 +1,2 @@
module
import ;
+168 -64
View File
@@ -1,12 +1,21 @@
#include "token.h"
#include "log.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
struct TokenStream {
char* filename;
const char* code;
size_t pos;
TokenInfo last_info;
int line;
int column;
const char* line_start;
/* End of last non-EOF token */
int last_line;
int last_column_end;
const char* last_line_start;
};
/**
@@ -15,22 +24,38 @@ struct TokenStream {
*/
typedef struct {
const char* keyword;
Token token;
TokenType token;
} KeywordMap;
static const KeywordMap keywords[] = {
{"module", TOKEN_MODULE},
{"import", TOKEN_IMPORT},
{"alias", TOKEN_ALIAS},
{"public", TOKEN_PUBLIC},
{"var", TOKEN_VAR},
{"const", TOKEN_CONST},
{"static", TOKEN_STATIC},
{"void", TOKEN_VOID},
{"i8", TOKEN_I8},
{"i16", TOKEN_I16},
{"i32", TOKEN_I32},
{"i64", TOKEN_I64},
{"u8", TOKEN_U8},
{"u16", TOKEN_U16},
{"u32", TOKEN_U32},
{"u64", TOKEN_U64},
{"true", TOKEN_TRUE},
{"false", TOKEN_FALSE},
};
/**
* Look up a keyword in the keyword map.
* Returns TOKEN_IDENTIFIER if not found.
*/
static Token lookup_keyword(const char* str, size_t length) {
static TokenType lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < count; i++) {
int i;
for (i = 0; i < count; i++) {
if (strlen(keywords[i].keyword) == length &&
strncmp(keywords[i].keyword, str, length) == 0) {
return keywords[i].token;
@@ -53,123 +78,202 @@ static int is_identifier_part(char c) {
return isalnum(c) || c == '_';
}
/**
* Read a character from the stream.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
ts->pos++;
return c;
}
/**
* Peek at the next character in the stream.
*/
static char peek_char(TokenStream* ts) {
return ts->code[ts->pos];
}
/**
* Read a character from the stream and update position.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return (char)-1;
if (c == '\0') return '\0';
ts->pos++;
if (c == '\n') {
ts->line++;
ts->column = 1;
ts->line_start = &ts->code[ts->pos];
} else {
ts->column++;
}
return c;
}
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
const char* start = &ts->code[ts->pos - 1];
size_t length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
static size_t get_line_length(const char* line_start) {
const char* p = line_start;
while (*p != '\n' && *p != '\0') {
p++;
}
Token token = lookup_keyword(start, length);
ts->last_info.token = token;
ts->last_info.text = (char*)start;
ts->last_info.text_length = length;
return token;
return (size_t)(p - line_start);
}
TokenStream* tokenstream_open(const char* code) {
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
Token t;
t.token = type;
t.text.data = (char*)text;
t.text.length = length;
t.location.filename = ts->filename;
t.location.line = line;
t.location.column_start = column;
t.location.column_end = column + (int)length - 1;
t.location.line_text.data = (char*)line_start;
t.location.line_text.length = get_line_length(line_start);
if (type != TOKEN_EOF) {
ts->last_line = t.location.line;
ts->last_column_end = t.location.column_end;
ts->last_line_start = t.location.line_text.data;
}
return t;
}
TokenStream* tokenstream_open(const char* filename, const char* code) {
/* Declarations first for C89 */
TokenStream* ts;
const char* name_src;
if (code == NULL) return NULL;
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
ts = (TokenStream*)malloc(sizeof(struct TokenStream));
if (ts == NULL) {
return NULL;
}
name_src = filename ? filename : "unknown";
ts->filename = malloc(strlen(name_src) + 1);
if (ts->filename) {
memcpy(ts->filename, name_src, strlen(name_src) + 1);
}
ts->code = code;
ts->pos = 0;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
ts->last_info.token = (Token)-1;
ts->line = 1;
ts->column = 1;
ts->line_start = code;
ts->last_line = 1;
ts->last_column_end = 0;
ts->last_line_start = code;
return ts;
}
void tokenstream_close(TokenStream* ts) {
if (ts == NULL) return;
if (ts->filename) free(ts->filename);
free(ts);
}
Token tokenstream_next(TokenStream* ts) {
if (ts == NULL) return -1;
/* Declarations first for C89 */
char c;
int start_line;
int start_column;
const char* line_start;
const char* start_text;
Token t;
if (ts == NULL) {
Token t = {0};
t.token = TOKEN_EOF;
return t;
}
/* Skip whitespace and comments */
while ((c = read_char(ts)) != (char)-1) {
while ((c = peek_char(ts)) != '\0') {
if (isspace(c)) {
read_char(ts);
continue;
}
/* Handle comments */
if (c == '/') {
if (peek_char(ts) == '/') {
if (ts->code[ts->pos + 1] == '/') {
/* Skip until end of line */
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
while ((c = read_char(ts)) != '\0' && c != '\n') {
/* Skip */
}
continue;
}
/* It's just a slash, which we don't handle yet */
return -1;
break;
}
/* We found a non-whitespace, non-comment character */
break;
}
if (c == (char)-1) {
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1; /* EOF */
if (peek_char(ts) == '\0') {
Token t;
t.token = TOKEN_EOF;
t.text.data = NULL;
t.text.length = 0;
t.location.filename = ts->filename;
t.location.line = ts->last_line;
t.location.column_start = ts->last_column_end + 1;
t.location.column_end = ts->last_column_end + 1;
t.location.line_text.data = (char*)ts->last_line_start;
t.location.line_text.length = get_line_length(ts->last_line_start);
return t;
}
/* Single-character tokens */
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
ts->last_info.text_length = 1;
start_line = ts->line;
start_column = ts->column;
line_start = ts->line_start;
start_text = &ts->code[ts->pos];
c = read_char(ts);
/* Single-character tokens */
switch (c) {
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
case ',': return ts->last_info.token = TOKEN_COMMA;
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
case '=': return create_token(ts, TOKEN_ASSIGN, start_text, 1, start_line, start_column, line_start);
case '"': {
size_t len = 0;
const char* start = &ts->code[ts->pos];
while (peek_char(ts) != '"' && peek_char(ts) != '\0') {
read_char(ts);
len++;
}
if (peek_char(ts) == '"') read_char(ts);
return create_token(ts, TOKEN_STRING, start, len, start_line, start_column + 1, line_start);
}
}
if (isdigit(c)) {
size_t len = 1;
while (isdigit(peek_char(ts))) {
read_char(ts);
len++;
}
return create_token(ts, TOKEN_INTEGER, start_text, len, start_line, start_column, line_start);
}
/* Keywords and identifiers */
if (is_identifier_start(c)) {
return read_keyword_or_identifier(ts, c);
/* Declarations first for C89 */
size_t length;
TokenType type;
length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
}
type = lookup_keyword(start_text, length);
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
}
/* Unknown character */
ts->last_info.token = (Token)-1;
ts->last_info.text = NULL;
ts->last_info.text_length = 0;
return -1;
}
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
if (ts == NULL || info == NULL) return;
*info = ts->last_info;
t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
log_on_line(&t.location, "unexpected token '%c'", c);
return t;
}
+37 -24
View File
@@ -4,55 +4,77 @@
#ifndef TOKEN_H
#define TOKEN_H
#include <stddef.h>
#include "location.h"
/**
* A list of all possible tokens.
*/
typedef enum {
// Keywords
/* Keywords */
TOKEN_MODULE,
TOKEN_IMPORT,
TOKEN_SEMICOLON,
TOKEN_ALIAS,
TOKEN_PUBLIC,
TOKEN_VAR,
TOKEN_CONST,
TOKEN_STATIC,
// Symbols
/* Symbols */
TOKEN_PARENT_OPEN,
TOKEN_PARENT_CLOSE,
TOKEN_BRACKET_OPEN,
TOKEN_BRACKET_CLOSE,
TOKEN_COMMA,
TOKEN_ASSIGN,
// Primitives
/* Primitives */
TOKEN_VOID,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
TOKEN_STRING,
TOKEN_INTEGER,
TOKEN_TRUE,
TOKEN_FALSE,
// Variable
/* Variable */
TOKEN_IDENTIFIER,
} Token;
/* Others */
TOKEN_EOF,
TOKEN_UNKNOWN
} TokenType;
/**
* Holds additional information about a token.
*/
typedef struct {
/// @brief The textual representation of a token.
/// Note that this is not necessarily null-terminated.
char* text;
/* @brief The actual token. */
TokenType token;
/// @brief The length of the `text` string.
size_t text_length;
/* @brief The textual representation of a token. */
String text;
/// @brief The actual token.
Token token;
} TokenInfo;
/* @brief The location of the token. */
Location location;
} Token;
typedef struct TokenStream TokenStream;
/**
* Returns a TokenStream for a text.
*
* @param filename The name of the file to read. This is only used for error reporting.
* @param code The text to read.
* @returns A handle to the TokenStream.
*/
TokenStream* tokenstream_open(const char* code);
TokenStream* tokenstream_open(const char* filename, const char* code);
/**
* Closes a TokenStream.
@@ -67,13 +89,4 @@ void tokenstream_close(TokenStream* ts);
*/
Token tokenstream_next(TokenStream* ts);
/**
* Gets additional information about the last token that was returned
* by `tokenstream_next`.
*
* @param ts The TokenStream to use.
* @param info The TokenInfo object to store the results in.
*/
void tokenstream_info(TokenStream* ts, TokenInfo* info);
#endif
+9
View File
@@ -0,0 +1,9 @@
/**
* Contains runtime information about types.
*/
#ifndef TYPES_H
#define TYPES_H
#endif
+46
View File
@@ -0,0 +1,46 @@
#include "util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
/* Portable va_copy fallback for pre-C99 or platforms without va_copy. */
#ifndef va_copy
# if defined(__va_copy)
# define va_copy(dest, src) __va_copy(dest, src)
# else
# define va_copy(dest, src) ((dest) = (src))
# endif
#endif
char* format_string_va(const char* fmt, va_list args) {
/* Declarations first to satisfy -std=c89 */
va_list args_copy;
int needed;
char* buf;
if (!fmt) return NULL;
va_copy(args_copy, args);
needed = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
if (needed < 0) return NULL;
buf = (char*)malloc((size_t)needed + 1);
if (!buf) return NULL;
vsnprintf(buf, (size_t)needed + 1, fmt, args);
return buf;
}
char* format_string(const char* fmt, ...) {
/* Declarations first to satisfy -std=c89 */
va_list args;
char* s;
if (!fmt) return NULL;
va_start(args, fmt);
s = format_string_va(fmt, args);
va_end(args);
return s;
}
+27
View File
@@ -0,0 +1,27 @@
#ifndef UTIL_H
#define UTIL_H
#include <stdarg.h>
#include <stddef.h>
/**
* Formats a string using printf-style formatting and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param ... The values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string(const char* fmt, ...);
/**
* Formats a string using printf-style formatting with a va_list and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param args The va_list of values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string_va(const char* fmt, va_list args);
#endif