Compare commits

..

74 Commits

Author SHA1 Message Date
seeseemelk b6d0a78d06 Add integration test 2026-05-01 09:44:46 +02:00
seeseemelk 3bdccf2000 Add integration test framework 2026-04-30 22:21:08 +02:00
seeseemelk 177fb971e4 Rename AST structures to Tree and relocate freeing logic 2026-04-30 21:46:15 +02:00
seeseemelk ea55dedd07 Refactor AST and Parser into modular subdirectories
- Split ast.h into granular headers in v0/ast/
- Split parser.c into modular implementation files in v0/parser/
- Move and rename parser tests to v0/parser/test_*.c
- Update build system (include.mk) with modular sub-makefiles
- Maintain v0/ast.h and v0/parser.h as umbrella headers
2026-04-30 21:23:07 +02:00
seeseemelk 4bd66ea216 More variable stuff 2026-04-30 20:25:53 +02:00
seeseemelk 0704284726 Can parse variables 2026-04-29 21:39:48 +02:00
seeseemelk 94ae665a0a Add initial variable work 2026-04-29 21:20:52 +02:00
seeseemelk e2d8e385f0 Add basic var tokens 2026-04-29 20:28:52 +02:00
seeseemelk 76f9168c5f Fix docs 2026-04-29 20:21:52 +02:00
seeseemelk 1ab021561e Fix bad test 2026-04-29 20:20:16 +02:00
seeseemelk f260e02efa Refactor parser 2026-04-29 20:15:05 +02:00
seeseemelk 1c5d49d682 Fix valgrind errors 2026-04-29 19:41:00 +02:00
seeseemelk cc25563cd2 Cleanup 2026-04-29 19:23:59 +02:00
seeseemelk 323a599399 Build with debug symbols 2026-04-29 18:53:02 +02:00
seeseemelk ec896495a3 Fix infinite loop bug 2026-04-29 14:40:06 +02:00
seeseemelk eb4b0495f2 Working on parser refactor 2026-04-29 14:36:42 +02:00
seeseemelk 1f40c8f5ee Refactor tests a bit more 2026-04-29 13:25:41 +02:00
seeseemelk 98d58a2169 Refactor tests 2026-04-29 13:09:14 +02:00
seeseemelk f0621a8076 Refactor parser 2026-04-29 11:53:26 +02:00
seeseemelk 84747028f5 Ensure alias and import can be mixed 2026-04-29 11:46:02 +02:00
seeseemelk f90cad2b96 Use proper public keyword 2026-04-29 11:43:14 +02:00
seeseemelk e09bd72441 Update ast interface 2026-04-29 11:24:42 +02:00
seeseemelk 9035cc639c Add alias to ast 2026-04-29 11:18:40 +02:00
seeseemelk 3288efdfd7 Refactor test interface 2026-04-29 10:59:06 +02:00
seeseemelk 34b7939f76 Refactor parser to C11 and update build configuration 2026-04-29 10:38:34 +02:00
seeseemelk 15714393c3 Refactor parser to use Token in AST and update tests 2026-04-29 10:35:12 +02:00
seeseemelk 146aa4d9d1 Convert codebase to C89 compatibility and update test scripts 2026-04-29 10:21:29 +02:00
seeseemelk 189c21667b Ignore intellij files 2026-04-28 16:07:46 +02:00
seeseemelk abdc6d67dc Re-order log lines 2026-04-28 16:06:21 +02:00
seeseemelk d89833b705 Add TYPES documentation 2026-04-28 16:06:12 +02:00
seeseemelk bfb3b69be1 fix: add util.c to source files 2026-04-26 22:48:31 +02:00
seeseemelk dc523c8d3c chore: remove legacy v0/string.h 2026-04-26 22:42:10 +02:00
seeseemelk 05dfb3725b fix: replace unsafe fixed-size buffers with dynamic formatting helpers; add util format helpers; centralize log_on_line cleanup 2026-04-26 22:42:10 +02:00
seeseemelk 70998643fb Add AGENTS.md 2026-04-26 22:30:51 +02:00
seeseemelk 129036b539 Fix all valgrind errors 2026-04-26 22:13:39 +02:00
seeseemelk dbc69eddc8 Update test target to use valgrind 2026-04-26 21:35:14 +02:00
seeseemelk 421338d995 Fix log header generation and EOF location reporting 2026-04-26 21:34:28 +02:00
seeseemelk f33e8d3e25 Update log headers 2026-04-26 21:19:59 +02:00
seeseemelk c219a303ec Fix error reporting position and match updated log headers 2026-04-26 21:16:50 +02:00
seeseemelk 9449f16e02 Implement tokenstream_get_test and simplified assert_log_file using test names 2026-04-26 20:31:17 +02:00
seeseemelk e910c01348 Refactor golden files to follow xyz_log_ and xyz_src_ naming convention 2026-04-25 20:05:16 +02:00
seeseemelk a6bdadac0c Add public imports
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 15:28:33 +02:00
seeseemelk 63dd5fa5c9 Implement public import parsing and add test case 2026-04-25 15:06:20 +02:00
seeseemelk d8544d7743 Add new rule to agent instructions 2026-04-25 14:38:35 +02:00
seeseemelk 91593e12b7 Add error logging and corresponding tests for parser syntax errors 2026-04-25 14:37:08 +02:00
seeseemelk 7c7e0c3272 Add import parsing
Co-authored-by: Copilot <copilot@github.com>
2026-04-25 14:30:11 +02:00
seeseemelk 116bdecafe Implement String structure and update Location/Token to use it 2026-04-25 14:17:17 +02:00
seeseemelk 902e2f0325 Update log_on_line to take Location* instead of individual fields 2026-04-24 22:13:29 +02:00
seeseemelk 26a1d0285e Refactor Token to use Location struct 2026-04-24 22:07:00 +02:00
seeseemelk a89e61eedd Introduce golden file mechanism for tests 2026-04-24 21:09:47 +02:00
seeseemelk 9ca72ef5bf Split test 2026-04-24 20:41:57 +02:00
seeseemelk 0306530fe8 Better logging in tokenstream 2026-04-24 20:40:31 +02:00
seeseemelk 451a9a2a22 Token refactor and better logs 2026-04-24 20:28:08 +02:00
seeseemelk da3425ec10 All target run tests 2026-04-24 20:04:43 +02:00
seeseemelk e021a2d63e During test, log to in-memory log 2026-04-24 20:04:00 +02:00
seeseemelk 0e826e05e1 Add log framework 2026-04-24 15:14:15 +02:00
seeseemelk 78899f32a6 Update copilot instructions 2026-04-24 14:59:53 +02:00
seeseemelk 0fa7b599ed Implement assert_str and assert_not_null and update tests 2026-04-24 14:57:52 +02:00
seeseemelk 594e33efd6 Add parser header 2026-04-24 14:52:53 +02:00
seeseemelk ec1a69f3dd Updated copilot instructions 2026-04-24 14:52:36 +02:00
seeseemelk a173e37adc Implement parser module and update AST 2026-04-24 14:50:54 +02:00
seeseemelk 2d1ccd52e6 Remove buffer module as it is no longer needed 2026-04-24 14:30:25 +02:00
seeseemelk b6aaa0c08f Implement tokenstream_info and refactor TokenStream interface 2026-04-24 14:28:57 +02:00
seeseemelk 1406cedd82 Add tokeninfo 2026-04-24 11:12:44 +02:00
seeseemelk 422203fdab Remove unnecessary NULL checks from token tests
Since buffer_open_string() always succeeds and tokenstream_open() always
succeeds when given a valid buffer, the NULL checks are unnecessary.
This simplifies the test code and makes it more readable.

All 12 tests still pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 11:04:05 +02:00
seeseemelk ed12c0a38e Refactor token tests to use in-memory strings instead of files
- Replaced all write_test_file() calls with buffer_open_string()
- Removed file I/O overhead - tests now run entirely in memory
- Removed unnecessary #include <stdio.h>, <stdlib.h>, <string.h> from test_token.c
- Simplified test_tokenstream_open_fail() to directly test NULL buffer handling
- Tests are faster and cleaner without temporary file creation
- No test output changes - all 12 tests still pass

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 11:03:14 +02:00
seeseemelk 49b9db5b75 Refactor token mapping: use keyword map for tokenization instead of strcmp
- Created KeywordMap structure with keyword-to-token mapping at top of token.c
- Added lookup_keyword() function to check if identifier is a keyword
- Replaced 3 strcmp calls (lines 99-101) with single lookup_keyword() call
- Removed token_to_string() function and its tests (3 tests removed)
- Single easy-to-read and modify keyword map serves both documentation and implementation
- Added new keywords by editing the keywords[] array at top of token.c

All 12 tests passing (removed token_to_string tests which are now unnecessary).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 09:44:19 +02:00
seeseemelk c1106d8e66 Update tokenstream_open to accept Buffer parameter instead of file path
The tokenstream_open function now takes a Buffer* parameter instead of a file
path string, making the API more flexible and allowing the caller to manage
buffer lifetime. The tokenstream_close function continues to close the underlying
buffer as documented.

- Changed tokenstream_open signature from (const char* path) to (Buffer* buffer)
- Updated implementation to accept and use the provided buffer directly
- Updated all tests to open buffers separately and pass them to tokenstream_open
- Added #include "buffer.h" to token.h for Buffer type definition
- All 15 tests pass

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 09:41:18 +02:00
seeseemelk dccdcb8ba5 Implement token.c with comprehensive tests and easy-to-modify token mapping
- Created token-to-string mapping array parallel to Token enum in token.c
- Implemented TokenStream with lookahead buffering for proper tokenization
- Implemented tokenstream_open/close/next functions with support for:
  - Keywords (module, import, void)
  - Symbols (parentheses, brackets, comma, semicolon)
  - Identifiers (alphanumeric starting with letter or underscore)
  - Comment skipping (// style)
  - Whitespace handling
- Added token_to_string function to token.h for token inspection
- Created comprehensive test suite (15 tests) covering all token types and edge cases
- All tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 09:35:18 +02:00
seeseemelk c73f99d9e6 Project setup 2026-04-24 09:29:53 +02:00
seeseemelk c90f3afd95 Implement all @copilot Makefile and buffer test annotations
- Defined V0_SRC_DEPS and V0_TEST_DEPS in v0/include.mk
- Updated clean rule to remove dependency files
- Referenced dependency variables for .d includes
- Added and deduplicated file-read test in v0/test_buffer.c

All tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 09:25:04 +02:00
seeseemelk 4939a74752 Update test paths after flattening v0 layout
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 08:20:54 +02:00
seeseemelk 7ff3f76de5 Implement buffer interface and tests
- Implement buffer_open_file/string, buffer_read, buffer_close in v0/src/buffer.c

- Add tests in v0/tests/test_buffer.c and test_buffer.txt; register tests in v0/tests/test.c

- Update v0/include.mk and Makefile

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-24 08:07:38 +02:00
seeseemelk f6a1b290fc Initial commit 2026-04-24 07:24:03 +02:00
88 changed files with 2409 additions and 14 deletions
+46
View File
@@ -0,0 +1,46 @@
# C2
C2 is a compiler for a new language.
See the README.md for information about this project.
## Code Changes
After every code change, ensure the binary builds correctly,
and run the unit tests (`make test`).
Ensure that every new function and code path has useful unit tests.
### Creating Source Files
Whenever a new source file is created, it must be added to the `include.mk` file.
A test file should also be created.
Test source files do not have to be added to the include.mk file.
These are added to the `test.c` file by means of directly `#include`ing the C file.
### Testing
Any test source code must be prefixed with test_xyz, where xyz matches
the source file it is trying to test.
For instance, a test for `buffer.c` must be called `test_buffer.c`.
There will be no `test_buffer.h`. Instead, `test.c` will directly
`#include` the Csource-file directly.
Every syntax error path identified in the parser MUST have a corresponding test.
## Language Syntax
Since this is a compiler for a new language, do not assume anything of its syntax.
Always check the `specs` directory to see examples and documentation about the language.
If there is anything unclear, ask the user for clarification.
It is certainly possible that there are contradictions in the
spec that have to be solved first.
## Comitting
Often, the user modifies an interface (typically in a header file), and then asks
the agent to update the implementation.
When creating a commit, make sure that both the user's and the agent's modifications
are included in the commit.
Only create a commit when specifically asked for that. Never assume implicitly that the
user wants you to create a commit.
Even if they asked you to create a commit in an earlier task, it does not mean that
you should also create a commit in a later task.
+14
View File
@@ -0,0 +1,14 @@
---
name: implement
description: 'Implement all @copilot annotations'
---
# General
Find and implement all `@copilot` comments in the codebase. Modify only code related to those annotations, and always make sure that tests are added.
## Plan Mode
If you are currently in plan mode, look at all the annotations and always create a plan first.
Only start modifying code once the plan has been approved by the user.
## Implementation
Implement the changes requested at the location of the annotation.
After the implementation is finished, remove the comment containing the annotation.
+3 -1
View File
@@ -1 +1,3 @@
bin/ /c2
/.idea/*
!/.idea/c_cpp_properties.json
+12
View File
@@ -0,0 +1,12 @@
{
"configurations": [
{
"name": "CLion",
"includePath": [
"${workspaceFolder}/v0/*"
],
"cStandard": "c89",
}
],
"version": 4
}
Symlink
+1
View File
@@ -0,0 +1 @@
.github/copilot-instructions.md
Symlink
+1
View File
@@ -0,0 +1 @@
.github/copilot-instructions.md
+15 -13
View File
@@ -1,19 +1,21 @@
.PHONY: all clean .PHONY: all test clean
SRC := main.c all: c2 test integration-test
BINDIR := bin
OBJ := $(SRC:%.c=$(BINDIR)/%.o)
all: c2 c2: v0/bin/c2
cp $< $@
clean: test::
rm -rv bin
c2: $(OBJ) generate_golden::
gcc -o $@ $<
$(BINDIR)/%.o: %.c clean::
gcc -c -o $@ $< rm -f c2
$(BINDIR): include v0/include.mk
mkdir -p $(BINDIR)
integration-test: v0/bin/c2 v0/bin/test_integration
./v0/bin/test_integration
v0/bin/test_integration: v0/test_integration.c
$(CC) $(CFLAGS) -o $@ $<
+33
View File
@@ -0,0 +1,33 @@
# C2
C2 is a programming language based on C.
It adds modern high-level features such as metaprogramming, generics, etc,
but compiles down to C89.
## Building
To build the c2compiler, simply run `make` or `make all`.
This will build the compiler and run the tests.
In order to only build the compiler, run `make c2`.
In order to run the tests, run `make test`.
## Versioning
The current version is v0. Its source code lives in the `v0` directory.
## Testing
### Unit Tests
Run unit tests with:
```bash
make test
```
### Integration Tests
Integration tests compare the compiler output with expected C files.
To add a new integration test, create a new directory under `v0/integration_tests/` with `input.c2` and `expected.c` files.
Run integration tests with:
```bash
make integration-test
```
## Languages Specifications
See the specs directory for information on the actual language syntax.
+1
View File
@@ -0,0 +1 @@
Hello, world
+15
View File
@@ -0,0 +1,15 @@
# General
A C2file starts with a module declaration followed by other declarations.
For instance:
```c2
module mymodule;
import libc.stdio;
void main() {
puts("Hello, world!");
}
```
.
+31
View File
@@ -0,0 +1,31 @@
# Imports
The import statement allows one module access to the public declarations of another module.
## Syntax
The import statement uses the following syntax:
```c2
import module_name;
```
They can optionally be prefixed by the `public` keyword, in which case the module will
export everything in the import transitively.
For instance,
```c2
--- a.c2
module a;
import b;
--- b.c2
module b;
public import c;
--- c.c2
module c;
// Some declarations
```
In this example, both module a and b can access the declarations in module c.
+25
View File
@@ -0,0 +1,25 @@
# Types
C2 has both built-in types and user-defined types.
## Builtin types
C2 has the following types builtin:
- `void`
- `i8`
- `i16`
- `i32`
- `i64`
- `u8`
- `u16`
- `u32`
- `u64`
## Type Aliases
Types can be aliased to different names using the alias keyword.
Here's a list of the default builtin aliases.
```c2
alias int = i32;
alias uint = u32;
alias char = u8;
alias string = char[];
```
+24
View File
@@ -0,0 +1,24 @@
# Variables
Variables can be defined in the global scope, in structs and classes, and in functions.
## Global variables
Global variables can be defined as such:
```c2
// Defines a global variable called my_var.
i32 my_var;
// Defines a const variable.
const i32 my_var;
// Defines a global variable whose type is determined automatically.
// The value will be determined at runtime.
var my_var = 123;
// Defines a const variable whose type is determined automatically.
const my_var = 123;
// Defines a global variable whose initial value is computed at compile-time.
// If it cannot be computed at compile-time, an error is thrown.
static my_var = 123;
```
+1
View File
@@ -0,0 +1 @@
/bin/
+21
View File
@@ -0,0 +1,21 @@
/**
* Holds the AST model
*/
#ifndef AST_H
#define AST_H
#include "ast/expression.h"
#include "ast/declaration.h"
#include "ast/module.h"
/**
* Frees a module and all its children.
*/
void ast_free_module(ModuleTree* module);
/**
* Frees a type expression.
*/
void ast_free_type(TypeTree* type);
#endif
+49
View File
@@ -0,0 +1,49 @@
#ifndef AST_DECLARATION_H
#define AST_DECLARATION_H
#include "expression.h"
#include "../bool.h"
typedef struct {
/** @brief The name of the module being imported. */
char* module_name;
/** @brief Whether the import is public or not. */
bool is_public;
} ImportTree;
/**
* A declaration that aliases one type to another.
*/
typedef struct {
/** @brief The name of the alias. */
const char* name;
/** @brief The value of the alias. */
TypeTree value;
} AliasTree;
/**
* A declaration of a variable, which may be a constant or not, and may be static or not.
*/
typedef struct {
/** @brief The name of the variable. */
char* name;
/** @brief The type of the variable. */
TypeTree type;
/** @brief The optional initializer expression. */
ExpressionTree* initializer;
/** @brief Whether the variable is public or not. */
bool is_public;
/** @brief Whether the variable is static or not. */
bool is_static;
/** @brief Whether the variable is a constant or not. */
bool is_const;
} VariableTree;
#endif
+9
View File
@@ -0,0 +1,9 @@
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* expr) {
if (expr->tag == TYPE_TREE_ARRAY) {
ast_free_type(expr->array.array);
free(expr->array.array);
}
}
+52
View File
@@ -0,0 +1,52 @@
#ifndef AST_EXPRESSION_H
#define AST_EXPRESSION_H
#include "../bool.h"
typedef enum {
EXPRESSION_TREE_INTEGER,
EXPRESSION_TREE_STRING,
EXPRESSION_TREE_BOOLEAN
} ExpressionTreeTag;
typedef struct {
ExpressionTreeTag tag;
union {
int integer;
const char* string;
bool boolean;
};
} ExpressionTree;
typedef enum {
TYPE_TREE_BUILTIN,
TYPE_TREE_ARRAY
} TypeTreeTag;
/**
* An expression that evaluates to a type.
*/
typedef struct TypeTree TypeTree;
struct TypeTree {
/** @brief defines which entry in the union is valid */
TypeTreeTag tag;
union {
/** @brief Evaluates to an array of the given type. */
struct {
/** @brief A pointer to the type of the elements stored in the array. */
TypeTree* array;
} array;
/** @brief Evaluates to a builtin integer type.*/
struct {
/**
* @brief The number of bits in the integer.
* Typical values are 8, 16, 32, and 64.
*/
int bitSize;
/** @brief `true` if the type is signed, `false` if it's unsigned. */
bool isSigned;
} builtin;
};
};
#endif
+3
View File
@@ -0,0 +1,3 @@
# There are currently no .c files in the ast directory.
# This file is provided for future consistency.
AST_SRC := v0/ast/module.c v0/ast/expression.c
+43
View File
@@ -0,0 +1,43 @@
#include "module.h"
#include "expression.h"
#include <stdlib.h>
void ast_free_type(TypeTree* type);
void ast_free_module(ModuleTree* module) {
if (module == NULL) {
return;
}
if (module->imports != NULL) {
for(size_t i = 0; i < module->import_count; i++) {
free(module->imports[i].module_name);
}
free(module->imports);
}
if (module->aliases != NULL) {
for(size_t i = 0; i < module->alias_count; i++) {
free((void*)module->aliases[i].name);
ast_free_type(&module->aliases[i].value);
}
free(module->aliases);
}
if (module->variables != NULL) {
for(size_t i = 0; i < module->variable_count; i++) {
free(module->variables[i].name);
ast_free_type(&module->variables[i].type);
if (module->variables[i].initializer) {
if (module->variables[i].initializer->tag == EXPRESSION_TREE_STRING) {
free((void*)module->variables[i].initializer->string);
}
free(module->variables[i].initializer);
}
}
free(module->variables);
}
free(module->name);
free(module);
}
+34
View File
@@ -0,0 +1,34 @@
#ifndef AST_MODULE_H
#define AST_MODULE_H
#include "declaration.h"
#include <stddef.h>
/**
* The top-level model.
* Every file matches an entire Module.
*/
typedef struct {
/** @brief The name of the module. */
char* name;
/** @brief The list of imports in the module. */
ImportTree* imports;
/** @brief The number of imports in the module. */
size_t import_count;
/** @brief The list of aliases in the module. */
AliasTree* aliases;
/** @brief The number of aliases in the module. */
size_t alias_count;
/** @brief The list of variables in the module. */
VariableTree* variables;
/** @brief The number of variables in the module. */
size_t variable_count;
} ModuleTree;
#endif
+10
View File
@@ -0,0 +1,10 @@
/* Minimal boolean type for C89 compatibility */
#ifndef BOOL_H
#define BOOL_H
typedef int bool;
#define true 1
#define false 0
#endif
+51
View File
@@ -0,0 +1,51 @@
include v0/ast/include.mk
include v0/parser/include.mk
V0_SRC := v0/main.c v0/util.c v0/token.c $(AST_SRC) $(PARSER_SRC) v0/log.c v0/str.c
# V0_TEST must only include `v0/test.c` itself, as all other test Csource files are
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
V0_TEST := v0/test.c
V0_SRC_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_SRC))
V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
# Define dependency file lists for sources and tests
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
CFLAGS += -Werror -Wall -pedantic -std=c11 -g
v0/bin/c2: $(V0_SRC_OBJ)
$(CC) $(CFLAGS) -o $@ $^
V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
$(CC) $(CFLAGS) -o $@ $^
# Only run tests under valgrind on Linux. On macOS (Darwin) valgrind is
# typically unavailable or unsupported, so run the test binary directly.
ifeq ($(shell uname -s),Linux)
TEST_CMD := valgrind --quiet --leak-check=full --error-exitcode=1 v0/bin/test
else
TEST_CMD := v0/bin/test
endif
test:: v0/bin/test
$(TEST_CMD)
generate_golden:: v0/bin/test
GENERATE_GOLDEN=1 v0/bin/test
clean::
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
# Build each .c file into a .o file, tracking header dependencies.
v0/bin/%.o: v0/%.c v0/include.mk
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
# Reference dependency files
-include $(V0_SRC_DEPS)
-include $(V0_TEST_DEPS)
+4
View File
@@ -0,0 +1,4 @@
#include <stdint.h>
// u32 simple:x
static uint32_t v_6simple_1x = 123;
+2
View File
@@ -0,0 +1,2 @@
module simple;
u32 x = 123;
+28
View File
@@ -0,0 +1,28 @@
/**
* Location handling for error reporting.
*/
#ifndef LOCATION_H
#define LOCATION_H
#include "str.h"
#include <stddef.h>
typedef struct {
/* @brief The name of the file where the token was found. */
char* filename;
/* @brief The entire line of text where the token was found. */
String line_text;
/* @brief The line number where the token was found. */
int line;
/* @brief The starting column number where the token was found. */
int column_start;
/* @brief The ending column number where the token was found. */
int column_end;
} Location;
#endif
+87
View File
@@ -0,0 +1,87 @@
#include "log.h"
#include "util.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
static LogError* s_logError = NULL;
void log_set_output(LogError* destination) {
s_logError = destination;
}
void log_error(const char* msg) {
if (s_logError != NULL) {
s_logError(msg);
} else {
fprintf(stderr, "Error: %s\n", msg);
}
}
void log_on_line(Location* loc, const char* msg, ...) {
/* Declarations first for C89 */
char* line_prefix = NULL;
char* formatted_msg = NULL;
char* header = NULL;
char* buffer = NULL;
va_list args;
int caret_len;
char* p;
int i1, i2;
size_t i3;
size_t total_size;
line_prefix = format_string("%d| ", loc->line);
if (!line_prefix) goto cleanup;
caret_len = loc->column_end - loc->column_start + 1;
if (caret_len < 1) caret_len = 1;
/* Format the message */
va_start(args, msg);
formatted_msg = format_string_va(msg, args);
va_end(args);
if (!formatted_msg) goto cleanup;
/* Header logic */
if (loc->filename && loc->filename[0] != '\0') {
header = format_string("--- %s ---\n", loc->filename);
} else {
header = format_string("--- \n");
}
if (!header) goto cleanup;
total_size = strlen(header) + 20 +
strlen(line_prefix) + loc->line_text.length + 2 + /* line| text\n */
strlen(line_prefix) + loc->column_start - 1 + caret_len + 2 + /* indent + ^^\n */
strlen(line_prefix) + 3 + strlen(formatted_msg) + 2 + /* indent + msg\n */
10;
buffer = (char*)malloc(total_size);
if (!buffer) goto cleanup;
p = buffer;
p += sprintf(p, "%s", header);
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
/* Caret line */
for (i1 = 0; i1 < (int)(strlen(line_prefix) + loc->column_start - 1); i1++) *p++ = ' ';
for (i2 = 0; i2 < caret_len; i2++) *p++ = '^';
*p++ = '\n';
/* Message line */
for (i3 = 0; i3 < strlen(line_prefix); i3++) *p++ = ' ';
p += sprintf(p, "%s\n", formatted_msg);
*p = '\0';
log_error(buffer);
cleanup:
free(line_prefix);
free(formatted_msg);
free(header);
free(buffer);
}
+36
View File
@@ -0,0 +1,36 @@
/**
* Contains the logging framework used for logging errors during compilation.
*/
#ifndef LOG_H
#define LOG_H
#include "location.h"
/**
* A method that can log an error.
*/
typedef void LogError(const char* msg);
/**
* Sets the destination for log errors.
*/
void log_set_output(LogError* destination);
/**
* Logs an error to the destination.
*/
void log_error(const char* msg);
/**
* Logs a pretty error with additional information about the line where the error occurred.
*
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
*
* @param loc The location where the error occurred.
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
* @param ... Additional arguments to format into the error message.
*/
void log_on_line(Location* loc, const char* msg, ...);
#endif
+6
View File
@@ -0,0 +1,6 @@
#include <stdio.h>
int main(int argc, char** argv) {
puts("Hello, world");
return 0;
}
+15
View File
@@ -0,0 +1,15 @@
#ifndef PARSER_H
#define PARSER_H
#include "ast.h"
#include "token.h"
/**
* Parse a stream of tokens into a module.
*
* @param ts The TokenStream to read.
* @returns The parsed module.
*/
ModuleTree* parser_parse(TokenStream* ts);
#endif
+52
View File
@@ -0,0 +1,52 @@
#include "internal.h"
#include "../str.h"
#include "../log.h"
#include <stdlib.h>
void parser_next_token(Parser* p) {
p->token = tokenstream_next(p->ts);
}
bool parser_accept(Parser* p, TokenType token) {
if (p->token.token == token) {
parser_next_token(p);
return true;
}
return false;
}
bool parser_expect(Parser* p, TokenType token, const char* msg) {
if (parser_accept(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
bool parser_peek(Parser* p, TokenType token) {
if (p->token.token == token) {
return true;
}
return false;
}
bool parser_require(Parser* p, TokenType token, const char* msg) {
if (parser_peek(p, token)) {
return true;
}
log_on_line(&p->token.location, msg);
return false;
}
char* parser_to_text(Parser* p) {
char* str = string_copy(p->token.text);
parser_next_token(p);
return str;
}
bool parser_accept_primitive(Parser* p) {
return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) ||
parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) ||
parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) ||
parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64);
}
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include <stdlib.h>
#include <string.h>
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public) {
module->import_count++;
module->imports = realloc(module->imports, sizeof(ImportTree) * module->import_count);
ImportTree* import = &module->imports[module->import_count - 1];
memset(import, 0, sizeof(ImportTree));
import->is_public = is_public;
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
import->module_name = parser_to_text(p);
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) {
return false;
}
return true;
}
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public) {
(void)is_public;
module->alias_count++;
module->aliases = realloc(module->aliases, sizeof(AliasTree) * module->alias_count);
AliasTree* alias = &module->aliases[module->alias_count - 1];
memset(alias, 0, sizeof(AliasTree));
if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) {
return false;
}
alias->name = parser_to_text(p);
if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) {
return false;
}
if (!parse_type_expression(p, &alias->value)) {
return false;
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) {
return false;
}
return true;
}
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const) {
module->variable_count++;
module->variables = realloc(module->variables, sizeof(VariableTree) * module->variable_count);
VariableTree* var = &module->variables[module->variable_count - 1];
memset(var, 0, sizeof(VariableTree));
var->is_public = is_public;
var->is_static = is_static;
var->is_const = is_const;
if (parser_accept_primitive(p)) {
if (!parse_type_expression(p, &var->type)) {
return false;
}
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) {
return false;
}
var->name = parser_to_text(p);
if (parser_accept(p, TOKEN_ASSIGN)) {
var->initializer = malloc(sizeof(ExpressionTree));
if (!parse_expression(p, var->initializer)) {
return false;
}
}
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) {
return false;
}
return true;
}
+98
View File
@@ -0,0 +1,98 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
bool parse_primitive_type_expression(Parser* p, TypeTree* expr) {
if (parser_accept(p, TOKEN_U8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_U64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = false;
return true;
} else if (parser_accept(p, TOKEN_I8)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 8;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I16)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 16;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I32)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 32;
expr->builtin.isSigned = true;
return true;
} else if (parser_accept(p, TOKEN_I64)) {
expr->tag = TYPE_TREE_BUILTIN;
expr->builtin.bitSize = 64;
expr->builtin.isSigned = true;
return true;
} else {
log_on_line(&p->token.location, "expected type expression");
return false;
}
}
bool parse_array_type_expression(Parser* p, TypeTree* expr) {
TypeTree elementType;
if (!parse_primitive_type_expression(p, &elementType)) {
return false;
}
if (parser_accept(p, TOKEN_BRACKET_OPEN)) {
expr->tag = TYPE_TREE_ARRAY;
expr->array.array = malloc(sizeof(TypeTree));
*expr->array.array = elementType;
if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) {
return false;
}
} else {
*expr = elementType;
return true;
}
return true;
}
bool parse_type_expression(Parser* p, TypeTree* expr) {
return parse_array_type_expression(p, expr);
}
bool parse_expression(Parser* p, ExpressionTree* expr) {
if (parser_peek(p, TOKEN_INTEGER)) {
expr->tag = EXPRESSION_TREE_INTEGER;
expr->integer = atoi(p->token.text.data);
parser_next_token(p);
return true;
} else if (parser_peek(p, TOKEN_STRING)) {
expr->tag = EXPRESSION_TREE_STRING;
expr->string = parser_to_text(p);
return true;
} else if (parser_accept(p, TOKEN_TRUE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = true;
return true;
} else if (parser_accept(p, TOKEN_FALSE)) {
expr->tag = EXPRESSION_TREE_BOOLEAN;
expr->boolean = false;
return true;
}
log_on_line(&p->token.location, "expected expression");
return false;
}
+1
View File
@@ -0,0 +1 @@
PARSER_SRC := v0/parser/core.c v0/parser/expression.c v0/parser/declaration.c v0/parser/module.c
+36
View File
@@ -0,0 +1,36 @@
#ifndef PARSER_INTERNAL_H
#define PARSER_INTERNAL_H
#include "../parser.h"
#include "../token.h"
#include "../ast.h"
typedef struct {
TokenStream* ts;
Token token;
} Parser;
// Core functions
void parser_next_token(Parser* p);
bool parser_accept(Parser* p, TokenType token);
bool parser_expect(Parser* p, TokenType token, const char* msg);
bool parser_peek(Parser* p, TokenType token);
bool parser_require(Parser* p, TokenType token, const char* msg);
char* parser_to_text(Parser* p);
bool parser_accept_primitive(Parser* p);
// Base parsing (expressions, types)
bool parse_primitive_type_expression(Parser* p, TypeTree* expr);
bool parse_array_type_expression(Parser* p, TypeTree* expr);
bool parse_type_expression(Parser* p, TypeTree* expr);
bool parse_expression(Parser* p, ExpressionTree* expr);
// Declaration parsing
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public);
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const);
// Module parsing
bool parse_module_declaration(Parser* p, ModuleTree* module);
#endif
+87
View File
@@ -0,0 +1,87 @@
#include "internal.h"
#include "../log.h"
#include <stdlib.h>
#include <string.h>
bool parse_module_declaration(Parser* p, ModuleTree* module) {
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
return false;
}
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
return false;
}
module->name = parser_to_text(p);
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
}
ModuleTree* parser_parse(TokenStream* ts) {
Parser* p = malloc(sizeof(Parser));
p->ts = ts;
parser_next_token(p);
ModuleTree* module = malloc(sizeof(ModuleTree));
memset(module, 0, sizeof(ModuleTree));
if (!parse_module_declaration(p, module)) {
goto fail;
}
while (!parser_peek(p, TOKEN_EOF)) {
bool is_public = false;
bool is_static = false;
bool is_const = false;
bool terminal = false;
while (!terminal) {
if (parser_accept(p, TOKEN_IMPORT)) {
if (is_static) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "import declarations cannot be static or const");
goto fail;
}
if (!parse_import_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_ALIAS)) {
if (is_static) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (is_const) {
log_on_line(&p->token.location, "alias declarations cannot be static or const");
goto fail;
}
if (!parse_alias_declaration(p, module, is_public)) {
goto fail;
}
terminal = true;
} else if (parser_accept(p, TOKEN_PUBLIC)) {
is_public = true;
} else if (parser_accept(p, TOKEN_STATIC)) {
is_static = true;
} else if (parser_accept(p, TOKEN_CONST)) {
is_const = true;
} else if (parser_accept(p, TOKEN_VAR) || parser_accept_primitive(p)) {
if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) {
goto fail;
}
terminal = true;
} else {
log_on_line(&p->token.location, "unexpected token");
goto fail;
}
}
}
free(p);
return module;
fail:
free(p);
ast_free_module(module);
return NULL;
}
+8
View File
@@ -0,0 +1,8 @@
#include "../test.h"
#include "../parser.h"
// Currently core utilities are tested indirectly through other parser tests.
// Placeholder for future explicit core utility tests.
static void test_parser_core_placeholder(void) {
// No-op
}
+89
View File
@@ -0,0 +1,89 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_missing_semicolon_import(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
static void test_parser_bad_import_name(void) {
test_get_ast();
assert_log_file("expected error for bad import name");
}
static void test_parser_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_false(m->imports[0].is_public, "expected import to not be public");
}
static void test_parser_public_imports(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
assert_not_null(m->imports, "expected imports to be parsed");
assert_int(1, (int)m->import_count, "expected one import");
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
assert_true(m->imports[0].is_public, "expected import to be public");
}
static void test_parser_alias_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_str("myalias", alias.name, "expected correct alias name");
}
static void test_parser_variable_simple(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_var", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_const(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_const", var.name, "expected correct variable name");
assert_true(var.is_const, "expected const");
assert_false(var.is_static, "expected not static");
}
static void test_parser_variable_static(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_str("my_static", var.name, "expected correct variable name");
assert_false(var.is_const, "expected not const");
assert_true(var.is_static, "expected static");
}
static void test_parser_multiple_vars(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(2, (int)m->variable_count, "expected correct number of variables");
assert_str("var1", m->variables[0].name, "expected first variable name 'var1'");
assert_str("var2", m->variables[1].name, "expected second variable name 'var2'");
}
+52
View File
@@ -0,0 +1,52 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_alias_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_BUILTIN, alias.value.tag, "expected correct alias tag");
assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32");
assert_true(alias.value.builtin.isSigned, "expected signed");
}
static void test_parser_alias_array(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
AliasTree alias = m->aliases[0];
assert_int(TYPE_TREE_ARRAY, alias.value.tag, "expected correct alias tag");
TypeTree* valueType = alias.value.array.array;
assert_not_null(valueType, "expected pointer to array type");
assert_int(TYPE_TREE_BUILTIN, valueType->tag, "expected correct type tag");
assert_int(32, valueType->builtin.bitSize, "expected bitSize 32");
assert_true(valueType->builtin.isSigned, "expected signed");
}
static void test_parser_variable_init(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected 1 variable");
VariableTree* var = &m->variables[0];
assert_str("x", var->name, "expected variable name 'x'");
assert_not_null(var->initializer, "expected variable to have an initializer");
assert_int(EXPRESSION_TREE_INTEGER, var->initializer->tag, "expected integer initializer");
assert_int(123, var->initializer->integer, "expected value 123");
}
static void test_parser_variable_simple_type(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_int(1, (int)m->variable_count, "expected correct number of variables");
VariableTree var = m->variables[0];
assert_int(TYPE_TREE_BUILTIN, var.type.tag, "expected correct type tag");
assert_int(32, var.type.builtin.bitSize, "expected bitSize 32");
assert_true(var.type.builtin.isSigned, "expected signed");
}
+21
View File
@@ -0,0 +1,21 @@
#include "../test.h"
#include "../parser.h"
#include <string.h>
#include <stdlib.h>
static void test_parser_module_name(void) {
ModuleTree* m = test_get_ast();
assert_not_null(m, "expected module to be parsed");
assert_str("my_module", m->name, "expected name 'my_module'");
}
static void test_parser_bad_module_name(void) {
test_get_ast();
assert_log_file("expected error to be logged for bad module name");
}
static void test_parser_missing_semicolon_module(void) {
test_get_ast();
assert_log_file("expected error for missing semicolon");
}
+11
View File
@@ -0,0 +1,11 @@
#include "str.h"
#include <string.h>
#include <stdlib.h>
char* string_copy(String string) {
char* str = malloc(string.length + 1);
memcpy(str, string.data, string.length);
str[string.length] = '\0';
return str;
}
+27
View File
@@ -0,0 +1,27 @@
/**
* Contains the definition of the String structure, which is a simple representation of a string in C.
*/
#ifndef STR_H
#define STR_H
#include <stddef.h>
/**
* A simple string structure that holds a pointer to the character data and its length.
*/
typedef struct {
char* data;
size_t length;
} String;
/**
* Creates a copy of a string.
*
* Note that this copy has to be freed afterwards.
*
* @param string The string to copy.
* @returns A null-terminated copy of the string.
*/
char* string_copy(String string);
#endif
+303
View File
@@ -0,0 +1,303 @@
#include "test.h"
#include "util.h"
#include "parser.h"
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static jmp_buf s_testJmp;
static char s_failMsg[1024];
static char* s_logOutput = NULL;
static const char* s_currentTestName = NULL;
static char* s_testSource = NULL;
static ModuleTree* s_currentModule = NULL;
static TokenStream* s_currentTokenStream = NULL;
void fail(const char* msg) {
if (msg) {
strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1);
s_failMsg[sizeof(s_failMsg) - 1] = '\0';
} else {
s_failMsg[0] = '\0';
}
longjmp(s_testJmp, 1);
}
char* read_file_content(const char* filepath) {
FILE* f;
long size;
char* content;
f = fopen(filepath, "r");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
size = ftell(f);
fseek(f, 0, SEEK_SET);
content = malloc(size + 1);
if (!content) {
fclose(f);
return NULL;
}
fread(content, 1, size, f);
content[size] = '\0';
fclose(f);
return content;
}
void assert_not_null(void* ptr, const char* msg) {
if (ptr == NULL) {
fail(msg);
}
}
void assert_string(const char* expected, String actual, const char* msg) {
if (expected == NULL || actual.data == NULL || strlen(expected) != actual.length || strncmp(expected, actual.data, actual.length) != 0) {
fail(msg);
}
}
void assert_str(const char* expected, const char* actual, const char* msg) {
if (expected == NULL || actual == NULL || strcmp(expected, actual) != 0) {
fail(msg);
}
}
TokenStream* test_get_tokenstream(void) {
if (s_currentTokenStream == NULL) {
char* filepath = NULL;
filepath = format_string("v0/tests/%s.c2", s_currentTestName);
if (!filepath) {
fail("out of memory");
return NULL;
}
if (s_testSource) free(s_testSource);
s_testSource = read_file_content(filepath);
if (!s_testSource) {
puts(filepath);
free(filepath);
fail("could not read test source file");
return NULL;
}
s_currentTokenStream = tokenstream_open(filepath, s_testSource);
free(filepath);
}
return s_currentTokenStream;
}
ModuleTree* test_get_ast(void) {
if (s_currentModule == NULL) {
s_currentModule = parser_parse(test_get_tokenstream());
}
return s_currentModule;
}
void assert_log(const char* expected, const char* msg) {
assert_str(expected, s_logOutput, msg);
}
void assert_log_file(const char* msg) {
char* filepath = format_string("v0/tests/%s.log", s_currentTestName);
const char* generate;
char* content;
if (!filepath) {
fail("out of memory");
return;
}
generate = getenv("GENERATE_GOLDEN");
if (generate && strcmp(generate, "1") == 0) {
FILE* f = fopen(filepath, "w");
if (!f) {
free(filepath);
fail("could not open golden file for writing");
return;
}
fputs(s_logOutput ? s_logOutput : "", f);
fclose(f);
free(filepath);
return;
}
content = read_file_content(filepath);
if (!content) {
free(filepath);
fail("could not open golden file for reading");
return;
}
bool match = strcmp(content, s_logOutput ? s_logOutput : "") == 0;
free(content);
free(filepath);
if (!match) {
fail(msg);
}
}
void assert_int(int expected, int actual, const char* msg) {
if (expected != actual) {
char* buf = format_string("%s (expected %d, got %d)", msg, expected, actual);
if (buf) {
fail(buf);
free(buf);
} else {
fail("out of memory");
}
}
}
void assert_true(bool condition, const char* msg) {
if (!condition) {
fail(msg);
}
}
void assert_false(bool condition, const char* msg) {
if (condition) {
fail(msg);
}
}
static void log_append(const char* msg) {
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
size_t newLen = oldLen + strlen(msg) + 1;
char* newOutput = malloc(newLen);
if (newOutput) {
if (s_logOutput) {
strcpy(newOutput, s_logOutput);
free(s_logOutput);
} else {
newOutput[0] = '\0';
}
strcat(newOutput, msg);
s_logOutput = newOutput;
}
}
static void log_clear(void) {
free(s_logOutput);
s_logOutput = NULL;
}
typedef struct {
const char* name;
Test func;
} TestCase;
#include "test_token.c"
#include "parser/test_module.c"
#include "parser/test_declaration.c"
#include "parser/test_expression.c"
#include "parser/test_core.c"
#include "test_log.c"
static int s_totalTests;
static int s_greenTests;
#define TEST(name) {#name, name},
static TestCase s_tests[] = {
TEST(test_log_error)
TEST(test_log_on_line_variadic)
TEST(test_log_on_line)
TEST(test_parser_module_name)
TEST(test_parser_bad_module_name)
TEST(test_parser_missing_semicolon_module)
TEST(test_parser_missing_semicolon_import)
TEST(test_parser_bad_import_name)
TEST(test_parser_imports)
TEST(test_parser_public_imports)
TEST(test_parser_alias_simple)
TEST(test_parser_alias_simple_type)
TEST(test_parser_alias_array)
TEST(test_parser_variable_simple)
TEST(test_parser_variable_simple_type)
TEST(test_parser_variable_const)
TEST(test_parser_variable_init)
TEST(test_parser_variable_static)
TEST(test_parser_multiple_vars)
TEST(test_parser_core_placeholder)
TEST(test_tokenstream_comma)
TEST(test_tokenstream_info)
TEST(test_tokenstream_keywords_and_symbols)
TEST(test_tokenstream_open_fail)
TEST(test_tokenstream_parentheses_and_brackets)
TEST(test_tokenstream_primitive_types)
TEST(test_tokenstream_simple_keyword)
TEST(test_tokenstream_unknown_token)
TEST(test_tokenstream_void_function_signature)
TEST(test_tokenstream_whitespace_ignored)
};
int main(int argc, char** argv) {
const char** failedTests;
int failedCount;
(void)argc;
(void)argv;
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
s_greenTests = 0;
// Allocate failed tests array dynamically to avoid VLAs
failedTests = (const char**)malloc((s_totalTests + 1) * sizeof(const char*));
failedCount = 0;
for (int i = 0; i < s_totalTests; i++) {
// Add 5 to strip the 'test_' prefix.
s_currentTestName = s_tests[i].name + 5;
log_set_output(log_append);
printf("%s...", s_tests[i].name);
fflush(stdout);
s_failMsg[0] = '\0';
if (setjmp(s_testJmp) == 0) {
log_clear();
if (s_testSource) {
free(s_testSource);
s_testSource = NULL;
}
s_tests[i].func();
printf(" [OK]\n");
s_greenTests++;
} else {
printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : "");
failedTests[failedCount++] = s_tests[i].name;
// Log output on failure
if (s_logOutput && s_logOutput[0]) {
printf("%s\n", s_logOutput);
}
}
// Free AST and TokenStream after each test
if (s_currentModule) {
ast_free_module(s_currentModule);
s_currentModule = NULL;
}
if (s_currentTokenStream) {
tokenstream_close(s_currentTokenStream);
s_currentTokenStream = NULL;
}
fflush(stdout);
}
if (s_testSource) free(s_testSource);
log_clear();
if (failedCount > 0) {
printf("\nFailed tests:\n");
for (int j = 0; j < failedCount; j++) {
printf(" - %s\n", failedTests[j]);
}
}
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
free(failedTests);
return failedCount > 0 ? 1 : 0;
}
+94
View File
@@ -0,0 +1,94 @@
/**
* Contains test assertions routines.
*/
#ifndef TEST_H
#define TEST_H
#include "token.h"
#include "ast.h"
typedef void (*Test)(void);
/**
* Fails a test.
* @param msg The message to print to the console.
*/
void fail(const char* msg);
/**
* Asserts that a pointer is not null.
*
* Calls `fail` if the assertion does not hold.
*
* @param ptr The pointer to test.
* @param msg The message to print if the pointer is null.
*/
void assert_not_null(void* ptr, const char* msg);
/**
* Asserts that a string has the expected value.
*
* Calls `fail` if the assertion does not hold.
*
* @param expected The expected value. This is typically a string literal.
* @param actual The actual value. This is typically an expression.
* @param msg The message to print if these do not match.
*/
void assert_str(const char* expected, const char* actual, const char* msg);
/**
* Asserts that a string has the expected value.
*
* Calls `fail` if the assertion does not hold.
*
* @param expected The expected value. This is typically a string literal.
* @param actual The actual value. This is typically an expression.
* @param msg The message to print if these do not match.
*/
void assert_string(const char* expected, String actual, const char* msg);
/**
* Asserts that the logged output matches the expected value.
*/
void assert_log(const char* expected, const char* msg);
/**
* Asserts that the logged output matches the content of the file `v0/tests/xyz.log`, where xyz is the test name.
* If GENERATE_GOLDEN=1, the file is overwritten with the actual output.
*/
void assert_log_file(const char* msg);
/**
* Asserts that two integers are equal.
*/
void assert_int(int expected, int actual, const char* msg);
/**
* Asserts that a condition is true.
*/
#include "bool.h"
void assert_true(bool condition, const char* msg);
/**
* Asserts that a condition is false.
*/
void assert_false(bool condition, const char* msg);
/**
* Get the token stream used for this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the tokenstream will be freed automatically by the test harness.
*/
TokenStream* test_get_tokenstream(void);
/**
* Gets a parsed module for the this test.
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
*
* At the end of the test, the AST will be freed automatically by the test harness.
*/
ModuleTree* test_get_ast(void);
#endif
+1
View File
@@ -0,0 +1 @@
abc
+63
View File
@@ -0,0 +1,63 @@
#define _DEFAULT_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
int run_test(const char* dir_name) {
char cmd[2048];
char input_path[1024];
char expected_path[1024];
snprintf(input_path, sizeof(input_path), "v0/integration_tests/%s/input.c2", dir_name);
snprintf(expected_path, sizeof(expected_path), "v0/integration_tests/%s/expected.c", dir_name);
if (snprintf(cmd, sizeof(cmd), "./v0/bin/c2 %s > actual.c", input_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Failed to run compiler for %s\n", dir_name);
return 1;
}
if (snprintf(cmd, sizeof(cmd), "diff -u %s actual.c", expected_path) >= sizeof(cmd)) {
printf("Command buffer too small for %s\n", dir_name);
return 1;
}
if (system(cmd) != 0) {
printf("Test %s failed: Output mismatch\n", dir_name);
return 1;
}
printf("Test %s passed\n", dir_name);
return 0;
}
int main() {
DIR* d = opendir("v0/integration_tests");
if (!d) {
perror("opendir");
return 1;
}
struct dirent* dir;
int passed = 0;
int failed = 0;
while ((dir = readdir(d)) != NULL) {
if (dir->d_type == DT_DIR && strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0) {
if (run_test(dir->d_name) == 0) {
passed++;
} else {
failed++;
}
}
}
closedir(d);
printf("\nTotal tests: %d, Passed: %d, Failed: %d\n", passed + failed, passed, failed);
return failed > 0 ? 1 : 0;
}
+52
View File
@@ -0,0 +1,52 @@
#include "test.h"
#include "log.h"
#include <string.h>
#include <stdlib.h>
#include "util.h"
static char* s_lastLoggedError = NULL;
static void mock_log(const char* msg) {
free(s_lastLoggedError);
s_lastLoggedError = format_string("%s", msg ? msg : "");
}
static void test_log_error(void) {
log_set_output(mock_log);
free(s_lastLoggedError);
s_lastLoggedError = NULL;
log_error("test error message");
assert_str("test error message", s_lastLoggedError, "expected 'test error message'");
log_set_output(NULL);
free(s_lastLoggedError);
s_lastLoggedError = NULL;
}
static void test_log_on_line(void) {
Location loc;
loc.filename = "v0/tests/log_on_line.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token");
assert_log_file("expected formatted error message");
}
static void test_log_on_line_variadic(void) {
Location loc;
loc.filename = "v0/tests/log_on_line_variadic.c2";
loc.line_text.data = "int main() []";
loc.line_text.length = 13;
loc.line = 1;
loc.column_start = 12;
loc.column_end = 13;
log_on_line(&loc, "unexpected token '%c'", 'x');
assert_log_file("expected formatted error message with variadic args");
}
+113
View File
@@ -0,0 +1,113 @@
#include "test.h"
#include "token.h"
#include <string.h>
#include <stdlib.h>
static void test_tokenstream_open_fail(void) {
TokenStream* ts = tokenstream_open(NULL, NULL);
if (ts != NULL) fail("expected NULL for NULL buffer");
}
static void test_tokenstream_simple_keyword(void) {
TokenStream* ts = test_get_tokenstream();
Token t;
Token eof;
t = tokenstream_next(ts);
if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
eof = tokenstream_next(ts);
if (eof.token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_keywords_and_symbols(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_parentheses_and_brackets(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_comma(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_whitespace_ignored(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_void_function_signature(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
static void test_tokenstream_unknown_token(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_UNKNOWN) fail("expected TOKEN_UNKNOWN");
assert_log_file("expected error message for unknown token");
}
static void test_tokenstream_info(void) {
TokenStream* ts = test_get_tokenstream();
Token t1;
Token t2;
t1 = tokenstream_next(ts);
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
assert_string("module", t1.text, "info: expected 'module'");
if (t1.location.line != 1) fail("expected line 1");
if (t1.location.column_start != 1) fail("expected column 1");
t2 = tokenstream_next(ts);
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
assert_string("main", t2.text, "info: expected 'main'");
if (t2.location.line != 1) fail("expected line 1");
if (t2.location.column_start != 8) fail("expected column 8");
}
static void test_tokenstream_primitive_types(void) {
TokenStream* ts = test_get_tokenstream();
if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8");
if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16");
if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32");
if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64");
if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8");
if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16");
if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32");
if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64");
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
}
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line.c2 ---
1| int main() []
^^
unexpected token
+1
View File
@@ -0,0 +1 @@
int main() []
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/log_on_line_variadic.c2 ---
1| int main() []
^^
unexpected token 'x'
+9
View File
@@ -0,0 +1,9 @@
module mymodule;
import foo;
alias myalias = i32[];
import bar;
alias otheralias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32[];
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+3
View File
@@ -0,0 +1,3 @@
module mymodule;
alias myalias = i32;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
import ;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_import_name.c2 ---
2| import ;
^
expected module identifier
+1
View File
@@ -0,0 +1 @@
import other_module;
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/parser_bad_module_name.c2 ---
1| import other_module;
^^^^^^
expected keyword 'module'
+2
View File
@@ -0,0 +1,2 @@
module my_module;
import other_module;
@@ -0,0 +1 @@
module my_module; import other_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_import.c2 ---
1| module my_module; import other_module
^
expected ';' after import
@@ -0,0 +1 @@
module my_module
@@ -0,0 +1,4 @@
--- v0/tests/parser_missing_semicolon_module.c2 ---
1| module my_module
^
expected ';' after module name
+1
View File
@@ -0,0 +1 @@
module my_module;
+4
View File
@@ -0,0 +1,4 @@
module test_multiple_vars;
i32 var1;
i32 var2;
+3
View File
@@ -0,0 +1,3 @@
module my_module;
public import other_module;
+3
View File
@@ -0,0 +1,3 @@
module test_const_var;
const i32 my_const;
+2
View File
@@ -0,0 +1,2 @@
module mymodule;
var x = 123;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+4
View File
@@ -0,0 +1,4 @@
module my_module;
// Defines a global variable called my_var.
i32 my_var;
+3
View File
@@ -0,0 +1,3 @@
module test_static_var;
static i32 my_static;
+1
View File
@@ -0,0 +1 @@
a,b,c
+1
View File
@@ -0,0 +1 @@
module main;
@@ -0,0 +1 @@
module main; import stdio;
@@ -0,0 +1 @@
()[]
+1
View File
@@ -0,0 +1 @@
i8 i16 i32 i64 u8 u16 u32 u64
+1
View File
@@ -0,0 +1 @@
module
+1
View File
@@ -0,0 +1 @@
%
+4
View File
@@ -0,0 +1,4 @@
--- v0/tests/tokenstream_unknown_token.c2 ---
1| %
^
unexpected token '%'
@@ -0,0 +1 @@
void main()
@@ -0,0 +1,2 @@
module
import ;
+279
View File
@@ -0,0 +1,279 @@
#include "token.h"
#include "log.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
struct TokenStream {
char* filename;
const char* code;
size_t pos;
int line;
int column;
const char* line_start;
/* End of last non-EOF token */
int last_line;
int last_column_end;
const char* last_line_start;
};
/**
* Easy-to-read and modify keyword-to-token mapping.
* Add new keywords here.
*/
typedef struct {
const char* keyword;
TokenType token;
} KeywordMap;
static const KeywordMap keywords[] = {
{"module", TOKEN_MODULE},
{"import", TOKEN_IMPORT},
{"alias", TOKEN_ALIAS},
{"public", TOKEN_PUBLIC},
{"var", TOKEN_VAR},
{"const", TOKEN_CONST},
{"static", TOKEN_STATIC},
{"void", TOKEN_VOID},
{"i8", TOKEN_I8},
{"i16", TOKEN_I16},
{"i32", TOKEN_I32},
{"i64", TOKEN_I64},
{"u8", TOKEN_U8},
{"u16", TOKEN_U16},
{"u32", TOKEN_U32},
{"u64", TOKEN_U64},
{"true", TOKEN_TRUE},
{"false", TOKEN_FALSE},
};
/**
* Look up a keyword in the keyword map.
* Returns TOKEN_IDENTIFIER if not found.
*/
static TokenType lookup_keyword(const char* str, size_t length) {
int count = sizeof(keywords) / sizeof(keywords[0]);
int i;
for (i = 0; i < count; i++) {
if (strlen(keywords[i].keyword) == length &&
strncmp(keywords[i].keyword, str, length) == 0) {
return keywords[i].token;
}
}
return TOKEN_IDENTIFIER;
}
/**
* Check if a character is the start of an identifier.
*/
static int is_identifier_start(char c) {
return isalpha(c) || c == '_';
}
/**
* Check if a character can be part of an identifier.
*/
static int is_identifier_part(char c) {
return isalnum(c) || c == '_';
}
/**
* Peek at the next character in the stream.
*/
static char peek_char(TokenStream* ts) {
return ts->code[ts->pos];
}
/**
* Read a character from the stream and update position.
*/
static char read_char(TokenStream* ts) {
char c = ts->code[ts->pos];
if (c == '\0') return '\0';
ts->pos++;
if (c == '\n') {
ts->line++;
ts->column = 1;
ts->line_start = &ts->code[ts->pos];
} else {
ts->column++;
}
return c;
}
static size_t get_line_length(const char* line_start) {
const char* p = line_start;
while (*p != '\n' && *p != '\0') {
p++;
}
return (size_t)(p - line_start);
}
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
Token t;
t.token = type;
t.text.data = (char*)text;
t.text.length = length;
t.location.filename = ts->filename;
t.location.line = line;
t.location.column_start = column;
t.location.column_end = column + (int)length - 1;
t.location.line_text.data = (char*)line_start;
t.location.line_text.length = get_line_length(line_start);
if (type != TOKEN_EOF) {
ts->last_line = t.location.line;
ts->last_column_end = t.location.column_end;
ts->last_line_start = t.location.line_text.data;
}
return t;
}
TokenStream* tokenstream_open(const char* filename, const char* code) {
/* Declarations first for C89 */
TokenStream* ts;
const char* name_src;
if (code == NULL) return NULL;
ts = (TokenStream*)malloc(sizeof(struct TokenStream));
if (ts == NULL) {
return NULL;
}
name_src = filename ? filename : "unknown";
ts->filename = malloc(strlen(name_src) + 1);
if (ts->filename) {
memcpy(ts->filename, name_src, strlen(name_src) + 1);
}
ts->code = code;
ts->pos = 0;
ts->line = 1;
ts->column = 1;
ts->line_start = code;
ts->last_line = 1;
ts->last_column_end = 0;
ts->last_line_start = code;
return ts;
}
void tokenstream_close(TokenStream* ts) {
if (ts == NULL) return;
if (ts->filename) free(ts->filename);
free(ts);
}
Token tokenstream_next(TokenStream* ts) {
/* Declarations first for C89 */
char c;
int start_line;
int start_column;
const char* line_start;
const char* start_text;
Token t;
if (ts == NULL) {
Token t = {0};
t.token = TOKEN_EOF;
return t;
}
/* Skip whitespace and comments */
while ((c = peek_char(ts)) != '\0') {
if (isspace(c)) {
read_char(ts);
continue;
}
/* Handle comments */
if (c == '/') {
if (ts->code[ts->pos + 1] == '/') {
/* Skip until end of line */
while ((c = read_char(ts)) != '\0' && c != '\n') {
/* Skip */
}
continue;
}
/* It's just a slash, which we don't handle yet */
break;
}
/* We found a non-whitespace, non-comment character */
break;
}
if (peek_char(ts) == '\0') {
Token t;
t.token = TOKEN_EOF;
t.text.data = NULL;
t.text.length = 0;
t.location.filename = ts->filename;
t.location.line = ts->last_line;
t.location.column_start = ts->last_column_end + 1;
t.location.column_end = ts->last_column_end + 1;
t.location.line_text.data = (char*)ts->last_line_start;
t.location.line_text.length = get_line_length(ts->last_line_start);
return t;
}
start_line = ts->line;
start_column = ts->column;
line_start = ts->line_start;
start_text = &ts->code[ts->pos];
c = read_char(ts);
/* Single-character tokens */
switch (c) {
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
case '=': return create_token(ts, TOKEN_ASSIGN, start_text, 1, start_line, start_column, line_start);
case '"': {
size_t len = 0;
const char* start = &ts->code[ts->pos];
while (peek_char(ts) != '"' && peek_char(ts) != '\0') {
read_char(ts);
len++;
}
if (peek_char(ts) == '"') read_char(ts);
return create_token(ts, TOKEN_STRING, start, len, start_line, start_column + 1, line_start);
}
}
if (isdigit(c)) {
size_t len = 1;
while (isdigit(peek_char(ts))) {
read_char(ts);
len++;
}
return create_token(ts, TOKEN_INTEGER, start_text, len, start_line, start_column, line_start);
}
/* Keywords and identifiers */
if (is_identifier_start(c)) {
/* Declarations first for C89 */
size_t length;
TokenType type;
length = 1;
while (is_identifier_part(peek_char(ts))) {
read_char(ts);
length++;
}
type = lookup_keyword(start_text, length);
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
}
/* Unknown character */
t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
log_on_line(&t.location, "unexpected token '%c'", c);
return t;
}
+92
View File
@@ -0,0 +1,92 @@
/**
* Contains the interface for reading tokens from a file.
*/
#ifndef TOKEN_H
#define TOKEN_H
#include "location.h"
/**
* A list of all possible tokens.
*/
typedef enum {
/* Keywords */
TOKEN_MODULE,
TOKEN_IMPORT,
TOKEN_SEMICOLON,
TOKEN_ALIAS,
TOKEN_PUBLIC,
TOKEN_VAR,
TOKEN_CONST,
TOKEN_STATIC,
/* Symbols */
TOKEN_PARENT_OPEN,
TOKEN_PARENT_CLOSE,
TOKEN_BRACKET_OPEN,
TOKEN_BRACKET_CLOSE,
TOKEN_COMMA,
TOKEN_ASSIGN,
/* Primitives */
TOKEN_VOID,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
TOKEN_STRING,
TOKEN_INTEGER,
TOKEN_TRUE,
TOKEN_FALSE,
/* Variable */
TOKEN_IDENTIFIER,
/* Others */
TOKEN_EOF,
TOKEN_UNKNOWN
} TokenType;
/**
* Holds additional information about a token.
*/
typedef struct {
/* @brief The actual token. */
TokenType token;
/* @brief The textual representation of a token. */
String text;
/* @brief The location of the token. */
Location location;
} Token;
typedef struct TokenStream TokenStream;
/**
* Returns a TokenStream for a text.
*
* @param filename The name of the file to read. This is only used for error reporting.
* @param code The text to read.
* @returns A handle to the TokenStream.
*/
TokenStream* tokenstream_open(const char* filename, const char* code);
/**
* Closes a TokenStream.
* @param ts The TokenStream to close.
*/
void tokenstream_close(TokenStream* ts);
/**
* Gets the next token from the TokenStream.
* @param ts The TokenStream to read from.
* @returns The next token read.
*/
Token tokenstream_next(TokenStream* ts);
#endif
+9
View File
@@ -0,0 +1,9 @@
/**
* Contains runtime information about types.
*/
#ifndef TYPES_H
#define TYPES_H
#endif
+46
View File
@@ -0,0 +1,46 @@
#include "util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
/* Portable va_copy fallback for pre-C99 or platforms without va_copy. */
#ifndef va_copy
# if defined(__va_copy)
# define va_copy(dest, src) __va_copy(dest, src)
# else
# define va_copy(dest, src) ((dest) = (src))
# endif
#endif
char* format_string_va(const char* fmt, va_list args) {
/* Declarations first to satisfy -std=c89 */
va_list args_copy;
int needed;
char* buf;
if (!fmt) return NULL;
va_copy(args_copy, args);
needed = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
if (needed < 0) return NULL;
buf = (char*)malloc((size_t)needed + 1);
if (!buf) return NULL;
vsnprintf(buf, (size_t)needed + 1, fmt, args);
return buf;
}
char* format_string(const char* fmt, ...) {
/* Declarations first to satisfy -std=c89 */
va_list args;
char* s;
if (!fmt) return NULL;
va_start(args, fmt);
s = format_string_va(fmt, args);
va_end(args);
return s;
}
+27
View File
@@ -0,0 +1,27 @@
#ifndef UTIL_H
#define UTIL_H
#include <stdarg.h>
#include <stddef.h>
/**
* Formats a string using printf-style formatting and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param ... The values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string(const char* fmt, ...);
/**
* Formats a string using printf-style formatting with a va_list and returns a newly allocated string.
* The caller is responsible for freeing the returned string.
*
* @param fmt The format string.
* @param args The va_list of values to format.
* @return A newly allocated string containing the formatted output.
*/
char* format_string_va(const char* fmt, va_list args);
#endif