Compare commits
1 Commits
master
..
e46c6ff52d
| Author | SHA1 | Date | |
|---|---|---|---|
| e46c6ff52d |
@@ -23,24 +23,11 @@ For instance, a test for `buffer.c` must be called `test_buffer.c`.
|
||||
There will be no `test_buffer.h`. Instead, `test.c` will directly
|
||||
`#include` the C–source-file directly.
|
||||
|
||||
Every syntax error path identified in the parser MUST have a corresponding test.
|
||||
|
||||
## Language Syntax
|
||||
Since this is a compiler for a new language, do not assume anything of its syntax.
|
||||
Always check the `specs` directory to see examples and documentation about the language.
|
||||
Since this is a compiler for a new language, do not assume anything
|
||||
of its syntax.
|
||||
Always check the `specs` directory.
|
||||
|
||||
If there is anything unclear, ask the user for clarification.
|
||||
It is certainly possible that there are contradictions in the
|
||||
spec that have to be solved first.
|
||||
|
||||
## Comitting
|
||||
Often, the user modifies an interface (typically in a header file), and then asks
|
||||
the agent to update the implementation.
|
||||
|
||||
When creating a commit, make sure that both the user's and the agent's modifications
|
||||
are included in the commit.
|
||||
|
||||
Only create a commit when specifically asked for that. Never assume implicitly that the
|
||||
user wants you to create a commit.
|
||||
Even if they asked you to create a commit in an earlier task, it does not mean that
|
||||
you should also create a commit in a later task.
|
||||
|
||||
@@ -1,3 +1 @@
|
||||
/c2
|
||||
/.idea/*
|
||||
!/.idea/c_cpp_properties.json
|
||||
|
||||
Generated
-12
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CLion",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/v0/*"
|
||||
],
|
||||
"cStandard": "c89",
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
@@ -1,21 +1,13 @@
|
||||
.PHONY: all test clean
|
||||
|
||||
all: c2 test integration-test
|
||||
all: c2
|
||||
|
||||
c2: v0/bin/c2
|
||||
cp $< $@
|
||||
|
||||
test::
|
||||
|
||||
generate_golden::
|
||||
|
||||
clean::
|
||||
rm -f c2
|
||||
|
||||
include v0/include.mk
|
||||
|
||||
integration-test: v0/bin/c2 v0/bin/test_integration
|
||||
./v0/bin/test_integration
|
||||
|
||||
v0/bin/test_integration: v0/test_integration.c
|
||||
$(CC) $(CFLAGS) -o $@ $<
|
||||
@@ -13,21 +13,5 @@ In order to run the tests, run `make test`.
|
||||
## Versioning
|
||||
The current version is v0. Its source code lives in the `v0` directory.
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
Run unit tests with:
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
Integration tests compare the compiler output with expected C files.
|
||||
To add a new integration test, create a new directory under `v0/integration_tests/` with `input.c2` and `expected.c` files.
|
||||
Run integration tests with:
|
||||
```bash
|
||||
make integration-test
|
||||
```
|
||||
|
||||
## Languages Specifications
|
||||
See the specs directory for information on the actual language syntax.
|
||||
|
||||
@@ -12,4 +12,3 @@ void main() {
|
||||
puts("Hello, world!");
|
||||
}
|
||||
```
|
||||
.
|
||||
@@ -1,31 +0,0 @@
|
||||
# Imports
|
||||
The import statement allows one module access to the public declarations of another module.
|
||||
|
||||
## Syntax
|
||||
The import statement uses the following syntax:
|
||||
|
||||
```c2
|
||||
import module_name;
|
||||
```
|
||||
|
||||
They can optionally be prefixed by the `public` keyword, in which case the module will
|
||||
export everything in the import transitively.
|
||||
|
||||
For instance,
|
||||
|
||||
```c2
|
||||
--- a.c2
|
||||
module a;
|
||||
import b;
|
||||
|
||||
--- b.c2
|
||||
module b;
|
||||
public import c;
|
||||
|
||||
--- c.c2
|
||||
module c;
|
||||
|
||||
// Some declarations
|
||||
```
|
||||
|
||||
In this example, both module a and b can access the declarations in module c.
|
||||
@@ -1,25 +0,0 @@
|
||||
# Types
|
||||
C2 has both built-in types and user-defined types.
|
||||
|
||||
## Builtin types
|
||||
C2 has the following types builtin:
|
||||
- `void`
|
||||
- `i8`
|
||||
- `i16`
|
||||
- `i32`
|
||||
- `i64`
|
||||
- `u8`
|
||||
- `u16`
|
||||
- `u32`
|
||||
- `u64`
|
||||
|
||||
## Type Aliases
|
||||
Types can be aliased to different names using the alias keyword.
|
||||
Here's a list of the default builtin aliases.
|
||||
|
||||
```c2
|
||||
alias int = i32;
|
||||
alias uint = u32;
|
||||
alias char = u8;
|
||||
alias string = char[];
|
||||
```
|
||||
@@ -1,24 +0,0 @@
|
||||
# Variables
|
||||
Variables can be defined in the global scope, in structs and classes, and in functions.
|
||||
|
||||
## Global variables
|
||||
Global variables can be defined as such:
|
||||
|
||||
```c2
|
||||
// Defines a global variable called my_var.
|
||||
i32 my_var;
|
||||
|
||||
// Defines a const variable.
|
||||
const i32 my_var;
|
||||
|
||||
// Defines a global variable whose type is determined automatically.
|
||||
// The value will be determined at runtime.
|
||||
var my_var = 123;
|
||||
|
||||
// Defines a const variable whose type is determined automatically.
|
||||
const my_var = 123;
|
||||
|
||||
// Defines a global variable whose initial value is computed at compile-time.
|
||||
// If it cannot be computed at compile-time, an error is thrown.
|
||||
static my_var = 123;
|
||||
```
|
||||
@@ -4,18 +4,13 @@
|
||||
#ifndef AST_H
|
||||
#define AST_H
|
||||
|
||||
#include "ast/expression.h"
|
||||
#include "ast/declaration.h"
|
||||
#include "ast/module.h"
|
||||
|
||||
/**
|
||||
* Frees a module and all its children.
|
||||
* The top-level model.
|
||||
* Every file matches an entire Module.
|
||||
*/
|
||||
void ast_free_module(ModuleTree* module);
|
||||
|
||||
/**
|
||||
* Frees a type expression.
|
||||
*/
|
||||
void ast_free_type(TypeTree* type);
|
||||
typedef struct {
|
||||
/// @brief The name of the module.
|
||||
char* name;
|
||||
} Module;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
#ifndef AST_DECLARATION_H
|
||||
#define AST_DECLARATION_H
|
||||
|
||||
#include "expression.h"
|
||||
#include "../bool.h"
|
||||
|
||||
typedef struct {
|
||||
/** @brief The name of the module being imported. */
|
||||
char* module_name;
|
||||
|
||||
/** @brief Whether the import is public or not. */
|
||||
bool is_public;
|
||||
} ImportTree;
|
||||
|
||||
/**
|
||||
* A declaration that aliases one type to another.
|
||||
*/
|
||||
typedef struct {
|
||||
/** @brief The name of the alias. */
|
||||
const char* name;
|
||||
|
||||
/** @brief The value of the alias. */
|
||||
TypeTree value;
|
||||
} AliasTree;
|
||||
|
||||
/**
|
||||
* A declaration of a variable, which may be a constant or not, and may be static or not.
|
||||
*/
|
||||
typedef struct {
|
||||
/** @brief The name of the variable. */
|
||||
char* name;
|
||||
|
||||
/** @brief The type of the variable. */
|
||||
TypeTree type;
|
||||
|
||||
/** @brief The optional initializer expression. */
|
||||
ExpressionTree* initializer;
|
||||
|
||||
/** @brief Whether the variable is public or not. */
|
||||
bool is_public;
|
||||
|
||||
/** @brief Whether the variable is static or not. */
|
||||
bool is_static;
|
||||
|
||||
/** @brief Whether the variable is a constant or not. */
|
||||
bool is_const;
|
||||
} VariableTree;
|
||||
|
||||
#endif
|
||||
@@ -1,9 +0,0 @@
|
||||
#include "expression.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void ast_free_type(TypeTree* expr) {
|
||||
if (expr->tag == TYPE_TREE_ARRAY) {
|
||||
ast_free_type(expr->array.array);
|
||||
free(expr->array.array);
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
#ifndef AST_EXPRESSION_H
|
||||
#define AST_EXPRESSION_H
|
||||
|
||||
#include "../bool.h"
|
||||
|
||||
typedef enum {
|
||||
EXPRESSION_TREE_INTEGER,
|
||||
EXPRESSION_TREE_STRING,
|
||||
EXPRESSION_TREE_BOOLEAN
|
||||
} ExpressionTreeTag;
|
||||
|
||||
typedef struct {
|
||||
ExpressionTreeTag tag;
|
||||
union {
|
||||
int integer;
|
||||
const char* string;
|
||||
bool boolean;
|
||||
};
|
||||
} ExpressionTree;
|
||||
|
||||
typedef enum {
|
||||
TYPE_TREE_BUILTIN,
|
||||
TYPE_TREE_ARRAY
|
||||
} TypeTreeTag;
|
||||
|
||||
/**
|
||||
* An expression that evaluates to a type.
|
||||
*/
|
||||
typedef struct TypeTree TypeTree;
|
||||
struct TypeTree {
|
||||
/** @brief defines which entry in the union is valid */
|
||||
TypeTreeTag tag;
|
||||
union {
|
||||
/** @brief Evaluates to an array of the given type. */
|
||||
struct {
|
||||
/** @brief A pointer to the type of the elements stored in the array. */
|
||||
TypeTree* array;
|
||||
} array;
|
||||
/** @brief Evaluates to a builtin integer type.*/
|
||||
struct {
|
||||
/**
|
||||
* @brief The number of bits in the integer.
|
||||
* Typical values are 8, 16, 32, and 64.
|
||||
*/
|
||||
int bitSize;
|
||||
/** @brief `true` if the type is signed, `false` if it's unsigned. */
|
||||
bool isSigned;
|
||||
} builtin;
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
# There are currently no .c files in the ast directory.
|
||||
# This file is provided for future consistency.
|
||||
AST_SRC := v0/ast/module.c v0/ast/expression.c
|
||||
@@ -1,43 +0,0 @@
|
||||
#include "module.h"
|
||||
#include "expression.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void ast_free_type(TypeTree* type);
|
||||
|
||||
void ast_free_module(ModuleTree* module) {
|
||||
if (module == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (module->imports != NULL) {
|
||||
for(size_t i = 0; i < module->import_count; i++) {
|
||||
free(module->imports[i].module_name);
|
||||
}
|
||||
free(module->imports);
|
||||
}
|
||||
|
||||
if (module->aliases != NULL) {
|
||||
for(size_t i = 0; i < module->alias_count; i++) {
|
||||
free((void*)module->aliases[i].name);
|
||||
ast_free_type(&module->aliases[i].value);
|
||||
}
|
||||
free(module->aliases);
|
||||
}
|
||||
|
||||
if (module->variables != NULL) {
|
||||
for(size_t i = 0; i < module->variable_count; i++) {
|
||||
free(module->variables[i].name);
|
||||
ast_free_type(&module->variables[i].type);
|
||||
if (module->variables[i].initializer) {
|
||||
if (module->variables[i].initializer->tag == EXPRESSION_TREE_STRING) {
|
||||
free((void*)module->variables[i].initializer->string);
|
||||
}
|
||||
free(module->variables[i].initializer);
|
||||
}
|
||||
}
|
||||
free(module->variables);
|
||||
}
|
||||
|
||||
free(module->name);
|
||||
free(module);
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
#ifndef AST_MODULE_H
|
||||
#define AST_MODULE_H
|
||||
|
||||
#include "declaration.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* The top-level model.
|
||||
* Every file matches an entire Module.
|
||||
*/
|
||||
typedef struct {
|
||||
/** @brief The name of the module. */
|
||||
char* name;
|
||||
|
||||
/** @brief The list of imports in the module. */
|
||||
ImportTree* imports;
|
||||
|
||||
/** @brief The number of imports in the module. */
|
||||
size_t import_count;
|
||||
|
||||
/** @brief The list of aliases in the module. */
|
||||
AliasTree* aliases;
|
||||
|
||||
/** @brief The number of aliases in the module. */
|
||||
size_t alias_count;
|
||||
|
||||
/** @brief The list of variables in the module. */
|
||||
VariableTree* variables;
|
||||
|
||||
/** @brief The number of variables in the module. */
|
||||
size_t variable_count;
|
||||
} ModuleTree;
|
||||
|
||||
#endif
|
||||
@@ -1,10 +0,0 @@
|
||||
/* Minimal boolean type for C89 compatibility */
|
||||
#ifndef BOOL_H
|
||||
#define BOOL_H
|
||||
|
||||
typedef int bool;
|
||||
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
#endif
|
||||
+2
-18
@@ -1,7 +1,4 @@
|
||||
include v0/ast/include.mk
|
||||
include v0/parser/include.mk
|
||||
|
||||
V0_SRC := v0/main.c v0/util.c v0/token.c $(AST_SRC) $(PARSER_SRC) v0/log.c v0/str.c
|
||||
V0_SRC := v0/main.c v0/token.c v0/parser.c
|
||||
|
||||
# V0_TEST must only include `v0/test.c` itself, as all other test C–source files are
|
||||
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
|
||||
@@ -14,8 +11,6 @@ V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
|
||||
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
|
||||
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
|
||||
|
||||
CFLAGS += -Werror -Wall -pedantic -std=c11 -g
|
||||
|
||||
v0/bin/c2: $(V0_SRC_OBJ)
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
@@ -24,19 +19,8 @@ V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
|
||||
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
# Only run tests under valgrind on Linux. On macOS (Darwin) valgrind is
|
||||
# typically unavailable or unsupported, so run the test binary directly.
|
||||
ifeq ($(shell uname -s),Linux)
|
||||
TEST_CMD := valgrind --quiet --leak-check=full --error-exitcode=1 v0/bin/test
|
||||
else
|
||||
TEST_CMD := v0/bin/test
|
||||
endif
|
||||
|
||||
test:: v0/bin/test
|
||||
$(TEST_CMD)
|
||||
|
||||
generate_golden:: v0/bin/test
|
||||
GENERATE_GOLDEN=1 v0/bin/test
|
||||
v0/bin/test
|
||||
|
||||
clean::
|
||||
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
#include <stdint.h>
|
||||
|
||||
// u32 simple:x
|
||||
static uint32_t v_6simple_1x = 123;
|
||||
@@ -1,2 +0,0 @@
|
||||
module simple;
|
||||
u32 x = 123;
|
||||
@@ -1,28 +0,0 @@
|
||||
/**
|
||||
* Location handling for error reporting.
|
||||
*/
|
||||
#ifndef LOCATION_H
|
||||
#define LOCATION_H
|
||||
|
||||
#include "str.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
/* @brief The name of the file where the token was found. */
|
||||
char* filename;
|
||||
|
||||
/* @brief The entire line of text where the token was found. */
|
||||
String line_text;
|
||||
|
||||
/* @brief The line number where the token was found. */
|
||||
int line;
|
||||
|
||||
/* @brief The starting column number where the token was found. */
|
||||
int column_start;
|
||||
|
||||
/* @brief The ending column number where the token was found. */
|
||||
int column_end;
|
||||
} Location;
|
||||
|
||||
#endif
|
||||
@@ -1,87 +0,0 @@
|
||||
#include "log.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
static LogError* s_logError = NULL;
|
||||
|
||||
void log_set_output(LogError* destination) {
|
||||
s_logError = destination;
|
||||
}
|
||||
|
||||
void log_error(const char* msg) {
|
||||
if (s_logError != NULL) {
|
||||
s_logError(msg);
|
||||
} else {
|
||||
fprintf(stderr, "Error: %s\n", msg);
|
||||
}
|
||||
}
|
||||
|
||||
void log_on_line(Location* loc, const char* msg, ...) {
|
||||
/* Declarations first for C89 */
|
||||
char* line_prefix = NULL;
|
||||
char* formatted_msg = NULL;
|
||||
char* header = NULL;
|
||||
char* buffer = NULL;
|
||||
va_list args;
|
||||
int caret_len;
|
||||
char* p;
|
||||
int i1, i2;
|
||||
size_t i3;
|
||||
size_t total_size;
|
||||
|
||||
line_prefix = format_string("%d| ", loc->line);
|
||||
if (!line_prefix) goto cleanup;
|
||||
|
||||
caret_len = loc->column_end - loc->column_start + 1;
|
||||
if (caret_len < 1) caret_len = 1;
|
||||
|
||||
/* Format the message */
|
||||
va_start(args, msg);
|
||||
formatted_msg = format_string_va(msg, args);
|
||||
va_end(args);
|
||||
if (!formatted_msg) goto cleanup;
|
||||
|
||||
/* Header logic */
|
||||
if (loc->filename && loc->filename[0] != '\0') {
|
||||
header = format_string("--- %s ---\n", loc->filename);
|
||||
} else {
|
||||
header = format_string("--- \n");
|
||||
}
|
||||
if (!header) goto cleanup;
|
||||
|
||||
total_size = strlen(header) + 20 +
|
||||
strlen(line_prefix) + loc->line_text.length + 2 + /* line| text\n */
|
||||
strlen(line_prefix) + loc->column_start - 1 + caret_len + 2 + /* indent + ^^\n */
|
||||
strlen(line_prefix) + 3 + strlen(formatted_msg) + 2 + /* indent + msg\n */
|
||||
10;
|
||||
|
||||
buffer = (char*)malloc(total_size);
|
||||
if (!buffer) goto cleanup;
|
||||
|
||||
p = buffer;
|
||||
p += sprintf(p, "%s", header);
|
||||
p += sprintf(p, "%s%.*s\n", line_prefix, (int)loc->line_text.length, loc->line_text.data);
|
||||
|
||||
/* Caret line */
|
||||
for (i1 = 0; i1 < (int)(strlen(line_prefix) + loc->column_start - 1); i1++) *p++ = ' ';
|
||||
for (i2 = 0; i2 < caret_len; i2++) *p++ = '^';
|
||||
*p++ = '\n';
|
||||
|
||||
/* Message line */
|
||||
for (i3 = 0; i3 < strlen(line_prefix); i3++) *p++ = ' ';
|
||||
p += sprintf(p, "%s\n", formatted_msg);
|
||||
|
||||
*p = '\0';
|
||||
|
||||
log_error(buffer);
|
||||
|
||||
cleanup:
|
||||
free(line_prefix);
|
||||
free(formatted_msg);
|
||||
free(header);
|
||||
free(buffer);
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/**
|
||||
* Contains the logging framework used for logging errors during compilation.
|
||||
*/
|
||||
#ifndef LOG_H
|
||||
#define LOG_H
|
||||
|
||||
#include "location.h"
|
||||
|
||||
/**
|
||||
* A method that can log an error.
|
||||
*/
|
||||
typedef void LogError(const char* msg);
|
||||
|
||||
/**
|
||||
* Sets the destination for log errors.
|
||||
*/
|
||||
void log_set_output(LogError* destination);
|
||||
|
||||
/**
|
||||
* Logs an error to the destination.
|
||||
*/
|
||||
void log_error(const char* msg);
|
||||
|
||||
/**
|
||||
* Logs a pretty error with additional information about the line where the error occurred.
|
||||
*
|
||||
* The @p msg parameter can contain format specifiers like printf, and the additional arguments will be formatted into the message.
|
||||
* It additionally supports the `%S` format specifier, which can be used to format a `String` structure from `string.h`.
|
||||
*
|
||||
* @param loc The location where the error occurred.
|
||||
* @param msg The error message to log. This can contain format specifiers like printf, and the additional arguments will be formatted into the message.
|
||||
* @param ... Additional arguments to format into the error message.
|
||||
*/
|
||||
void log_on_line(Location* loc, const char* msg, ...);
|
||||
|
||||
#endif
|
||||
@@ -2,5 +2,4 @@
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
puts("Hello, world");
|
||||
return 0;
|
||||
}
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
#include "parser.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
Module* parser_parse(TokenStream* ts) {
|
||||
Token t = tokenstream_next(ts);
|
||||
if (t != TOKEN_MODULE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
t = tokenstream_next(ts);
|
||||
if (t != TOKEN_IDENTIFIER) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TokenInfo info;
|
||||
tokenstream_info(ts, &info);
|
||||
|
||||
Module* module = (Module*)malloc(sizeof(Module));
|
||||
if (module == NULL) return NULL;
|
||||
|
||||
module->name = (char*)malloc(info.text_length + 1);
|
||||
if (module->name == NULL) {
|
||||
free(module);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memcpy(module->name, info.text, info.text_length);
|
||||
module->name[info.text_length] = '\0';
|
||||
|
||||
t = tokenstream_next(ts);
|
||||
if (t != TOKEN_SEMICOLON) {
|
||||
free(module->name);
|
||||
free(module);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
void parser_free(Module* module) {
|
||||
if (module == NULL) return;
|
||||
free(module->name);
|
||||
free(module);
|
||||
}
|
||||
+8
-1
@@ -10,6 +10,13 @@
|
||||
* @param ts The TokenStream to read.
|
||||
* @returns The parsed module.
|
||||
*/
|
||||
ModuleTree* parser_parse(TokenStream* ts);
|
||||
Module* parser_parse(TokenStream* ts);
|
||||
|
||||
/**
|
||||
* Frees the parsed AST.
|
||||
*
|
||||
* @param module The AST return by parser_parse.
|
||||
*/
|
||||
void parser_free(Module* module);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
#include "internal.h"
|
||||
#include "../str.h"
|
||||
#include "../log.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void parser_next_token(Parser* p) {
|
||||
p->token = tokenstream_next(p->ts);
|
||||
}
|
||||
|
||||
bool parser_accept(Parser* p, TokenType token) {
|
||||
if (p->token.token == token) {
|
||||
parser_next_token(p);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parser_expect(Parser* p, TokenType token, const char* msg) {
|
||||
if (parser_accept(p, token)) {
|
||||
return true;
|
||||
}
|
||||
log_on_line(&p->token.location, msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parser_peek(Parser* p, TokenType token) {
|
||||
if (p->token.token == token) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parser_require(Parser* p, TokenType token, const char* msg) {
|
||||
if (parser_peek(p, token)) {
|
||||
return true;
|
||||
}
|
||||
log_on_line(&p->token.location, msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
char* parser_to_text(Parser* p) {
|
||||
char* str = string_copy(p->token.text);
|
||||
parser_next_token(p);
|
||||
return str;
|
||||
}
|
||||
|
||||
bool parser_accept_primitive(Parser* p) {
|
||||
return parser_peek(p, TOKEN_I8) || parser_peek(p, TOKEN_I16) ||
|
||||
parser_peek(p, TOKEN_I32) || parser_peek(p, TOKEN_I64) ||
|
||||
parser_peek(p, TOKEN_U8) || parser_peek(p, TOKEN_U16) ||
|
||||
parser_peek(p, TOKEN_U32) || parser_peek(p, TOKEN_U64);
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
#include "internal.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public) {
|
||||
module->import_count++;
|
||||
module->imports = realloc(module->imports, sizeof(ImportTree) * module->import_count);
|
||||
|
||||
ImportTree* import = &module->imports[module->import_count - 1];
|
||||
memset(import, 0, sizeof(ImportTree));
|
||||
import->is_public = is_public;
|
||||
|
||||
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
import->module_name = parser_to_text(p);
|
||||
|
||||
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after import")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public) {
|
||||
(void)is_public;
|
||||
module->alias_count++;
|
||||
module->aliases = realloc(module->aliases, sizeof(AliasTree) * module->alias_count);
|
||||
|
||||
AliasTree* alias = &module->aliases[module->alias_count - 1];
|
||||
memset(alias, 0, sizeof(AliasTree));
|
||||
|
||||
if (!parser_require(p, TOKEN_IDENTIFIER, "expected alias identifier")) {
|
||||
return false;
|
||||
}
|
||||
alias->name = parser_to_text(p);
|
||||
|
||||
if (!parser_expect(p, TOKEN_ASSIGN, "expected '=' after alias name")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parse_type_expression(p, &alias->value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after alias declaration")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const) {
|
||||
module->variable_count++;
|
||||
module->variables = realloc(module->variables, sizeof(VariableTree) * module->variable_count);
|
||||
|
||||
VariableTree* var = &module->variables[module->variable_count - 1];
|
||||
memset(var, 0, sizeof(VariableTree));
|
||||
var->is_public = is_public;
|
||||
var->is_static = is_static;
|
||||
var->is_const = is_const;
|
||||
|
||||
if (parser_accept_primitive(p)) {
|
||||
if (!parse_type_expression(p, &var->type)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!parser_require(p, TOKEN_IDENTIFIER, "expected variable identifier")) {
|
||||
return false;
|
||||
}
|
||||
var->name = parser_to_text(p);
|
||||
|
||||
if (parser_accept(p, TOKEN_ASSIGN)) {
|
||||
var->initializer = malloc(sizeof(ExpressionTree));
|
||||
if (!parse_expression(p, var->initializer)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!parser_expect(p, TOKEN_SEMICOLON, "expected ';' after variable declaration")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
#include "internal.h"
|
||||
#include "../log.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
bool parse_primitive_type_expression(Parser* p, TypeTree* expr) {
|
||||
if (parser_accept(p, TOKEN_U8)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 8;
|
||||
expr->builtin.isSigned = false;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_U16)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 16;
|
||||
expr->builtin.isSigned = false;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_U32)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 32;
|
||||
expr->builtin.isSigned = false;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_U64)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 64;
|
||||
expr->builtin.isSigned = false;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_I8)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 8;
|
||||
expr->builtin.isSigned = true;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_I16)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 16;
|
||||
expr->builtin.isSigned = true;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_I32)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 32;
|
||||
expr->builtin.isSigned = true;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_I64)) {
|
||||
expr->tag = TYPE_TREE_BUILTIN;
|
||||
expr->builtin.bitSize = 64;
|
||||
expr->builtin.isSigned = true;
|
||||
return true;
|
||||
} else {
|
||||
log_on_line(&p->token.location, "expected type expression");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_array_type_expression(Parser* p, TypeTree* expr) {
|
||||
TypeTree elementType;
|
||||
if (!parse_primitive_type_expression(p, &elementType)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parser_accept(p, TOKEN_BRACKET_OPEN)) {
|
||||
expr->tag = TYPE_TREE_ARRAY;
|
||||
expr->array.array = malloc(sizeof(TypeTree));
|
||||
*expr->array.array = elementType;
|
||||
|
||||
if (!parser_expect(p, TOKEN_BRACKET_CLOSE, "expected ']' to end array type")) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
*expr = elementType;
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_type_expression(Parser* p, TypeTree* expr) {
|
||||
return parse_array_type_expression(p, expr);
|
||||
}
|
||||
|
||||
bool parse_expression(Parser* p, ExpressionTree* expr) {
|
||||
if (parser_peek(p, TOKEN_INTEGER)) {
|
||||
expr->tag = EXPRESSION_TREE_INTEGER;
|
||||
expr->integer = atoi(p->token.text.data);
|
||||
parser_next_token(p);
|
||||
return true;
|
||||
} else if (parser_peek(p, TOKEN_STRING)) {
|
||||
expr->tag = EXPRESSION_TREE_STRING;
|
||||
expr->string = parser_to_text(p);
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_TRUE)) {
|
||||
expr->tag = EXPRESSION_TREE_BOOLEAN;
|
||||
expr->boolean = true;
|
||||
return true;
|
||||
} else if (parser_accept(p, TOKEN_FALSE)) {
|
||||
expr->tag = EXPRESSION_TREE_BOOLEAN;
|
||||
expr->boolean = false;
|
||||
return true;
|
||||
}
|
||||
log_on_line(&p->token.location, "expected expression");
|
||||
return false;
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
PARSER_SRC := v0/parser/core.c v0/parser/expression.c v0/parser/declaration.c v0/parser/module.c
|
||||
@@ -1,36 +0,0 @@
|
||||
#ifndef PARSER_INTERNAL_H
|
||||
#define PARSER_INTERNAL_H
|
||||
|
||||
#include "../parser.h"
|
||||
#include "../token.h"
|
||||
#include "../ast.h"
|
||||
|
||||
typedef struct {
|
||||
TokenStream* ts;
|
||||
Token token;
|
||||
} Parser;
|
||||
|
||||
// Core functions
|
||||
void parser_next_token(Parser* p);
|
||||
bool parser_accept(Parser* p, TokenType token);
|
||||
bool parser_expect(Parser* p, TokenType token, const char* msg);
|
||||
bool parser_peek(Parser* p, TokenType token);
|
||||
bool parser_require(Parser* p, TokenType token, const char* msg);
|
||||
char* parser_to_text(Parser* p);
|
||||
bool parser_accept_primitive(Parser* p);
|
||||
|
||||
// Base parsing (expressions, types)
|
||||
bool parse_primitive_type_expression(Parser* p, TypeTree* expr);
|
||||
bool parse_array_type_expression(Parser* p, TypeTree* expr);
|
||||
bool parse_type_expression(Parser* p, TypeTree* expr);
|
||||
bool parse_expression(Parser* p, ExpressionTree* expr);
|
||||
|
||||
// Declaration parsing
|
||||
bool parse_import_declaration(Parser* p, ModuleTree* module, bool is_public);
|
||||
bool parse_alias_declaration(Parser* p, ModuleTree* module, bool is_public);
|
||||
bool parse_variable_declaration(Parser* p, ModuleTree* module, bool is_public, bool is_static, bool is_const);
|
||||
|
||||
// Module parsing
|
||||
bool parse_module_declaration(Parser* p, ModuleTree* module);
|
||||
|
||||
#endif
|
||||
@@ -1,87 +0,0 @@
|
||||
#include "internal.h"
|
||||
#include "../log.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
bool parse_module_declaration(Parser* p, ModuleTree* module) {
|
||||
if (!parser_expect(p, TOKEN_MODULE, "expected keyword 'module'")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parser_require(p, TOKEN_IDENTIFIER, "expected module identifier")) {
|
||||
return false;
|
||||
}
|
||||
module->name = parser_to_text(p);
|
||||
|
||||
return parser_expect(p, TOKEN_SEMICOLON, "expected ';' after module name");
|
||||
}
|
||||
|
||||
ModuleTree* parser_parse(TokenStream* ts) {
|
||||
Parser* p = malloc(sizeof(Parser));
|
||||
p->ts = ts;
|
||||
parser_next_token(p);
|
||||
|
||||
ModuleTree* module = malloc(sizeof(ModuleTree));
|
||||
memset(module, 0, sizeof(ModuleTree));
|
||||
if (!parse_module_declaration(p, module)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
while (!parser_peek(p, TOKEN_EOF)) {
|
||||
bool is_public = false;
|
||||
bool is_static = false;
|
||||
bool is_const = false;
|
||||
bool terminal = false;
|
||||
|
||||
while (!terminal) {
|
||||
if (parser_accept(p, TOKEN_IMPORT)) {
|
||||
if (is_static) {
|
||||
log_on_line(&p->token.location, "import declarations cannot be static or const");
|
||||
goto fail;
|
||||
}
|
||||
if (is_const) {
|
||||
log_on_line(&p->token.location, "import declarations cannot be static or const");
|
||||
goto fail;
|
||||
}
|
||||
if (!parse_import_declaration(p, module, is_public)) {
|
||||
goto fail;
|
||||
}
|
||||
terminal = true;
|
||||
} else if (parser_accept(p, TOKEN_ALIAS)) {
|
||||
if (is_static) {
|
||||
log_on_line(&p->token.location, "alias declarations cannot be static or const");
|
||||
goto fail;
|
||||
}
|
||||
if (is_const) {
|
||||
log_on_line(&p->token.location, "alias declarations cannot be static or const");
|
||||
goto fail;
|
||||
}
|
||||
if (!parse_alias_declaration(p, module, is_public)) {
|
||||
goto fail;
|
||||
}
|
||||
terminal = true;
|
||||
} else if (parser_accept(p, TOKEN_PUBLIC)) {
|
||||
is_public = true;
|
||||
} else if (parser_accept(p, TOKEN_STATIC)) {
|
||||
is_static = true;
|
||||
} else if (parser_accept(p, TOKEN_CONST)) {
|
||||
is_const = true;
|
||||
} else if (parser_accept(p, TOKEN_VAR) || parser_accept_primitive(p)) {
|
||||
if (!parse_variable_declaration(p, module, is_public, is_static, is_const)) {
|
||||
goto fail;
|
||||
}
|
||||
terminal = true;
|
||||
} else {
|
||||
log_on_line(&p->token.location, "unexpected token");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(p);
|
||||
return module;
|
||||
fail:
|
||||
free(p);
|
||||
ast_free_module(module);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#include "../test.h"
|
||||
#include "../parser.h"
|
||||
|
||||
// Currently core utilities are tested indirectly through other parser tests.
|
||||
// Placeholder for future explicit core utility tests.
|
||||
static void test_parser_core_placeholder(void) {
|
||||
// No-op
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
#include "../test.h"
|
||||
#include "../parser.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void test_parser_missing_semicolon_import(void) {
|
||||
test_get_ast();
|
||||
assert_log_file("expected error for missing semicolon");
|
||||
}
|
||||
|
||||
static void test_parser_bad_import_name(void) {
|
||||
test_get_ast();
|
||||
assert_log_file("expected error for bad import name");
|
||||
}
|
||||
|
||||
static void test_parser_imports(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_str("my_module", m->name, "expected name 'my_module'");
|
||||
|
||||
assert_not_null(m->imports, "expected imports to be parsed");
|
||||
assert_int(1, (int)m->import_count, "expected one import");
|
||||
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
|
||||
assert_false(m->imports[0].is_public, "expected import to not be public");
|
||||
}
|
||||
|
||||
static void test_parser_public_imports(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_str("my_module", m->name, "expected name 'my_module'");
|
||||
|
||||
assert_not_null(m->imports, "expected imports to be parsed");
|
||||
assert_int(1, (int)m->import_count, "expected one import");
|
||||
assert_str("other_module", m->imports[0].module_name, "expected import name 'other_module'");
|
||||
assert_true(m->imports[0].is_public, "expected import to be public");
|
||||
}
|
||||
|
||||
static void test_parser_alias_simple(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
|
||||
AliasTree alias = m->aliases[0];
|
||||
assert_str("myalias", alias.name, "expected correct alias name");
|
||||
}
|
||||
|
||||
static void test_parser_variable_simple(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->variable_count, "expected correct number of variables");
|
||||
VariableTree var = m->variables[0];
|
||||
assert_str("my_var", var.name, "expected correct variable name");
|
||||
assert_false(var.is_const, "expected not const");
|
||||
assert_false(var.is_static, "expected not static");
|
||||
}
|
||||
|
||||
static void test_parser_variable_const(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->variable_count, "expected correct number of variables");
|
||||
VariableTree var = m->variables[0];
|
||||
assert_str("my_const", var.name, "expected correct variable name");
|
||||
assert_true(var.is_const, "expected const");
|
||||
assert_false(var.is_static, "expected not static");
|
||||
}
|
||||
|
||||
static void test_parser_variable_static(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->variable_count, "expected correct number of variables");
|
||||
VariableTree var = m->variables[0];
|
||||
assert_str("my_static", var.name, "expected correct variable name");
|
||||
assert_false(var.is_const, "expected not const");
|
||||
assert_true(var.is_static, "expected static");
|
||||
}
|
||||
|
||||
static void test_parser_multiple_vars(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(2, (int)m->variable_count, "expected correct number of variables");
|
||||
assert_str("var1", m->variables[0].name, "expected first variable name 'var1'");
|
||||
assert_str("var2", m->variables[1].name, "expected second variable name 'var2'");
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
#include "../test.h"
|
||||
#include "../parser.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void test_parser_alias_simple_type(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
|
||||
AliasTree alias = m->aliases[0];
|
||||
assert_int(TYPE_TREE_BUILTIN, alias.value.tag, "expected correct alias tag");
|
||||
assert_int(32, alias.value.builtin.bitSize, "expected bitSize 32");
|
||||
assert_true(alias.value.builtin.isSigned, "expected signed");
|
||||
}
|
||||
|
||||
static void test_parser_alias_array(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->alias_count, "expected correct number of aliases");
|
||||
AliasTree alias = m->aliases[0];
|
||||
assert_int(TYPE_TREE_ARRAY, alias.value.tag, "expected correct alias tag");
|
||||
TypeTree* valueType = alias.value.array.array;
|
||||
assert_not_null(valueType, "expected pointer to array type");
|
||||
assert_int(TYPE_TREE_BUILTIN, valueType->tag, "expected correct type tag");
|
||||
assert_int(32, valueType->builtin.bitSize, "expected bitSize 32");
|
||||
assert_true(valueType->builtin.isSigned, "expected signed");
|
||||
}
|
||||
|
||||
static void test_parser_variable_init(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->variable_count, "expected 1 variable");
|
||||
VariableTree* var = &m->variables[0];
|
||||
assert_str("x", var->name, "expected variable name 'x'");
|
||||
assert_not_null(var->initializer, "expected variable to have an initializer");
|
||||
assert_int(EXPRESSION_TREE_INTEGER, var->initializer->tag, "expected integer initializer");
|
||||
assert_int(123, var->initializer->integer, "expected value 123");
|
||||
}
|
||||
|
||||
static void test_parser_variable_simple_type(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_int(1, (int)m->variable_count, "expected correct number of variables");
|
||||
VariableTree var = m->variables[0];
|
||||
assert_int(TYPE_TREE_BUILTIN, var.type.tag, "expected correct type tag");
|
||||
assert_int(32, var.type.builtin.bitSize, "expected bitSize 32");
|
||||
assert_true(var.type.builtin.isSigned, "expected signed");
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
#include "../test.h"
|
||||
#include "../parser.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void test_parser_module_name(void) {
|
||||
ModuleTree* m = test_get_ast();
|
||||
|
||||
assert_not_null(m, "expected module to be parsed");
|
||||
assert_str("my_module", m->name, "expected name 'my_module'");
|
||||
}
|
||||
|
||||
static void test_parser_bad_module_name(void) {
|
||||
test_get_ast();
|
||||
assert_log_file("expected error to be logged for bad module name");
|
||||
}
|
||||
|
||||
static void test_parser_missing_semicolon_module(void) {
|
||||
test_get_ast();
|
||||
assert_log_file("expected error for missing semicolon");
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
#include "str.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
char* string_copy(String string) {
|
||||
char* str = malloc(string.length + 1);
|
||||
memcpy(str, string.data, string.length);
|
||||
str[string.length] = '\0';
|
||||
return str;
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/**
|
||||
* Contains the definition of the String structure, which is a simple representation of a string in C.
|
||||
*/
|
||||
#ifndef STR_H
|
||||
#define STR_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* A simple string structure that holds a pointer to the character data and its length.
|
||||
*/
|
||||
typedef struct {
|
||||
char* data;
|
||||
size_t length;
|
||||
} String;
|
||||
|
||||
/**
|
||||
* Creates a copy of a string.
|
||||
*
|
||||
* Note that this copy has to be freed afterwards.
|
||||
*
|
||||
* @param string The string to copy.
|
||||
* @returns A null-terminated copy of the string.
|
||||
*/
|
||||
char* string_copy(String string);
|
||||
|
||||
#endif
|
||||
@@ -1,303 +1,83 @@
|
||||
#include "test.h"
|
||||
|
||||
#include "util.h"
|
||||
#include "parser.h"
|
||||
|
||||
#include <setjmp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static jmp_buf s_testJmp;
|
||||
static char s_failMsg[1024];
|
||||
static char* s_logOutput = NULL;
|
||||
static const char* s_currentTestName = NULL;
|
||||
static char* s_testSource = NULL;
|
||||
|
||||
static ModuleTree* s_currentModule = NULL;
|
||||
static TokenStream* s_currentTokenStream = NULL;
|
||||
static const char* s_failMsg;
|
||||
|
||||
void fail(const char* msg) {
|
||||
if (msg) {
|
||||
strncpy(s_failMsg, msg, sizeof(s_failMsg) - 1);
|
||||
s_failMsg[sizeof(s_failMsg) - 1] = '\0';
|
||||
} else {
|
||||
s_failMsg[0] = '\0';
|
||||
}
|
||||
s_failMsg = msg;
|
||||
longjmp(s_testJmp, 1);
|
||||
}
|
||||
|
||||
char* read_file_content(const char* filepath) {
|
||||
FILE* f;
|
||||
long size;
|
||||
char* content;
|
||||
|
||||
f = fopen(filepath, "r");
|
||||
if (!f) return NULL;
|
||||
fseek(f, 0, SEEK_END);
|
||||
size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
content = malloc(size + 1);
|
||||
if (!content) {
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
fread(content, 1, size, f);
|
||||
content[size] = '\0';
|
||||
fclose(f);
|
||||
return content;
|
||||
}
|
||||
|
||||
void assert_not_null(void* ptr, const char* msg) {
|
||||
if (ptr == NULL) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void assert_string(const char* expected, String actual, const char* msg) {
|
||||
if (expected == NULL || actual.data == NULL || strlen(expected) != actual.length || strncmp(expected, actual.data, actual.length) != 0) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void assert_str(const char* expected, const char* actual, const char* msg) {
|
||||
if (expected == NULL || actual == NULL || strcmp(expected, actual) != 0) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
TokenStream* test_get_tokenstream(void) {
|
||||
if (s_currentTokenStream == NULL) {
|
||||
char* filepath = NULL;
|
||||
|
||||
filepath = format_string("v0/tests/%s.c2", s_currentTestName);
|
||||
if (!filepath) {
|
||||
fail("out of memory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (s_testSource) free(s_testSource);
|
||||
s_testSource = read_file_content(filepath);
|
||||
if (!s_testSource) {
|
||||
puts(filepath);
|
||||
free(filepath);
|
||||
fail("could not read test source file");
|
||||
return NULL;
|
||||
}
|
||||
s_currentTokenStream = tokenstream_open(filepath, s_testSource);
|
||||
free(filepath);
|
||||
}
|
||||
return s_currentTokenStream;
|
||||
}
|
||||
|
||||
ModuleTree* test_get_ast(void) {
|
||||
if (s_currentModule == NULL) {
|
||||
s_currentModule = parser_parse(test_get_tokenstream());
|
||||
}
|
||||
return s_currentModule;
|
||||
}
|
||||
|
||||
void assert_log(const char* expected, const char* msg) {
|
||||
assert_str(expected, s_logOutput, msg);
|
||||
}
|
||||
|
||||
void assert_log_file(const char* msg) {
|
||||
char* filepath = format_string("v0/tests/%s.log", s_currentTestName);
|
||||
const char* generate;
|
||||
char* content;
|
||||
if (!filepath) {
|
||||
fail("out of memory");
|
||||
return;
|
||||
}
|
||||
generate = getenv("GENERATE_GOLDEN");
|
||||
if (generate && strcmp(generate, "1") == 0) {
|
||||
FILE* f = fopen(filepath, "w");
|
||||
if (!f) {
|
||||
free(filepath);
|
||||
fail("could not open golden file for writing");
|
||||
return;
|
||||
}
|
||||
fputs(s_logOutput ? s_logOutput : "", f);
|
||||
fclose(f);
|
||||
free(filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
content = read_file_content(filepath);
|
||||
if (!content) {
|
||||
free(filepath);
|
||||
fail("could not open golden file for reading");
|
||||
return;
|
||||
}
|
||||
|
||||
bool match = strcmp(content, s_logOutput ? s_logOutput : "") == 0;
|
||||
free(content);
|
||||
free(filepath);
|
||||
if (!match) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void assert_int(int expected, int actual, const char* msg) {
|
||||
if (expected != actual) {
|
||||
char* buf = format_string("%s (expected %d, got %d)", msg, expected, actual);
|
||||
if (buf) {
|
||||
fail(buf);
|
||||
free(buf);
|
||||
} else {
|
||||
fail("out of memory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void assert_true(bool condition, const char* msg) {
|
||||
if (!condition) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void assert_false(bool condition, const char* msg) {
|
||||
if (condition) {
|
||||
fail(msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void log_append(const char* msg) {
|
||||
size_t oldLen = s_logOutput ? strlen(s_logOutput) : 0;
|
||||
size_t newLen = oldLen + strlen(msg) + 1;
|
||||
char* newOutput = malloc(newLen);
|
||||
if (newOutput) {
|
||||
if (s_logOutput) {
|
||||
strcpy(newOutput, s_logOutput);
|
||||
free(s_logOutput);
|
||||
} else {
|
||||
newOutput[0] = '\0';
|
||||
}
|
||||
strcat(newOutput, msg);
|
||||
s_logOutput = newOutput;
|
||||
}
|
||||
}
|
||||
|
||||
static void log_clear(void) {
|
||||
free(s_logOutput);
|
||||
s_logOutput = NULL;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
Test func;
|
||||
} TestCase;
|
||||
|
||||
#include "test_token.c"
|
||||
#include "parser/test_module.c"
|
||||
#include "parser/test_declaration.c"
|
||||
#include "parser/test_expression.c"
|
||||
#include "parser/test_core.c"
|
||||
#include "test_log.c"
|
||||
#include "test_parser.c"
|
||||
|
||||
static int s_totalTests;
|
||||
static int s_greenTests;
|
||||
|
||||
#define TEST(name) {#name, name},
|
||||
|
||||
static TestCase s_tests[] = {
|
||||
TEST(test_log_error)
|
||||
TEST(test_log_on_line_variadic)
|
||||
TEST(test_log_on_line)
|
||||
TEST(test_parser_module_name)
|
||||
TEST(test_parser_bad_module_name)
|
||||
TEST(test_parser_missing_semicolon_module)
|
||||
TEST(test_parser_missing_semicolon_import)
|
||||
TEST(test_parser_bad_import_name)
|
||||
TEST(test_parser_imports)
|
||||
TEST(test_parser_public_imports)
|
||||
TEST(test_parser_alias_simple)
|
||||
TEST(test_parser_alias_simple_type)
|
||||
TEST(test_parser_alias_array)
|
||||
TEST(test_parser_variable_simple)
|
||||
TEST(test_parser_variable_simple_type)
|
||||
TEST(test_parser_variable_const)
|
||||
TEST(test_parser_variable_init)
|
||||
TEST(test_parser_variable_static)
|
||||
TEST(test_parser_multiple_vars)
|
||||
TEST(test_parser_core_placeholder)
|
||||
TEST(test_tokenstream_comma)
|
||||
TEST(test_tokenstream_info)
|
||||
TEST(test_tokenstream_keywords_and_symbols)
|
||||
TEST(test_tokenstream_open_fail)
|
||||
TEST(test_tokenstream_parentheses_and_brackets)
|
||||
TEST(test_tokenstream_primitive_types)
|
||||
TEST(test_tokenstream_simple_keyword)
|
||||
TEST(test_tokenstream_unknown_token)
|
||||
TEST(test_tokenstream_void_function_signature)
|
||||
TEST(test_tokenstream_whitespace_ignored)
|
||||
{"tokenstream_open_fail", test_tokenstream_open_fail},
|
||||
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
|
||||
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
|
||||
{"tokenstream_parentheses_and_brackets", test_tokenstream_parentheses_and_brackets},
|
||||
{"tokenstream_comma", test_tokenstream_comma},
|
||||
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
|
||||
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
|
||||
{"tokenstream_info", test_tokenstream_info},
|
||||
{"parser_module_name", test_parser_module_name},
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char** failedTests;
|
||||
int failedCount;
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
|
||||
s_greenTests = 0;
|
||||
|
||||
// Allocate failed tests array dynamically to avoid VLAs
|
||||
failedTests = (const char**)malloc((s_totalTests + 1) * sizeof(const char*));
|
||||
failedCount = 0;
|
||||
const char* failedTests[s_totalTests + 1];
|
||||
int failedCount = 0;
|
||||
|
||||
for (int i = 0; i < s_totalTests; i++) {
|
||||
// Add 5 to strip the 'test_' prefix.
|
||||
s_currentTestName = s_tests[i].name + 5;
|
||||
log_set_output(log_append);
|
||||
printf("%s...", s_tests[i].name);
|
||||
fflush(stdout);
|
||||
s_failMsg[0] = '\0';
|
||||
s_failMsg = NULL;
|
||||
|
||||
if (setjmp(s_testJmp) == 0) {
|
||||
log_clear();
|
||||
if (s_testSource) {
|
||||
free(s_testSource);
|
||||
s_testSource = NULL;
|
||||
}
|
||||
s_tests[i].func();
|
||||
printf(" [OK]\n");
|
||||
s_greenTests++;
|
||||
} else {
|
||||
printf(" [FAIL]: %s\n", s_failMsg[0] ? s_failMsg : "");
|
||||
printf(" [FAIL]: %s\n", s_failMsg ? s_failMsg : "");
|
||||
failedTests[failedCount++] = s_tests[i].name;
|
||||
|
||||
// Log output on failure
|
||||
if (s_logOutput && s_logOutput[0]) {
|
||||
printf("%s\n", s_logOutput);
|
||||
}
|
||||
}
|
||||
|
||||
// Free AST and TokenStream after each test
|
||||
if (s_currentModule) {
|
||||
ast_free_module(s_currentModule);
|
||||
s_currentModule = NULL;
|
||||
}
|
||||
if (s_currentTokenStream) {
|
||||
tokenstream_close(s_currentTokenStream);
|
||||
s_currentTokenStream = NULL;
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
if (s_testSource) free(s_testSource);
|
||||
log_clear();
|
||||
|
||||
if (failedCount > 0) {
|
||||
printf("\nFailed tests:\n");
|
||||
for (int j = 0; j < failedCount; j++) {
|
||||
printf(" - %s\n", failedTests[j]);
|
||||
for (int i = 0; i < failedCount; i++) {
|
||||
printf(" - %s\n", failedTests[i]);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
|
||||
free(failedTests);
|
||||
return failedCount > 0 ? 1 : 0;
|
||||
}
|
||||
@@ -4,9 +4,6 @@
|
||||
#ifndef TEST_H
|
||||
#define TEST_H
|
||||
|
||||
#include "token.h"
|
||||
#include "ast.h"
|
||||
|
||||
typedef void (*Test)(void);
|
||||
|
||||
/**
|
||||
@@ -15,80 +12,4 @@ typedef void (*Test)(void);
|
||||
*/
|
||||
void fail(const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that a pointer is not null.
|
||||
*
|
||||
* Calls `fail` if the assertion does not hold.
|
||||
*
|
||||
* @param ptr The pointer to test.
|
||||
* @param msg The message to print if the pointer is null.
|
||||
*/
|
||||
void assert_not_null(void* ptr, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that a string has the expected value.
|
||||
*
|
||||
* Calls `fail` if the assertion does not hold.
|
||||
*
|
||||
* @param expected The expected value. This is typically a string literal.
|
||||
* @param actual The actual value. This is typically an expression.
|
||||
* @param msg The message to print if these do not match.
|
||||
*/
|
||||
void assert_str(const char* expected, const char* actual, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that a string has the expected value.
|
||||
*
|
||||
* Calls `fail` if the assertion does not hold.
|
||||
*
|
||||
* @param expected The expected value. This is typically a string literal.
|
||||
* @param actual The actual value. This is typically an expression.
|
||||
* @param msg The message to print if these do not match.
|
||||
*/
|
||||
void assert_string(const char* expected, String actual, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that the logged output matches the expected value.
|
||||
*/
|
||||
void assert_log(const char* expected, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that the logged output matches the content of the file `v0/tests/xyz.log`, where xyz is the test name.
|
||||
* If GENERATE_GOLDEN=1, the file is overwritten with the actual output.
|
||||
*/
|
||||
void assert_log_file(const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that two integers are equal.
|
||||
*/
|
||||
void assert_int(int expected, int actual, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that a condition is true.
|
||||
*/
|
||||
#include "bool.h"
|
||||
|
||||
void assert_true(bool condition, const char* msg);
|
||||
|
||||
/**
|
||||
* Asserts that a condition is false.
|
||||
*/
|
||||
void assert_false(bool condition, const char* msg);
|
||||
|
||||
/**
|
||||
* Get the token stream used for this test.
|
||||
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
|
||||
*
|
||||
* At the end of the test, the tokenstream will be freed automatically by the test harness.
|
||||
*/
|
||||
TokenStream* test_get_tokenstream(void);
|
||||
|
||||
/**
|
||||
* Gets a parsed module for the this test.
|
||||
* It reads from the `v0/tests/xyz.c2` file, where xyz is the test name.
|
||||
*
|
||||
* At the end of the test, the AST will be freed automatically by the test harness.
|
||||
*/
|
||||
ModuleTree* test_get_ast(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
#define _DEFAULT_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
int run_test(const char* dir_name) {
|
||||
char cmd[2048];
|
||||
char input_path[1024];
|
||||
char expected_path[1024];
|
||||
|
||||
snprintf(input_path, sizeof(input_path), "v0/integration_tests/%s/input.c2", dir_name);
|
||||
snprintf(expected_path, sizeof(expected_path), "v0/integration_tests/%s/expected.c", dir_name);
|
||||
|
||||
if (snprintf(cmd, sizeof(cmd), "./v0/bin/c2 %s > actual.c", input_path) >= sizeof(cmd)) {
|
||||
printf("Command buffer too small for %s\n", dir_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (system(cmd) != 0) {
|
||||
printf("Failed to run compiler for %s\n", dir_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (snprintf(cmd, sizeof(cmd), "diff -u %s actual.c", expected_path) >= sizeof(cmd)) {
|
||||
printf("Command buffer too small for %s\n", dir_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (system(cmd) != 0) {
|
||||
printf("Test %s failed: Output mismatch\n", dir_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Test %s passed\n", dir_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
DIR* d = opendir("v0/integration_tests");
|
||||
if (!d) {
|
||||
perror("opendir");
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct dirent* dir;
|
||||
int passed = 0;
|
||||
int failed = 0;
|
||||
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_DIR && strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0) {
|
||||
if (run_test(dir->d_name) == 0) {
|
||||
passed++;
|
||||
} else {
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
|
||||
printf("\nTotal tests: %d, Passed: %d, Failed: %d\n", passed + failed, passed, failed);
|
||||
return failed > 0 ? 1 : 0;
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
#include "test.h"
|
||||
#include "log.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "util.h"
|
||||
|
||||
static char* s_lastLoggedError = NULL;
|
||||
|
||||
static void mock_log(const char* msg) {
|
||||
free(s_lastLoggedError);
|
||||
s_lastLoggedError = format_string("%s", msg ? msg : "");
|
||||
}
|
||||
|
||||
static void test_log_error(void) {
|
||||
log_set_output(mock_log);
|
||||
|
||||
free(s_lastLoggedError);
|
||||
s_lastLoggedError = NULL;
|
||||
log_error("test error message");
|
||||
|
||||
assert_str("test error message", s_lastLoggedError, "expected 'test error message'");
|
||||
|
||||
log_set_output(NULL);
|
||||
free(s_lastLoggedError);
|
||||
s_lastLoggedError = NULL;
|
||||
}
|
||||
|
||||
static void test_log_on_line(void) {
|
||||
Location loc;
|
||||
loc.filename = "v0/tests/log_on_line.c2";
|
||||
loc.line_text.data = "int main() []";
|
||||
loc.line_text.length = 13;
|
||||
loc.line = 1;
|
||||
loc.column_start = 12;
|
||||
loc.column_end = 13;
|
||||
|
||||
log_on_line(&loc, "unexpected token");
|
||||
assert_log_file("expected formatted error message");
|
||||
}
|
||||
|
||||
static void test_log_on_line_variadic(void) {
|
||||
Location loc;
|
||||
loc.filename = "v0/tests/log_on_line_variadic.c2";
|
||||
loc.line_text.data = "int main() []";
|
||||
loc.line_text.length = 13;
|
||||
loc.line = 1;
|
||||
loc.column_start = 12;
|
||||
loc.column_end = 13;
|
||||
|
||||
log_on_line(&loc, "unexpected token '%c'", 'x');
|
||||
assert_log_file("expected formatted error message with variadic args");
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "test.h"
|
||||
#include "parser.h"
|
||||
#include <string.h>
|
||||
|
||||
static void test_parser_module_name(void) {
|
||||
TokenStream* ts = tokenstream_open("module my_module;");
|
||||
Module* m = parser_parse(ts);
|
||||
|
||||
if (m == NULL) fail("expected module to be parsed");
|
||||
if (strcmp(m->name, "my_module") != 0) fail("expected name 'my_module'");
|
||||
|
||||
parser_free(m);
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
+72
-75
@@ -1,113 +1,110 @@
|
||||
#include "test.h"
|
||||
#include "token.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void test_tokenstream_open_fail(void) {
|
||||
TokenStream* ts = tokenstream_open(NULL, NULL);
|
||||
TokenStream* ts = tokenstream_open(NULL);
|
||||
if (ts != NULL) fail("expected NULL for NULL buffer");
|
||||
}
|
||||
|
||||
static void test_tokenstream_simple_keyword(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
Token t;
|
||||
Token eof;
|
||||
TokenStream* ts = tokenstream_open("module");
|
||||
|
||||
t = tokenstream_next(ts);
|
||||
if (t.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
Token t = tokenstream_next(ts);
|
||||
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
|
||||
eof = tokenstream_next(ts);
|
||||
if (eof.token != TOKEN_EOF) fail("expected EOF");
|
||||
Token eof = tokenstream_next(ts);
|
||||
if (eof != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_keywords_and_symbols(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
TokenStream* ts = tokenstream_open("module main; import stdio;");
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
||||
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
||||
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_parentheses_and_brackets(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
TokenStream* ts = tokenstream_open("()[]");
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts).token != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
||||
if (tokenstream_next(ts).token != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_comma(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
TokenStream* ts = tokenstream_open("a,b,c");
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected a");
|
||||
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected b");
|
||||
if (tokenstream_next(ts).token != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected c");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
|
||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
|
||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_whitespace_ignored(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
TokenStream* ts = tokenstream_open(" module \n\t import ; ");
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts).token != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts).token != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_void_function_signature(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
TokenStream* ts = tokenstream_open("void main()");
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_VOID) fail("expected TOKEN_VOID");
|
||||
if (tokenstream_next(ts).token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||
if (tokenstream_next(ts).token != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts).token != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
}
|
||||
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
static void test_tokenstream_unknown_token(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_UNKNOWN) fail("expected TOKEN_UNKNOWN");
|
||||
|
||||
assert_log_file("expected error message for unknown token");
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_info(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
Token t1;
|
||||
Token t2;
|
||||
TokenStream* ts = tokenstream_open("module main;");
|
||||
|
||||
t1 = tokenstream_next(ts);
|
||||
if (t1.token != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
assert_string("module", t1.text, "info: expected 'module'");
|
||||
if (t1.location.line != 1) fail("expected line 1");
|
||||
if (t1.location.column_start != 1) fail("expected column 1");
|
||||
Token t1 = tokenstream_next(ts);
|
||||
TokenInfo info1;
|
||||
tokenstream_info(ts, &info1);
|
||||
if (t1 != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (info1.token != TOKEN_MODULE) fail("info: expected TOKEN_MODULE");
|
||||
|
||||
t2 = tokenstream_next(ts);
|
||||
if (t2.token != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||
assert_string("main", t2.text, "info: expected 'main'");
|
||||
if (t2.location.line != 1) fail("expected line 1");
|
||||
if (t2.location.column_start != 8) fail("expected column 8");
|
||||
}
|
||||
|
||||
static void test_tokenstream_primitive_types(void) {
|
||||
TokenStream* ts = test_get_tokenstream();
|
||||
|
||||
if (tokenstream_next(ts).token != TOKEN_I8) fail("expected TOKEN_I8");
|
||||
if (tokenstream_next(ts).token != TOKEN_I16) fail("expected TOKEN_I16");
|
||||
if (tokenstream_next(ts).token != TOKEN_I32) fail("expected TOKEN_I32");
|
||||
if (tokenstream_next(ts).token != TOKEN_I64) fail("expected TOKEN_I64");
|
||||
if (tokenstream_next(ts).token != TOKEN_U8) fail("expected TOKEN_U8");
|
||||
if (tokenstream_next(ts).token != TOKEN_U16) fail("expected TOKEN_U16");
|
||||
if (tokenstream_next(ts).token != TOKEN_U32) fail("expected TOKEN_U32");
|
||||
if (tokenstream_next(ts).token != TOKEN_U64) fail("expected TOKEN_U64");
|
||||
if (tokenstream_next(ts).token != TOKEN_EOF) fail("expected EOF");
|
||||
char buf1[32];
|
||||
memcpy(buf1, info1.text, info1.text_length);
|
||||
buf1[info1.text_length] = '\0';
|
||||
assert_str("module", buf1, "info: expected 'module'");
|
||||
|
||||
Token t2 = tokenstream_next(ts);
|
||||
TokenInfo info2;
|
||||
tokenstream_info(ts, &info2);
|
||||
if (t2 != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||
if (info2.token != TOKEN_IDENTIFIER) fail("info: expected TOKEN_IDENTIFIER");
|
||||
|
||||
char buf2[32];
|
||||
memcpy(buf2, info2.text, info2.text_length);
|
||||
buf2[info2.text_length] = '\0';
|
||||
assert_str("main", buf2, "info: expected 'main'");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
int main() []
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/log_on_line.c2 ---
|
||||
1| int main() []
|
||||
^^
|
||||
unexpected token
|
||||
@@ -1 +0,0 @@
|
||||
int main() []
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/log_on_line_variadic.c2 ---
|
||||
1| int main() []
|
||||
^^
|
||||
unexpected token 'x'
|
||||
@@ -1,9 +0,0 @@
|
||||
module mymodule;
|
||||
|
||||
import foo;
|
||||
|
||||
alias myalias = i32[];
|
||||
|
||||
import bar;
|
||||
|
||||
alias otheralias = i32;
|
||||
@@ -1,3 +0,0 @@
|
||||
module mymodule;
|
||||
|
||||
alias myalias = i32[];
|
||||
@@ -1,3 +0,0 @@
|
||||
module mymodule;
|
||||
|
||||
alias myalias = i32;
|
||||
@@ -1,3 +0,0 @@
|
||||
module mymodule;
|
||||
|
||||
alias myalias = i32;
|
||||
@@ -1,2 +0,0 @@
|
||||
module mymodule;
|
||||
import ;
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/parser_bad_import_name.c2 ---
|
||||
2| import ;
|
||||
^
|
||||
expected module identifier
|
||||
@@ -1 +0,0 @@
|
||||
import other_module;
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/parser_bad_module_name.c2 ---
|
||||
1| import other_module;
|
||||
^^^^^^
|
||||
expected keyword 'module'
|
||||
@@ -1,2 +0,0 @@
|
||||
module my_module;
|
||||
import other_module;
|
||||
@@ -1 +0,0 @@
|
||||
module my_module; import other_module
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/parser_missing_semicolon_import.c2 ---
|
||||
1| module my_module; import other_module
|
||||
^
|
||||
expected ';' after import
|
||||
@@ -1 +0,0 @@
|
||||
module my_module
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/parser_missing_semicolon_module.c2 ---
|
||||
1| module my_module
|
||||
^
|
||||
expected ';' after module name
|
||||
@@ -1 +0,0 @@
|
||||
module my_module;
|
||||
@@ -1,4 +0,0 @@
|
||||
module test_multiple_vars;
|
||||
|
||||
i32 var1;
|
||||
i32 var2;
|
||||
@@ -1,3 +0,0 @@
|
||||
module my_module;
|
||||
|
||||
public import other_module;
|
||||
@@ -1,3 +0,0 @@
|
||||
module test_const_var;
|
||||
|
||||
const i32 my_const;
|
||||
@@ -1,2 +0,0 @@
|
||||
module mymodule;
|
||||
var x = 123;
|
||||
@@ -1,4 +0,0 @@
|
||||
module my_module;
|
||||
|
||||
// Defines a global variable called my_var.
|
||||
i32 my_var;
|
||||
@@ -1,4 +0,0 @@
|
||||
module my_module;
|
||||
|
||||
// Defines a global variable called my_var.
|
||||
i32 my_var;
|
||||
@@ -1,3 +0,0 @@
|
||||
module test_static_var;
|
||||
|
||||
static i32 my_static;
|
||||
@@ -1 +0,0 @@
|
||||
a,b,c
|
||||
@@ -1 +0,0 @@
|
||||
module main;
|
||||
@@ -1 +0,0 @@
|
||||
module main; import stdio;
|
||||
@@ -1 +0,0 @@
|
||||
()[]
|
||||
@@ -1 +0,0 @@
|
||||
i8 i16 i32 i64 u8 u16 u32 u64
|
||||
@@ -1 +0,0 @@
|
||||
module
|
||||
@@ -1 +0,0 @@
|
||||
%
|
||||
@@ -1,4 +0,0 @@
|
||||
--- v0/tests/tokenstream_unknown_token.c2 ---
|
||||
1| %
|
||||
^
|
||||
unexpected token '%'
|
||||
@@ -1 +0,0 @@
|
||||
void main()
|
||||
@@ -1,2 +0,0 @@
|
||||
module
|
||||
import ;
|
||||
+60
-164
@@ -1,21 +1,12 @@
|
||||
#include "token.h"
|
||||
#include "log.h"
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
struct TokenStream {
|
||||
char* filename;
|
||||
const char* code;
|
||||
size_t pos;
|
||||
int line;
|
||||
int column;
|
||||
const char* line_start;
|
||||
|
||||
/* End of last non-EOF token */
|
||||
int last_line;
|
||||
int last_column_end;
|
||||
const char* last_line_start;
|
||||
TokenInfo last_info;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -24,38 +15,22 @@ struct TokenStream {
|
||||
*/
|
||||
typedef struct {
|
||||
const char* keyword;
|
||||
TokenType token;
|
||||
Token token;
|
||||
} KeywordMap;
|
||||
|
||||
static const KeywordMap keywords[] = {
|
||||
{"module", TOKEN_MODULE},
|
||||
{"import", TOKEN_IMPORT},
|
||||
{"alias", TOKEN_ALIAS},
|
||||
{"public", TOKEN_PUBLIC},
|
||||
{"var", TOKEN_VAR},
|
||||
{"const", TOKEN_CONST},
|
||||
{"static", TOKEN_STATIC},
|
||||
|
||||
{"void", TOKEN_VOID},
|
||||
{"i8", TOKEN_I8},
|
||||
{"i16", TOKEN_I16},
|
||||
{"i32", TOKEN_I32},
|
||||
{"i64", TOKEN_I64},
|
||||
{"u8", TOKEN_U8},
|
||||
{"u16", TOKEN_U16},
|
||||
{"u32", TOKEN_U32},
|
||||
{"u64", TOKEN_U64},
|
||||
{"true", TOKEN_TRUE},
|
||||
{"false", TOKEN_FALSE},
|
||||
};
|
||||
|
||||
/**
|
||||
* Look up a keyword in the keyword map.
|
||||
* Returns TOKEN_IDENTIFIER if not found.
|
||||
*/
|
||||
static TokenType lookup_keyword(const char* str, size_t length) {
|
||||
static Token lookup_keyword(const char* str, size_t length) {
|
||||
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||
int i;
|
||||
for (i = 0; i < count; i++) {
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (strlen(keywords[i].keyword) == length &&
|
||||
strncmp(keywords[i].keyword, str, length) == 0) {
|
||||
return keywords[i].token;
|
||||
@@ -79,201 +54,122 @@ static int is_identifier_part(char c) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at the next character in the stream.
|
||||
*/
|
||||
static char peek_char(TokenStream* ts) {
|
||||
return ts->code[ts->pos];
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a character from the stream and update position.
|
||||
* Read a character from the stream.
|
||||
*/
|
||||
static char read_char(TokenStream* ts) {
|
||||
char c = ts->code[ts->pos];
|
||||
if (c == '\0') return '\0';
|
||||
|
||||
if (c == '\0') return (char)-1;
|
||||
ts->pos++;
|
||||
if (c == '\n') {
|
||||
ts->line++;
|
||||
ts->column = 1;
|
||||
ts->line_start = &ts->code[ts->pos];
|
||||
} else {
|
||||
ts->column++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static size_t get_line_length(const char* line_start) {
|
||||
const char* p = line_start;
|
||||
while (*p != '\n' && *p != '\0') {
|
||||
p++;
|
||||
}
|
||||
return (size_t)(p - line_start);
|
||||
/**
|
||||
* Peek at the next character in the stream.
|
||||
*/
|
||||
static char peek_char(TokenStream* ts) {
|
||||
char c = ts->code[ts->pos];
|
||||
if (c == '\0') return (char)-1;
|
||||
return c;
|
||||
}
|
||||
|
||||
static Token create_token(TokenStream* ts, TokenType type, const char* text, size_t length, int line, int column, const char* line_start) {
|
||||
Token t;
|
||||
t.token = type;
|
||||
t.text.data = (char*)text;
|
||||
t.text.length = length;
|
||||
t.location.filename = ts->filename;
|
||||
t.location.line = line;
|
||||
t.location.column_start = column;
|
||||
t.location.column_end = column + (int)length - 1;
|
||||
t.location.line_text.data = (char*)line_start;
|
||||
t.location.line_text.length = get_line_length(line_start);
|
||||
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
||||
const char* start = &ts->code[ts->pos - 1];
|
||||
size_t length = 1;
|
||||
|
||||
if (type != TOKEN_EOF) {
|
||||
ts->last_line = t.location.line;
|
||||
ts->last_column_end = t.location.column_end;
|
||||
ts->last_line_start = t.location.line_text.data;
|
||||
while (is_identifier_part(peek_char(ts))) {
|
||||
read_char(ts);
|
||||
length++;
|
||||
}
|
||||
|
||||
return t;
|
||||
Token token = lookup_keyword(start, length);
|
||||
ts->last_info.token = token;
|
||||
ts->last_info.text = (char*)start;
|
||||
ts->last_info.text_length = length;
|
||||
return token;
|
||||
}
|
||||
|
||||
TokenStream* tokenstream_open(const char* filename, const char* code) {
|
||||
/* Declarations first for C89 */
|
||||
TokenStream* ts;
|
||||
const char* name_src;
|
||||
|
||||
TokenStream* tokenstream_open(const char* code) {
|
||||
if (code == NULL) return NULL;
|
||||
|
||||
ts = (TokenStream*)malloc(sizeof(struct TokenStream));
|
||||
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
|
||||
if (ts == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
name_src = filename ? filename : "unknown";
|
||||
ts->filename = malloc(strlen(name_src) + 1);
|
||||
if (ts->filename) {
|
||||
memcpy(ts->filename, name_src, strlen(name_src) + 1);
|
||||
}
|
||||
ts->code = code;
|
||||
ts->pos = 0;
|
||||
ts->line = 1;
|
||||
ts->column = 1;
|
||||
ts->line_start = code;
|
||||
ts->last_line = 1;
|
||||
ts->last_column_end = 0;
|
||||
ts->last_line_start = code;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
ts->last_info.token = (Token)-1;
|
||||
return ts;
|
||||
}
|
||||
|
||||
void tokenstream_close(TokenStream* ts) {
|
||||
if (ts == NULL) return;
|
||||
if (ts->filename) free(ts->filename);
|
||||
free(ts);
|
||||
}
|
||||
|
||||
Token tokenstream_next(TokenStream* ts) {
|
||||
/* Declarations first for C89 */
|
||||
char c;
|
||||
int start_line;
|
||||
int start_column;
|
||||
const char* line_start;
|
||||
const char* start_text;
|
||||
Token t;
|
||||
if (ts == NULL) return -1;
|
||||
|
||||
if (ts == NULL) {
|
||||
Token t = {0};
|
||||
t.token = TOKEN_EOF;
|
||||
return t;
|
||||
}
|
||||
char c;
|
||||
|
||||
/* Skip whitespace and comments */
|
||||
while ((c = peek_char(ts)) != '\0') {
|
||||
while ((c = read_char(ts)) != (char)-1) {
|
||||
if (isspace(c)) {
|
||||
read_char(ts);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle comments */
|
||||
if (c == '/') {
|
||||
if (ts->code[ts->pos + 1] == '/') {
|
||||
if (peek_char(ts) == '/') {
|
||||
/* Skip until end of line */
|
||||
while ((c = read_char(ts)) != '\0' && c != '\n') {
|
||||
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
|
||||
/* Skip */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* It's just a slash, which we don't handle yet */
|
||||
break;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* We found a non-whitespace, non-comment character */
|
||||
break;
|
||||
}
|
||||
|
||||
if (peek_char(ts) == '\0') {
|
||||
Token t;
|
||||
t.token = TOKEN_EOF;
|
||||
t.text.data = NULL;
|
||||
t.text.length = 0;
|
||||
t.location.filename = ts->filename;
|
||||
|
||||
t.location.line = ts->last_line;
|
||||
t.location.column_start = ts->last_column_end + 1;
|
||||
t.location.column_end = ts->last_column_end + 1;
|
||||
t.location.line_text.data = (char*)ts->last_line_start;
|
||||
t.location.line_text.length = get_line_length(ts->last_line_start);
|
||||
return t;
|
||||
if (c == (char)-1) {
|
||||
ts->last_info.token = (Token)-1;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
return -1; /* EOF */
|
||||
}
|
||||
|
||||
start_line = ts->line;
|
||||
start_column = ts->column;
|
||||
line_start = ts->line_start;
|
||||
start_text = &ts->code[ts->pos];
|
||||
|
||||
c = read_char(ts);
|
||||
|
||||
/* Single-character tokens */
|
||||
switch (c) {
|
||||
case '(': return create_token(ts, TOKEN_PARENT_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||
case ')': return create_token(ts, TOKEN_PARENT_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||
case '[': return create_token(ts, TOKEN_BRACKET_OPEN, start_text, 1, start_line, start_column, line_start);
|
||||
case ']': return create_token(ts, TOKEN_BRACKET_CLOSE, start_text, 1, start_line, start_column, line_start);
|
||||
case ',': return create_token(ts, TOKEN_COMMA, start_text, 1, start_line, start_column, line_start);
|
||||
case ';': return create_token(ts, TOKEN_SEMICOLON, start_text, 1, start_line, start_column, line_start);
|
||||
case '=': return create_token(ts, TOKEN_ASSIGN, start_text, 1, start_line, start_column, line_start);
|
||||
case '"': {
|
||||
size_t len = 0;
|
||||
const char* start = &ts->code[ts->pos];
|
||||
while (peek_char(ts) != '"' && peek_char(ts) != '\0') {
|
||||
read_char(ts);
|
||||
len++;
|
||||
}
|
||||
if (peek_char(ts) == '"') read_char(ts);
|
||||
return create_token(ts, TOKEN_STRING, start, len, start_line, start_column + 1, line_start);
|
||||
}
|
||||
}
|
||||
ts->last_info.text = (char*)&ts->code[ts->pos - 1];
|
||||
ts->last_info.text_length = 1;
|
||||
|
||||
if (isdigit(c)) {
|
||||
size_t len = 1;
|
||||
while (isdigit(peek_char(ts))) {
|
||||
read_char(ts);
|
||||
len++;
|
||||
}
|
||||
return create_token(ts, TOKEN_INTEGER, start_text, len, start_line, start_column, line_start);
|
||||
switch (c) {
|
||||
case '(': return ts->last_info.token = TOKEN_PARENT_OPEN;
|
||||
case ')': return ts->last_info.token = TOKEN_PARENT_CLOSE;
|
||||
case '[': return ts->last_info.token = TOKEN_BRACKET_OPEN;
|
||||
case ']': return ts->last_info.token = TOKEN_BRACKET_CLOSE;
|
||||
case ',': return ts->last_info.token = TOKEN_COMMA;
|
||||
case ';': return ts->last_info.token = TOKEN_SEMICOLON;
|
||||
}
|
||||
|
||||
/* Keywords and identifiers */
|
||||
if (is_identifier_start(c)) {
|
||||
/* Declarations first for C89 */
|
||||
size_t length;
|
||||
TokenType type;
|
||||
|
||||
length = 1;
|
||||
while (is_identifier_part(peek_char(ts))) {
|
||||
read_char(ts);
|
||||
length++;
|
||||
}
|
||||
type = lookup_keyword(start_text, length);
|
||||
return create_token(ts, type, start_text, length, start_line, start_column, line_start);
|
||||
return read_keyword_or_identifier(ts, c);
|
||||
}
|
||||
|
||||
/* Unknown character */
|
||||
t = create_token(ts, TOKEN_UNKNOWN, start_text, 1, start_line, start_column, line_start);
|
||||
log_on_line(&t.location, "unexpected token '%c'", c);
|
||||
return t;
|
||||
ts->last_info.token = (Token)-1;
|
||||
ts->last_info.text = NULL;
|
||||
ts->last_info.text_length = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void tokenstream_info(TokenStream* ts, TokenInfo* info) {
|
||||
if (ts == NULL || info == NULL) return;
|
||||
*info = ts->last_info;
|
||||
}
|
||||
|
||||
+24
-37
@@ -4,77 +4,55 @@
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include "location.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* A list of all possible tokens.
|
||||
*/
|
||||
typedef enum {
|
||||
/* Keywords */
|
||||
// Keywords
|
||||
TOKEN_MODULE,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_SEMICOLON,
|
||||
TOKEN_ALIAS,
|
||||
TOKEN_PUBLIC,
|
||||
TOKEN_VAR,
|
||||
TOKEN_CONST,
|
||||
TOKEN_STATIC,
|
||||
|
||||
/* Symbols */
|
||||
// Symbols
|
||||
TOKEN_PARENT_OPEN,
|
||||
TOKEN_PARENT_CLOSE,
|
||||
TOKEN_BRACKET_OPEN,
|
||||
TOKEN_BRACKET_CLOSE,
|
||||
TOKEN_COMMA,
|
||||
TOKEN_ASSIGN,
|
||||
|
||||
/* Primitives */
|
||||
// Primitives
|
||||
TOKEN_VOID,
|
||||
TOKEN_I8,
|
||||
TOKEN_I16,
|
||||
TOKEN_I32,
|
||||
TOKEN_I64,
|
||||
TOKEN_U8,
|
||||
TOKEN_U16,
|
||||
TOKEN_U32,
|
||||
TOKEN_U64,
|
||||
TOKEN_STRING,
|
||||
TOKEN_INTEGER,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_FALSE,
|
||||
|
||||
/* Variable */
|
||||
// Variable
|
||||
TOKEN_IDENTIFIER,
|
||||
|
||||
/* Others */
|
||||
TOKEN_EOF,
|
||||
TOKEN_UNKNOWN
|
||||
} TokenType;
|
||||
} Token;
|
||||
|
||||
/**
|
||||
* Holds additional information about a token.
|
||||
*/
|
||||
typedef struct {
|
||||
/* @brief The actual token. */
|
||||
TokenType token;
|
||||
/// @brief The textual representation of a token.
|
||||
/// Note that this is not necessarily null-terminated.
|
||||
char* text;
|
||||
|
||||
/* @brief The textual representation of a token. */
|
||||
String text;
|
||||
/// @brief The length of the `text` string.
|
||||
size_t text_length;
|
||||
|
||||
/* @brief The location of the token. */
|
||||
Location location;
|
||||
} Token;
|
||||
/// @brief The actual token.
|
||||
Token token;
|
||||
} TokenInfo;
|
||||
|
||||
typedef struct TokenStream TokenStream;
|
||||
|
||||
/**
|
||||
* Returns a TokenStream for a text.
|
||||
*
|
||||
* @param filename The name of the file to read. This is only used for error reporting.
|
||||
* @param code The text to read.
|
||||
* @returns A handle to the TokenStream.
|
||||
*/
|
||||
TokenStream* tokenstream_open(const char* filename, const char* code);
|
||||
TokenStream* tokenstream_open(const char* code);
|
||||
|
||||
/**
|
||||
* Closes a TokenStream.
|
||||
@@ -89,4 +67,13 @@ void tokenstream_close(TokenStream* ts);
|
||||
*/
|
||||
Token tokenstream_next(TokenStream* ts);
|
||||
|
||||
/**
|
||||
* Gets additional information about the last token that was returned
|
||||
* by `tokenstream_next`.
|
||||
*
|
||||
* @param ts The TokenStream to use.
|
||||
* @param info The TokenInfo object to store the results in.
|
||||
*/
|
||||
void tokenstream_info(TokenStream* ts, TokenInfo* info);
|
||||
|
||||
#endif
|
||||
@@ -1,9 +0,0 @@
|
||||
/**
|
||||
* Contains runtime information about types.
|
||||
*/
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,46 +0,0 @@
|
||||
#include "util.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
/* Portable va_copy fallback for pre-C99 or platforms without va_copy. */
|
||||
#ifndef va_copy
|
||||
# if defined(__va_copy)
|
||||
# define va_copy(dest, src) __va_copy(dest, src)
|
||||
# else
|
||||
# define va_copy(dest, src) ((dest) = (src))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
char* format_string_va(const char* fmt, va_list args) {
|
||||
/* Declarations first to satisfy -std=c89 */
|
||||
va_list args_copy;
|
||||
int needed;
|
||||
char* buf;
|
||||
|
||||
if (!fmt) return NULL;
|
||||
|
||||
va_copy(args_copy, args);
|
||||
needed = vsnprintf(NULL, 0, fmt, args_copy);
|
||||
va_end(args_copy);
|
||||
if (needed < 0) return NULL;
|
||||
|
||||
buf = (char*)malloc((size_t)needed + 1);
|
||||
if (!buf) return NULL;
|
||||
vsnprintf(buf, (size_t)needed + 1, fmt, args);
|
||||
return buf;
|
||||
}
|
||||
|
||||
char* format_string(const char* fmt, ...) {
|
||||
/* Declarations first to satisfy -std=c89 */
|
||||
va_list args;
|
||||
char* s;
|
||||
|
||||
if (!fmt) return NULL;
|
||||
|
||||
va_start(args, fmt);
|
||||
s = format_string_va(fmt, args);
|
||||
va_end(args);
|
||||
return s;
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Formats a string using printf-style formatting and returns a newly allocated string.
|
||||
* The caller is responsible for freeing the returned string.
|
||||
*
|
||||
* @param fmt The format string.
|
||||
* @param ... The values to format.
|
||||
* @return A newly allocated string containing the formatted output.
|
||||
*/
|
||||
char* format_string(const char* fmt, ...);
|
||||
|
||||
/**
|
||||
* Formats a string using printf-style formatting with a va_list and returns a newly allocated string.
|
||||
* The caller is responsible for freeing the returned string.
|
||||
*
|
||||
* @param fmt The format string.
|
||||
* @param args The va_list of values to format.
|
||||
* @return A newly allocated string containing the formatted output.
|
||||
*/
|
||||
char* format_string_va(const char* fmt, va_list args);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user