Compare commits
11 Commits
14fced76f0
...
1406cedd82
| Author | SHA1 | Date | |
|---|---|---|---|
| 1406cedd82 | |||
| 422203fdab | |||
| ed12c0a38e | |||
| 49b9db5b75 | |||
| c1106d8e66 | |||
| dccdcb8ba5 | |||
| c73f99d9e6 | |||
| c90f3afd95 | |||
| 4939a74752 | |||
| 7ff3f76de5 | |||
| f6a1b290fc |
@@ -0,0 +1,33 @@
|
|||||||
|
# C2
|
||||||
|
C2 is a compiler for a new language.
|
||||||
|
See the README.md for information about this project.
|
||||||
|
|
||||||
|
## Code Changes
|
||||||
|
After every code change, ensure the binary builds correctly,
|
||||||
|
and run the unit tests (`make test`).
|
||||||
|
|
||||||
|
Ensure that every new function and code path has useful unit tests.
|
||||||
|
|
||||||
|
### Creating Source Files
|
||||||
|
Whenever a new source file is created, it must be added to the `include.mk` file.
|
||||||
|
A test file should also be created.
|
||||||
|
|
||||||
|
Test source files do not have to be added to the include.mk file.
|
||||||
|
These are added to the `test.c` file by means of directly `#include`ing the C file.
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
Any test source code must be prefixed with test_xyz, where xyz matches
|
||||||
|
the source file it is trying to test.
|
||||||
|
For instance, a test for `buffer.c` must be called `test_buffer.c`.
|
||||||
|
|
||||||
|
There will be no `test_buffer.h`. Instead, `test.c` will directly
|
||||||
|
`#include` the C–source-file directly.
|
||||||
|
|
||||||
|
## Language Syntax
|
||||||
|
Since this is a compiler for a new language, do not assume anything
|
||||||
|
of its syntax.
|
||||||
|
Always check the `specs` directory.
|
||||||
|
|
||||||
|
If there is anything unclear, ask the user for clarification.
|
||||||
|
It is certainly possible that there are contradictions in the
|
||||||
|
spec that have to be solved first.
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
name: implement
|
||||||
|
description: 'Implement all @copilot annotations'
|
||||||
|
---
|
||||||
|
# General
|
||||||
|
Find and implement all `@copilot` comments in the codebase. Modify only code related to those annotations, and always make sure that tests are added.
|
||||||
|
|
||||||
|
## Plan Mode
|
||||||
|
If you are currently in plan mode, look at all the annotations and always create a plan first.
|
||||||
|
Only start modifying code once the plan has been approved by the user.
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
Implement the changes requested at the location of the annotation.
|
||||||
|
After the implementation is finished, remove the comment containing the annotation.
|
||||||
+1
-1
@@ -1 +1 @@
|
|||||||
bin/
|
/c2
|
||||||
|
|||||||
@@ -1,19 +1,13 @@
|
|||||||
.PHONY: all clean
|
.PHONY: all test clean
|
||||||
|
|
||||||
SRC := main.c
|
|
||||||
BINDIR := bin
|
|
||||||
OBJ := $(SRC:%.c=$(BINDIR)/%.o)
|
|
||||||
|
|
||||||
all: c2
|
all: c2
|
||||||
|
|
||||||
clean:
|
c2: v0/bin/c2
|
||||||
rm -rv bin
|
cp $< $@
|
||||||
|
|
||||||
c2: $(OBJ)
|
test::
|
||||||
gcc -o $@ $<
|
|
||||||
|
|
||||||
$(BINDIR)/%.o: %.c
|
clean::
|
||||||
gcc -c -o $@ $<
|
rm -f c2
|
||||||
|
|
||||||
$(BINDIR):
|
include v0/include.mk
|
||||||
mkdir -p $(BINDIR)
|
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
# C2
|
||||||
|
C2 is a programming language based on C.
|
||||||
|
It adds modern high-level features such as metaprogramming, generics, etc,
|
||||||
|
but compiles down to C89.
|
||||||
|
|
||||||
|
## Building
|
||||||
|
To build the c2–compiler, simply run `make` or `make all`.
|
||||||
|
This will build the compiler and run the tests.
|
||||||
|
|
||||||
|
In order to only build the compiler, run `make c2`.
|
||||||
|
In order to run the tests, run `make test`.
|
||||||
|
|
||||||
|
## Versioning
|
||||||
|
The current version is v0. Its source code lives in the `v0` directory.
|
||||||
|
|
||||||
|
## Languages Specifications
|
||||||
|
See the specs directory for information on the actual language syntax.
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
Copilot / contributor instructions for v0
|
||||||
|
|
||||||
|
- When adding new source or test files for v0, do NOT rely on wildcards. Add the file path explicitly to v0/include.mk in either V0_SRC (for library/source files) or V0_TEST (for test files beginning with `test_`).
|
||||||
|
- v0/include.mk is included by the top-level Makefile and is a dependency for object builds. Modifying v0/include.mk will force appropriate recompilation.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
V0_SRC := v0/buffer.c v0/main.c
|
||||||
|
V0_TEST := v0/test.c v0/test_buffer.c
|
||||||
|
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
# General
|
||||||
|
A C2–file starts with a module declaration followed by other declarations.
|
||||||
|
|
||||||
|
For instance:
|
||||||
|
|
||||||
|
```c2
|
||||||
|
module mymodule;
|
||||||
|
|
||||||
|
import libc.stdio;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
puts("Hello, world!");
|
||||||
|
}
|
||||||
|
```
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
/bin/
|
||||||
+63
@@ -0,0 +1,63 @@
|
|||||||
|
#include "buffer.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
BUFFER_FILE,
|
||||||
|
BUFFER_STRING
|
||||||
|
} BufferType;
|
||||||
|
|
||||||
|
struct Buffer {
|
||||||
|
BufferType type;
|
||||||
|
union {
|
||||||
|
FILE* file;
|
||||||
|
struct {
|
||||||
|
const char* data;
|
||||||
|
size_t pos;
|
||||||
|
} string;
|
||||||
|
} source;
|
||||||
|
};
|
||||||
|
|
||||||
|
Buffer* buffer_open_file(const char* path) {
|
||||||
|
FILE* f = fopen(path, "r");
|
||||||
|
if (f == NULL)
|
||||||
|
return NULL;
|
||||||
|
Buffer* buf = malloc(sizeof(Buffer));
|
||||||
|
if (buf == NULL) {
|
||||||
|
fclose(f);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
buf->type = BUFFER_FILE;
|
||||||
|
buf->source.file = f;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
Buffer* buffer_open_string(const char* string) {
|
||||||
|
Buffer* buf = malloc(sizeof(Buffer));
|
||||||
|
if (buf == NULL)
|
||||||
|
return NULL;
|
||||||
|
buf->type = BUFFER_STRING;
|
||||||
|
buf->source.string.data = string;
|
||||||
|
buf->source.string.pos = 0;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void buffer_close(Buffer* buffer) {
|
||||||
|
if (buffer->type == BUFFER_FILE)
|
||||||
|
fclose(buffer->source.file);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
char buffer_read(Buffer* buffer) {
|
||||||
|
if (buffer->type == BUFFER_FILE) {
|
||||||
|
int c = fgetc(buffer->source.file);
|
||||||
|
return c == EOF ? (char)-1 : (char)c;
|
||||||
|
} else {
|
||||||
|
char c = buffer->source.string.data[buffer->source.string.pos];
|
||||||
|
if (c == '\0')
|
||||||
|
return (char)-1;
|
||||||
|
buffer->source.string.pos++;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
+50
@@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* An interface that wraps files and strings.
|
||||||
|
* Allows stream–like reading from it.
|
||||||
|
*/
|
||||||
|
#ifndef BUFFER_H
|
||||||
|
#define BUFFER_H
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface to a source of textual data.
|
||||||
|
*/
|
||||||
|
typedef struct Buffer Buffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens a file.
|
||||||
|
*
|
||||||
|
* @param path The path to the file.
|
||||||
|
* @returns The newly–opened buffer.
|
||||||
|
*/
|
||||||
|
Buffer* buffer_open_file(const char* path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens a string.
|
||||||
|
*
|
||||||
|
* The string is not copied, and must not be free'd until the
|
||||||
|
* buffer itself has been closed.
|
||||||
|
*
|
||||||
|
* @param string The contents stored in the buffer.
|
||||||
|
* @returns A newly–opened buffer that reads from the string.
|
||||||
|
*/
|
||||||
|
Buffer* buffer_open_string(const char* string);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the buffer.
|
||||||
|
*
|
||||||
|
* @param buffer The buffer to close.
|
||||||
|
*/
|
||||||
|
void buffer_close(Buffer* buffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a single character from the buffer.
|
||||||
|
*
|
||||||
|
* If there are no more characters in the buffer,
|
||||||
|
* this returns `-1`.
|
||||||
|
*
|
||||||
|
* @param buffer The buffer to read from.
|
||||||
|
* @returns the next character in the buffer.
|
||||||
|
*/
|
||||||
|
char buffer_read(Buffer* buffer);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
V0_SRC := v0/buffer.c v0/main.c v0/token.c
|
||||||
|
|
||||||
|
# V0_TEST must only include `v0/test.c` itself, as all other test C–source files are
|
||||||
|
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
|
||||||
|
V0_TEST := v0/test.c
|
||||||
|
|
||||||
|
V0_SRC_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_SRC))
|
||||||
|
V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
|
||||||
|
|
||||||
|
# Define dependency file lists for sources and tests
|
||||||
|
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
|
||||||
|
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
|
||||||
|
|
||||||
|
v0/bin/c2: $(V0_SRC_OBJ)
|
||||||
|
$(CC) $(CFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
|
||||||
|
|
||||||
|
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
|
||||||
|
$(CC) $(CFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
test:: v0/bin/test
|
||||||
|
v0/bin/test
|
||||||
|
|
||||||
|
clean::
|
||||||
|
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
|
||||||
|
|
||||||
|
# Build each .c file into a .o file, tracking header dependencies.
|
||||||
|
v0/bin/%.o: v0/%.c v0/include.mk
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
|
||||||
|
|
||||||
|
# Reference dependency files
|
||||||
|
-include $(V0_SRC_DEPS)
|
||||||
|
-include $(V0_TEST_DEPS)
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
puts("Hello, world");
|
||||||
|
}
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
#include "test.h"
|
||||||
|
#include "buffer.h"
|
||||||
|
#include <setjmp.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
static jmp_buf s_testJmp;
|
||||||
|
static const char* s_failMsg;
|
||||||
|
|
||||||
|
void fail(const char* msg) {
|
||||||
|
s_failMsg = msg;
|
||||||
|
longjmp(s_testJmp, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char* name;
|
||||||
|
Test func;
|
||||||
|
} TestCase;
|
||||||
|
|
||||||
|
#include "test_buffer.c"
|
||||||
|
#include "test_token.c"
|
||||||
|
|
||||||
|
static int s_totalTests;
|
||||||
|
static int s_greenTests;
|
||||||
|
|
||||||
|
static TestCase s_tests[] = {
|
||||||
|
{"buffer_string_reads_chars", test_buffer_string_reads_chars},
|
||||||
|
{"buffer_string_eof", test_buffer_string_eof},
|
||||||
|
{"buffer_string_eof_after_content", test_buffer_string_eof_after_content},
|
||||||
|
{"buffer_file_reads_chars", test_buffer_file_reads_chars},
|
||||||
|
{"buffer_file_open_fail", test_buffer_file_open_fail},
|
||||||
|
{"tokenstream_open_fail", test_tokenstream_open_fail},
|
||||||
|
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
|
||||||
|
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
|
||||||
|
{"tokenstream_parentheses_and_brackets", test_tokenstream_parentheses_and_brackets},
|
||||||
|
{"tokenstream_comma", test_tokenstream_comma},
|
||||||
|
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
|
||||||
|
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
(void)argc;
|
||||||
|
(void)argv;
|
||||||
|
|
||||||
|
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
|
||||||
|
s_greenTests = 0;
|
||||||
|
|
||||||
|
const char* failedTests[s_totalTests + 1];
|
||||||
|
int failedCount = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < s_totalTests; i++) {
|
||||||
|
printf("%s...", s_tests[i].name);
|
||||||
|
s_failMsg = NULL;
|
||||||
|
|
||||||
|
if (setjmp(s_testJmp) == 0) {
|
||||||
|
s_tests[i].func();
|
||||||
|
printf(" [OK]\n");
|
||||||
|
s_greenTests++;
|
||||||
|
} else {
|
||||||
|
printf(" [FAIL]: %s\n", s_failMsg ? s_failMsg : "");
|
||||||
|
failedTests[failedCount++] = s_tests[i].name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (failedCount > 0) {
|
||||||
|
printf("\nFailed tests:\n");
|
||||||
|
for (int i = 0; i < failedCount; i++) {
|
||||||
|
printf(" - %s\n", failedTests[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
|
||||||
|
return failedCount > 0 ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
/**
|
||||||
|
* Contains test assertions routines.
|
||||||
|
*/
|
||||||
|
#ifndef TEST_H
|
||||||
|
#define TEST_H
|
||||||
|
|
||||||
|
typedef void (*Test)(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fails a test.
|
||||||
|
* @param msg The message to print to the console.
|
||||||
|
*/
|
||||||
|
void fail(const char* msg);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
#include "test.h"
|
||||||
|
#include "buffer.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
|
||||||
|
static void test_buffer_string_reads_chars(void) {
|
||||||
|
Buffer* buf = buffer_open_string("hi");
|
||||||
|
if (buffer_read(buf) != 'h') fail("expected 'h'");
|
||||||
|
if (buffer_read(buf) != 'i') fail("expected 'i'");
|
||||||
|
buffer_close(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_buffer_string_eof(void) {
|
||||||
|
Buffer* buf = buffer_open_string("");
|
||||||
|
if (buffer_read(buf) != (char)-1) fail("expected -1 on empty string");
|
||||||
|
buffer_close(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_buffer_string_eof_after_content(void) {
|
||||||
|
Buffer* buf = buffer_open_string("a");
|
||||||
|
buffer_read(buf);
|
||||||
|
if (buffer_read(buf) != (char)-1) fail("expected -1 after end of string");
|
||||||
|
buffer_close(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_buffer_file_reads_chars(void) {
|
||||||
|
Buffer* buf = buffer_open_file("v0/test_buffer.txt");
|
||||||
|
if (buf == NULL) fail("could not open file");
|
||||||
|
if (buffer_read(buf) != 'a') fail("expected 'a'");
|
||||||
|
if (buffer_read(buf) != 'b') fail("expected 'b'");
|
||||||
|
if (buffer_read(buf) != 'c') fail("expected 'c'");
|
||||||
|
if (buffer_read(buf) != '\n') fail("expected newline after content");
|
||||||
|
if (buffer_read(buf) != (char)-1) fail("expected -1 after file");
|
||||||
|
buffer_close(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_buffer_file_open_fail(void) {
|
||||||
|
Buffer* buf = buffer_open_file("v0/does_not_exist.txt");
|
||||||
|
if (buf != NULL) fail("expected NULL for non-existent file");
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
abc
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
#include "test.h"
|
||||||
|
#include "token.h"
|
||||||
|
|
||||||
|
static void test_tokenstream_open_fail(void) {
|
||||||
|
TokenStream* ts = tokenstream_open(NULL);
|
||||||
|
if (ts != NULL) fail("expected NULL for NULL buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_simple_keyword(void) {
|
||||||
|
Buffer* buf = buffer_open_string("module");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
Token t = tokenstream_next(ts);
|
||||||
|
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
|
|
||||||
|
Token eof = tokenstream_next(ts);
|
||||||
|
if (eof != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_keywords_and_symbols(void) {
|
||||||
|
Buffer* buf = buffer_open_string("module main; import stdio;");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
|
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_parentheses_and_brackets(void) {
|
||||||
|
Buffer* buf = buffer_open_string("()[]");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
||||||
|
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_comma(void) {
|
||||||
|
Buffer* buf = buffer_open_string("a,b,c");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
|
||||||
|
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_whitespace_ignored(void) {
|
||||||
|
Buffer* buf = buffer_open_string(" module \n\t import ; ");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||||
|
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_tokenstream_void_function_signature(void) {
|
||||||
|
Buffer* buf = buffer_open_string("void main()");
|
||||||
|
TokenStream* ts = tokenstream_open(buf);
|
||||||
|
|
||||||
|
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||||
|
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||||
|
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||||
|
|
||||||
|
tokenstream_close(ts);
|
||||||
|
}
|
||||||
+170
@@ -0,0 +1,170 @@
|
|||||||
|
#include "token.h"
|
||||||
|
#include "buffer.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Easy-to-read and modify keyword-to-token mapping.
|
||||||
|
* Add new keywords here.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
const char* keyword;
|
||||||
|
Token token;
|
||||||
|
} KeywordMap;
|
||||||
|
|
||||||
|
static const KeywordMap keywords[] = {
|
||||||
|
{"module", TOKEN_MODULE},
|
||||||
|
{"import", TOKEN_IMPORT},
|
||||||
|
{"void", TOKEN_VOID},
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up a keyword in the keyword map.
|
||||||
|
* Returns TOKEN_IDENTIFIER if not found.
|
||||||
|
*/
|
||||||
|
static Token lookup_keyword(const char* str) {
|
||||||
|
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
if (strcmp(keywords[i].keyword, str) == 0) {
|
||||||
|
return keywords[i].token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TOKEN_IDENTIFIER;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TokenStream {
|
||||||
|
Buffer* buffer;
|
||||||
|
char lookahead;
|
||||||
|
int has_lookahead;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a character is the start of an identifier.
|
||||||
|
*/
|
||||||
|
static int is_identifier_start(char c) {
|
||||||
|
return isalpha(c) || c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a character can be part of an identifier.
|
||||||
|
*/
|
||||||
|
static int is_identifier_part(char c) {
|
||||||
|
return isalnum(c) || c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a character, using lookahead if available.
|
||||||
|
*/
|
||||||
|
static char read_char(TokenStream* ts) {
|
||||||
|
if (ts->has_lookahead) {
|
||||||
|
ts->has_lookahead = 0;
|
||||||
|
return ts->lookahead;
|
||||||
|
}
|
||||||
|
return buffer_read(ts->buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put a character back into the lookahead buffer.
|
||||||
|
*/
|
||||||
|
static void unread_char(TokenStream* ts, char c) {
|
||||||
|
if (c != (char)-1) {
|
||||||
|
ts->lookahead = c;
|
||||||
|
ts->has_lookahead = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to read a keyword or identifier starting with the given character.
|
||||||
|
* Returns the token type, or TOKEN_IDENTIFIER if it doesn't match a keyword.
|
||||||
|
*/
|
||||||
|
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
||||||
|
char buffer[256];
|
||||||
|
int index = 0;
|
||||||
|
buffer[index++] = first;
|
||||||
|
|
||||||
|
char c;
|
||||||
|
while ((c = read_char(ts)) != (char)-1 && is_identifier_part(c)) {
|
||||||
|
if (index < 255) {
|
||||||
|
buffer[index++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Put back the character that ended the identifier */
|
||||||
|
unread_char(ts, c);
|
||||||
|
buffer[index] = '\0';
|
||||||
|
|
||||||
|
/* Check for keywords */
|
||||||
|
return lookup_keyword(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenStream* tokenstream_open(Buffer* buffer) {
|
||||||
|
if (buffer == NULL) return NULL;
|
||||||
|
|
||||||
|
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
|
||||||
|
if (ts == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ts->buffer = buffer;
|
||||||
|
ts->lookahead = 0;
|
||||||
|
ts->has_lookahead = 0;
|
||||||
|
return ts;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tokenstream_close(TokenStream* ts) {
|
||||||
|
if (ts == NULL) return;
|
||||||
|
buffer_close(ts->buffer);
|
||||||
|
free(ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
Token tokenstream_next(TokenStream* ts) {
|
||||||
|
if (ts == NULL || ts->buffer == NULL) return -1;
|
||||||
|
|
||||||
|
char c;
|
||||||
|
|
||||||
|
/* Skip whitespace and comments */
|
||||||
|
while ((c = read_char(ts)) != (char)-1) {
|
||||||
|
if (isspace(c)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle comments */
|
||||||
|
if (c == '/') {
|
||||||
|
char next = read_char(ts);
|
||||||
|
if (next == '/') {
|
||||||
|
/* Skip until end of line */
|
||||||
|
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
|
||||||
|
/* Skip */
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Put back the character after / */
|
||||||
|
unread_char(ts, next);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We found a non-whitespace, non-comment character */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == (char)-1) return -1; /* EOF */
|
||||||
|
|
||||||
|
/* Single-character tokens */
|
||||||
|
switch (c) {
|
||||||
|
case '(': return TOKEN_PARENT_OPEN;
|
||||||
|
case ')': return TOKEN_PARENT_CLOSE;
|
||||||
|
case '[': return TOKEN_BRACKET_OPEN;
|
||||||
|
case ']': return TOKEN_BRACKET_CLOSE;
|
||||||
|
case ',': return TOKEN_COMMA;
|
||||||
|
case ';': return TOKEN_SEMICOLON;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Keywords and identifiers */
|
||||||
|
if (is_identifier_start(c)) {
|
||||||
|
return read_keyword_or_identifier(ts, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unknown character */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
+71
@@ -0,0 +1,71 @@
|
|||||||
|
/**
|
||||||
|
* Contains the interface for reading tokens from a file.
|
||||||
|
*/
|
||||||
|
#ifndef TOKEN_H
|
||||||
|
#define TOKEN_H
|
||||||
|
|
||||||
|
#include "buffer.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A list of all possible tokens.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
// Keywords
|
||||||
|
TOKEN_MODULE,
|
||||||
|
TOKEN_IMPORT,
|
||||||
|
TOKEN_SEMICOLON,
|
||||||
|
|
||||||
|
// Symbols
|
||||||
|
TOKEN_PARENT_OPEN,
|
||||||
|
TOKEN_PARENT_CLOSE,
|
||||||
|
TOKEN_BRACKET_OPEN,
|
||||||
|
TOKEN_BRACKET_CLOSE,
|
||||||
|
TOKEN_COMMA,
|
||||||
|
|
||||||
|
// Primitives
|
||||||
|
TOKEN_VOID,
|
||||||
|
|
||||||
|
// Variable
|
||||||
|
TOKEN_IDENTIFIER,
|
||||||
|
} Token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds additional information about a token.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/// @brief The textual representation of a token.
|
||||||
|
char* text;
|
||||||
|
|
||||||
|
/// @brief The length of the `text` string.
|
||||||
|
size_t text_length;
|
||||||
|
|
||||||
|
/// @brief The actual token.
|
||||||
|
Token token;
|
||||||
|
} TokenInfo;
|
||||||
|
|
||||||
|
typedef struct TokenStream TokenStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a TokenStream for a given buffer.
|
||||||
|
*
|
||||||
|
* When the tokenstream is closed, the underlying buffer is also closed.
|
||||||
|
*
|
||||||
|
* @param buffer The buffer to read from.
|
||||||
|
* @returns A handle to the TokenStream.
|
||||||
|
*/
|
||||||
|
TokenStream* tokenstream_open(Buffer* buffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes a TokenStream.
|
||||||
|
* @param ts The TokenStream to close.
|
||||||
|
*/
|
||||||
|
void tokenstream_close(TokenStream* ts);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the next token from the TokenStream.
|
||||||
|
* @param ts The TokenStream to read from.
|
||||||
|
* @returns The next token read.
|
||||||
|
*/
|
||||||
|
Token tokenstream_next(TokenStream* ts);
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user