Compare commits
11 Commits
14fced76f0
...
1406cedd82
| Author | SHA1 | Date | |
|---|---|---|---|
| 1406cedd82 | |||
| 422203fdab | |||
| ed12c0a38e | |||
| 49b9db5b75 | |||
| c1106d8e66 | |||
| dccdcb8ba5 | |||
| c73f99d9e6 | |||
| c90f3afd95 | |||
| 4939a74752 | |||
| 7ff3f76de5 | |||
| f6a1b290fc |
@@ -0,0 +1,33 @@
|
||||
# C2
|
||||
C2 is a compiler for a new language.
|
||||
See the README.md for information about this project.
|
||||
|
||||
## Code Changes
|
||||
After every code change, ensure the binary builds correctly,
|
||||
and run the unit tests (`make test`).
|
||||
|
||||
Ensure that every new function and code path has useful unit tests.
|
||||
|
||||
### Creating Source Files
|
||||
Whenever a new source file is created, it must be added to the `include.mk` file.
|
||||
A test file should also be created.
|
||||
|
||||
Test source files do not have to be added to the include.mk file.
|
||||
These are added to the `test.c` file by means of directly `#include`ing the C file.
|
||||
|
||||
### Testing
|
||||
Any test source code must be prefixed with test_xyz, where xyz matches
|
||||
the source file it is trying to test.
|
||||
For instance, a test for `buffer.c` must be called `test_buffer.c`.
|
||||
|
||||
There will be no `test_buffer.h`. Instead, `test.c` will directly
|
||||
`#include` the C–source-file directly.
|
||||
|
||||
## Language Syntax
|
||||
Since this is a compiler for a new language, do not assume anything
|
||||
of its syntax.
|
||||
Always check the `specs` directory.
|
||||
|
||||
If there is anything unclear, ask the user for clarification.
|
||||
It is certainly possible that there are contradictions in the
|
||||
spec that have to be solved first.
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
name: implement
|
||||
description: 'Implement all @copilot annotations'
|
||||
---
|
||||
# General
|
||||
Find and implement all `@copilot` comments in the codebase. Modify only code related to those annotations, and always make sure that tests are added.
|
||||
|
||||
## Plan Mode
|
||||
If you are currently in plan mode, look at all the annotations and always create a plan first.
|
||||
Only start modifying code once the plan has been approved by the user.
|
||||
|
||||
## Implementation
|
||||
Implement the changes requested at the location of the annotation.
|
||||
After the implementation is finished, remove the comment containing the annotation.
|
||||
+1
-1
@@ -1 +1 @@
|
||||
bin/
|
||||
/c2
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
.PHONY: all clean
|
||||
|
||||
SRC := main.c
|
||||
BINDIR := bin
|
||||
OBJ := $(SRC:%.c=$(BINDIR)/%.o)
|
||||
.PHONY: all test clean
|
||||
|
||||
all: c2
|
||||
|
||||
clean:
|
||||
rm -rv bin
|
||||
c2: v0/bin/c2
|
||||
cp $< $@
|
||||
|
||||
c2: $(OBJ)
|
||||
gcc -o $@ $<
|
||||
test::
|
||||
|
||||
$(BINDIR)/%.o: %.c
|
||||
gcc -c -o $@ $<
|
||||
clean::
|
||||
rm -f c2
|
||||
|
||||
$(BINDIR):
|
||||
mkdir -p $(BINDIR)
|
||||
include v0/include.mk
|
||||
@@ -0,0 +1,17 @@
|
||||
# C2
|
||||
C2 is a programming language based on C.
|
||||
It adds modern high-level features such as metaprogramming, generics, etc,
|
||||
but compiles down to C89.
|
||||
|
||||
## Building
|
||||
To build the c2–compiler, simply run `make` or `make all`.
|
||||
This will build the compiler and run the tests.
|
||||
|
||||
In order to only build the compiler, run `make c2`.
|
||||
In order to run the tests, run `make test`.
|
||||
|
||||
## Versioning
|
||||
The current version is v0. Its source code lives in the `v0` directory.
|
||||
|
||||
## Languages Specifications
|
||||
See the specs directory for information on the actual language syntax.
|
||||
@@ -0,0 +1,9 @@
|
||||
Copilot / contributor instructions for v0
|
||||
|
||||
- When adding new source or test files for v0, do NOT rely on wildcards. Add the file path explicitly to v0/include.mk in either V0_SRC (for library/source files) or V0_TEST (for test files beginning with `test_`).
|
||||
- v0/include.mk is included by the top-level Makefile and is a dependency for object builds. Modifying v0/include.mk will force appropriate recompilation.
|
||||
|
||||
Example:
|
||||
V0_SRC := v0/buffer.c v0/main.c
|
||||
V0_TEST := v0/test.c v0/test_buffer.c
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# General
|
||||
A C2–file starts with a module declaration followed by other declarations.
|
||||
|
||||
For instance:
|
||||
|
||||
```c2
|
||||
module mymodule;
|
||||
|
||||
import libc.stdio;
|
||||
|
||||
void main() {
|
||||
puts("Hello, world!");
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1 @@
|
||||
/bin/
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef enum {
|
||||
BUFFER_FILE,
|
||||
BUFFER_STRING
|
||||
} BufferType;
|
||||
|
||||
struct Buffer {
|
||||
BufferType type;
|
||||
union {
|
||||
FILE* file;
|
||||
struct {
|
||||
const char* data;
|
||||
size_t pos;
|
||||
} string;
|
||||
} source;
|
||||
};
|
||||
|
||||
Buffer* buffer_open_file(const char* path) {
|
||||
FILE* f = fopen(path, "r");
|
||||
if (f == NULL)
|
||||
return NULL;
|
||||
Buffer* buf = malloc(sizeof(Buffer));
|
||||
if (buf == NULL) {
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
buf->type = BUFFER_FILE;
|
||||
buf->source.file = f;
|
||||
return buf;
|
||||
}
|
||||
|
||||
Buffer* buffer_open_string(const char* string) {
|
||||
Buffer* buf = malloc(sizeof(Buffer));
|
||||
if (buf == NULL)
|
||||
return NULL;
|
||||
buf->type = BUFFER_STRING;
|
||||
buf->source.string.data = string;
|
||||
buf->source.string.pos = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
void buffer_close(Buffer* buffer) {
|
||||
if (buffer->type == BUFFER_FILE)
|
||||
fclose(buffer->source.file);
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
char buffer_read(Buffer* buffer) {
|
||||
if (buffer->type == BUFFER_FILE) {
|
||||
int c = fgetc(buffer->source.file);
|
||||
return c == EOF ? (char)-1 : (char)c;
|
||||
} else {
|
||||
char c = buffer->source.string.data[buffer->source.string.pos];
|
||||
if (c == '\0')
|
||||
return (char)-1;
|
||||
buffer->source.string.pos++;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* An interface that wraps files and strings.
|
||||
* Allows stream–like reading from it.
|
||||
*/
|
||||
#ifndef BUFFER_H
|
||||
#define BUFFER_H
|
||||
|
||||
/**
|
||||
* An interface to a source of textual data.
|
||||
*/
|
||||
typedef struct Buffer Buffer;
|
||||
|
||||
/**
|
||||
* Opens a file.
|
||||
*
|
||||
* @param path The path to the file.
|
||||
* @returns The newly–opened buffer.
|
||||
*/
|
||||
Buffer* buffer_open_file(const char* path);
|
||||
|
||||
/**
|
||||
* Opens a string.
|
||||
*
|
||||
* The string is not copied, and must not be free'd until the
|
||||
* buffer itself has been closed.
|
||||
*
|
||||
* @param string The contents stored in the buffer.
|
||||
* @returns A newly–opened buffer that reads from the string.
|
||||
*/
|
||||
Buffer* buffer_open_string(const char* string);
|
||||
|
||||
/**
|
||||
* Closes the buffer.
|
||||
*
|
||||
* @param buffer The buffer to close.
|
||||
*/
|
||||
void buffer_close(Buffer* buffer);
|
||||
|
||||
/**
|
||||
* Reads a single character from the buffer.
|
||||
*
|
||||
* If there are no more characters in the buffer,
|
||||
* this returns `-1`.
|
||||
*
|
||||
* @param buffer The buffer to read from.
|
||||
* @returns the next character in the buffer.
|
||||
*/
|
||||
char buffer_read(Buffer* buffer);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,35 @@
|
||||
V0_SRC := v0/buffer.c v0/main.c v0/token.c
|
||||
|
||||
# V0_TEST must only include `v0/test.c` itself, as all other test C–source files are
|
||||
# included directly into `v0/test.c` using `#include "test_xyz.c"`.
|
||||
V0_TEST := v0/test.c
|
||||
|
||||
V0_SRC_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_SRC))
|
||||
V0_TEST_OBJ := $(patsubst v0/%.c,v0/bin/%.o,$(V0_TEST))
|
||||
|
||||
# Define dependency file lists for sources and tests
|
||||
V0_SRC_DEPS := $(V0_SRC_OBJ:.o=.d)
|
||||
V0_TEST_DEPS := $(V0_TEST_OBJ:.o=.d)
|
||||
|
||||
v0/bin/c2: $(V0_SRC_OBJ)
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
V0_SRC_OBJ_NO_MAIN := $(filter-out v0/bin/main.o,$(V0_SRC_OBJ))
|
||||
|
||||
v0/bin/test: $(V0_SRC_OBJ_NO_MAIN) $(V0_TEST_OBJ)
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
|
||||
test:: v0/bin/test
|
||||
v0/bin/test
|
||||
|
||||
clean::
|
||||
rm -f v0/bin/test v0/bin/c2 $(V0_SRC_OBJ) $(V0_TEST_OBJ) $(V0_SRC_DEPS) $(V0_TEST_DEPS)
|
||||
|
||||
# Build each .c file into a .o file, tracking header dependencies.
|
||||
v0/bin/%.o: v0/%.c v0/include.mk
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
|
||||
|
||||
# Reference dependency files
|
||||
-include $(V0_SRC_DEPS)
|
||||
-include $(V0_TEST_DEPS)
|
||||
@@ -0,0 +1,5 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
puts("Hello, world");
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
#include "test.h"
|
||||
#include "buffer.h"
|
||||
#include <setjmp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static jmp_buf s_testJmp;
|
||||
static const char* s_failMsg;
|
||||
|
||||
void fail(const char* msg) {
|
||||
s_failMsg = msg;
|
||||
longjmp(s_testJmp, 1);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
Test func;
|
||||
} TestCase;
|
||||
|
||||
#include "test_buffer.c"
|
||||
#include "test_token.c"
|
||||
|
||||
static int s_totalTests;
|
||||
static int s_greenTests;
|
||||
|
||||
static TestCase s_tests[] = {
|
||||
{"buffer_string_reads_chars", test_buffer_string_reads_chars},
|
||||
{"buffer_string_eof", test_buffer_string_eof},
|
||||
{"buffer_string_eof_after_content", test_buffer_string_eof_after_content},
|
||||
{"buffer_file_reads_chars", test_buffer_file_reads_chars},
|
||||
{"buffer_file_open_fail", test_buffer_file_open_fail},
|
||||
{"tokenstream_open_fail", test_tokenstream_open_fail},
|
||||
{"tokenstream_simple_keyword", test_tokenstream_simple_keyword},
|
||||
{"tokenstream_keywords_and_symbols", test_tokenstream_keywords_and_symbols},
|
||||
{"tokenstream_parentheses_and_brackets", test_tokenstream_parentheses_and_brackets},
|
||||
{"tokenstream_comma", test_tokenstream_comma},
|
||||
{"tokenstream_whitespace_ignored", test_tokenstream_whitespace_ignored},
|
||||
{"tokenstream_void_function_signature", test_tokenstream_void_function_signature},
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
s_totalTests = sizeof(s_tests) / sizeof(s_tests[0]);
|
||||
s_greenTests = 0;
|
||||
|
||||
const char* failedTests[s_totalTests + 1];
|
||||
int failedCount = 0;
|
||||
|
||||
for (int i = 0; i < s_totalTests; i++) {
|
||||
printf("%s...", s_tests[i].name);
|
||||
s_failMsg = NULL;
|
||||
|
||||
if (setjmp(s_testJmp) == 0) {
|
||||
s_tests[i].func();
|
||||
printf(" [OK]\n");
|
||||
s_greenTests++;
|
||||
} else {
|
||||
printf(" [FAIL]: %s\n", s_failMsg ? s_failMsg : "");
|
||||
failedTests[failedCount++] = s_tests[i].name;
|
||||
}
|
||||
}
|
||||
|
||||
if (failedCount > 0) {
|
||||
printf("\nFailed tests:\n");
|
||||
for (int i = 0; i < failedCount; i++) {
|
||||
printf(" - %s\n", failedTests[i]);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n%d/%d tests passed.\n", s_greenTests, s_totalTests);
|
||||
return failedCount > 0 ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* Contains test assertions routines.
|
||||
*/
|
||||
#ifndef TEST_H
|
||||
#define TEST_H
|
||||
|
||||
typedef void (*Test)(void);
|
||||
|
||||
/**
|
||||
* Fails a test.
|
||||
* @param msg The message to print to the console.
|
||||
*/
|
||||
void fail(const char* msg);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,40 @@
|
||||
#include "test.h"
|
||||
#include "buffer.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
static void test_buffer_string_reads_chars(void) {
|
||||
Buffer* buf = buffer_open_string("hi");
|
||||
if (buffer_read(buf) != 'h') fail("expected 'h'");
|
||||
if (buffer_read(buf) != 'i') fail("expected 'i'");
|
||||
buffer_close(buf);
|
||||
}
|
||||
|
||||
static void test_buffer_string_eof(void) {
|
||||
Buffer* buf = buffer_open_string("");
|
||||
if (buffer_read(buf) != (char)-1) fail("expected -1 on empty string");
|
||||
buffer_close(buf);
|
||||
}
|
||||
|
||||
static void test_buffer_string_eof_after_content(void) {
|
||||
Buffer* buf = buffer_open_string("a");
|
||||
buffer_read(buf);
|
||||
if (buffer_read(buf) != (char)-1) fail("expected -1 after end of string");
|
||||
buffer_close(buf);
|
||||
}
|
||||
|
||||
static void test_buffer_file_reads_chars(void) {
|
||||
Buffer* buf = buffer_open_file("v0/test_buffer.txt");
|
||||
if (buf == NULL) fail("could not open file");
|
||||
if (buffer_read(buf) != 'a') fail("expected 'a'");
|
||||
if (buffer_read(buf) != 'b') fail("expected 'b'");
|
||||
if (buffer_read(buf) != 'c') fail("expected 'c'");
|
||||
if (buffer_read(buf) != '\n') fail("expected newline after content");
|
||||
if (buffer_read(buf) != (char)-1) fail("expected -1 after file");
|
||||
buffer_close(buf);
|
||||
}
|
||||
|
||||
static void test_buffer_file_open_fail(void) {
|
||||
Buffer* buf = buffer_open_file("v0/does_not_exist.txt");
|
||||
if (buf != NULL) fail("expected NULL for non-existent file");
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
abc
|
||||
@@ -0,0 +1,87 @@
|
||||
#include "test.h"
|
||||
#include "token.h"
|
||||
|
||||
static void test_tokenstream_open_fail(void) {
|
||||
TokenStream* ts = tokenstream_open(NULL);
|
||||
if (ts != NULL) fail("expected NULL for NULL buffer");
|
||||
}
|
||||
|
||||
static void test_tokenstream_simple_keyword(void) {
|
||||
Buffer* buf = buffer_open_string("module");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
Token t = tokenstream_next(ts);
|
||||
if (t != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
|
||||
Token eof = tokenstream_next(ts);
|
||||
if (eof != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_keywords_and_symbols(void) {
|
||||
Buffer* buf = buffer_open_string("module main; import stdio;");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (main)");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER (stdio)");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_parentheses_and_brackets(void) {
|
||||
Buffer* buf = buffer_open_string("()[]");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts) != TOKEN_BRACKET_OPEN) fail("expected TOKEN_BRACKET_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_BRACKET_CLOSE) fail("expected TOKEN_BRACKET_CLOSE");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_comma(void) {
|
||||
Buffer* buf = buffer_open_string("a,b,c");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected a");
|
||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected b");
|
||||
if (tokenstream_next(ts) != TOKEN_COMMA) fail("expected comma");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected c");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_whitespace_ignored(void) {
|
||||
Buffer* buf = buffer_open_string(" module \n\t import ; ");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
if (tokenstream_next(ts) != TOKEN_MODULE) fail("expected TOKEN_MODULE");
|
||||
if (tokenstream_next(ts) != TOKEN_IMPORT) fail("expected TOKEN_IMPORT");
|
||||
if (tokenstream_next(ts) != TOKEN_SEMICOLON) fail("expected TOKEN_SEMICOLON");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
|
||||
static void test_tokenstream_void_function_signature(void) {
|
||||
Buffer* buf = buffer_open_string("void main()");
|
||||
TokenStream* ts = tokenstream_open(buf);
|
||||
|
||||
if (tokenstream_next(ts) != TOKEN_VOID) fail("expected TOKEN_VOID");
|
||||
if (tokenstream_next(ts) != TOKEN_IDENTIFIER) fail("expected TOKEN_IDENTIFIER");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_OPEN) fail("expected TOKEN_PARENT_OPEN");
|
||||
if (tokenstream_next(ts) != TOKEN_PARENT_CLOSE) fail("expected TOKEN_PARENT_CLOSE");
|
||||
if (tokenstream_next(ts) != -1) fail("expected EOF");
|
||||
|
||||
tokenstream_close(ts);
|
||||
}
|
||||
+170
@@ -0,0 +1,170 @@
|
||||
#include "token.h"
|
||||
#include "buffer.h"
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* Easy-to-read and modify keyword-to-token mapping.
|
||||
* Add new keywords here.
|
||||
*/
|
||||
typedef struct {
|
||||
const char* keyword;
|
||||
Token token;
|
||||
} KeywordMap;
|
||||
|
||||
static const KeywordMap keywords[] = {
|
||||
{"module", TOKEN_MODULE},
|
||||
{"import", TOKEN_IMPORT},
|
||||
{"void", TOKEN_VOID},
|
||||
};
|
||||
|
||||
/**
|
||||
* Look up a keyword in the keyword map.
|
||||
* Returns TOKEN_IDENTIFIER if not found.
|
||||
*/
|
||||
static Token lookup_keyword(const char* str) {
|
||||
int count = sizeof(keywords) / sizeof(keywords[0]);
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (strcmp(keywords[i].keyword, str) == 0) {
|
||||
return keywords[i].token;
|
||||
}
|
||||
}
|
||||
return TOKEN_IDENTIFIER;
|
||||
}
|
||||
|
||||
struct TokenStream {
|
||||
Buffer* buffer;
|
||||
char lookahead;
|
||||
int has_lookahead;
|
||||
};
|
||||
|
||||
/**
|
||||
* Check if a character is the start of an identifier.
|
||||
*/
|
||||
static int is_identifier_start(char c) {
|
||||
return isalpha(c) || c == '_';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a character can be part of an identifier.
|
||||
*/
|
||||
static int is_identifier_part(char c) {
|
||||
return isalnum(c) || c == '_';
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a character, using lookahead if available.
|
||||
*/
|
||||
static char read_char(TokenStream* ts) {
|
||||
if (ts->has_lookahead) {
|
||||
ts->has_lookahead = 0;
|
||||
return ts->lookahead;
|
||||
}
|
||||
return buffer_read(ts->buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Put a character back into the lookahead buffer.
|
||||
*/
|
||||
static void unread_char(TokenStream* ts, char c) {
|
||||
if (c != (char)-1) {
|
||||
ts->lookahead = c;
|
||||
ts->has_lookahead = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to read a keyword or identifier starting with the given character.
|
||||
* Returns the token type, or TOKEN_IDENTIFIER if it doesn't match a keyword.
|
||||
*/
|
||||
static Token read_keyword_or_identifier(TokenStream* ts, char first) {
|
||||
char buffer[256];
|
||||
int index = 0;
|
||||
buffer[index++] = first;
|
||||
|
||||
char c;
|
||||
while ((c = read_char(ts)) != (char)-1 && is_identifier_part(c)) {
|
||||
if (index < 255) {
|
||||
buffer[index++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
/* Put back the character that ended the identifier */
|
||||
unread_char(ts, c);
|
||||
buffer[index] = '\0';
|
||||
|
||||
/* Check for keywords */
|
||||
return lookup_keyword(buffer);
|
||||
}
|
||||
|
||||
TokenStream* tokenstream_open(Buffer* buffer) {
|
||||
if (buffer == NULL) return NULL;
|
||||
|
||||
TokenStream* ts = (TokenStream*)malloc(sizeof(struct TokenStream));
|
||||
if (ts == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ts->buffer = buffer;
|
||||
ts->lookahead = 0;
|
||||
ts->has_lookahead = 0;
|
||||
return ts;
|
||||
}
|
||||
|
||||
void tokenstream_close(TokenStream* ts) {
|
||||
if (ts == NULL) return;
|
||||
buffer_close(ts->buffer);
|
||||
free(ts);
|
||||
}
|
||||
|
||||
Token tokenstream_next(TokenStream* ts) {
|
||||
if (ts == NULL || ts->buffer == NULL) return -1;
|
||||
|
||||
char c;
|
||||
|
||||
/* Skip whitespace and comments */
|
||||
while ((c = read_char(ts)) != (char)-1) {
|
||||
if (isspace(c)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle comments */
|
||||
if (c == '/') {
|
||||
char next = read_char(ts);
|
||||
if (next == '/') {
|
||||
/* Skip until end of line */
|
||||
while ((c = read_char(ts)) != (char)-1 && c != '\n') {
|
||||
/* Skip */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* Put back the character after / */
|
||||
unread_char(ts, next);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* We found a non-whitespace, non-comment character */
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == (char)-1) return -1; /* EOF */
|
||||
|
||||
/* Single-character tokens */
|
||||
switch (c) {
|
||||
case '(': return TOKEN_PARENT_OPEN;
|
||||
case ')': return TOKEN_PARENT_CLOSE;
|
||||
case '[': return TOKEN_BRACKET_OPEN;
|
||||
case ']': return TOKEN_BRACKET_CLOSE;
|
||||
case ',': return TOKEN_COMMA;
|
||||
case ';': return TOKEN_SEMICOLON;
|
||||
}
|
||||
|
||||
/* Keywords and identifiers */
|
||||
if (is_identifier_start(c)) {
|
||||
return read_keyword_or_identifier(ts, c);
|
||||
}
|
||||
|
||||
/* Unknown character */
|
||||
return -1;
|
||||
}
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Contains the interface for reading tokens from a file.
|
||||
*/
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
/**
|
||||
* A list of all possible tokens.
|
||||
*/
|
||||
typedef enum {
|
||||
// Keywords
|
||||
TOKEN_MODULE,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_SEMICOLON,
|
||||
|
||||
// Symbols
|
||||
TOKEN_PARENT_OPEN,
|
||||
TOKEN_PARENT_CLOSE,
|
||||
TOKEN_BRACKET_OPEN,
|
||||
TOKEN_BRACKET_CLOSE,
|
||||
TOKEN_COMMA,
|
||||
|
||||
// Primitives
|
||||
TOKEN_VOID,
|
||||
|
||||
// Variable
|
||||
TOKEN_IDENTIFIER,
|
||||
} Token;
|
||||
|
||||
/**
|
||||
* Holds additional information about a token.
|
||||
*/
|
||||
typedef struct {
|
||||
/// @brief The textual representation of a token.
|
||||
char* text;
|
||||
|
||||
/// @brief The length of the `text` string.
|
||||
size_t text_length;
|
||||
|
||||
/// @brief The actual token.
|
||||
Token token;
|
||||
} TokenInfo;
|
||||
|
||||
typedef struct TokenStream TokenStream;
|
||||
|
||||
/**
|
||||
* Returns a TokenStream for a given buffer.
|
||||
*
|
||||
* When the tokenstream is closed, the underlying buffer is also closed.
|
||||
*
|
||||
* @param buffer The buffer to read from.
|
||||
* @returns A handle to the TokenStream.
|
||||
*/
|
||||
TokenStream* tokenstream_open(Buffer* buffer);
|
||||
|
||||
/**
|
||||
* Closes a TokenStream.
|
||||
* @param ts The TokenStream to close.
|
||||
*/
|
||||
void tokenstream_close(TokenStream* ts);
|
||||
|
||||
/**
|
||||
* Gets the next token from the TokenStream.
|
||||
* @param ts The TokenStream to read from.
|
||||
* @returns The next token read.
|
||||
*/
|
||||
Token tokenstream_next(TokenStream* ts);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user