summaryrefslogtreecommitdiff
path: root/uscript
diff options
context:
space:
mode:
Diffstat (limited to 'uscript')
-rw-r--r--uscript/lex.c382
-rw-r--r--uscript/lex.h74
-rw-r--r--uscript/parser.c841
-rw-r--r--uscript/parser.h8
-rw-r--r--uscript/us_debug.c117
-rw-r--r--uscript/us_debug.h10
-rw-r--r--uscript/uscript.c34
-rw-r--r--uscript/uscript.h14
-rw-r--r--uscript/val.c204
-rw-r--r--uscript/val.h102
-rw-r--r--uscript/vm.c306
-rw-r--r--uscript/vm.h44
-rw-r--r--uscript/xbytecode.h32
13 files changed, 2168 insertions, 0 deletions
diff --git a/uscript/lex.c b/uscript/lex.c
new file mode 100644
index 0000000..cc41b2a
--- /dev/null
+++ b/uscript/lex.c
@@ -0,0 +1,382 @@
+#include "lex.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "dyn_arr.h"
+
+#define advance(lex) (++lex->head, ++lex->col)
+#define current_char(lex) (*lex->head)
+
+// For printing purposes
+static
+char *token_names[] = {
+ "TOKEN_EOF",
+#define TOKEN_NAME(name) "TOKEN_" #name,
+ XTOKENS(TOKEN_NAME)
+#undef TOKEN_NAME
+};
+
+static
+bool is_digit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+static
+bool is_upper(char c)
+{
+ return c >= 'A' && c <= 'Z';
+}
+
+static
+bool is_lower(char c)
+{
+ return c >= 'a' && c <= 'z';
+}
+
+static
+bool is_ident(char c)
+{
+ return is_lower(c) ||
+ is_upper(c) ||
+ c == '_';
+}
+
+static
+bool match_kw(const struct lexer *lex, const char *b)
+{
+ size_t alen = (size_t)(lex->head - lex->base);
+ size_t blen = strlen(b);
+ return alen == blen && memcmp(lex->base, b, alen) == 0;
+}
+
+// Advances if the given character is a match.
+static
+bool match_char(struct lexer *lex, char c)
+{
+ if (current_char(lex) == c) {
+ advance(lex);
+ return true;
+ }
+ return false;
+}
+
+// Looks for a non-whitespace character and sets the start of the token to
+// that. If it sees a newline, it will also update all relevant data. And
+// finally, if it sees a #, it will interpret it as a comment.
+static
+void goto_token_start(struct lexer *lex)
+{
+ while (true) {
+ switch (current_char(lex)) {
+ case '#':
+ while (
+ current_char(lex) != '\n' &&
+ current_char(lex) != '\0'
+ )
+ advance(lex);
+ break;
+ case '\n':
+ advance(lex);
+ lex->line++;
+ lex->col = 0;
+ break;
+ case ' ':
+ case '\t':
+ case '\r':
+ advance(lex);
+ break;
+ default:
+ lex->base = lex->head;
+ return;
+ }
+ }
+}
+
+static
+struct token create_token(struct lexer *lex, u16 token_kind)
+{
+ struct token tok;
+ tok.kind = token_kind;
+ tok.start = lex->base;
+ tok.len = (int)(lex->head - lex->base);
+ tok.line = lex->line;
+ tok.col = lex->col - tok.len;
+ tok.val = create_zilch();
+ return tok;
+}
+
+static
+struct token err_token(struct lexer *lex, const char *msg)
+{
+ struct token tok;
+ tok.kind = TOKEN_ERR;
+ tok.start = msg;
+ tok.len = (int)strlen(msg);
+ tok.line = lex->line;
+ tok.col = lex->col;
+ tok.val = create_zilch();
+ return tok;
+}
+
+static
+struct token num_token(struct lexer *lex)
+{
+ while (is_digit(current_char(lex))) {
+ advance(lex);
+ }
+
+ if (current_char(lex) == '.') {
+ advance(lex);
+ while (is_digit(current_char(lex))) {
+ advance(lex);
+ }
+ }
+
+ return create_token(lex, TOKEN_NUM);
+}
+
+// And ident(ifier) token is either a keyword, or an actual identifier
+static
+struct token ident_token(struct lexer *lex)
+{
+ while (true) {
+ char c = current_char(lex);
+ if (c == ':') {
+ // If a : appears in the middle of an identifier, allow
+ // it
+ if (!is_ident(lex->head[1]))
+ break;
+
+ } else if (!is_ident(c) && !is_digit(c)) {
+ break;
+ }
+
+
+ advance(lex);
+ }
+
+ // : is just to be used as a namespacer. So obviously it should be
+ // disallowed at the beginning and end of identifiers.
+ if (lex->head[-1] == ':')
+ return err_token(lex, "cannot end an identifier in ':'");
+
+ u16 kind = TOKEN_IDENT;
+
+ if (match_kw(lex, "if"))
+ kind = TOKEN_IF;
+ else if (match_kw(lex, "elseif"))
+ kind = TOKEN_ELSEIF;
+ else if (match_kw(lex, "else"))
+ kind = TOKEN_ELSE;
+ else if (match_kw(lex, "loop"))
+ kind = TOKEN_LOOP;
+ else if (match_kw(lex, "mod"))
+ kind = TOKEN_MOD;
+ else if (match_kw(lex, "true"))
+ kind = TOKEN_TRUE;
+ else if (match_kw(lex, "false"))
+ kind = TOKEN_FALSE;
+ else if (match_kw(lex, "zilch"))
+ kind = TOKEN_ZILCH;
+ else if (match_kw(lex, "nada"))
+ kind = TOKEN_ZILCH;
+ else if (match_kw(lex, "do"))
+ kind = TOKEN_DO;
+ else if (match_kw(lex, "break"))
+ kind = TOKEN_BREAK;
+ else if (match_kw(lex, "next"))
+ kind = TOKEN_NEXT;
+ else if (match_kw(lex, "in"))
+ kind = TOKEN_IN;
+ else if (match_kw(lex, "fun"))
+ kind = TOKEN_FUN;
+ else if (match_kw(lex, "ret"))
+ kind = TOKEN_RET;
+ else if (match_kw(lex, "let"))
+ kind = TOKEN_LET;
+ else if (match_kw(lex, "end"))
+ kind = TOKEN_END;
+ else if (match_kw(lex, "global"))
+ kind = TOKEN_GLOBAL;
+ else if (match_kw(lex, "print"))
+ kind = TOKEN_PRINT;
+
+ return create_token(lex, kind);
+}
+
+static
+struct token str_token(struct lexer *lex, char term)
+{
+ // TODO: escape sequences
+ char *chars = da_create(char, 0);
+
+ while (current_char(lex) != term && current_char(lex) != '\0') {
+ if (current_char(lex) != '\\') {
+ da_append(char, &chars, current_char(lex));
+ advance(lex);
+ continue;
+ }
+ advance(lex);
+
+ switch (current_char(lex)) {
+ case '\\': da_append(char, &chars, '\\'); break;
+ case '\'': da_append(char, &chars, '\''); break;
+ case '"': da_append(char, &chars, '"'); break;
+ case 'n': da_append(char, &chars, '\n'); break;
+ case 't': da_append(char, &chars, '\t'); break;
+ case 'r': da_append(char, &chars, '\r'); break;
+ default:
+ da_free(chars);
+ return err_token(lex, "invalid escape sequence");
+ }
+
+ advance(lex); // eat escape
+ }
+
+ da_append(char, &chars, 0);
+
+ if (current_char(lex) == '\0')
+ return err_token(lex, "string never terminates");
+
+ advance(lex); // eat terminator
+
+ struct token str = create_token(lex, TOKEN_STR);
+ str.val = wrap_str(copy_str(chars, da_len(chars) - 1));
+ da_free(chars);
+ return str;
+}
+
+// A symbol token is any token that contains no alphanumeric characters.
+static
+struct token symbol_token(struct lexer *lex, char c)
+{
+ switch (c) {
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ',':
+ case ';':
+ case ':':
+ return create_token(lex, c);
+ case '.':
+ return create_token(
+ lex,
+ match_char(lex, '.') ? TOKEN_DOT_DOT : c
+ );
+ case '+':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_PLUS_EQL : c
+ );
+ case '-':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_MINUS_EQL : c
+ );
+ case '*':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_MULT_EQL : c
+ );
+ case '/':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_DIV_EQL : c
+ );
+ case '%':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_MOD_EQL : c
+ );
+ case '<':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_LTEQL : c
+ );
+ case '>':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_GTEQL : c
+ );
+ case '=':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_EQL : c
+ );
+ case '!':
+ return create_token(
+ lex,
+ match_char(lex, '=') ? TOKEN_NEQL : c
+ );
+ }
+
+ // TODO: log unknown character
+ return err_token(lex, "unknown character");
+}
+
+void lex_init(struct lexer *lex, const char *src)
+{
+ lex->src = src;
+ lex->base = src;
+ lex->head = src;
+
+ lex->line = 1;
+ lex->col = 0;
+}
+
+struct token lex_next_token(struct lexer *lex)
+{
+ goto_token_start(lex);
+
+ char c = current_char(lex);
+
+ if (c == '\0')
+ return create_token(lex, TOKEN_EOF);
+
+ advance(lex);
+
+ if (c == '"' || c == '\'')
+ return str_token(lex, c);
+ if (is_digit(c))
+ return num_token(lex);
+ if (is_ident(c))
+ return ident_token(lex);
+
+ return symbol_token(lex, c);
+}
+
+void token_kind_name(char *dst, size_t len, u16 kind)
+{
+ char char_name[2] = {0, 0};
+ const char *name = char_name;
+
+ if (kind < TOKEN_EOF)
+ char_name[0] = kind;
+ else
+ name = token_names[kind - TOKEN_EOF];
+
+ len = fmin(len, strlen(name));
+ memcpy(dst, name, len);
+ dst[len] = '\0';
+}
+
+void print_token(struct token tok)
+{
+ char kind_name[128];
+ token_kind_name(kind_name, 128, tok.kind);
+
+ printf(
+ "%-15s | %3d:%-2d | %-24.*s\n",
+ kind_name,
+ tok.line,
+ tok.col,
+ tok.len,
+ tok.start
+ );
+}
diff --git a/uscript/lex.h b/uscript/lex.h
new file mode 100644
index 0000000..119b867
--- /dev/null
+++ b/uscript/lex.h
@@ -0,0 +1,74 @@
+#ifndef __USCRIPT_LEX_H__
+#define __USCRIPT_LEX_H__
+
+#include "common.h"
+#include "val.h"
+
+#define XTOKENS(_) \
+ _(PRINT) \
+ _(BREAK) \
+ _(DIV_EQL) \
+ _(DO) \
+ _(DOT_DOT) \
+ _(ELSE) \
+ _(ELSEIF) \
+ _(END) \
+ _(EQL) \
+ _(ERR) \
+ _(FALSE) \
+ _(FUN) \
+ _(GLOBAL) \
+ _(GTEQL) \
+ _(IDENT) \
+ _(IF) \
+ _(IN) \
+ _(LET) \
+ _(LOOP) \
+ _(LTEQL) \
+ _(MINUS_EQL) \
+ _(MOD) \
+ _(MOD_EQL) \
+ _(MULT_EQL) \
+ _(NEQL) \
+ _(NEXT) \
+ _(NUM) \
+ _(PLUS_EQL) \
+ _(RET) \
+ _(STR) \
+ _(TRUE) \
+ _(ZILCH)
+
+// single-character tokens are represented by their ASCII value, but other types
+// of tokens are represented by a token_kind enum value.
+enum token_kind {
+ TOKEN_EOF = 256,
+#define DEF_TOKEN_ENUM(name) TOKEN_##name,
+ XTOKENS(DEF_TOKEN_ENUM)
+#undef DEF_TOKEN_ENUM
+};
+
+struct lexer {
+ const char *src;
+ const char *base;
+ const char *head;
+
+ int line;
+ int col;
+};
+
+struct token {
+ const char *start;
+ int len;
+ int line;
+ int col;
+ u16 kind;
+ struct us_val val;
+};
+
+void lex_init(struct lexer *lex, const char *src);
+struct token lex_next_token(struct lexer *lex);
+
+void token_kind_name(char *dst, size_t len, u16 kind);
+void print_token(struct token tok);
+
+#endif // __USCRIPT_LEX_H__
diff --git a/uscript/parser.c b/uscript/parser.c
new file mode 100644
index 0000000..c51bc85
--- /dev/null
+++ b/uscript/parser.c
@@ -0,0 +1,841 @@
+#include "parser.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "dyn_arr.h"
+#include "lex.h"
+#include "val.h"
+#include "vm.h"
+
+#define parser_add_byte(p, byte) (proto_add_byte((p)->fp->proto, byte))
+#define parser_add_const(p, c) (proto_add_const((p)->fp->proto, c))
+#define parser_bytecode_len(p) (da_len((p)->fp->proto->bytecode))
+
+enum precedence {
+ PREC_NONE,
+ PREC_ASSIGN, // =
+ PREC_EQL, // == !=
+ PREC_COMP, // < <= > >=
+ PREC_CONCAT,
+ PREC_TERM, // + -
+ PREC_FACTOR, // * / %
+ PREC_UNARY,
+ PREC_CALL, // ()
+};
+
+struct loop {
+ struct loop *outer;
+ int start;
+ int scope;
+ bool labeled;
+ struct token label;
+ int *breaks; // dyn_arr
+};
+
+struct variable {
+ struct token name;
+ int scope;
+ bool captured;
+};
+
+struct upval {
+ struct token name;
+ bool is_local;
+ u8 index;
+};
+
+struct func_parser {
+ struct func_parser *outer;
+ struct us_proto *proto;
+ struct upval *upvals; // dyn_arr
+ struct variable *locals; // dyn_arr
+ struct loop *loop;
+ int scope;
+ bool is_script;
+};
+
+struct parser {
+ struct lexer lex;
+
+ struct token prev;
+ struct token cur;
+
+ struct func_parser *fp;
+
+ bool can_assign;
+ bool had_err;
+};
+
+typedef void (*fn_parse)(struct parser *p);
+
+struct expr {
+ fn_parse prefix;
+ fn_parse infix;
+ enum precedence prec;
+};
+
+static
+void show_error(struct parser *p, struct token tok, const char *msg, ...)
+{
+ if (tok.kind == TOKEN_EOF)
+ fprintf(stderr, "on line %d at EOF:\n\t", tok.line);
+ else
+ fprintf(stderr, "on line %d at '%.*s':\n\t", tok.line, tok.len, tok.start);
+
+ va_list args;
+ va_start(args, msg);
+ vfprintf(stderr, msg, args);
+ va_end(args);
+
+ putc('\n', stderr);
+
+ p->had_err = true;
+}
+
+static
+void advance(struct parser *p)
+{
+ p->prev = p->cur;
+ while (true) {
+ p->cur = lex_next_token(&p->lex);
+ // print_token(p->cur);
+ if (p->cur.kind != TOKEN_ERR)
+ break;
+ show_error(p, p->prev, p->cur.start);
+ };
+}
+
+static
+int begin_jump(struct parser *p, u8 instruction)
+{
+ parser_add_byte(p, instruction);
+ parser_add_byte(p, 0xFF);
+ parser_add_byte(p, 0xFF);
+ return parser_bytecode_len(p) - 2;
+}
+
+static
+void end_jump(struct parser *p, int loc)
+{
+ int jump = parser_bytecode_len(p) - loc - 2;
+ if (jump > UINT16_MAX)
+ show_error(p, p->prev, "jump too large");
+ p->fp->proto->bytecode[loc] = (jump >> 8) & 0xFF;
+ p->fp->proto->bytecode[loc+1] = jump & 0xFF;
+}
+
+static
+void add_loop(struct parser *p, int loc)
+{
+ int jump = parser_bytecode_len(p) - loc + 2;
+ if (jump > UINT16_MAX)
+ show_error(p, p->prev, "jump too large");
+ parser_add_byte(p, BC_LOOP);
+ parser_add_byte(p, (jump >> 8) & 0xFF);
+ parser_add_byte(p, jump & 0xFF);
+}
+
+static
+bool consume(struct parser *p, u16 tok)
+{
+ if (p->cur.kind != tok)
+ return false;
+ advance(p);
+ return true;
+}
+
+static
+void expect(struct parser *p, u16 tok, const char *err)
+{
+ if (p->cur.kind != tok) {
+ show_error(p, p->cur, err);
+ return;
+ }
+ advance(p);
+}
+
+static
+void declare_variable(struct parser *p, struct token name)
+{
+ if (da_len(p->fp->locals) > UINT8_MAX)
+ show_error(p, name, "too many locals");
+ struct variable slot;
+ slot.scope = p->fp->scope;
+ slot.name = name;
+ slot.captured = false;
+ da_append(struct variable, &p->fp->locals, slot);
+}
+
+static
+int find_local(struct func_parser *fp, struct token name)
+{
+ for (int i = da_len(fp->locals) - 1; i >= 0; i--) {
+ struct variable local = fp->locals[i];
+ if (
+ name.len == local.name.len &&
+ memcmp(name.start, local.name.start, name.len) == 0
+ )
+ return i;
+ }
+ return -1;
+}
+
+static
+int find_upval(struct parser *p, struct func_parser *fp, struct token name)
+{
+ if (fp == NULL)
+ return -1;
+
+ for (int i = da_len(fp->upvals); i >= 0; i--) {
+ struct upval upval = fp->upvals[i];
+ if (
+ name.len == upval.name.len &&
+ memcmp(name.start, upval.name.start, name.len) == 0
+ )
+ return i;
+ }
+
+ // Didn't find one already captured in an outer scope. Try to find a
+ // local variable.
+ int local = find_local(fp->outer, name);
+ if (local != -1) {
+ fp->outer->locals[local].captured = true;
+ struct upval upval;
+ upval.name = name;
+ upval.is_local = true;
+ upval.index = local;
+ da_append(
+ struct upval,
+ &fp->upvals,
+ upval
+ );
+ fp->proto->upvalc++;
+ return da_len(fp->upvals) - 1;
+ }
+
+ // Didn't find an already captured upval. Try to capture one from an
+ // outer scope.
+ int outer = find_upval(p, fp->outer, name);
+ if (outer != -1) {
+ struct upval upval;
+ upval.name = name;
+ upval.is_local = false;
+ upval.index = outer;
+ da_append(
+ struct upval,
+ &fp->upvals,
+ upval
+ );
+ fp->proto->upvalc++;
+ return da_len(fp->upvals) - 1;
+ }
+
+ return -1;
+}
+
+static
+int pop_scope(struct parser *p, int scope)
+{
+ int locals_len = da_len(p->fp->locals);
+ for (int i = da_len(p->fp->locals) - 1; i >= 0; i--) {
+ struct variable local = p->fp->locals[i];
+ if (local.scope < scope)
+ break;
+ parser_add_byte(p, local.captured ? BC_POP_UPVAL : BC_POP);
+ locals_len = i;
+ }
+ return locals_len;
+}
+
+static
+void begin_scope(struct parser *p)
+{
+ p->fp->scope++;
+}
+
+static
+void end_scope(struct parser *p)
+{
+ *da_len_ptr(p->fp->locals) = pop_scope(p, p->fp->scope);
+ p->fp->scope--;
+}
+
+static
+void begin_function(struct parser *p, struct us_proto *proto)
+{
+ struct func_parser *fp = mem_alloc(sizeof(struct func_parser));
+ fp->outer = p->fp;
+ fp->proto = proto;
+ fp->loop = NULL;
+ fp->locals = da_create(struct variable, 0);
+ fp->upvals = da_create(struct upval, 0);
+ fp->scope = 0;
+ fp->is_script = false;
+ p->fp = fp;
+}
+
+static
+void end_function(struct parser *p)
+{
+ parser_add_byte(p, BC_ZILCH);
+ parser_add_byte(p, BC_RET);
+
+ struct func_parser *fp = p->fp;
+ p->fp = fp->outer;
+
+ if (p->fp) {
+ p->fp->proto->constants[p->fp->proto->nconstants] =
+ wrap_proto(fp->proto);
+ parser_add_byte(p, BC_LOAD_FUNC);
+ parser_add_byte(p, p->fp->proto->nconstants++);
+
+ for (int i = 0; i < fp->proto->upvalc; i++) {
+ parser_add_byte(p, fp->upvals[i].is_local ? 1 : 0);
+ parser_add_byte(p, (u8)fp->upvals[i].index);
+ }
+ }
+
+#ifdef UE_DEBUG
+ print_func(fp->proto);
+#endif
+
+ da_free(fp->locals);
+ da_free(fp->upvals);
+ mem_free(fp);
+}
+
+static void parse_expr(struct parser *p, enum precedence prec);
+static struct expr get_expr(struct token tok);
+static void expr(struct parser *p);
+
+static
+void parse_number(struct parser *p)
+{
+ struct token num_tok = p->prev;
+ double n = strtod(num_tok.start, NULL);
+
+ // Avoid filling up the constants list with numbers that could easily
+ // just be part of the bytecode.
+ if (n <= UINT8_MAX && (u8)n == n) {
+ parser_add_byte(p, BC_SMALL_INT);
+ parser_add_byte(p, (u8)n);
+ return;
+ }
+
+ parser_add_const(p, create_num(n));
+}
+
+static
+void parse_string(struct parser *p)
+{
+ parser_add_const(p, p->prev.val);
+}
+
+static
+void parse_literal(struct parser *p)
+{
+ switch (p->prev.kind) {
+ case TOKEN_FALSE: parser_add_byte(p, BC_FALSE); break;
+ case TOKEN_TRUE: parser_add_byte(p, BC_TRUE); break;
+ case TOKEN_ZILCH: parser_add_byte(p, BC_ZILCH); break;
+ }
+}
+
+static
+void parse_ident(struct parser *p)
+{
+ struct token ident = p->prev;
+
+ u8 setter;
+ u8 getter;
+
+ int var = find_local(p->fp, ident);
+ if (var != -1) {
+ setter = BC_SET_LOCAL;
+ getter = BC_GET_LOCAL;
+ } else if ((var = find_upval(p, p->fp, ident)) != -1) {
+ setter = BC_SET_UPVAL;
+ getter = BC_GET_UPVAL;
+ }
+
+ if (var == -1) {
+ show_error(p, ident, "undefined variable");
+ return;
+ }
+
+ if (p->can_assign && consume(p, '=')) {
+ expr(p);
+ parser_add_byte(p, setter);
+ } else {
+ parser_add_byte(p, getter);
+ }
+ parser_add_byte(p, (u8)var);
+}
+
+static
+void parse_binary(struct parser *p)
+{
+ struct token op = p->prev;
+
+ parse_expr(p, get_expr(op).prec + 1);
+
+ switch (op.kind) {
+ case '+': parser_add_byte(p, BC_ADD); break;
+ case '-': parser_add_byte(p, BC_SUB); break;
+ case '*': parser_add_byte(p, BC_MULT); break;
+ case '/': parser_add_byte(p, BC_DIV); break;
+ case '%': parser_add_byte(p, BC_MOD); break;
+ case '>': parser_add_byte(p, BC_GT); break;
+ case '<': parser_add_byte(p, BC_LT); break;
+ case TOKEN_DOT_DOT: parser_add_byte(p, BC_CONCAT); break;
+ case TOKEN_EQL: parser_add_byte(p, BC_EQL); break;
+ case TOKEN_NEQL: parser_add_byte(p, BC_NEQL); break;
+ case TOKEN_GTEQL: parser_add_byte(p, BC_GTE); break;
+ case TOKEN_LTEQL: parser_add_byte(p, BC_LTE); break;
+ }
+}
+
+static
+void parse_unary(struct parser *p)
+{
+ struct token op = p->prev;
+
+ parse_expr(p, PREC_UNARY);
+
+ switch (op.kind) {
+ case '-': parser_add_byte(p, BC_NEG); break;
+ case '!': parser_add_byte(p, BC_NOT); break;
+ }
+}
+
+static
+void parse_grouping(struct parser *p)
+{
+ expr(p);
+ expect(p, ')', "expected ')'");
+}
+
+static
+void parse_call(struct parser *p)
+{
+ int argc = 0;
+ if (p->cur.kind != ')') {
+ do {
+ argc++;
+ expr(p);
+ } while (consume(p, ','));
+ }
+ expect(p, ')', "expected ')'");
+
+ if (argc > UINT8_MAX)
+ show_error(p, p->prev, "max argument count is 255");
+
+ parser_add_byte(p, BC_CALL);
+ parser_add_byte(p, argc);
+}
+
+static
+struct expr expressions[] = {
+ ['('] = {parse_grouping, parse_call, PREC_CALL},
+ [')'] = {NULL, NULL, PREC_NONE},
+ ['{'] = {NULL, NULL, PREC_NONE},
+ ['}'] = {NULL, NULL, PREC_NONE},
+ ['['] = {NULL, NULL, PREC_NONE},
+ [']'] = {NULL, NULL, PREC_NONE},
+ [','] = {NULL, NULL, PREC_NONE},
+ [';'] = {NULL, NULL, PREC_NONE},
+ [':'] = {NULL, NULL, PREC_NONE},
+ ['.'] = {NULL, NULL, PREC_NONE},
+ ['+'] = {NULL, parse_binary, PREC_TERM},
+ ['-'] = {parse_unary, parse_binary, PREC_TERM},
+ ['*'] = {NULL, parse_binary, PREC_FACTOR},
+ ['/'] = {NULL, parse_binary, PREC_FACTOR},
+ ['%'] = {NULL, parse_binary, PREC_FACTOR},
+ ['<'] = {NULL, parse_binary, PREC_COMP},
+ ['>'] = {NULL, parse_binary, PREC_COMP},
+ ['='] = {NULL, NULL, PREC_NONE},
+ ['!'] = {parse_unary, NULL, PREC_NONE},
+ [TOKEN_EOF] = {NULL, NULL, PREC_NONE},
+ [TOKEN_BREAK] = {NULL, NULL, PREC_NONE},
+ [TOKEN_DIV_EQL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_DOT_DOT] = {NULL, parse_binary, PREC_CONCAT},
+ [TOKEN_DO] = {NULL, NULL, PREC_NONE},
+ [TOKEN_ELSE] = {NULL, NULL, PREC_NONE},
+ [TOKEN_END] = {NULL, NULL, PREC_NONE},
+ [TOKEN_EQL] = {NULL, parse_binary, PREC_EQL},
+ [TOKEN_ERR] = {NULL, NULL, PREC_NONE},
+ [TOKEN_FALSE] = {parse_literal, NULL, PREC_NONE},
+ [TOKEN_FUN] = {NULL, NULL, PREC_NONE},
+ [TOKEN_GLOBAL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_GTEQL] = {NULL, parse_binary, PREC_COMP},
+ [TOKEN_IDENT] = {parse_ident, NULL, PREC_NONE},
+ [TOKEN_IF] = {NULL, NULL, PREC_NONE},
+ [TOKEN_IN] = {NULL, NULL, PREC_NONE},
+ [TOKEN_LET] = {NULL, NULL, PREC_NONE},
+ [TOKEN_LOOP] = {NULL, NULL, PREC_NONE},
+ [TOKEN_LTEQL] = {NULL, parse_binary, PREC_COMP},
+ [TOKEN_MINUS_EQL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_MOD] = {NULL, NULL, PREC_NONE},
+ [TOKEN_MOD_EQL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_MULT_EQL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_NEQL] = {NULL, parse_binary, PREC_EQL},
+ [TOKEN_NEXT] = {NULL, NULL, PREC_NONE},
+ [TOKEN_NUM] = {parse_number, NULL, PREC_NONE},
+ [TOKEN_PLUS_EQL] = {NULL, NULL, PREC_NONE},
+ [TOKEN_RET] = {NULL, NULL, PREC_NONE},
+ [TOKEN_STR] = {parse_string, NULL, PREC_NONE},
+ [TOKEN_TRUE] = {parse_literal, NULL, PREC_NONE},
+ [TOKEN_ZILCH] = {parse_literal, NULL, PREC_NONE},
+};
+
+static void stat(struct parser *p);
+
+static
+struct expr get_expr(struct token tok)
+{
+ return expressions[tok.kind];
+}
+
+static
+void parse_expr(struct parser *p, enum precedence prec)
+{
+ advance(p);
+
+ fn_parse prefix = get_expr(p->prev).prefix;
+ if (!prefix) {
+ show_error(p, p->prev, "expected expression");
+ return;
+ }
+
+ p->can_assign = prec <= PREC_ASSIGN;
+ prefix(p);
+
+ while (prec <= get_expr(p->cur).prec) {
+ advance(p);
+ get_expr(p->prev).infix(p);
+ }
+
+ if (p->can_assign && consume(p, '=')) {
+ show_error(p, p->prev, "bad assignment");
+ }
+ p->can_assign = false;
+}
+
+static
+void expr(struct parser *p)
+{
+ parse_expr(p, PREC_ASSIGN);
+}
+
+static
+void expr_stat(struct parser *p)
+{
+ expr(p);
+ parser_add_byte(p, BC_POP);
+}
+
+static
+void let_stat(struct parser *p)
+{
+ do {
+ expect(p, TOKEN_IDENT, "expected variable name");
+ struct token name = p->prev;
+
+ if (consume(p, '='))
+ expr(p);
+ else
+ parser_add_byte(p, BC_ZILCH);
+
+ declare_variable(p, name);
+ } while (consume(p, ','));
+}
+
+static
+void fun_stat(struct parser *p)
+{
+ expect(p, TOKEN_IDENT, "expected function name");
+ struct token name_token = p->prev;
+
+ struct us_str *name = copy_str(name_token.start, name_token.len);
+ struct us_proto *proto = create_proto(name);
+
+ // parser_add_const(p, wrap_func(func));
+ declare_variable(p, name_token);
+
+ begin_function(p, proto);
+
+ expect(p, '(', "expected '(' after function name");
+ if (p->cur.kind != ')') {
+ do {
+ expect(p, TOKEN_IDENT, "expected arguement name");
+ declare_variable(p, p->prev);
+ proto->argc++;
+ } while (consume(p, ','));
+ }
+ expect(p, ')', "expected ')' after arguments");
+
+ while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF)
+ stat(p);
+
+ end_function(p);
+
+ expect(p, TOKEN_END, "unterminated function");
+}
+
+static
+void if_stat(struct parser *p, bool is_elseif)
+{
+ struct token begin = p->prev;
+
+ expr(p);
+
+ int jump = begin_jump(p, BC_FALSEY_JMP);
+
+ if (!is_elseif)
+ expect(p, ':', "expected ':' to begin 'if' block");
+ else
+ expect(p, ':', "expected ':' to begin 'elseif' block");
+
+ begin_scope(p);
+
+ while (
+ p->cur.kind != TOKEN_END &&
+ p->cur.kind != TOKEN_ELSE &&
+ p->cur.kind != TOKEN_ELSEIF &&
+ p->cur.kind != TOKEN_EOF
+ )
+ stat(p);
+
+ end_scope(p);
+
+ int else_jump = begin_jump(p, BC_JMP);
+ end_jump(p, jump);
+
+ // The only reason "elseif" was chosen over "else if" is because it
+ // reduces indentation in this one single spot.
+ if (consume(p, TOKEN_ELSEIF)) {
+ if_stat(p, true);
+ } else if (consume(p, TOKEN_ELSE)) {
+ expect(p, ':', "expected ':' to begin 'else' block");
+ begin_scope(p);
+ while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF)
+ stat(p);
+ end_scope(p);
+ }
+
+ end_jump(p, else_jump);
+
+ if (!is_elseif && !consume(p, TOKEN_END))
+ show_error(p, begin, "unterminated 'if' block");
+}
+
+static
+void loop_stat(struct parser *p)
+{
+ // For now, we will just support loop <cond>. loop <var> in <expr>
+ // needs a lot to happen before it can be implemented.
+
+ struct token begin = p->prev;
+
+ struct loop loop;
+ loop.outer = p->fp->loop;
+ loop.start = da_len(p->fp->proto->bytecode) - 1;
+ loop.labeled = false;
+ loop.scope = p->fp->scope + 1;
+ loop.breaks = da_create(int, 0);
+ p->fp->loop = &loop;
+
+ int exit_jump = -1;
+
+ if (!consume(p, ':')) {
+ expr(p);
+ exit_jump = begin_jump(p, BC_FALSEY_JMP);
+
+ expect(p, ':', "expected ':' to begin 'loop' block");
+ }
+
+ if (consume(p, '<')) {
+ expect(p, TOKEN_IDENT, "expected loop label");
+ loop.label = p->prev;
+ loop.labeled = true;
+ expect(p, '>', "expected '>' after loop label");
+ }
+
+ begin_scope(p);
+ while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF)
+ stat(p);
+ end_scope(p);
+ add_loop(p, loop.start);
+
+ if (exit_jump != -1)
+ end_jump(p, exit_jump);
+
+ for (int i = 0; i < da_len(loop.breaks); i++)
+ end_jump(p, loop.breaks[i]);
+
+ if (!consume(p, TOKEN_END))
+ show_error(p, begin, "unterminated 'loop' block");
+
+ p->fp->loop = loop.outer;
+ da_free(loop.breaks);
+}
+
+static
+struct loop *find_loop(struct parser *p, struct token label)
+{
+ struct loop *loop = p->fp->loop;
+ while (loop != NULL) {
+ if (
+ loop->label.len == label.len &&
+ memcmp(label.start, loop->label.start, label.len) == 0
+ ) {
+ break;
+ }
+ loop = loop->outer;
+
+ }
+
+ if (!loop) {
+ show_error(
+ p,
+ label,
+ "unknown loop label '<%.*s>'",
+ label.len, label.start
+ );
+ return p->fp->loop;
+ }
+
+ return loop;
+}
+
+static
+struct loop *loop_label(struct parser *p)
+{
+ if (consume(p, '<')) {
+ expect(p, TOKEN_IDENT, "expected loop label");
+ struct token label = p->prev;
+ expect(p, '>', "expected '>' after loop label");
+
+ return find_loop(p, label);
+ }
+ return p->fp->loop;
+}
+
+static
+void break_stat(struct parser *p)
+{
+ if (!p->fp->loop) {
+ show_error(p, p->prev, "'break' is only allowed in loops");
+ return;
+ }
+
+ struct loop *loop = loop_label(p);
+ pop_scope(p, loop->scope);
+ da_append(int, &loop->breaks, begin_jump(p, BC_JMP));
+}
+
+static
+void next_stat(struct parser *p)
+{
+ if (!p->fp->loop) {
+ show_error(p, p->prev, "'next' is only allowed in loops");
+ return;
+ }
+
+ struct loop *loop = loop_label(p);
+ pop_scope(p, loop->scope);
+ add_loop(p, loop->start);
+}
+
+static
+void do_stat(struct parser *p)
+{
+ struct token begin = p->prev;
+
+ begin_scope(p);
+
+ expect(p, ':', "expected ':' to begin 'do' block");
+
+ while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF)
+ stat(p);
+
+ if (!consume(p, TOKEN_END))
+ show_error(p, begin, "unterminated 'do' block");
+
+ end_scope(p);
+}
+
+static
+void ret_stat(struct parser *p)
+{
+ if (p->fp->is_script)
+ show_error(p, p->prev, "ret is only allowed within functions");
+
+ if (p->cur.kind != TOKEN_END && p->cur.kind != ';') {
+ expr(p);
+ } else {
+ parser_add_byte(p, BC_ZILCH);
+ }
+ parser_add_byte(p, BC_RET);
+}
+
+static
+void stat(struct parser *p)
+{
+ if (consume(p, TOKEN_LET)) {
+ let_stat(p);
+ consume(p, ';');
+ } else if (consume(p, TOKEN_FUN)) {
+ fun_stat(p);
+ } else if (consume(p, TOKEN_IF)) {
+ if_stat(p, false);
+ } else if (consume(p, TOKEN_LOOP)) {
+ loop_stat(p);
+ } else if (consume(p, TOKEN_BREAK)) {
+ break_stat(p);
+ consume(p, ';');
+ } else if (consume(p, TOKEN_NEXT)) {
+ next_stat(p);
+ consume(p, ';');
+ } else if (consume(p, TOKEN_DO)) {
+ do_stat(p);
+ } else if (consume(p, TOKEN_RET)) {
+ ret_stat(p);
+ consume(p, ';');
+ } else if (consume(p, TOKEN_PRINT)) {
+ // temp. only til functions get functioning
+ expect(p, '(', "expected '('");
+ expr(p);
+ expect(p, ')', "expected ')'");
+ parser_add_byte(p, BC_PRINT);
+ consume(p, ';');
+ } else {
+ expr_stat(p);
+ }
+}
+
+struct us_proto *compile(const char *name, const char *src)
+{
+ struct us_proto *proto = create_proto(copy_str(name, -1));
+
+ struct parser p;
+ p.had_err = false;
+ p.can_assign = false;
+ p.fp = NULL;
+
+ begin_function(&p, proto);
+ p.fp->is_script = true;
+
+ lex_init(&p.lex, src);
+
+ advance(&p);
+ while (!consume(&p, TOKEN_EOF)) {
+ stat(&p);
+ }
+ expect(&p, TOKEN_EOF, "expected EOF");
+
+ end_function(&p);
+
+ return p.had_err ? NULL : proto;
+}
diff --git a/uscript/parser.h b/uscript/parser.h
new file mode 100644
index 0000000..023bbfe
--- /dev/null
+++ b/uscript/parser.h
@@ -0,0 +1,8 @@
+#ifndef __USCRIPT_PARSER_H__
+#define __USCRIPT_PARSER_H__
+
+#include "val.h"
+
+struct us_proto *compile(const char *name, const char *src);
+
+#endif // __USCRIPT_PARSER_H__
diff --git a/uscript/us_debug.c b/uscript/us_debug.c
new file mode 100644
index 0000000..8d5dfe3
--- /dev/null
+++ b/uscript/us_debug.c
@@ -0,0 +1,117 @@
+#include "us_debug.h"
+
+#include <assert.h>
+
+#include "dyn_arr.h"
+#include "vm.h"
+
+static
+char *bc_names[] = {
+#define BC(name) "BC_" #name,
+#include "xbytecode.h"
+#undef BC
+};
+
+int print_instruction(struct us_proto *proto, int idx)
+{
+ enum bytecode instruction = proto->bytecode[idx];
+ fprintf(stderr, "%04d %-15s ", idx, bc_names[instruction]);
+ switch (instruction) {
+ case BC_LOAD: {
+ int const_idx = proto->bytecode[idx + 1];
+ char *const_str = val_to_str(proto->constants[const_idx], NULL);
+ fprintf(
+ stderr,
+ "%d (%s)\n",
+ const_idx,
+ const_str
+ );
+ mem_free(const_str);
+ return idx + 2;
+ }
+ case BC_LOAD_FUNC: {
+ int const_idx = proto->bytecode[idx + 1];
+ assert(proto->constants[const_idx].type == VAL_PROTO);
+ struct us_proto *p = get_proto(proto->constants[const_idx]);
+ int upvalc = p->upvalc;
+ fprintf(
+ stderr,
+ "%.*s() %d\n",
+ (int)p->name->len, p->name->chars,
+ upvalc
+ );
+ return idx + 2 + upvalc * 2;
+ }
+ case BC_SET_UPVAL:
+ case BC_GET_UPVAL:
+ case BC_SET_LOCAL:
+ case BC_GET_LOCAL: {
+ int local_idx = proto->bytecode[idx + 1];
+ fprintf(
+ stderr,
+ "%d\n",
+ local_idx
+ );
+ return idx + 2;
+ }
+ case BC_CALL:
+ case BC_SMALL_INT: {
+ fprintf(
+ stderr,
+ "%d\n",
+ proto->bytecode[idx + 1]
+ );
+ return idx + 2;
+ }
+ case BC_JMP:
+ case BC_LOOP:
+ case BC_FALSEY_JMP: {
+ u16 jmp = (u16)(proto->bytecode[idx + 1] << 8) | proto->bytecode[idx + 2];
+ int dst = idx + jmp + 3;
+ if (instruction == BC_LOOP) {
+ dst = idx + 3 - jmp;
+ }
+ fprintf(
+ stderr,
+ "-> %04d\n",
+ dst
+ );
+ return idx + 3;
+ }
+ case BC_PRINT:
+ case BC_TRUE:
+ case BC_FALSE:
+ case BC_ZILCH:
+ case BC_POP:
+ case BC_POP_UPVAL:
+ case BC_ADD:
+ case BC_SUB:
+ case BC_MULT:
+ case BC_DIV:
+ case BC_MOD:
+ case BC_NEG:
+ case BC_NOT:
+ case BC_CONCAT:
+ case BC_EQL:
+ case BC_NEQL:
+ case BC_GT:
+ case BC_LT:
+ case BC_GTE:
+ case BC_LTE:
+ case BC_RET:
+ putc('\n', stderr);
+ return idx + 1;
+ }
+
+ // unreachable
+ return idx + 1;
+}
+
+void print_func(struct us_proto *func)
+{
+ fprintf(stderr, "%s():\n", func->name->chars);
+ for (int i = 0; i < da_len(func->bytecode);) {
+ putc('\t', stderr);
+ i = print_instruction(func, i);
+ }
+}
diff --git a/uscript/us_debug.h b/uscript/us_debug.h
new file mode 100644
index 0000000..fd7bd2b
--- /dev/null
+++ b/uscript/us_debug.h
@@ -0,0 +1,10 @@
+#ifndef __USCRIPT_US_DEBUG_H__
+#define __USCRIPT_US_DEBUG_H__
+
+#include "common.h"
+#include "val.h"
+
+int print_instruction(struct us_proto *func, int idx);
+void print_func(struct us_proto *func);
+
+#endif // __USCRIPT_US_DEBUG_H__
diff --git a/uscript/uscript.c b/uscript/uscript.c
new file mode 100644
index 0000000..5ec43a1
--- /dev/null
+++ b/uscript/uscript.c
@@ -0,0 +1,34 @@
+#include "uscript.h"
+
+#include <stdio.h>
+
+#include "dyn_arr.h"
+#include "lex.h"
+#include "val.h"
+#include "vm.h"
+#include "parser.h"
+
+void us_init(void)
+{
+ init_vm();
+}
+
+void us_deinit(void)
+{
+ deinit_vm();
+}
+
+void us_load_file(const char *file_path)
+{
+ char *file = read_file(file_path, NULL);
+ us_load_src(file);
+ mem_free(file);
+}
+
+void us_load_src(const char *src)
+{
+ struct us_proto *proto = compile("main", src);
+ if (!proto)
+ return;
+ us_exec(create_func(proto));
+}
diff --git a/uscript/uscript.h b/uscript/uscript.h
new file mode 100644
index 0000000..74958e5
--- /dev/null
+++ b/uscript/uscript.h
@@ -0,0 +1,14 @@
+#ifndef __USCRIPT_LANG_H__
+#define __USCRIPT_LANG_H__
+
+struct us_func;
+
+void us_init(void);
+void us_deinit(void);
+
+void us_load_file(const char *file);
+void us_load_src(const char *src);
+
+void us_exec(struct us_func *func);
+
+#endif // __USCRIPT_LANG_H__
diff --git a/uscript/val.c b/uscript/val.c
new file mode 100644
index 0000000..d713645
--- /dev/null
+++ b/uscript/val.c
@@ -0,0 +1,204 @@
+#include "val.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "dyn_arr.h"
+#include "vm.h"
+
+#define STR_NUM_FMT "%g"
+#define STR_FUNC_FMT "<func %s: %p>"
+
+static
+void init_obj(struct us_val val, struct us_obj *obj)
+{
+ (void)obj; // nothing to init yet
+ da_append(struct us_val, &vm.objs, val);
+}
+
+struct us_str *take_str(char *chars, int len)
+{
+ if (len < 0)
+ len = strlen(chars);
+ struct us_str *str = mem_alloc(sizeof(struct us_str));
+ str->chars = chars;
+ str->len = len;
+ init_obj(wrap_str(str), &str->header);
+ return str;
+}
+
+struct us_str *copy_str(const char *chars, int len)
+{
+ if (len < 0)
+ len = strlen(chars);
+ char *copy = mem_alloc(len + 1);
+ memcpy(copy, chars, len);
+ copy[len] = '\0';
+ return take_str(copy, len);
+}
+
+struct us_proto *create_proto(struct us_str *name)
+{
+ struct us_proto *proto = mem_alloc(sizeof(struct us_proto));
+ proto->name = name;
+ proto->bytecode = da_create(u8, 0);
+ proto->upvalc = 0;
+ proto->argc = 0;
+ proto->is_variadic = false;
+ proto->nconstants = 0;
+ init_obj(wrap_proto(proto), &proto->header);
+ return proto;
+}
+
+struct us_func *create_func(struct us_proto *proto)
+{
+ struct us_func *func = mem_alloc(sizeof(struct us_func));
+ func->proto = proto;
+ func->upvals = mem_alloc(sizeof(struct us_upval*) * proto->upvalc);
+ init_obj(wrap_func(func), &func->header);
+ return func;
+}
+
+struct us_upval *create_upval(struct us_val *val)
+{
+ struct us_upval *upval = mem_alloc(sizeof(struct us_upval));
+ upval->loc = val;
+ upval->closed = create_zilch();
+ upval->next = NULL;
+ init_obj(wrap_upval(upval), &upval->header);
+ return upval;
+}
+
+void free_val(struct us_val v)
+{
+ switch (v.type) {
+ case VAL_STR: {
+ struct us_str *str = get_str(v);
+ mem_free(str->chars);
+ mem_free(str);
+ break;
+ }
+ case VAL_PROTO: {
+ struct us_proto *proto = get_proto(v);
+ da_free(proto->bytecode);
+ mem_free(proto->upval_locs);
+ mem_free(proto);
+ break;
+ }
+ case VAL_FUNC: {
+ struct us_func *func = get_func(v);
+ mem_free(func->upvals);
+ mem_free(func);
+ break;
+ }
+ case VAL_UPVAL:
+ mem_free(get_obj(v));
+ break;
+ case VAL_NUM:
+ case VAL_BOOL:
+ case VAL_ZILCH:
+ break;
+ }
+}
+
+void proto_add_const(struct us_proto *proto, struct us_val v)
+{
+ proto->constants[proto->nconstants] = v;
+ proto_add_byte(proto, BC_LOAD);
+ proto_add_byte(proto, proto->nconstants++);
+}
+
+bool vals_eql(struct us_val a, struct us_val b)
+{
+ if (a.type != b.type)
+ return false;
+
+ switch (a.type) {
+ case VAL_NUM: return get_num(a) == get_num(b);
+ case VAL_BOOL: return get_bool(a) == get_bool(b);
+ case VAL_ZILCH: return true;
+ case VAL_STR: {
+ struct us_str *a_str = get_str(a);
+ struct us_str *b_str = get_str(b);
+ return a_str->len == b_str->len &&
+ memcmp(a_str->chars, b_str->chars, a_str->len) == 0;
+ }
+ case VAL_FUNC:
+ case VAL_UPVAL:
+ case VAL_PROTO: return get_obj(a) == get_obj(b);
+ }
+
+ // unreachable
+ return false;
+}
+
+char *val_to_str(struct us_val v, int *len_out)
+{
+ // TODO: have this function return a us_str so that we own the memory,
+ // and so concatenation of strings is faster.
+
+ switch (v.type) {
+ case VAL_NUM: {
+ int len = snprintf(NULL, 0, STR_NUM_FMT, get_num(v));
+ char *str = mem_alloc(sizeof(char) * (len + 1));
+ snprintf(str, len + 1, STR_NUM_FMT, get_num(v));
+ if (len_out)
+ *len_out = len;
+ return str;
+ }
+ case VAL_BOOL: {
+ const char *bool_str = get_bool(v) ? "true" : "false";
+ char *str = mem_alloc(strlen(bool_str) + 1);
+ strcpy(str, bool_str);
+ if (len_out)
+ *len_out = strlen(bool_str);
+ return str;
+ }
+ case VAL_ZILCH: {
+ const char *zilch_str = "zilch";
+ char *str = mem_alloc(strlen(zilch_str) + 1);
+ strcpy(str, zilch_str);
+ if (len_out)
+ *len_out = strlen(zilch_str);
+ return str;
+ }
+ case VAL_STR: {
+ const struct us_str *us_str = get_str(v);
+ char *str = mem_alloc(sizeof(char) * us_str->len + 1);
+ strncpy(str, us_str->chars, us_str->len);
+ str[us_str->len] = 0;
+ if (len_out)
+ *len_out = us_str->len;
+ return str;
+ }
+ case VAL_PROTO: {
+ const struct us_proto *proto = get_proto(v);
+ int len = snprintf(
+ NULL,
+ 0,
+ STR_FUNC_FMT,
+ proto->name->chars,
+ (void*)proto
+ );
+ char *str = mem_alloc(sizeof(char) * (len + 1));
+ snprintf(
+ str,
+ len + 1,
+ STR_FUNC_FMT,
+ proto->name->chars,
+ (void*)proto
+ );
+ if (len_out)
+ *len_out = len;
+ return str;
+ }
+ case VAL_FUNC:
+ return val_to_str(wrap_proto(get_func(v)->proto), len_out);
+ case VAL_UPVAL:
+ return val_to_str(*get_upval(v)->loc, len_out);
+ }
+
+ // unreachable
+ return NULL;
+}
+
diff --git a/uscript/val.h b/uscript/val.h
new file mode 100644
index 0000000..1c314e4
--- /dev/null
+++ b/uscript/val.h
@@ -0,0 +1,102 @@
+#ifndef __USCRIPT_VAL_H__
+#define __USCRIPT_VAL_H__
+
+#include "common.h"
+
+#define get_num(v) (v.dat.number)
+#define get_bool(v) (v.dat.boolean)
+#define get_obj(v) (v.dat.obj)
+#define get_str(v) (v.dat.str)
+#define get_proto(v) (v.dat.proto)
+#define get_func(v) (v.dat.func)
+#define get_upval(v) (v.dat.upval)
+
+#define create_num(n) ((struct us_val){.type=VAL_NUM, .dat={.number=(n)}})
+#define create_bool(b) ((struct us_val){.type=VAL_BOOL, .dat={.boolean=(b)}})
+#define create_zilch() ((struct us_val){.type=VAL_ZILCH, .dat={.number=0}})
+#define wrap_str(o) ((struct us_val){.type=VAL_STR, .dat={.str=(o)}})
+#define wrap_proto(o) ((struct us_val){.type=VAL_PROTO, .dat={.proto=(o)}})
+#define wrap_func(o) ((struct us_val){.type=VAL_FUNC, .dat={.func=(o)}})
+#define wrap_upval(o) ((struct us_val){.type=VAL_UPVAL, .dat={.upval=(o)}})
+
+#define val_is_obj(v) (v.type >= VAL_STR)
+
+#define proto_add_byte(func, op) da_append(u8, &(func)->bytecode, op)
+
+enum val_type {
+ VAL_NUM,
+ VAL_BOOL,
+ VAL_ZILCH,
+ // Do not place any new object types before VAL_STR. Object types are
+ // detected by doing a comparison with VAL_STR. See val_is_obj().
+ VAL_STR,
+ VAL_PROTO,
+ VAL_FUNC,
+ VAL_UPVAL,
+};
+
+struct us_val {
+ enum val_type type;
+ union {
+ double number;
+ bool boolean;
+ struct us_obj *obj;
+ struct us_str *str;
+ struct us_proto *proto;
+ struct us_func *func;
+ struct us_upval *upval;
+ } dat;
+};
+
+struct us_obj {
+ // We don't need the object header for now; it will be useful in the
+ // future, though.
+ u8 _placeholder;
+};
+
+struct us_str {
+ struct us_obj header;
+ char *chars;
+ size_t len;
+};
+
+struct us_proto {
+ struct us_obj header;
+ const struct us_str *name;
+ struct us_val constants[UINT8_MAX];
+ u8 *bytecode; // dyn_arr
+ int* upval_locs;
+ int upvalc;
+ int argc;
+ bool is_variadic;
+ u8 nconstants;
+};
+
+struct us_func {
+ struct us_obj header;
+ struct us_proto *proto;
+ struct us_upval **upvals;
+};
+
+struct us_upval {
+ struct us_obj header;
+ struct us_val *loc;
+ struct us_val closed;
+ struct us_upval *next;
+};
+
+struct us_str *take_str(char *chars, int len);
+struct us_str *copy_str(const char *chars, int len);
+struct us_proto *create_proto(struct us_str *name);
+struct us_func *create_func(struct us_proto *proto);
+struct us_upval *create_upval(struct us_val *val);
+
+void free_val(struct us_val v);
+
+void proto_add_const(struct us_proto *func, struct us_val v);
+
+bool vals_eql(struct us_val a, struct us_val b);
+
+char *val_to_str(struct us_val v, int *len_out);
+
+#endif // __USCRIPT_VAL_H__
diff --git a/uscript/vm.c b/uscript/vm.c
new file mode 100644
index 0000000..f9f1fe6
--- /dev/null
+++ b/uscript/vm.c
@@ -0,0 +1,306 @@
+#include "vm.h"
+
+#include <math.h>
+#include <string.h>
+
+#include "dyn_arr.h"
+#include "us_debug.h"
+#include "uscript.h"
+
+struct vm vm;
+
+void init_vm(void)
+{
+ vm.objs = da_create(struct us_val, 128);
+ vm.cf = vm.cf_stack;
+ vm.stacktop = vm.stack;
+}
+
+void deinit_vm(void)
+{
+ for (int i = 0; i < da_len(vm.objs); i++) {
+ free_val(vm.objs[i]);
+ }
+ da_clear(vm.objs); // not needed, but makes me feel better :)
+ da_free(vm.objs);
+}
+
+static
+bool as_bool(struct us_val v)
+{
+ if (v.type == VAL_ZILCH)
+ return false;
+ if (v.type == VAL_BOOL)
+ return get_bool(v);
+ return true;
+}
+
+static
+struct us_str *concat(struct us_val a, struct us_val b)
+{
+ int a_len;
+ char *a_str = val_to_str(a, &a_len);
+ int b_len;
+ char *b_str = val_to_str(b, &b_len);
+
+ int len = a_len + b_len;
+ char *chars = mem_alloc(sizeof(char) * (len + 1));
+ memcpy(chars, a_str, a_len);
+ memcpy(chars + a_len, b_str, b_len);
+ chars[len] = '\0';
+
+ mem_free(a_str);
+ mem_free(b_str);
+
+ return take_str(chars, len);
+}
+
+static
+u16 read_short(struct us_proto *proto, int *i)
+{
+ return (u16)(proto->bytecode[++*i] << 8) | proto->bytecode[++*i];
+}
+
+static
+void close_upvals(struct us_val *to)
+{
+ struct us_upval *upval = vm.open_upvals;
+ while (upval && upval->loc > to) {
+ upval->closed = *upval->loc;
+ upval->loc = &upval->closed;
+ upval = upval->next;
+ }
+ vm.open_upvals = upval;
+}
+
+void us_exec(struct us_func *func)
+{
+#define read_byte() (func->proto->bytecode[++i])
+#define read_const() (func->proto->constants[read_byte()])
+ vm.cf++;
+ vm.cf->func = func;
+ vm.cf->stackbot = vm.stacktop - func->proto->argc;
+
+ for (int i = 0; i < da_len(func->proto->bytecode); i++) {
+ enum bytecode instruction = func->proto->bytecode[i];
+ // putc('>', stderr);
+ // for (struct us_val *val = vm.stack; val < vm.stacktop; val++) {
+ // char *val_str = val_to_str(*val, NULL);
+ // if (val == vm.cf->stackbot - 1)
+ // fprintf(stderr, " %s >", val_str);
+ // else
+ // fprintf(stderr, " %s |", val_str);
+ // mem_free(val_str);
+ // }
+ // putc('\n', stderr);
+ // putc('>', stderr);
+ // print_instruction(func->proto, i);
+
+ switch (instruction) {
+ case BC_LOAD:
+ vm_push(read_const());
+ break;
+ case BC_LOAD_FUNC: {
+ struct us_proto *proto = get_proto(read_const());
+
+ struct us_func *new_func = create_func(proto);
+
+ for (int j = 0; j < proto->upvalc; j++) {
+ u8 is_local = read_byte();
+ u8 index = read_byte();
+
+ if (is_local) {
+ struct us_upval *upval = create_upval(
+ vm.cf->stackbot + index
+ );
+ upval->next = vm.open_upvals;
+ vm.open_upvals = upval;
+
+ new_func->upvals[j] = upval;
+ } else {
+ new_func->upvals[j] =
+ func->upvals[index];
+ }
+ }
+
+ vm_push(wrap_func(new_func));
+ break;
+ }
+ case BC_SMALL_INT:
+ vm_push(create_num(read_byte()));
+ break;
+ case BC_FALSE: vm_push(create_bool(false)); break;
+ case BC_TRUE: vm_push(create_bool(true)); break;
+ case BC_ZILCH: vm_push(create_zilch()); break;
+ case BC_SET_LOCAL:
+ vm.cf->stackbot[read_byte()] = vm_peek();
+ break;
+ case BC_GET_LOCAL:
+ vm_push(vm.cf->stackbot[read_byte()]);
+ break;
+ case BC_GET_UPVAL:
+ vm_push(*func->upvals[read_byte()]->loc);
+ break;
+ case BC_SET_UPVAL:
+ *func->upvals[read_byte()]->loc = vm_peek();
+ break;
+ case BC_POP_UPVAL:
+ close_upvals(vm.stacktop - 1);
+ vm_pop();
+ break;
+ case BC_POP: vm_pop(); break;
+ case BC_ADD: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_num(get_num(a) + get_num(b)));
+ break;
+ }
+ case BC_SUB: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_num(get_num(a) - get_num(b)));
+ break;
+ }
+ case BC_MULT: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_num(get_num(a) * get_num(b)));
+ break;
+ }
+ case BC_DIV: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_num(get_num(a) / get_num(b)));
+ break;
+ }
+ case BC_MOD: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_num(fmod(get_num(a), get_num(b))));
+ break;
+ }
+ case BC_GT: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_bool(get_num(a) > get_num(b)));
+ break;
+ }
+ case BC_LT: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_bool(get_num(a) < get_num(b)));
+ break;
+ }
+ case BC_GTE: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_bool(get_num(a) >= get_num(b)));
+ break;
+ }
+ case BC_LTE: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ if (b.type != VAL_NUM || a.type != VAL_NUM)
+ log_fatal(1, "Invalid operands");
+ vm_push(create_bool(get_num(a) <= get_num(b)));
+ break;
+ }
+ case BC_NEG: {
+ struct us_val a = vm_pop();
+ if (a.type != VAL_NUM)
+ log_fatal(1, "Invalid operand");
+ vm_push(create_num(-get_num(a)));
+ break;
+ }
+ case BC_NOT: {
+ bool negated = !as_bool(vm_pop());
+ vm_push(create_bool(negated));
+ break;
+ }
+ case BC_CONCAT: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ vm_push(wrap_str(concat(a, b)));
+ break;
+ }
+ case BC_EQL: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ vm_push(create_bool(vals_eql(a, b)));
+ break;
+ }
+ case BC_NEQL: {
+ struct us_val b = vm_pop();
+ struct us_val a = vm_pop();
+ vm_push(create_bool(!vals_eql(a, b)));
+ break;
+ }
+ case BC_FALSEY_JMP: {
+ u16 jmp = read_short(func->proto, &i);
+ if (as_bool(vm_pop()))
+ break;
+ i += jmp;
+ break;
+ }
+ case BC_JMP:
+ i += read_short(func->proto, &i);
+ break;
+ case BC_LOOP:
+ i -= read_short(func->proto, &i);
+ break;
+ case BC_PRINT: {
+ char *str = val_to_str(vm_pop(), NULL);
+ olog(str);
+ mem_free(str);
+ break;
+ }
+ case BC_CALL: {
+ int argc = read_byte();
+ struct us_val callee = vm.stacktop[-argc - 1];
+ if (callee.type != VAL_FUNC)
+ log_fatal(1, "can only call functions");
+ struct us_func *func = get_func(callee);
+ if (argc != func->proto->argc) {
+ log_fatal(
+ 1,
+ "wrong number of arguments to '%s()' (%d/%d)",
+ func->proto->name->chars,
+ argc,
+ func->proto->argc
+ );
+ }
+ us_exec(func);
+ break;
+ }
+ case BC_RET: {
+ struct us_val ret_val = vm_pop();
+
+ close_upvals(vm.cf->stackbot - 1);
+
+ vm.stacktop = vm.cf->stackbot - 1;
+ vm.cf--;
+ vm_push(ret_val);
+ return;
+ }
+ default:
+ log_fatal(1, "unhandled instruction %d", instruction);
+ break;
+ }
+ }
+}
diff --git a/uscript/vm.h b/uscript/vm.h
new file mode 100644
index 0000000..15c21a0
--- /dev/null
+++ b/uscript/vm.h
@@ -0,0 +1,44 @@
+#ifndef __USCRIPT_VM_H__
+#define __USCRIPT_VM_H__
+
+#include "common.h"
+#include "val.h"
+
+#define MAX_CALL_FRAMES (64)
+#define STACK_SIZE (MAX_CALL_FRAMES * 256)
+
+#define vm_pop() (*(--vm.stacktop))
+#define vm_peek() (vm.stacktop[-1])
+#define vm_push(v) (*vm.stacktop++ = (v))
+
+enum bytecode {
+#define BC(name) BC_##name,
+#include "xbytecode.h"
+#undef BC
+};
+
+struct call_frame {
+ struct us_func *func;
+ struct us_val *stackbot;
+};
+
+struct vm {
+ struct us_val *objs;
+
+ struct call_frame cf_stack[MAX_CALL_FRAMES];
+ struct call_frame *cf;
+
+ struct us_val *global_stack; // dyn_arr
+ struct us_val stack[STACK_SIZE];
+ struct us_val *stacktop;
+
+ struct us_upval *open_upvals;
+};
+
+extern struct vm vm;
+
+void init_vm(void);
+void deinit_vm(void);
+void print_func(struct us_proto *proto);
+
+#endif // __USCRIPT_VM_H__
diff --git a/uscript/xbytecode.h b/uscript/xbytecode.h
new file mode 100644
index 0000000..4372ea5
--- /dev/null
+++ b/uscript/xbytecode.h
@@ -0,0 +1,32 @@
+BC(LOAD)
+BC(LOAD_FUNC)
+BC(SMALL_INT)
+BC(FALSE)
+BC(TRUE)
+BC(ZILCH)
+BC(GET_LOCAL)
+BC(SET_LOCAL)
+BC(GET_UPVAL)
+BC(SET_UPVAL)
+BC(POP)
+BC(POP_UPVAL)
+BC(ADD)
+BC(SUB)
+BC(MULT)
+BC(DIV)
+BC(MOD)
+BC(GT)
+BC(GTE)
+BC(LT)
+BC(LTE)
+BC(NEG)
+BC(NOT)
+BC(EQL)
+BC(NEQL)
+BC(CONCAT)
+BC(JMP)
+BC(FALSEY_JMP)
+BC(LOOP)
+BC(PRINT)
+BC(CALL)
+BC(RET)