From 957c64c7b8b5e98d8a03dd84c7e27e7991fb9dbc Mon Sep 17 00:00:00 2001 From: iamcheeseman Date: Mon, 6 Apr 2026 17:04:05 -0400 Subject: Initial commit --- uscript/lex.c | 382 ++++++++++++++++++++++++ uscript/lex.h | 74 +++++ uscript/parser.c | 841 ++++++++++++++++++++++++++++++++++++++++++++++++++++ uscript/parser.h | 8 + uscript/us_debug.c | 117 ++++++++ uscript/us_debug.h | 10 + uscript/uscript.c | 34 +++ uscript/uscript.h | 14 + uscript/val.c | 204 +++++++++++++ uscript/val.h | 102 +++++++ uscript/vm.c | 306 +++++++++++++++++++ uscript/vm.h | 44 +++ uscript/xbytecode.h | 32 ++ 13 files changed, 2168 insertions(+) create mode 100644 uscript/lex.c create mode 100644 uscript/lex.h create mode 100644 uscript/parser.c create mode 100644 uscript/parser.h create mode 100644 uscript/us_debug.c create mode 100644 uscript/us_debug.h create mode 100644 uscript/uscript.c create mode 100644 uscript/uscript.h create mode 100644 uscript/val.c create mode 100644 uscript/val.h create mode 100644 uscript/vm.c create mode 100644 uscript/vm.h create mode 100644 uscript/xbytecode.h (limited to 'uscript') diff --git a/uscript/lex.c b/uscript/lex.c new file mode 100644 index 0000000..cc41b2a --- /dev/null +++ b/uscript/lex.c @@ -0,0 +1,382 @@ +#include "lex.h" + +#include +#include +#include + +#include "dyn_arr.h" + +#define advance(lex) (++lex->head, ++lex->col) +#define current_char(lex) (*lex->head) + +// For printing purposes +static +char *token_names[] = { + "TOKEN_EOF", +#define TOKEN_NAME(name) "TOKEN_" #name, + XTOKENS(TOKEN_NAME) +#undef TOKEN_NAME +}; + +static +bool is_digit(char c) +{ + return c >= '0' && c <= '9'; +} + +static +bool is_upper(char c) +{ + return c >= 'A' && c <= 'Z'; +} + +static +bool is_lower(char c) +{ + return c >= 'a' && c <= 'z'; +} + +static +bool is_ident(char c) +{ + return is_lower(c) || + is_upper(c) || + c == '_'; +} + +static +bool match_kw(const struct lexer *lex, const char *b) +{ + size_t alen = (size_t)(lex->head - lex->base); + size_t blen = strlen(b); + return alen == blen && memcmp(lex->base, b, alen) == 0; +} + +// Advances if the given character is a match. +static +bool match_char(struct lexer *lex, char c) +{ + if (current_char(lex) == c) { + advance(lex); + return true; + } + return false; +} + +// Looks for a non-whitespace character and sets the start of the token to +// that. If it sees a newline, it will also update all relevant data. And +// finally, if it sees a #, it will interpret it as a comment. +static +void goto_token_start(struct lexer *lex) +{ + while (true) { + switch (current_char(lex)) { + case '#': + while ( + current_char(lex) != '\n' && + current_char(lex) != '\0' + ) + advance(lex); + break; + case '\n': + advance(lex); + lex->line++; + lex->col = 0; + break; + case ' ': + case '\t': + case '\r': + advance(lex); + break; + default: + lex->base = lex->head; + return; + } + } +} + +static +struct token create_token(struct lexer *lex, u16 token_kind) +{ + struct token tok; + tok.kind = token_kind; + tok.start = lex->base; + tok.len = (int)(lex->head - lex->base); + tok.line = lex->line; + tok.col = lex->col - tok.len; + tok.val = create_zilch(); + return tok; +} + +static +struct token err_token(struct lexer *lex, const char *msg) +{ + struct token tok; + tok.kind = TOKEN_ERR; + tok.start = msg; + tok.len = (int)strlen(msg); + tok.line = lex->line; + tok.col = lex->col; + tok.val = create_zilch(); + return tok; +} + +static +struct token num_token(struct lexer *lex) +{ + while (is_digit(current_char(lex))) { + advance(lex); + } + + if (current_char(lex) == '.') { + advance(lex); + while (is_digit(current_char(lex))) { + advance(lex); + } + } + + return create_token(lex, TOKEN_NUM); +} + +// And ident(ifier) token is either a keyword, or an actual identifier +static +struct token ident_token(struct lexer *lex) +{ + while (true) { + char c = current_char(lex); + if (c == ':') { + // If a : appears in the middle of an identifier, allow + // it + if (!is_ident(lex->head[1])) + break; + + } else if (!is_ident(c) && !is_digit(c)) { + break; + } + + + advance(lex); + } + + // : is just to be used as a namespacer. So obviously it should be + // disallowed at the beginning and end of identifiers. + if (lex->head[-1] == ':') + return err_token(lex, "cannot end an identifier in ':'"); + + u16 kind = TOKEN_IDENT; + + if (match_kw(lex, "if")) + kind = TOKEN_IF; + else if (match_kw(lex, "elseif")) + kind = TOKEN_ELSEIF; + else if (match_kw(lex, "else")) + kind = TOKEN_ELSE; + else if (match_kw(lex, "loop")) + kind = TOKEN_LOOP; + else if (match_kw(lex, "mod")) + kind = TOKEN_MOD; + else if (match_kw(lex, "true")) + kind = TOKEN_TRUE; + else if (match_kw(lex, "false")) + kind = TOKEN_FALSE; + else if (match_kw(lex, "zilch")) + kind = TOKEN_ZILCH; + else if (match_kw(lex, "nada")) + kind = TOKEN_ZILCH; + else if (match_kw(lex, "do")) + kind = TOKEN_DO; + else if (match_kw(lex, "break")) + kind = TOKEN_BREAK; + else if (match_kw(lex, "next")) + kind = TOKEN_NEXT; + else if (match_kw(lex, "in")) + kind = TOKEN_IN; + else if (match_kw(lex, "fun")) + kind = TOKEN_FUN; + else if (match_kw(lex, "ret")) + kind = TOKEN_RET; + else if (match_kw(lex, "let")) + kind = TOKEN_LET; + else if (match_kw(lex, "end")) + kind = TOKEN_END; + else if (match_kw(lex, "global")) + kind = TOKEN_GLOBAL; + else if (match_kw(lex, "print")) + kind = TOKEN_PRINT; + + return create_token(lex, kind); +} + +static +struct token str_token(struct lexer *lex, char term) +{ + // TODO: escape sequences + char *chars = da_create(char, 0); + + while (current_char(lex) != term && current_char(lex) != '\0') { + if (current_char(lex) != '\\') { + da_append(char, &chars, current_char(lex)); + advance(lex); + continue; + } + advance(lex); + + switch (current_char(lex)) { + case '\\': da_append(char, &chars, '\\'); break; + case '\'': da_append(char, &chars, '\''); break; + case '"': da_append(char, &chars, '"'); break; + case 'n': da_append(char, &chars, '\n'); break; + case 't': da_append(char, &chars, '\t'); break; + case 'r': da_append(char, &chars, '\r'); break; + default: + da_free(chars); + return err_token(lex, "invalid escape sequence"); + } + + advance(lex); // eat escape + } + + da_append(char, &chars, 0); + + if (current_char(lex) == '\0') + return err_token(lex, "string never terminates"); + + advance(lex); // eat terminator + + struct token str = create_token(lex, TOKEN_STR); + str.val = wrap_str(copy_str(chars, da_len(chars) - 1)); + da_free(chars); + return str; +} + +// A symbol token is any token that contains no alphanumeric characters. +static +struct token symbol_token(struct lexer *lex, char c) +{ + switch (c) { + case '(': + case ')': + case '{': + case '}': + case '[': + case ']': + case ',': + case ';': + case ':': + return create_token(lex, c); + case '.': + return create_token( + lex, + match_char(lex, '.') ? TOKEN_DOT_DOT : c + ); + case '+': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_PLUS_EQL : c + ); + case '-': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_MINUS_EQL : c + ); + case '*': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_MULT_EQL : c + ); + case '/': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_DIV_EQL : c + ); + case '%': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_MOD_EQL : c + ); + case '<': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_LTEQL : c + ); + case '>': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_GTEQL : c + ); + case '=': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_EQL : c + ); + case '!': + return create_token( + lex, + match_char(lex, '=') ? TOKEN_NEQL : c + ); + } + + // TODO: log unknown character + return err_token(lex, "unknown character"); +} + +void lex_init(struct lexer *lex, const char *src) +{ + lex->src = src; + lex->base = src; + lex->head = src; + + lex->line = 1; + lex->col = 0; +} + +struct token lex_next_token(struct lexer *lex) +{ + goto_token_start(lex); + + char c = current_char(lex); + + if (c == '\0') + return create_token(lex, TOKEN_EOF); + + advance(lex); + + if (c == '"' || c == '\'') + return str_token(lex, c); + if (is_digit(c)) + return num_token(lex); + if (is_ident(c)) + return ident_token(lex); + + return symbol_token(lex, c); +} + +void token_kind_name(char *dst, size_t len, u16 kind) +{ + char char_name[2] = {0, 0}; + const char *name = char_name; + + if (kind < TOKEN_EOF) + char_name[0] = kind; + else + name = token_names[kind - TOKEN_EOF]; + + len = fmin(len, strlen(name)); + memcpy(dst, name, len); + dst[len] = '\0'; +} + +void print_token(struct token tok) +{ + char kind_name[128]; + token_kind_name(kind_name, 128, tok.kind); + + printf( + "%-15s | %3d:%-2d | %-24.*s\n", + kind_name, + tok.line, + tok.col, + tok.len, + tok.start + ); +} diff --git a/uscript/lex.h b/uscript/lex.h new file mode 100644 index 0000000..119b867 --- /dev/null +++ b/uscript/lex.h @@ -0,0 +1,74 @@ +#ifndef __USCRIPT_LEX_H__ +#define __USCRIPT_LEX_H__ + +#include "common.h" +#include "val.h" + +#define XTOKENS(_) \ + _(PRINT) \ + _(BREAK) \ + _(DIV_EQL) \ + _(DO) \ + _(DOT_DOT) \ + _(ELSE) \ + _(ELSEIF) \ + _(END) \ + _(EQL) \ + _(ERR) \ + _(FALSE) \ + _(FUN) \ + _(GLOBAL) \ + _(GTEQL) \ + _(IDENT) \ + _(IF) \ + _(IN) \ + _(LET) \ + _(LOOP) \ + _(LTEQL) \ + _(MINUS_EQL) \ + _(MOD) \ + _(MOD_EQL) \ + _(MULT_EQL) \ + _(NEQL) \ + _(NEXT) \ + _(NUM) \ + _(PLUS_EQL) \ + _(RET) \ + _(STR) \ + _(TRUE) \ + _(ZILCH) + +// single-character tokens are represented by their ASCII value, but other types +// of tokens are represented by a token_kind enum value. +enum token_kind { + TOKEN_EOF = 256, +#define DEF_TOKEN_ENUM(name) TOKEN_##name, + XTOKENS(DEF_TOKEN_ENUM) +#undef DEF_TOKEN_ENUM +}; + +struct lexer { + const char *src; + const char *base; + const char *head; + + int line; + int col; +}; + +struct token { + const char *start; + int len; + int line; + int col; + u16 kind; + struct us_val val; +}; + +void lex_init(struct lexer *lex, const char *src); +struct token lex_next_token(struct lexer *lex); + +void token_kind_name(char *dst, size_t len, u16 kind); +void print_token(struct token tok); + +#endif // __USCRIPT_LEX_H__ diff --git a/uscript/parser.c b/uscript/parser.c new file mode 100644 index 0000000..c51bc85 --- /dev/null +++ b/uscript/parser.c @@ -0,0 +1,841 @@ +#include "parser.h" + +#include +#include + +#include "dyn_arr.h" +#include "lex.h" +#include "val.h" +#include "vm.h" + +#define parser_add_byte(p, byte) (proto_add_byte((p)->fp->proto, byte)) +#define parser_add_const(p, c) (proto_add_const((p)->fp->proto, c)) +#define parser_bytecode_len(p) (da_len((p)->fp->proto->bytecode)) + +enum precedence { + PREC_NONE, + PREC_ASSIGN, // = + PREC_EQL, // == != + PREC_COMP, // < <= > >= + PREC_CONCAT, + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, + PREC_CALL, // () +}; + +struct loop { + struct loop *outer; + int start; + int scope; + bool labeled; + struct token label; + int *breaks; // dyn_arr +}; + +struct variable { + struct token name; + int scope; + bool captured; +}; + +struct upval { + struct token name; + bool is_local; + u8 index; +}; + +struct func_parser { + struct func_parser *outer; + struct us_proto *proto; + struct upval *upvals; // dyn_arr + struct variable *locals; // dyn_arr + struct loop *loop; + int scope; + bool is_script; +}; + +struct parser { + struct lexer lex; + + struct token prev; + struct token cur; + + struct func_parser *fp; + + bool can_assign; + bool had_err; +}; + +typedef void (*fn_parse)(struct parser *p); + +struct expr { + fn_parse prefix; + fn_parse infix; + enum precedence prec; +}; + +static +void show_error(struct parser *p, struct token tok, const char *msg, ...) +{ + if (tok.kind == TOKEN_EOF) + fprintf(stderr, "on line %d at EOF:\n\t", tok.line); + else + fprintf(stderr, "on line %d at '%.*s':\n\t", tok.line, tok.len, tok.start); + + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + + putc('\n', stderr); + + p->had_err = true; +} + +static +void advance(struct parser *p) +{ + p->prev = p->cur; + while (true) { + p->cur = lex_next_token(&p->lex); + // print_token(p->cur); + if (p->cur.kind != TOKEN_ERR) + break; + show_error(p, p->prev, p->cur.start); + }; +} + +static +int begin_jump(struct parser *p, u8 instruction) +{ + parser_add_byte(p, instruction); + parser_add_byte(p, 0xFF); + parser_add_byte(p, 0xFF); + return parser_bytecode_len(p) - 2; +} + +static +void end_jump(struct parser *p, int loc) +{ + int jump = parser_bytecode_len(p) - loc - 2; + if (jump > UINT16_MAX) + show_error(p, p->prev, "jump too large"); + p->fp->proto->bytecode[loc] = (jump >> 8) & 0xFF; + p->fp->proto->bytecode[loc+1] = jump & 0xFF; +} + +static +void add_loop(struct parser *p, int loc) +{ + int jump = parser_bytecode_len(p) - loc + 2; + if (jump > UINT16_MAX) + show_error(p, p->prev, "jump too large"); + parser_add_byte(p, BC_LOOP); + parser_add_byte(p, (jump >> 8) & 0xFF); + parser_add_byte(p, jump & 0xFF); +} + +static +bool consume(struct parser *p, u16 tok) +{ + if (p->cur.kind != tok) + return false; + advance(p); + return true; +} + +static +void expect(struct parser *p, u16 tok, const char *err) +{ + if (p->cur.kind != tok) { + show_error(p, p->cur, err); + return; + } + advance(p); +} + +static +void declare_variable(struct parser *p, struct token name) +{ + if (da_len(p->fp->locals) > UINT8_MAX) + show_error(p, name, "too many locals"); + struct variable slot; + slot.scope = p->fp->scope; + slot.name = name; + slot.captured = false; + da_append(struct variable, &p->fp->locals, slot); +} + +static +int find_local(struct func_parser *fp, struct token name) +{ + for (int i = da_len(fp->locals) - 1; i >= 0; i--) { + struct variable local = fp->locals[i]; + if ( + name.len == local.name.len && + memcmp(name.start, local.name.start, name.len) == 0 + ) + return i; + } + return -1; +} + +static +int find_upval(struct parser *p, struct func_parser *fp, struct token name) +{ + if (fp == NULL) + return -1; + + for (int i = da_len(fp->upvals); i >= 0; i--) { + struct upval upval = fp->upvals[i]; + if ( + name.len == upval.name.len && + memcmp(name.start, upval.name.start, name.len) == 0 + ) + return i; + } + + // Didn't find one already captured in an outer scope. Try to find a + // local variable. + int local = find_local(fp->outer, name); + if (local != -1) { + fp->outer->locals[local].captured = true; + struct upval upval; + upval.name = name; + upval.is_local = true; + upval.index = local; + da_append( + struct upval, + &fp->upvals, + upval + ); + fp->proto->upvalc++; + return da_len(fp->upvals) - 1; + } + + // Didn't find an already captured upval. Try to capture one from an + // outer scope. + int outer = find_upval(p, fp->outer, name); + if (outer != -1) { + struct upval upval; + upval.name = name; + upval.is_local = false; + upval.index = outer; + da_append( + struct upval, + &fp->upvals, + upval + ); + fp->proto->upvalc++; + return da_len(fp->upvals) - 1; + } + + return -1; +} + +static +int pop_scope(struct parser *p, int scope) +{ + int locals_len = da_len(p->fp->locals); + for (int i = da_len(p->fp->locals) - 1; i >= 0; i--) { + struct variable local = p->fp->locals[i]; + if (local.scope < scope) + break; + parser_add_byte(p, local.captured ? BC_POP_UPVAL : BC_POP); + locals_len = i; + } + return locals_len; +} + +static +void begin_scope(struct parser *p) +{ + p->fp->scope++; +} + +static +void end_scope(struct parser *p) +{ + *da_len_ptr(p->fp->locals) = pop_scope(p, p->fp->scope); + p->fp->scope--; +} + +static +void begin_function(struct parser *p, struct us_proto *proto) +{ + struct func_parser *fp = mem_alloc(sizeof(struct func_parser)); + fp->outer = p->fp; + fp->proto = proto; + fp->loop = NULL; + fp->locals = da_create(struct variable, 0); + fp->upvals = da_create(struct upval, 0); + fp->scope = 0; + fp->is_script = false; + p->fp = fp; +} + +static +void end_function(struct parser *p) +{ + parser_add_byte(p, BC_ZILCH); + parser_add_byte(p, BC_RET); + + struct func_parser *fp = p->fp; + p->fp = fp->outer; + + if (p->fp) { + p->fp->proto->constants[p->fp->proto->nconstants] = + wrap_proto(fp->proto); + parser_add_byte(p, BC_LOAD_FUNC); + parser_add_byte(p, p->fp->proto->nconstants++); + + for (int i = 0; i < fp->proto->upvalc; i++) { + parser_add_byte(p, fp->upvals[i].is_local ? 1 : 0); + parser_add_byte(p, (u8)fp->upvals[i].index); + } + } + +#ifdef UE_DEBUG + print_func(fp->proto); +#endif + + da_free(fp->locals); + da_free(fp->upvals); + mem_free(fp); +} + +static void parse_expr(struct parser *p, enum precedence prec); +static struct expr get_expr(struct token tok); +static void expr(struct parser *p); + +static +void parse_number(struct parser *p) +{ + struct token num_tok = p->prev; + double n = strtod(num_tok.start, NULL); + + // Avoid filling up the constants list with numbers that could easily + // just be part of the bytecode. + if (n <= UINT8_MAX && (u8)n == n) { + parser_add_byte(p, BC_SMALL_INT); + parser_add_byte(p, (u8)n); + return; + } + + parser_add_const(p, create_num(n)); +} + +static +void parse_string(struct parser *p) +{ + parser_add_const(p, p->prev.val); +} + +static +void parse_literal(struct parser *p) +{ + switch (p->prev.kind) { + case TOKEN_FALSE: parser_add_byte(p, BC_FALSE); break; + case TOKEN_TRUE: parser_add_byte(p, BC_TRUE); break; + case TOKEN_ZILCH: parser_add_byte(p, BC_ZILCH); break; + } +} + +static +void parse_ident(struct parser *p) +{ + struct token ident = p->prev; + + u8 setter; + u8 getter; + + int var = find_local(p->fp, ident); + if (var != -1) { + setter = BC_SET_LOCAL; + getter = BC_GET_LOCAL; + } else if ((var = find_upval(p, p->fp, ident)) != -1) { + setter = BC_SET_UPVAL; + getter = BC_GET_UPVAL; + } + + if (var == -1) { + show_error(p, ident, "undefined variable"); + return; + } + + if (p->can_assign && consume(p, '=')) { + expr(p); + parser_add_byte(p, setter); + } else { + parser_add_byte(p, getter); + } + parser_add_byte(p, (u8)var); +} + +static +void parse_binary(struct parser *p) +{ + struct token op = p->prev; + + parse_expr(p, get_expr(op).prec + 1); + + switch (op.kind) { + case '+': parser_add_byte(p, BC_ADD); break; + case '-': parser_add_byte(p, BC_SUB); break; + case '*': parser_add_byte(p, BC_MULT); break; + case '/': parser_add_byte(p, BC_DIV); break; + case '%': parser_add_byte(p, BC_MOD); break; + case '>': parser_add_byte(p, BC_GT); break; + case '<': parser_add_byte(p, BC_LT); break; + case TOKEN_DOT_DOT: parser_add_byte(p, BC_CONCAT); break; + case TOKEN_EQL: parser_add_byte(p, BC_EQL); break; + case TOKEN_NEQL: parser_add_byte(p, BC_NEQL); break; + case TOKEN_GTEQL: parser_add_byte(p, BC_GTE); break; + case TOKEN_LTEQL: parser_add_byte(p, BC_LTE); break; + } +} + +static +void parse_unary(struct parser *p) +{ + struct token op = p->prev; + + parse_expr(p, PREC_UNARY); + + switch (op.kind) { + case '-': parser_add_byte(p, BC_NEG); break; + case '!': parser_add_byte(p, BC_NOT); break; + } +} + +static +void parse_grouping(struct parser *p) +{ + expr(p); + expect(p, ')', "expected ')'"); +} + +static +void parse_call(struct parser *p) +{ + int argc = 0; + if (p->cur.kind != ')') { + do { + argc++; + expr(p); + } while (consume(p, ',')); + } + expect(p, ')', "expected ')'"); + + if (argc > UINT8_MAX) + show_error(p, p->prev, "max argument count is 255"); + + parser_add_byte(p, BC_CALL); + parser_add_byte(p, argc); +} + +static +struct expr expressions[] = { + ['('] = {parse_grouping, parse_call, PREC_CALL}, + [')'] = {NULL, NULL, PREC_NONE}, + ['{'] = {NULL, NULL, PREC_NONE}, + ['}'] = {NULL, NULL, PREC_NONE}, + ['['] = {NULL, NULL, PREC_NONE}, + [']'] = {NULL, NULL, PREC_NONE}, + [','] = {NULL, NULL, PREC_NONE}, + [';'] = {NULL, NULL, PREC_NONE}, + [':'] = {NULL, NULL, PREC_NONE}, + ['.'] = {NULL, NULL, PREC_NONE}, + ['+'] = {NULL, parse_binary, PREC_TERM}, + ['-'] = {parse_unary, parse_binary, PREC_TERM}, + ['*'] = {NULL, parse_binary, PREC_FACTOR}, + ['/'] = {NULL, parse_binary, PREC_FACTOR}, + ['%'] = {NULL, parse_binary, PREC_FACTOR}, + ['<'] = {NULL, parse_binary, PREC_COMP}, + ['>'] = {NULL, parse_binary, PREC_COMP}, + ['='] = {NULL, NULL, PREC_NONE}, + ['!'] = {parse_unary, NULL, PREC_NONE}, + [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, + [TOKEN_BREAK] = {NULL, NULL, PREC_NONE}, + [TOKEN_DIV_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT_DOT] = {NULL, parse_binary, PREC_CONCAT}, + [TOKEN_DO] = {NULL, NULL, PREC_NONE}, + [TOKEN_ELSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_END] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQL] = {NULL, parse_binary, PREC_EQL}, + [TOKEN_ERR] = {NULL, NULL, PREC_NONE}, + [TOKEN_FALSE] = {parse_literal, NULL, PREC_NONE}, + [TOKEN_FUN] = {NULL, NULL, PREC_NONE}, + [TOKEN_GLOBAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_GTEQL] = {NULL, parse_binary, PREC_COMP}, + [TOKEN_IDENT] = {parse_ident, NULL, PREC_NONE}, + [TOKEN_IF] = {NULL, NULL, PREC_NONE}, + [TOKEN_IN] = {NULL, NULL, PREC_NONE}, + [TOKEN_LET] = {NULL, NULL, PREC_NONE}, + [TOKEN_LOOP] = {NULL, NULL, PREC_NONE}, + [TOKEN_LTEQL] = {NULL, parse_binary, PREC_COMP}, + [TOKEN_MINUS_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_MOD] = {NULL, NULL, PREC_NONE}, + [TOKEN_MOD_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_MULT_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_NEQL] = {NULL, parse_binary, PREC_EQL}, + [TOKEN_NEXT] = {NULL, NULL, PREC_NONE}, + [TOKEN_NUM] = {parse_number, NULL, PREC_NONE}, + [TOKEN_PLUS_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_RET] = {NULL, NULL, PREC_NONE}, + [TOKEN_STR] = {parse_string, NULL, PREC_NONE}, + [TOKEN_TRUE] = {parse_literal, NULL, PREC_NONE}, + [TOKEN_ZILCH] = {parse_literal, NULL, PREC_NONE}, +}; + +static void stat(struct parser *p); + +static +struct expr get_expr(struct token tok) +{ + return expressions[tok.kind]; +} + +static +void parse_expr(struct parser *p, enum precedence prec) +{ + advance(p); + + fn_parse prefix = get_expr(p->prev).prefix; + if (!prefix) { + show_error(p, p->prev, "expected expression"); + return; + } + + p->can_assign = prec <= PREC_ASSIGN; + prefix(p); + + while (prec <= get_expr(p->cur).prec) { + advance(p); + get_expr(p->prev).infix(p); + } + + if (p->can_assign && consume(p, '=')) { + show_error(p, p->prev, "bad assignment"); + } + p->can_assign = false; +} + +static +void expr(struct parser *p) +{ + parse_expr(p, PREC_ASSIGN); +} + +static +void expr_stat(struct parser *p) +{ + expr(p); + parser_add_byte(p, BC_POP); +} + +static +void let_stat(struct parser *p) +{ + do { + expect(p, TOKEN_IDENT, "expected variable name"); + struct token name = p->prev; + + if (consume(p, '=')) + expr(p); + else + parser_add_byte(p, BC_ZILCH); + + declare_variable(p, name); + } while (consume(p, ',')); +} + +static +void fun_stat(struct parser *p) +{ + expect(p, TOKEN_IDENT, "expected function name"); + struct token name_token = p->prev; + + struct us_str *name = copy_str(name_token.start, name_token.len); + struct us_proto *proto = create_proto(name); + + // parser_add_const(p, wrap_func(func)); + declare_variable(p, name_token); + + begin_function(p, proto); + + expect(p, '(', "expected '(' after function name"); + if (p->cur.kind != ')') { + do { + expect(p, TOKEN_IDENT, "expected arguement name"); + declare_variable(p, p->prev); + proto->argc++; + } while (consume(p, ',')); + } + expect(p, ')', "expected ')' after arguments"); + + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + + end_function(p); + + expect(p, TOKEN_END, "unterminated function"); +} + +static +void if_stat(struct parser *p, bool is_elseif) +{ + struct token begin = p->prev; + + expr(p); + + int jump = begin_jump(p, BC_FALSEY_JMP); + + if (!is_elseif) + expect(p, ':', "expected ':' to begin 'if' block"); + else + expect(p, ':', "expected ':' to begin 'elseif' block"); + + begin_scope(p); + + while ( + p->cur.kind != TOKEN_END && + p->cur.kind != TOKEN_ELSE && + p->cur.kind != TOKEN_ELSEIF && + p->cur.kind != TOKEN_EOF + ) + stat(p); + + end_scope(p); + + int else_jump = begin_jump(p, BC_JMP); + end_jump(p, jump); + + // The only reason "elseif" was chosen over "else if" is because it + // reduces indentation in this one single spot. + if (consume(p, TOKEN_ELSEIF)) { + if_stat(p, true); + } else if (consume(p, TOKEN_ELSE)) { + expect(p, ':', "expected ':' to begin 'else' block"); + begin_scope(p); + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + end_scope(p); + } + + end_jump(p, else_jump); + + if (!is_elseif && !consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'if' block"); +} + +static +void loop_stat(struct parser *p) +{ + // For now, we will just support loop . loop in + // needs a lot to happen before it can be implemented. + + struct token begin = p->prev; + + struct loop loop; + loop.outer = p->fp->loop; + loop.start = da_len(p->fp->proto->bytecode) - 1; + loop.labeled = false; + loop.scope = p->fp->scope + 1; + loop.breaks = da_create(int, 0); + p->fp->loop = &loop; + + int exit_jump = -1; + + if (!consume(p, ':')) { + expr(p); + exit_jump = begin_jump(p, BC_FALSEY_JMP); + + expect(p, ':', "expected ':' to begin 'loop' block"); + } + + if (consume(p, '<')) { + expect(p, TOKEN_IDENT, "expected loop label"); + loop.label = p->prev; + loop.labeled = true; + expect(p, '>', "expected '>' after loop label"); + } + + begin_scope(p); + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + end_scope(p); + add_loop(p, loop.start); + + if (exit_jump != -1) + end_jump(p, exit_jump); + + for (int i = 0; i < da_len(loop.breaks); i++) + end_jump(p, loop.breaks[i]); + + if (!consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'loop' block"); + + p->fp->loop = loop.outer; + da_free(loop.breaks); +} + +static +struct loop *find_loop(struct parser *p, struct token label) +{ + struct loop *loop = p->fp->loop; + while (loop != NULL) { + if ( + loop->label.len == label.len && + memcmp(label.start, loop->label.start, label.len) == 0 + ) { + break; + } + loop = loop->outer; + + } + + if (!loop) { + show_error( + p, + label, + "unknown loop label '<%.*s>'", + label.len, label.start + ); + return p->fp->loop; + } + + return loop; +} + +static +struct loop *loop_label(struct parser *p) +{ + if (consume(p, '<')) { + expect(p, TOKEN_IDENT, "expected loop label"); + struct token label = p->prev; + expect(p, '>', "expected '>' after loop label"); + + return find_loop(p, label); + } + return p->fp->loop; +} + +static +void break_stat(struct parser *p) +{ + if (!p->fp->loop) { + show_error(p, p->prev, "'break' is only allowed in loops"); + return; + } + + struct loop *loop = loop_label(p); + pop_scope(p, loop->scope); + da_append(int, &loop->breaks, begin_jump(p, BC_JMP)); +} + +static +void next_stat(struct parser *p) +{ + if (!p->fp->loop) { + show_error(p, p->prev, "'next' is only allowed in loops"); + return; + } + + struct loop *loop = loop_label(p); + pop_scope(p, loop->scope); + add_loop(p, loop->start); +} + +static +void do_stat(struct parser *p) +{ + struct token begin = p->prev; + + begin_scope(p); + + expect(p, ':', "expected ':' to begin 'do' block"); + + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + + if (!consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'do' block"); + + end_scope(p); +} + +static +void ret_stat(struct parser *p) +{ + if (p->fp->is_script) + show_error(p, p->prev, "ret is only allowed within functions"); + + if (p->cur.kind != TOKEN_END && p->cur.kind != ';') { + expr(p); + } else { + parser_add_byte(p, BC_ZILCH); + } + parser_add_byte(p, BC_RET); +} + +static +void stat(struct parser *p) +{ + if (consume(p, TOKEN_LET)) { + let_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_FUN)) { + fun_stat(p); + } else if (consume(p, TOKEN_IF)) { + if_stat(p, false); + } else if (consume(p, TOKEN_LOOP)) { + loop_stat(p); + } else if (consume(p, TOKEN_BREAK)) { + break_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_NEXT)) { + next_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_DO)) { + do_stat(p); + } else if (consume(p, TOKEN_RET)) { + ret_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_PRINT)) { + // temp. only til functions get functioning + expect(p, '(', "expected '('"); + expr(p); + expect(p, ')', "expected ')'"); + parser_add_byte(p, BC_PRINT); + consume(p, ';'); + } else { + expr_stat(p); + } +} + +struct us_proto *compile(const char *name, const char *src) +{ + struct us_proto *proto = create_proto(copy_str(name, -1)); + + struct parser p; + p.had_err = false; + p.can_assign = false; + p.fp = NULL; + + begin_function(&p, proto); + p.fp->is_script = true; + + lex_init(&p.lex, src); + + advance(&p); + while (!consume(&p, TOKEN_EOF)) { + stat(&p); + } + expect(&p, TOKEN_EOF, "expected EOF"); + + end_function(&p); + + return p.had_err ? NULL : proto; +} diff --git a/uscript/parser.h b/uscript/parser.h new file mode 100644 index 0000000..023bbfe --- /dev/null +++ b/uscript/parser.h @@ -0,0 +1,8 @@ +#ifndef __USCRIPT_PARSER_H__ +#define __USCRIPT_PARSER_H__ + +#include "val.h" + +struct us_proto *compile(const char *name, const char *src); + +#endif // __USCRIPT_PARSER_H__ diff --git a/uscript/us_debug.c b/uscript/us_debug.c new file mode 100644 index 0000000..8d5dfe3 --- /dev/null +++ b/uscript/us_debug.c @@ -0,0 +1,117 @@ +#include "us_debug.h" + +#include + +#include "dyn_arr.h" +#include "vm.h" + +static +char *bc_names[] = { +#define BC(name) "BC_" #name, +#include "xbytecode.h" +#undef BC +}; + +int print_instruction(struct us_proto *proto, int idx) +{ + enum bytecode instruction = proto->bytecode[idx]; + fprintf(stderr, "%04d %-15s ", idx, bc_names[instruction]); + switch (instruction) { + case BC_LOAD: { + int const_idx = proto->bytecode[idx + 1]; + char *const_str = val_to_str(proto->constants[const_idx], NULL); + fprintf( + stderr, + "%d (%s)\n", + const_idx, + const_str + ); + mem_free(const_str); + return idx + 2; + } + case BC_LOAD_FUNC: { + int const_idx = proto->bytecode[idx + 1]; + assert(proto->constants[const_idx].type == VAL_PROTO); + struct us_proto *p = get_proto(proto->constants[const_idx]); + int upvalc = p->upvalc; + fprintf( + stderr, + "%.*s() %d\n", + (int)p->name->len, p->name->chars, + upvalc + ); + return idx + 2 + upvalc * 2; + } + case BC_SET_UPVAL: + case BC_GET_UPVAL: + case BC_SET_LOCAL: + case BC_GET_LOCAL: { + int local_idx = proto->bytecode[idx + 1]; + fprintf( + stderr, + "%d\n", + local_idx + ); + return idx + 2; + } + case BC_CALL: + case BC_SMALL_INT: { + fprintf( + stderr, + "%d\n", + proto->bytecode[idx + 1] + ); + return idx + 2; + } + case BC_JMP: + case BC_LOOP: + case BC_FALSEY_JMP: { + u16 jmp = (u16)(proto->bytecode[idx + 1] << 8) | proto->bytecode[idx + 2]; + int dst = idx + jmp + 3; + if (instruction == BC_LOOP) { + dst = idx + 3 - jmp; + } + fprintf( + stderr, + "-> %04d\n", + dst + ); + return idx + 3; + } + case BC_PRINT: + case BC_TRUE: + case BC_FALSE: + case BC_ZILCH: + case BC_POP: + case BC_POP_UPVAL: + case BC_ADD: + case BC_SUB: + case BC_MULT: + case BC_DIV: + case BC_MOD: + case BC_NEG: + case BC_NOT: + case BC_CONCAT: + case BC_EQL: + case BC_NEQL: + case BC_GT: + case BC_LT: + case BC_GTE: + case BC_LTE: + case BC_RET: + putc('\n', stderr); + return idx + 1; + } + + // unreachable + return idx + 1; +} + +void print_func(struct us_proto *func) +{ + fprintf(stderr, "%s():\n", func->name->chars); + for (int i = 0; i < da_len(func->bytecode);) { + putc('\t', stderr); + i = print_instruction(func, i); + } +} diff --git a/uscript/us_debug.h b/uscript/us_debug.h new file mode 100644 index 0000000..fd7bd2b --- /dev/null +++ b/uscript/us_debug.h @@ -0,0 +1,10 @@ +#ifndef __USCRIPT_US_DEBUG_H__ +#define __USCRIPT_US_DEBUG_H__ + +#include "common.h" +#include "val.h" + +int print_instruction(struct us_proto *func, int idx); +void print_func(struct us_proto *func); + +#endif // __USCRIPT_US_DEBUG_H__ diff --git a/uscript/uscript.c b/uscript/uscript.c new file mode 100644 index 0000000..5ec43a1 --- /dev/null +++ b/uscript/uscript.c @@ -0,0 +1,34 @@ +#include "uscript.h" + +#include + +#include "dyn_arr.h" +#include "lex.h" +#include "val.h" +#include "vm.h" +#include "parser.h" + +void us_init(void) +{ + init_vm(); +} + +void us_deinit(void) +{ + deinit_vm(); +} + +void us_load_file(const char *file_path) +{ + char *file = read_file(file_path, NULL); + us_load_src(file); + mem_free(file); +} + +void us_load_src(const char *src) +{ + struct us_proto *proto = compile("main", src); + if (!proto) + return; + us_exec(create_func(proto)); +} diff --git a/uscript/uscript.h b/uscript/uscript.h new file mode 100644 index 0000000..74958e5 --- /dev/null +++ b/uscript/uscript.h @@ -0,0 +1,14 @@ +#ifndef __USCRIPT_LANG_H__ +#define __USCRIPT_LANG_H__ + +struct us_func; + +void us_init(void); +void us_deinit(void); + +void us_load_file(const char *file); +void us_load_src(const char *src); + +void us_exec(struct us_func *func); + +#endif // __USCRIPT_LANG_H__ diff --git a/uscript/val.c b/uscript/val.c new file mode 100644 index 0000000..d713645 --- /dev/null +++ b/uscript/val.c @@ -0,0 +1,204 @@ +#include "val.h" + +#include +#include + +#include "dyn_arr.h" +#include "vm.h" + +#define STR_NUM_FMT "%g" +#define STR_FUNC_FMT "" + +static +void init_obj(struct us_val val, struct us_obj *obj) +{ + (void)obj; // nothing to init yet + da_append(struct us_val, &vm.objs, val); +} + +struct us_str *take_str(char *chars, int len) +{ + if (len < 0) + len = strlen(chars); + struct us_str *str = mem_alloc(sizeof(struct us_str)); + str->chars = chars; + str->len = len; + init_obj(wrap_str(str), &str->header); + return str; +} + +struct us_str *copy_str(const char *chars, int len) +{ + if (len < 0) + len = strlen(chars); + char *copy = mem_alloc(len + 1); + memcpy(copy, chars, len); + copy[len] = '\0'; + return take_str(copy, len); +} + +struct us_proto *create_proto(struct us_str *name) +{ + struct us_proto *proto = mem_alloc(sizeof(struct us_proto)); + proto->name = name; + proto->bytecode = da_create(u8, 0); + proto->upvalc = 0; + proto->argc = 0; + proto->is_variadic = false; + proto->nconstants = 0; + init_obj(wrap_proto(proto), &proto->header); + return proto; +} + +struct us_func *create_func(struct us_proto *proto) +{ + struct us_func *func = mem_alloc(sizeof(struct us_func)); + func->proto = proto; + func->upvals = mem_alloc(sizeof(struct us_upval*) * proto->upvalc); + init_obj(wrap_func(func), &func->header); + return func; +} + +struct us_upval *create_upval(struct us_val *val) +{ + struct us_upval *upval = mem_alloc(sizeof(struct us_upval)); + upval->loc = val; + upval->closed = create_zilch(); + upval->next = NULL; + init_obj(wrap_upval(upval), &upval->header); + return upval; +} + +void free_val(struct us_val v) +{ + switch (v.type) { + case VAL_STR: { + struct us_str *str = get_str(v); + mem_free(str->chars); + mem_free(str); + break; + } + case VAL_PROTO: { + struct us_proto *proto = get_proto(v); + da_free(proto->bytecode); + mem_free(proto->upval_locs); + mem_free(proto); + break; + } + case VAL_FUNC: { + struct us_func *func = get_func(v); + mem_free(func->upvals); + mem_free(func); + break; + } + case VAL_UPVAL: + mem_free(get_obj(v)); + break; + case VAL_NUM: + case VAL_BOOL: + case VAL_ZILCH: + break; + } +} + +void proto_add_const(struct us_proto *proto, struct us_val v) +{ + proto->constants[proto->nconstants] = v; + proto_add_byte(proto, BC_LOAD); + proto_add_byte(proto, proto->nconstants++); +} + +bool vals_eql(struct us_val a, struct us_val b) +{ + if (a.type != b.type) + return false; + + switch (a.type) { + case VAL_NUM: return get_num(a) == get_num(b); + case VAL_BOOL: return get_bool(a) == get_bool(b); + case VAL_ZILCH: return true; + case VAL_STR: { + struct us_str *a_str = get_str(a); + struct us_str *b_str = get_str(b); + return a_str->len == b_str->len && + memcmp(a_str->chars, b_str->chars, a_str->len) == 0; + } + case VAL_FUNC: + case VAL_UPVAL: + case VAL_PROTO: return get_obj(a) == get_obj(b); + } + + // unreachable + return false; +} + +char *val_to_str(struct us_val v, int *len_out) +{ + // TODO: have this function return a us_str so that we own the memory, + // and so concatenation of strings is faster. + + switch (v.type) { + case VAL_NUM: { + int len = snprintf(NULL, 0, STR_NUM_FMT, get_num(v)); + char *str = mem_alloc(sizeof(char) * (len + 1)); + snprintf(str, len + 1, STR_NUM_FMT, get_num(v)); + if (len_out) + *len_out = len; + return str; + } + case VAL_BOOL: { + const char *bool_str = get_bool(v) ? "true" : "false"; + char *str = mem_alloc(strlen(bool_str) + 1); + strcpy(str, bool_str); + if (len_out) + *len_out = strlen(bool_str); + return str; + } + case VAL_ZILCH: { + const char *zilch_str = "zilch"; + char *str = mem_alloc(strlen(zilch_str) + 1); + strcpy(str, zilch_str); + if (len_out) + *len_out = strlen(zilch_str); + return str; + } + case VAL_STR: { + const struct us_str *us_str = get_str(v); + char *str = mem_alloc(sizeof(char) * us_str->len + 1); + strncpy(str, us_str->chars, us_str->len); + str[us_str->len] = 0; + if (len_out) + *len_out = us_str->len; + return str; + } + case VAL_PROTO: { + const struct us_proto *proto = get_proto(v); + int len = snprintf( + NULL, + 0, + STR_FUNC_FMT, + proto->name->chars, + (void*)proto + ); + char *str = mem_alloc(sizeof(char) * (len + 1)); + snprintf( + str, + len + 1, + STR_FUNC_FMT, + proto->name->chars, + (void*)proto + ); + if (len_out) + *len_out = len; + return str; + } + case VAL_FUNC: + return val_to_str(wrap_proto(get_func(v)->proto), len_out); + case VAL_UPVAL: + return val_to_str(*get_upval(v)->loc, len_out); + } + + // unreachable + return NULL; +} + diff --git a/uscript/val.h b/uscript/val.h new file mode 100644 index 0000000..1c314e4 --- /dev/null +++ b/uscript/val.h @@ -0,0 +1,102 @@ +#ifndef __USCRIPT_VAL_H__ +#define __USCRIPT_VAL_H__ + +#include "common.h" + +#define get_num(v) (v.dat.number) +#define get_bool(v) (v.dat.boolean) +#define get_obj(v) (v.dat.obj) +#define get_str(v) (v.dat.str) +#define get_proto(v) (v.dat.proto) +#define get_func(v) (v.dat.func) +#define get_upval(v) (v.dat.upval) + +#define create_num(n) ((struct us_val){.type=VAL_NUM, .dat={.number=(n)}}) +#define create_bool(b) ((struct us_val){.type=VAL_BOOL, .dat={.boolean=(b)}}) +#define create_zilch() ((struct us_val){.type=VAL_ZILCH, .dat={.number=0}}) +#define wrap_str(o) ((struct us_val){.type=VAL_STR, .dat={.str=(o)}}) +#define wrap_proto(o) ((struct us_val){.type=VAL_PROTO, .dat={.proto=(o)}}) +#define wrap_func(o) ((struct us_val){.type=VAL_FUNC, .dat={.func=(o)}}) +#define wrap_upval(o) ((struct us_val){.type=VAL_UPVAL, .dat={.upval=(o)}}) + +#define val_is_obj(v) (v.type >= VAL_STR) + +#define proto_add_byte(func, op) da_append(u8, &(func)->bytecode, op) + +enum val_type { + VAL_NUM, + VAL_BOOL, + VAL_ZILCH, + // Do not place any new object types before VAL_STR. Object types are + // detected by doing a comparison with VAL_STR. See val_is_obj(). + VAL_STR, + VAL_PROTO, + VAL_FUNC, + VAL_UPVAL, +}; + +struct us_val { + enum val_type type; + union { + double number; + bool boolean; + struct us_obj *obj; + struct us_str *str; + struct us_proto *proto; + struct us_func *func; + struct us_upval *upval; + } dat; +}; + +struct us_obj { + // We don't need the object header for now; it will be useful in the + // future, though. + u8 _placeholder; +}; + +struct us_str { + struct us_obj header; + char *chars; + size_t len; +}; + +struct us_proto { + struct us_obj header; + const struct us_str *name; + struct us_val constants[UINT8_MAX]; + u8 *bytecode; // dyn_arr + int* upval_locs; + int upvalc; + int argc; + bool is_variadic; + u8 nconstants; +}; + +struct us_func { + struct us_obj header; + struct us_proto *proto; + struct us_upval **upvals; +}; + +struct us_upval { + struct us_obj header; + struct us_val *loc; + struct us_val closed; + struct us_upval *next; +}; + +struct us_str *take_str(char *chars, int len); +struct us_str *copy_str(const char *chars, int len); +struct us_proto *create_proto(struct us_str *name); +struct us_func *create_func(struct us_proto *proto); +struct us_upval *create_upval(struct us_val *val); + +void free_val(struct us_val v); + +void proto_add_const(struct us_proto *func, struct us_val v); + +bool vals_eql(struct us_val a, struct us_val b); + +char *val_to_str(struct us_val v, int *len_out); + +#endif // __USCRIPT_VAL_H__ diff --git a/uscript/vm.c b/uscript/vm.c new file mode 100644 index 0000000..f9f1fe6 --- /dev/null +++ b/uscript/vm.c @@ -0,0 +1,306 @@ +#include "vm.h" + +#include +#include + +#include "dyn_arr.h" +#include "us_debug.h" +#include "uscript.h" + +struct vm vm; + +void init_vm(void) +{ + vm.objs = da_create(struct us_val, 128); + vm.cf = vm.cf_stack; + vm.stacktop = vm.stack; +} + +void deinit_vm(void) +{ + for (int i = 0; i < da_len(vm.objs); i++) { + free_val(vm.objs[i]); + } + da_clear(vm.objs); // not needed, but makes me feel better :) + da_free(vm.objs); +} + +static +bool as_bool(struct us_val v) +{ + if (v.type == VAL_ZILCH) + return false; + if (v.type == VAL_BOOL) + return get_bool(v); + return true; +} + +static +struct us_str *concat(struct us_val a, struct us_val b) +{ + int a_len; + char *a_str = val_to_str(a, &a_len); + int b_len; + char *b_str = val_to_str(b, &b_len); + + int len = a_len + b_len; + char *chars = mem_alloc(sizeof(char) * (len + 1)); + memcpy(chars, a_str, a_len); + memcpy(chars + a_len, b_str, b_len); + chars[len] = '\0'; + + mem_free(a_str); + mem_free(b_str); + + return take_str(chars, len); +} + +static +u16 read_short(struct us_proto *proto, int *i) +{ + return (u16)(proto->bytecode[++*i] << 8) | proto->bytecode[++*i]; +} + +static +void close_upvals(struct us_val *to) +{ + struct us_upval *upval = vm.open_upvals; + while (upval && upval->loc > to) { + upval->closed = *upval->loc; + upval->loc = &upval->closed; + upval = upval->next; + } + vm.open_upvals = upval; +} + +void us_exec(struct us_func *func) +{ +#define read_byte() (func->proto->bytecode[++i]) +#define read_const() (func->proto->constants[read_byte()]) + vm.cf++; + vm.cf->func = func; + vm.cf->stackbot = vm.stacktop - func->proto->argc; + + for (int i = 0; i < da_len(func->proto->bytecode); i++) { + enum bytecode instruction = func->proto->bytecode[i]; + // putc('>', stderr); + // for (struct us_val *val = vm.stack; val < vm.stacktop; val++) { + // char *val_str = val_to_str(*val, NULL); + // if (val == vm.cf->stackbot - 1) + // fprintf(stderr, " %s >", val_str); + // else + // fprintf(stderr, " %s |", val_str); + // mem_free(val_str); + // } + // putc('\n', stderr); + // putc('>', stderr); + // print_instruction(func->proto, i); + + switch (instruction) { + case BC_LOAD: + vm_push(read_const()); + break; + case BC_LOAD_FUNC: { + struct us_proto *proto = get_proto(read_const()); + + struct us_func *new_func = create_func(proto); + + for (int j = 0; j < proto->upvalc; j++) { + u8 is_local = read_byte(); + u8 index = read_byte(); + + if (is_local) { + struct us_upval *upval = create_upval( + vm.cf->stackbot + index + ); + upval->next = vm.open_upvals; + vm.open_upvals = upval; + + new_func->upvals[j] = upval; + } else { + new_func->upvals[j] = + func->upvals[index]; + } + } + + vm_push(wrap_func(new_func)); + break; + } + case BC_SMALL_INT: + vm_push(create_num(read_byte())); + break; + case BC_FALSE: vm_push(create_bool(false)); break; + case BC_TRUE: vm_push(create_bool(true)); break; + case BC_ZILCH: vm_push(create_zilch()); break; + case BC_SET_LOCAL: + vm.cf->stackbot[read_byte()] = vm_peek(); + break; + case BC_GET_LOCAL: + vm_push(vm.cf->stackbot[read_byte()]); + break; + case BC_GET_UPVAL: + vm_push(*func->upvals[read_byte()]->loc); + break; + case BC_SET_UPVAL: + *func->upvals[read_byte()]->loc = vm_peek(); + break; + case BC_POP_UPVAL: + close_upvals(vm.stacktop - 1); + vm_pop(); + break; + case BC_POP: vm_pop(); break; + case BC_ADD: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_num(get_num(a) + get_num(b))); + break; + } + case BC_SUB: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_num(get_num(a) - get_num(b))); + break; + } + case BC_MULT: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_num(get_num(a) * get_num(b))); + break; + } + case BC_DIV: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_num(get_num(a) / get_num(b))); + break; + } + case BC_MOD: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_num(fmod(get_num(a), get_num(b)))); + break; + } + case BC_GT: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_bool(get_num(a) > get_num(b))); + break; + } + case BC_LT: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_bool(get_num(a) < get_num(b))); + break; + } + case BC_GTE: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_bool(get_num(a) >= get_num(b))); + break; + } + case BC_LTE: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + if (b.type != VAL_NUM || a.type != VAL_NUM) + log_fatal(1, "Invalid operands"); + vm_push(create_bool(get_num(a) <= get_num(b))); + break; + } + case BC_NEG: { + struct us_val a = vm_pop(); + if (a.type != VAL_NUM) + log_fatal(1, "Invalid operand"); + vm_push(create_num(-get_num(a))); + break; + } + case BC_NOT: { + bool negated = !as_bool(vm_pop()); + vm_push(create_bool(negated)); + break; + } + case BC_CONCAT: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + vm_push(wrap_str(concat(a, b))); + break; + } + case BC_EQL: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + vm_push(create_bool(vals_eql(a, b))); + break; + } + case BC_NEQL: { + struct us_val b = vm_pop(); + struct us_val a = vm_pop(); + vm_push(create_bool(!vals_eql(a, b))); + break; + } + case BC_FALSEY_JMP: { + u16 jmp = read_short(func->proto, &i); + if (as_bool(vm_pop())) + break; + i += jmp; + break; + } + case BC_JMP: + i += read_short(func->proto, &i); + break; + case BC_LOOP: + i -= read_short(func->proto, &i); + break; + case BC_PRINT: { + char *str = val_to_str(vm_pop(), NULL); + olog(str); + mem_free(str); + break; + } + case BC_CALL: { + int argc = read_byte(); + struct us_val callee = vm.stacktop[-argc - 1]; + if (callee.type != VAL_FUNC) + log_fatal(1, "can only call functions"); + struct us_func *func = get_func(callee); + if (argc != func->proto->argc) { + log_fatal( + 1, + "wrong number of arguments to '%s()' (%d/%d)", + func->proto->name->chars, + argc, + func->proto->argc + ); + } + us_exec(func); + break; + } + case BC_RET: { + struct us_val ret_val = vm_pop(); + + close_upvals(vm.cf->stackbot - 1); + + vm.stacktop = vm.cf->stackbot - 1; + vm.cf--; + vm_push(ret_val); + return; + } + default: + log_fatal(1, "unhandled instruction %d", instruction); + break; + } + } +} diff --git a/uscript/vm.h b/uscript/vm.h new file mode 100644 index 0000000..15c21a0 --- /dev/null +++ b/uscript/vm.h @@ -0,0 +1,44 @@ +#ifndef __USCRIPT_VM_H__ +#define __USCRIPT_VM_H__ + +#include "common.h" +#include "val.h" + +#define MAX_CALL_FRAMES (64) +#define STACK_SIZE (MAX_CALL_FRAMES * 256) + +#define vm_pop() (*(--vm.stacktop)) +#define vm_peek() (vm.stacktop[-1]) +#define vm_push(v) (*vm.stacktop++ = (v)) + +enum bytecode { +#define BC(name) BC_##name, +#include "xbytecode.h" +#undef BC +}; + +struct call_frame { + struct us_func *func; + struct us_val *stackbot; +}; + +struct vm { + struct us_val *objs; + + struct call_frame cf_stack[MAX_CALL_FRAMES]; + struct call_frame *cf; + + struct us_val *global_stack; // dyn_arr + struct us_val stack[STACK_SIZE]; + struct us_val *stacktop; + + struct us_upval *open_upvals; +}; + +extern struct vm vm; + +void init_vm(void); +void deinit_vm(void); +void print_func(struct us_proto *proto); + +#endif // __USCRIPT_VM_H__ diff --git a/uscript/xbytecode.h b/uscript/xbytecode.h new file mode 100644 index 0000000..4372ea5 --- /dev/null +++ b/uscript/xbytecode.h @@ -0,0 +1,32 @@ +BC(LOAD) +BC(LOAD_FUNC) +BC(SMALL_INT) +BC(FALSE) +BC(TRUE) +BC(ZILCH) +BC(GET_LOCAL) +BC(SET_LOCAL) +BC(GET_UPVAL) +BC(SET_UPVAL) +BC(POP) +BC(POP_UPVAL) +BC(ADD) +BC(SUB) +BC(MULT) +BC(DIV) +BC(MOD) +BC(GT) +BC(GTE) +BC(LT) +BC(LTE) +BC(NEG) +BC(NOT) +BC(EQL) +BC(NEQL) +BC(CONCAT) +BC(JMP) +BC(FALSEY_JMP) +BC(LOOP) +BC(PRINT) +BC(CALL) +BC(RET) -- cgit v1.3-2-g0d8e