diff options
Diffstat (limited to 'uscript/parser.c')
| -rw-r--r-- | uscript/parser.c | 841 |
1 files changed, 841 insertions, 0 deletions
diff --git a/uscript/parser.c b/uscript/parser.c new file mode 100644 index 0000000..c51bc85 --- /dev/null +++ b/uscript/parser.c @@ -0,0 +1,841 @@ +#include "parser.h" + +#include <stdarg.h> +#include <string.h> + +#include "dyn_arr.h" +#include "lex.h" +#include "val.h" +#include "vm.h" + +#define parser_add_byte(p, byte) (proto_add_byte((p)->fp->proto, byte)) +#define parser_add_const(p, c) (proto_add_const((p)->fp->proto, c)) +#define parser_bytecode_len(p) (da_len((p)->fp->proto->bytecode)) + +enum precedence { + PREC_NONE, + PREC_ASSIGN, // = + PREC_EQL, // == != + PREC_COMP, // < <= > >= + PREC_CONCAT, + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, + PREC_CALL, // () +}; + +struct loop { + struct loop *outer; + int start; + int scope; + bool labeled; + struct token label; + int *breaks; // dyn_arr +}; + +struct variable { + struct token name; + int scope; + bool captured; +}; + +struct upval { + struct token name; + bool is_local; + u8 index; +}; + +struct func_parser { + struct func_parser *outer; + struct us_proto *proto; + struct upval *upvals; // dyn_arr + struct variable *locals; // dyn_arr + struct loop *loop; + int scope; + bool is_script; +}; + +struct parser { + struct lexer lex; + + struct token prev; + struct token cur; + + struct func_parser *fp; + + bool can_assign; + bool had_err; +}; + +typedef void (*fn_parse)(struct parser *p); + +struct expr { + fn_parse prefix; + fn_parse infix; + enum precedence prec; +}; + +static +void show_error(struct parser *p, struct token tok, const char *msg, ...) +{ + if (tok.kind == TOKEN_EOF) + fprintf(stderr, "on line %d at EOF:\n\t", tok.line); + else + fprintf(stderr, "on line %d at '%.*s':\n\t", tok.line, tok.len, tok.start); + + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + + putc('\n', stderr); + + p->had_err = true; +} + +static +void advance(struct parser *p) +{ + p->prev = p->cur; + while (true) { + p->cur = lex_next_token(&p->lex); + // print_token(p->cur); + if (p->cur.kind != TOKEN_ERR) + break; + show_error(p, p->prev, p->cur.start); + }; +} + +static +int begin_jump(struct parser *p, u8 instruction) +{ + parser_add_byte(p, instruction); + parser_add_byte(p, 0xFF); + parser_add_byte(p, 0xFF); + return parser_bytecode_len(p) - 2; +} + +static +void end_jump(struct parser *p, int loc) +{ + int jump = parser_bytecode_len(p) - loc - 2; + if (jump > UINT16_MAX) + show_error(p, p->prev, "jump too large"); + p->fp->proto->bytecode[loc] = (jump >> 8) & 0xFF; + p->fp->proto->bytecode[loc+1] = jump & 0xFF; +} + +static +void add_loop(struct parser *p, int loc) +{ + int jump = parser_bytecode_len(p) - loc + 2; + if (jump > UINT16_MAX) + show_error(p, p->prev, "jump too large"); + parser_add_byte(p, BC_LOOP); + parser_add_byte(p, (jump >> 8) & 0xFF); + parser_add_byte(p, jump & 0xFF); +} + +static +bool consume(struct parser *p, u16 tok) +{ + if (p->cur.kind != tok) + return false; + advance(p); + return true; +} + +static +void expect(struct parser *p, u16 tok, const char *err) +{ + if (p->cur.kind != tok) { + show_error(p, p->cur, err); + return; + } + advance(p); +} + +static +void declare_variable(struct parser *p, struct token name) +{ + if (da_len(p->fp->locals) > UINT8_MAX) + show_error(p, name, "too many locals"); + struct variable slot; + slot.scope = p->fp->scope; + slot.name = name; + slot.captured = false; + da_append(struct variable, &p->fp->locals, slot); +} + +static +int find_local(struct func_parser *fp, struct token name) +{ + for (int i = da_len(fp->locals) - 1; i >= 0; i--) { + struct variable local = fp->locals[i]; + if ( + name.len == local.name.len && + memcmp(name.start, local.name.start, name.len) == 0 + ) + return i; + } + return -1; +} + +static +int find_upval(struct parser *p, struct func_parser *fp, struct token name) +{ + if (fp == NULL) + return -1; + + for (int i = da_len(fp->upvals); i >= 0; i--) { + struct upval upval = fp->upvals[i]; + if ( + name.len == upval.name.len && + memcmp(name.start, upval.name.start, name.len) == 0 + ) + return i; + } + + // Didn't find one already captured in an outer scope. Try to find a + // local variable. + int local = find_local(fp->outer, name); + if (local != -1) { + fp->outer->locals[local].captured = true; + struct upval upval; + upval.name = name; + upval.is_local = true; + upval.index = local; + da_append( + struct upval, + &fp->upvals, + upval + ); + fp->proto->upvalc++; + return da_len(fp->upvals) - 1; + } + + // Didn't find an already captured upval. Try to capture one from an + // outer scope. + int outer = find_upval(p, fp->outer, name); + if (outer != -1) { + struct upval upval; + upval.name = name; + upval.is_local = false; + upval.index = outer; + da_append( + struct upval, + &fp->upvals, + upval + ); + fp->proto->upvalc++; + return da_len(fp->upvals) - 1; + } + + return -1; +} + +static +int pop_scope(struct parser *p, int scope) +{ + int locals_len = da_len(p->fp->locals); + for (int i = da_len(p->fp->locals) - 1; i >= 0; i--) { + struct variable local = p->fp->locals[i]; + if (local.scope < scope) + break; + parser_add_byte(p, local.captured ? BC_POP_UPVAL : BC_POP); + locals_len = i; + } + return locals_len; +} + +static +void begin_scope(struct parser *p) +{ + p->fp->scope++; +} + +static +void end_scope(struct parser *p) +{ + *da_len_ptr(p->fp->locals) = pop_scope(p, p->fp->scope); + p->fp->scope--; +} + +static +void begin_function(struct parser *p, struct us_proto *proto) +{ + struct func_parser *fp = mem_alloc(sizeof(struct func_parser)); + fp->outer = p->fp; + fp->proto = proto; + fp->loop = NULL; + fp->locals = da_create(struct variable, 0); + fp->upvals = da_create(struct upval, 0); + fp->scope = 0; + fp->is_script = false; + p->fp = fp; +} + +static +void end_function(struct parser *p) +{ + parser_add_byte(p, BC_ZILCH); + parser_add_byte(p, BC_RET); + + struct func_parser *fp = p->fp; + p->fp = fp->outer; + + if (p->fp) { + p->fp->proto->constants[p->fp->proto->nconstants] = + wrap_proto(fp->proto); + parser_add_byte(p, BC_LOAD_FUNC); + parser_add_byte(p, p->fp->proto->nconstants++); + + for (int i = 0; i < fp->proto->upvalc; i++) { + parser_add_byte(p, fp->upvals[i].is_local ? 1 : 0); + parser_add_byte(p, (u8)fp->upvals[i].index); + } + } + +#ifdef UE_DEBUG + print_func(fp->proto); +#endif + + da_free(fp->locals); + da_free(fp->upvals); + mem_free(fp); +} + +static void parse_expr(struct parser *p, enum precedence prec); +static struct expr get_expr(struct token tok); +static void expr(struct parser *p); + +static +void parse_number(struct parser *p) +{ + struct token num_tok = p->prev; + double n = strtod(num_tok.start, NULL); + + // Avoid filling up the constants list with numbers that could easily + // just be part of the bytecode. + if (n <= UINT8_MAX && (u8)n == n) { + parser_add_byte(p, BC_SMALL_INT); + parser_add_byte(p, (u8)n); + return; + } + + parser_add_const(p, create_num(n)); +} + +static +void parse_string(struct parser *p) +{ + parser_add_const(p, p->prev.val); +} + +static +void parse_literal(struct parser *p) +{ + switch (p->prev.kind) { + case TOKEN_FALSE: parser_add_byte(p, BC_FALSE); break; + case TOKEN_TRUE: parser_add_byte(p, BC_TRUE); break; + case TOKEN_ZILCH: parser_add_byte(p, BC_ZILCH); break; + } +} + +static +void parse_ident(struct parser *p) +{ + struct token ident = p->prev; + + u8 setter; + u8 getter; + + int var = find_local(p->fp, ident); + if (var != -1) { + setter = BC_SET_LOCAL; + getter = BC_GET_LOCAL; + } else if ((var = find_upval(p, p->fp, ident)) != -1) { + setter = BC_SET_UPVAL; + getter = BC_GET_UPVAL; + } + + if (var == -1) { + show_error(p, ident, "undefined variable"); + return; + } + + if (p->can_assign && consume(p, '=')) { + expr(p); + parser_add_byte(p, setter); + } else { + parser_add_byte(p, getter); + } + parser_add_byte(p, (u8)var); +} + +static +void parse_binary(struct parser *p) +{ + struct token op = p->prev; + + parse_expr(p, get_expr(op).prec + 1); + + switch (op.kind) { + case '+': parser_add_byte(p, BC_ADD); break; + case '-': parser_add_byte(p, BC_SUB); break; + case '*': parser_add_byte(p, BC_MULT); break; + case '/': parser_add_byte(p, BC_DIV); break; + case '%': parser_add_byte(p, BC_MOD); break; + case '>': parser_add_byte(p, BC_GT); break; + case '<': parser_add_byte(p, BC_LT); break; + case TOKEN_DOT_DOT: parser_add_byte(p, BC_CONCAT); break; + case TOKEN_EQL: parser_add_byte(p, BC_EQL); break; + case TOKEN_NEQL: parser_add_byte(p, BC_NEQL); break; + case TOKEN_GTEQL: parser_add_byte(p, BC_GTE); break; + case TOKEN_LTEQL: parser_add_byte(p, BC_LTE); break; + } +} + +static +void parse_unary(struct parser *p) +{ + struct token op = p->prev; + + parse_expr(p, PREC_UNARY); + + switch (op.kind) { + case '-': parser_add_byte(p, BC_NEG); break; + case '!': parser_add_byte(p, BC_NOT); break; + } +} + +static +void parse_grouping(struct parser *p) +{ + expr(p); + expect(p, ')', "expected ')'"); +} + +static +void parse_call(struct parser *p) +{ + int argc = 0; + if (p->cur.kind != ')') { + do { + argc++; + expr(p); + } while (consume(p, ',')); + } + expect(p, ')', "expected ')'"); + + if (argc > UINT8_MAX) + show_error(p, p->prev, "max argument count is 255"); + + parser_add_byte(p, BC_CALL); + parser_add_byte(p, argc); +} + +static +struct expr expressions[] = { + ['('] = {parse_grouping, parse_call, PREC_CALL}, + [')'] = {NULL, NULL, PREC_NONE}, + ['{'] = {NULL, NULL, PREC_NONE}, + ['}'] = {NULL, NULL, PREC_NONE}, + ['['] = {NULL, NULL, PREC_NONE}, + [']'] = {NULL, NULL, PREC_NONE}, + [','] = {NULL, NULL, PREC_NONE}, + [';'] = {NULL, NULL, PREC_NONE}, + [':'] = {NULL, NULL, PREC_NONE}, + ['.'] = {NULL, NULL, PREC_NONE}, + ['+'] = {NULL, parse_binary, PREC_TERM}, + ['-'] = {parse_unary, parse_binary, PREC_TERM}, + ['*'] = {NULL, parse_binary, PREC_FACTOR}, + ['/'] = {NULL, parse_binary, PREC_FACTOR}, + ['%'] = {NULL, parse_binary, PREC_FACTOR}, + ['<'] = {NULL, parse_binary, PREC_COMP}, + ['>'] = {NULL, parse_binary, PREC_COMP}, + ['='] = {NULL, NULL, PREC_NONE}, + ['!'] = {parse_unary, NULL, PREC_NONE}, + [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, + [TOKEN_BREAK] = {NULL, NULL, PREC_NONE}, + [TOKEN_DIV_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT_DOT] = {NULL, parse_binary, PREC_CONCAT}, + [TOKEN_DO] = {NULL, NULL, PREC_NONE}, + [TOKEN_ELSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_END] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQL] = {NULL, parse_binary, PREC_EQL}, + [TOKEN_ERR] = {NULL, NULL, PREC_NONE}, + [TOKEN_FALSE] = {parse_literal, NULL, PREC_NONE}, + [TOKEN_FUN] = {NULL, NULL, PREC_NONE}, + [TOKEN_GLOBAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_GTEQL] = {NULL, parse_binary, PREC_COMP}, + [TOKEN_IDENT] = {parse_ident, NULL, PREC_NONE}, + [TOKEN_IF] = {NULL, NULL, PREC_NONE}, + [TOKEN_IN] = {NULL, NULL, PREC_NONE}, + [TOKEN_LET] = {NULL, NULL, PREC_NONE}, + [TOKEN_LOOP] = {NULL, NULL, PREC_NONE}, + [TOKEN_LTEQL] = {NULL, parse_binary, PREC_COMP}, + [TOKEN_MINUS_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_MOD] = {NULL, NULL, PREC_NONE}, + [TOKEN_MOD_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_MULT_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_NEQL] = {NULL, parse_binary, PREC_EQL}, + [TOKEN_NEXT] = {NULL, NULL, PREC_NONE}, + [TOKEN_NUM] = {parse_number, NULL, PREC_NONE}, + [TOKEN_PLUS_EQL] = {NULL, NULL, PREC_NONE}, + [TOKEN_RET] = {NULL, NULL, PREC_NONE}, + [TOKEN_STR] = {parse_string, NULL, PREC_NONE}, + [TOKEN_TRUE] = {parse_literal, NULL, PREC_NONE}, + [TOKEN_ZILCH] = {parse_literal, NULL, PREC_NONE}, +}; + +static void stat(struct parser *p); + +static +struct expr get_expr(struct token tok) +{ + return expressions[tok.kind]; +} + +static +void parse_expr(struct parser *p, enum precedence prec) +{ + advance(p); + + fn_parse prefix = get_expr(p->prev).prefix; + if (!prefix) { + show_error(p, p->prev, "expected expression"); + return; + } + + p->can_assign = prec <= PREC_ASSIGN; + prefix(p); + + while (prec <= get_expr(p->cur).prec) { + advance(p); + get_expr(p->prev).infix(p); + } + + if (p->can_assign && consume(p, '=')) { + show_error(p, p->prev, "bad assignment"); + } + p->can_assign = false; +} + +static +void expr(struct parser *p) +{ + parse_expr(p, PREC_ASSIGN); +} + +static +void expr_stat(struct parser *p) +{ + expr(p); + parser_add_byte(p, BC_POP); +} + +static +void let_stat(struct parser *p) +{ + do { + expect(p, TOKEN_IDENT, "expected variable name"); + struct token name = p->prev; + + if (consume(p, '=')) + expr(p); + else + parser_add_byte(p, BC_ZILCH); + + declare_variable(p, name); + } while (consume(p, ',')); +} + +static +void fun_stat(struct parser *p) +{ + expect(p, TOKEN_IDENT, "expected function name"); + struct token name_token = p->prev; + + struct us_str *name = copy_str(name_token.start, name_token.len); + struct us_proto *proto = create_proto(name); + + // parser_add_const(p, wrap_func(func)); + declare_variable(p, name_token); + + begin_function(p, proto); + + expect(p, '(', "expected '(' after function name"); + if (p->cur.kind != ')') { + do { + expect(p, TOKEN_IDENT, "expected arguement name"); + declare_variable(p, p->prev); + proto->argc++; + } while (consume(p, ',')); + } + expect(p, ')', "expected ')' after arguments"); + + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + + end_function(p); + + expect(p, TOKEN_END, "unterminated function"); +} + +static +void if_stat(struct parser *p, bool is_elseif) +{ + struct token begin = p->prev; + + expr(p); + + int jump = begin_jump(p, BC_FALSEY_JMP); + + if (!is_elseif) + expect(p, ':', "expected ':' to begin 'if' block"); + else + expect(p, ':', "expected ':' to begin 'elseif' block"); + + begin_scope(p); + + while ( + p->cur.kind != TOKEN_END && + p->cur.kind != TOKEN_ELSE && + p->cur.kind != TOKEN_ELSEIF && + p->cur.kind != TOKEN_EOF + ) + stat(p); + + end_scope(p); + + int else_jump = begin_jump(p, BC_JMP); + end_jump(p, jump); + + // The only reason "elseif" was chosen over "else if" is because it + // reduces indentation in this one single spot. + if (consume(p, TOKEN_ELSEIF)) { + if_stat(p, true); + } else if (consume(p, TOKEN_ELSE)) { + expect(p, ':', "expected ':' to begin 'else' block"); + begin_scope(p); + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + end_scope(p); + } + + end_jump(p, else_jump); + + if (!is_elseif && !consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'if' block"); +} + +static +void loop_stat(struct parser *p) +{ + // For now, we will just support loop <cond>. loop <var> in <expr> + // needs a lot to happen before it can be implemented. + + struct token begin = p->prev; + + struct loop loop; + loop.outer = p->fp->loop; + loop.start = da_len(p->fp->proto->bytecode) - 1; + loop.labeled = false; + loop.scope = p->fp->scope + 1; + loop.breaks = da_create(int, 0); + p->fp->loop = &loop; + + int exit_jump = -1; + + if (!consume(p, ':')) { + expr(p); + exit_jump = begin_jump(p, BC_FALSEY_JMP); + + expect(p, ':', "expected ':' to begin 'loop' block"); + } + + if (consume(p, '<')) { + expect(p, TOKEN_IDENT, "expected loop label"); + loop.label = p->prev; + loop.labeled = true; + expect(p, '>', "expected '>' after loop label"); + } + + begin_scope(p); + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + end_scope(p); + add_loop(p, loop.start); + + if (exit_jump != -1) + end_jump(p, exit_jump); + + for (int i = 0; i < da_len(loop.breaks); i++) + end_jump(p, loop.breaks[i]); + + if (!consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'loop' block"); + + p->fp->loop = loop.outer; + da_free(loop.breaks); +} + +static +struct loop *find_loop(struct parser *p, struct token label) +{ + struct loop *loop = p->fp->loop; + while (loop != NULL) { + if ( + loop->label.len == label.len && + memcmp(label.start, loop->label.start, label.len) == 0 + ) { + break; + } + loop = loop->outer; + + } + + if (!loop) { + show_error( + p, + label, + "unknown loop label '<%.*s>'", + label.len, label.start + ); + return p->fp->loop; + } + + return loop; +} + +static +struct loop *loop_label(struct parser *p) +{ + if (consume(p, '<')) { + expect(p, TOKEN_IDENT, "expected loop label"); + struct token label = p->prev; + expect(p, '>', "expected '>' after loop label"); + + return find_loop(p, label); + } + return p->fp->loop; +} + +static +void break_stat(struct parser *p) +{ + if (!p->fp->loop) { + show_error(p, p->prev, "'break' is only allowed in loops"); + return; + } + + struct loop *loop = loop_label(p); + pop_scope(p, loop->scope); + da_append(int, &loop->breaks, begin_jump(p, BC_JMP)); +} + +static +void next_stat(struct parser *p) +{ + if (!p->fp->loop) { + show_error(p, p->prev, "'next' is only allowed in loops"); + return; + } + + struct loop *loop = loop_label(p); + pop_scope(p, loop->scope); + add_loop(p, loop->start); +} + +static +void do_stat(struct parser *p) +{ + struct token begin = p->prev; + + begin_scope(p); + + expect(p, ':', "expected ':' to begin 'do' block"); + + while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) + stat(p); + + if (!consume(p, TOKEN_END)) + show_error(p, begin, "unterminated 'do' block"); + + end_scope(p); +} + +static +void ret_stat(struct parser *p) +{ + if (p->fp->is_script) + show_error(p, p->prev, "ret is only allowed within functions"); + + if (p->cur.kind != TOKEN_END && p->cur.kind != ';') { + expr(p); + } else { + parser_add_byte(p, BC_ZILCH); + } + parser_add_byte(p, BC_RET); +} + +static +void stat(struct parser *p) +{ + if (consume(p, TOKEN_LET)) { + let_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_FUN)) { + fun_stat(p); + } else if (consume(p, TOKEN_IF)) { + if_stat(p, false); + } else if (consume(p, TOKEN_LOOP)) { + loop_stat(p); + } else if (consume(p, TOKEN_BREAK)) { + break_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_NEXT)) { + next_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_DO)) { + do_stat(p); + } else if (consume(p, TOKEN_RET)) { + ret_stat(p); + consume(p, ';'); + } else if (consume(p, TOKEN_PRINT)) { + // temp. only til functions get functioning + expect(p, '(', "expected '('"); + expr(p); + expect(p, ')', "expected ')'"); + parser_add_byte(p, BC_PRINT); + consume(p, ';'); + } else { + expr_stat(p); + } +} + +struct us_proto *compile(const char *name, const char *src) +{ + struct us_proto *proto = create_proto(copy_str(name, -1)); + + struct parser p; + p.had_err = false; + p.can_assign = false; + p.fp = NULL; + + begin_function(&p, proto); + p.fp->is_script = true; + + lex_init(&p.lex, src); + + advance(&p); + while (!consume(&p, TOKEN_EOF)) { + stat(&p); + } + expect(&p, TOKEN_EOF, "expected EOF"); + + end_function(&p); + + return p.had_err ? NULL : proto; +} |
