#include "parser.h" #include #include #include "dyn_arr.h" #include "lex.h" #include "val.h" #include "vm.h" #define parser_add_byte(p, byte) (proto_add_byte((p)->fp->proto, byte)) #define parser_add_const(p, c) (proto_add_const((p)->fp->proto, c)) #define parser_bytecode_len(p) (da_len((p)->fp->proto->bytecode)) enum precedence { PREC_NONE, PREC_ASSIGN, // = PREC_EQL, // == != PREC_COMP, // < <= > >= PREC_CONCAT, PREC_TERM, // + - PREC_FACTOR, // * / % PREC_UNARY, PREC_CALL, // () }; struct loop { struct loop *outer; int start; int scope; bool labeled; struct token label; int *breaks; // dyn_arr }; struct variable { struct token name; int scope; bool captured; }; struct upval { struct token name; bool is_local; u8 index; }; struct func_parser { struct func_parser *outer; struct us_proto *proto; struct upval *upvals; // dyn_arr struct variable *locals; // dyn_arr struct loop *loop; int scope; bool is_script; }; struct parser { struct lexer lex; struct token prev; struct token cur; struct func_parser *fp; bool can_assign; bool had_err; }; typedef void (*fn_parse)(struct parser *p); struct expr { fn_parse prefix; fn_parse infix; enum precedence prec; }; static void show_error(struct parser *p, struct token tok, const char *msg, ...) { if (tok.kind == TOKEN_EOF) fprintf(stderr, "on line %d at EOF:\n\t", tok.line); else fprintf(stderr, "on line %d at '%.*s':\n\t", tok.line, tok.len, tok.start); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); putc('\n', stderr); p->had_err = true; } static void advance(struct parser *p) { p->prev = p->cur; while (true) { p->cur = lex_next_token(&p->lex); // print_token(p->cur); if (p->cur.kind != TOKEN_ERR) break; show_error(p, p->prev, p->cur.start); }; } static int begin_jump(struct parser *p, u8 instruction) { parser_add_byte(p, instruction); parser_add_byte(p, 0xFF); parser_add_byte(p, 0xFF); return parser_bytecode_len(p) - 2; } static void end_jump(struct parser *p, int loc) { int jump = parser_bytecode_len(p) - loc - 2; if (jump > UINT16_MAX) show_error(p, p->prev, "jump too large"); p->fp->proto->bytecode[loc] = (jump >> 8) & 0xFF; p->fp->proto->bytecode[loc+1] = jump & 0xFF; } static void add_loop(struct parser *p, int loc) { int jump = parser_bytecode_len(p) - loc + 2; if (jump > UINT16_MAX) show_error(p, p->prev, "jump too large"); parser_add_byte(p, BC_LOOP); parser_add_byte(p, (jump >> 8) & 0xFF); parser_add_byte(p, jump & 0xFF); } static bool consume(struct parser *p, u16 tok) { if (p->cur.kind != tok) return false; advance(p); return true; } static void expect(struct parser *p, u16 tok, const char *err) { if (p->cur.kind != tok) { show_error(p, p->cur, err); return; } advance(p); } static int declare_variable(struct parser *p, struct token name) { if (da_len(p->fp->locals) > UINT8_MAX) show_error(p, name, "too many locals"); struct variable slot; slot.scope = p->fp->scope; slot.name = name; slot.captured = false; da_append(struct variable, &p->fp->locals, slot); return da_len(p->fp->locals) - 1; } static int declare_named_variable(struct parser *p, const char* name) { struct token token; token.start = name; token.len = (int)strlen(name); token.line = p->cur.line; token.col = p->cur.col; token.kind = TOKEN_IDENT; token.val = create_zilch(); return declare_variable(p, token); } static int find_local(struct func_parser *fp, struct token name) { if (fp == NULL) return -1; for (int i = da_len(fp->locals) - 1; i >= 0; i--) { struct variable local = fp->locals[i]; if ( name.len == local.name.len && memcmp(name.start, local.name.start, name.len) == 0 ) return i; } return -1; } static int find_global(struct token name) { for (int i = da_len(vm.gstack) - 1; i >= 0; i--) { struct global global = vm.gstack[i]; if ( (size_t)name.len == global.name->len && memcmp(name.start, global.name->chars, name.len) == 0 ) return i; } return -1; } static int find_upval(struct parser *p, struct func_parser *fp, struct token name) { if (fp == NULL) return -1; for (int i = da_len(fp->upvals); i >= 0; i--) { struct upval upval = fp->upvals[i]; if ( name.len == upval.name.len && memcmp(name.start, upval.name.start, name.len) == 0 ) return i; } // Didn't find one already captured in an outer scope. Try to find a // local variable. int local = find_local(fp->outer, name); if (local != -1) { fp->outer->locals[local].captured = true; struct upval upval; upval.name = name; upval.is_local = true; upval.index = local; da_append( struct upval, &fp->upvals, upval ); fp->proto->upvalc++; return da_len(fp->upvals) - 1; } // Didn't find an already captured upval. Try to capture one from an // outer scope. int outer = find_upval(p, fp->outer, name); if (outer != -1) { struct upval upval; upval.name = name; upval.is_local = false; upval.index = outer; da_append( struct upval, &fp->upvals, upval ); fp->proto->upvalc++; return da_len(fp->upvals) - 1; } return -1; } static int pop_scope(struct parser *p, int scope) { int locals_len = da_len(p->fp->locals); for (int i = da_len(p->fp->locals) - 1; i >= 0; i--) { struct variable local = p->fp->locals[i]; if (local.scope < scope) break; parser_add_byte(p, local.captured ? BC_POP_UPVAL : BC_POP); locals_len = i; } return locals_len; } static void begin_scope(struct parser *p) { p->fp->scope++; } static void end_scope(struct parser *p) { *da_len_ptr(p->fp->locals) = pop_scope(p, p->fp->scope); p->fp->scope--; } static void begin_function(struct parser *p, struct us_proto *proto) { struct func_parser *fp = mem_alloc(sizeof(struct func_parser)); fp->outer = p->fp; fp->proto = proto; fp->loop = NULL; fp->locals = da_create(struct variable, 0); fp->upvals = da_create(struct upval, 0); fp->scope = 0; fp->is_script = false; p->fp = fp; } static void end_function(struct parser *p) { parser_add_byte(p, BC_ZILCH); parser_add_byte(p, BC_RET); struct func_parser *fp = p->fp; p->fp = fp->outer; if (p->fp) { p->fp->proto->constants[p->fp->proto->nconstants] = wrap_proto(fp->proto); parser_add_byte(p, BC_LOAD_FUNC); parser_add_byte(p, p->fp->proto->nconstants++); for (int i = 0; i < fp->proto->upvalc; i++) { parser_add_byte(p, fp->upvals[i].is_local ? 1 : 0); parser_add_byte(p, (u8)fp->upvals[i].index); } } #ifdef UE_DEBUG print_func(fp->proto); #endif da_free(fp->locals); da_free(fp->upvals); mem_free(fp); } static void parse_expr(struct parser *p, enum precedence prec); static struct expr get_expr(struct token tok); static void expr(struct parser *p); static void parse_number(struct parser *p) { struct token num_tok = p->prev; double n = strtod(num_tok.start, NULL); // Avoid filling up the constants list with numbers that could easily // just be part of the bytecode. if (n <= UINT8_MAX && (u8)n == n) { parser_add_byte(p, BC_SMALL_INT); parser_add_byte(p, (u8)n); return; } parser_add_const(p, create_num(n)); } static void parse_string(struct parser *p) { parser_add_const(p, p->prev.val); } static void parse_literal(struct parser *p) { switch (p->prev.kind) { case TOKEN_FALSE: parser_add_byte(p, BC_FALSE); break; case TOKEN_TRUE: parser_add_byte(p, BC_TRUE); break; case TOKEN_ZILCH: parser_add_byte(p, BC_ZILCH); break; } } static void parse_arr(struct parser *p) { int arr_len = 0; if (p->cur.kind != ']') { do { if (p->cur.kind == ']') break; expr(p); arr_len++; } while (consume(p, ',')); } expect(p, ']', "unterminated array literal"); if (arr_len > UINT8_MAX) { show_error(p, p->prev, "too many literals the array (max 255)"); } parser_add_byte(p, BC_ARR); parser_add_byte(p, (u8)arr_len); } static void parser_add_var_access(struct parser *p, u8 acc, int var) { parser_add_byte(p, acc); if (acc == BC_SET_GLOBAL || acc == BC_GET_GLOBAL) { parser_add_byte(p, (var >> 8) & 0xFF); parser_add_byte(p, var & 0xFF); } else { parser_add_byte(p, (u8)var); } } static void parse_ident(struct parser *p) { struct token ident = p->prev; u8 setter; u8 getter; int var = find_local(p->fp, ident); if (var != -1) { setter = BC_SET_LOCAL; getter = BC_GET_LOCAL; } else if ((var = find_upval(p, p->fp, ident)) != -1) { setter = BC_SET_UPVAL; getter = BC_GET_UPVAL; } else if ((var = find_global(ident)) != -1) { setter = BC_SET_GLOBAL; getter = BC_GET_GLOBAL; } if (var == -1) { show_error(p, ident, "undefined variable"); return; } #define compound_op(op) \ do { \ parser_add_var_access(p, getter, var); \ expr(p); \ parser_add_byte(p, op); \ parser_add_var_access(p, setter, var); \ } while (false) if (p->can_assign && consume(p, '=')) { expr(p); parser_add_var_access(p, setter, var); } else if (p->can_assign && consume(p, TOKEN_PLUS_EQL)) { compound_op(BC_ADD); } else if (p->can_assign && consume(p, TOKEN_MINUS_EQL)) { compound_op(BC_SUB); } else if (p->can_assign && consume(p, TOKEN_MULT_EQL)) { compound_op(BC_MULT); } else if (p->can_assign && consume(p, TOKEN_DIV_EQL)) { compound_op(BC_DIV); } else if (p->can_assign && consume(p, TOKEN_MOD_EQL)) { compound_op(BC_MOD); } else { parser_add_var_access(p, getter, var); } #undef compound_op } static void parse_binary(struct parser *p) { struct token op = p->prev; parse_expr(p, get_expr(op).prec + 1); switch (op.kind) { case '+': parser_add_byte(p, BC_ADD); break; case '-': parser_add_byte(p, BC_SUB); break; case '*': parser_add_byte(p, BC_MULT); break; case '/': parser_add_byte(p, BC_DIV); break; case '%': parser_add_byte(p, BC_MOD); break; case '>': parser_add_byte(p, BC_GT); break; case '<': parser_add_byte(p, BC_LT); break; case TOKEN_DOT_DOT: parser_add_byte(p, BC_CONCAT); break; case TOKEN_EQL: parser_add_byte(p, BC_EQL); break; case TOKEN_NEQL: parser_add_byte(p, BC_NEQL); break; case TOKEN_GTEQL: parser_add_byte(p, BC_GTE); break; case TOKEN_LTEQL: parser_add_byte(p, BC_LTE); break; } } static void parse_unary(struct parser *p) { struct token op = p->prev; parse_expr(p, PREC_UNARY); switch (op.kind) { case '-': parser_add_byte(p, BC_NEG); break; case '!': parser_add_byte(p, BC_NOT); break; } } static void parse_grouping(struct parser *p) { expr(p); expect(p, ')', "expected ')'"); } static void parse_call(struct parser *p) { int argc = 0; if (p->cur.kind != ')') { do { argc++; expr(p); } while (consume(p, ',')); } expect(p, ')', "expected ')'"); if (argc > UINT8_MAX) show_error(p, p->prev, "max argument count is 255"); parser_add_byte(p, BC_CALL); parser_add_byte(p, argc); } static void parse_index(struct parser *p) { bool can_assign = p->can_assign; expr(p); expect(p, ']', "expected ']'"); #define compound_op(op) \ do { \ parser_add_byte(p, BC_PUSH_INDEX); \ expr(p); \ parser_add_byte(p, op); \ parser_add_byte(p, BC_SET_INDEX); \ } while (false) if (can_assign && consume(p, '=')) { expr(p); parser_add_byte(p, BC_SET_INDEX); } else if (can_assign && consume(p, TOKEN_PLUS_EQL)) { compound_op(BC_ADD); } else if (can_assign && consume(p, TOKEN_MINUS_EQL)) { compound_op(BC_SUB); } else if (can_assign && consume(p, TOKEN_MULT_EQL)) { compound_op(BC_MULT); } else if (can_assign && consume(p, TOKEN_DIV_EQL)) { compound_op(BC_DIV); } else if (can_assign && consume(p, TOKEN_MOD_EQL)) { compound_op(BC_MOD); } else { parser_add_byte(p, BC_GET_INDEX); } #undef compound_op } static struct expr expressions[] = { ['('] = {parse_grouping, parse_call, PREC_CALL}, [')'] = {NULL, NULL, PREC_NONE}, ['{'] = {NULL, NULL, PREC_NONE}, ['}'] = {NULL, NULL, PREC_NONE}, ['['] = {parse_arr, parse_index, PREC_CALL}, [']'] = {NULL, NULL, PREC_NONE}, [','] = {NULL, NULL, PREC_NONE}, [';'] = {NULL, NULL, PREC_NONE}, [':'] = {NULL, NULL, PREC_NONE}, ['.'] = {NULL, NULL, PREC_NONE}, ['+'] = {NULL, parse_binary, PREC_TERM}, ['-'] = {parse_unary, parse_binary, PREC_TERM}, ['*'] = {NULL, parse_binary, PREC_FACTOR}, ['/'] = {NULL, parse_binary, PREC_FACTOR}, ['%'] = {NULL, parse_binary, PREC_FACTOR}, ['<'] = {NULL, parse_binary, PREC_COMP}, ['>'] = {NULL, parse_binary, PREC_COMP}, ['='] = {NULL, NULL, PREC_NONE}, ['!'] = {parse_unary, NULL, PREC_NONE}, [TOKEN_EOF] = {NULL, NULL, PREC_NONE}, [TOKEN_BREAK] = {NULL, NULL, PREC_NONE}, [TOKEN_DIV_EQL] = {NULL, NULL, PREC_NONE}, [TOKEN_DOT_DOT] = {NULL, parse_binary, PREC_CONCAT}, [TOKEN_DO] = {NULL, NULL, PREC_NONE}, [TOKEN_ELSE] = {NULL, NULL, PREC_NONE}, [TOKEN_END] = {NULL, NULL, PREC_NONE}, [TOKEN_EQL] = {NULL, parse_binary, PREC_EQL}, [TOKEN_ERR] = {NULL, NULL, PREC_NONE}, [TOKEN_FALSE] = {parse_literal, NULL, PREC_NONE}, [TOKEN_FUN] = {NULL, NULL, PREC_NONE}, [TOKEN_GLOBAL] = {NULL, NULL, PREC_NONE}, [TOKEN_GTEQL] = {NULL, parse_binary, PREC_COMP}, [TOKEN_IDENT] = {parse_ident, NULL, PREC_NONE}, [TOKEN_IF] = {NULL, NULL, PREC_NONE}, [TOKEN_IN] = {NULL, NULL, PREC_NONE}, [TOKEN_LET] = {NULL, NULL, PREC_NONE}, [TOKEN_LOOP] = {NULL, NULL, PREC_NONE}, [TOKEN_LTEQL] = {NULL, parse_binary, PREC_COMP}, [TOKEN_MINUS_EQL] = {NULL, NULL, PREC_NONE}, [TOKEN_MOD_EQL] = {NULL, NULL, PREC_NONE}, [TOKEN_MULT_EQL] = {NULL, NULL, PREC_NONE}, [TOKEN_NEQL] = {NULL, parse_binary, PREC_EQL}, [TOKEN_NEXT] = {NULL, NULL, PREC_NONE}, [TOKEN_NUM] = {parse_number, NULL, PREC_NONE}, [TOKEN_PLUS_EQL] = {NULL, NULL, PREC_NONE}, [TOKEN_RET] = {NULL, NULL, PREC_NONE}, [TOKEN_STR] = {parse_string, NULL, PREC_NONE}, [TOKEN_TRUE] = {parse_literal, NULL, PREC_NONE}, [TOKEN_ZILCH] = {parse_literal, NULL, PREC_NONE}, }; static void stat(struct parser *p); static struct expr get_expr(struct token tok) { return expressions[tok.kind]; } static void parse_expr(struct parser *p, enum precedence prec) { advance(p); fn_parse prefix = get_expr(p->prev).prefix; if (!prefix) { show_error(p, p->prev, "expected expression"); return; } p->can_assign = prec <= PREC_ASSIGN; prefix(p); while (prec <= get_expr(p->cur).prec) { advance(p); get_expr(p->prev).infix(p); } if (p->can_assign && consume(p, '=')) { show_error(p, p->prev, "bad assignment"); } p->can_assign = false; } static void expr(struct parser *p) { parse_expr(p, PREC_ASSIGN); } static void expr_stat(struct parser *p) { expr(p); parser_add_byte(p, BC_POP); } static void var_def_stat(struct parser *p, bool is_global) { do { expect(p, TOKEN_IDENT, "expected variable name"); struct token name = p->prev; if (consume(p, '=')) expr(p); else parser_add_byte(p, BC_ZILCH); if (is_global) { struct us_str *ident = copy_str(name.start, name.len); int idx = declare_global(ident); parser_add_byte(p, BC_SET_GLOBAL); parser_add_byte(p, (idx >> 8) & 0xFF); parser_add_byte(p, idx & 0xFF); parser_add_byte(p, BC_POP); // set global does not pop } else { declare_variable(p, name); } } while (consume(p, ',')); } static void fun_stat(struct parser *p) { expect(p, TOKEN_IDENT, "expected function name"); struct token name_token = p->prev; struct us_str *name = copy_str(name_token.start, name_token.len); struct us_proto *proto = create_proto(name); // parser_add_const(p, wrap_func(func)); declare_variable(p, name_token); begin_function(p, proto); expect(p, '(', "expected '(' after function name"); if (p->cur.kind != ')') { do { expect(p, TOKEN_IDENT, "expected arguement name"); declare_variable(p, p->prev); proto->argc++; } while (consume(p, ',')); } expect(p, ')', "expected ')' after arguments"); while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) stat(p); end_function(p); expect(p, TOKEN_END, "unterminated function"); } static void if_stat(struct parser *p, bool is_elseif) { struct token begin = p->prev; expr(p); int jump = begin_jump(p, BC_FALSEY_JMP); parser_add_byte(p, BC_POP); if (!is_elseif) expect(p, ':', "expected ':' to begin 'if' block"); else expect(p, ':', "expected ':' to begin 'elseif' block"); begin_scope(p); while ( p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_ELSE && p->cur.kind != TOKEN_ELSEIF && p->cur.kind != TOKEN_EOF ) stat(p); end_scope(p); int else_jump = begin_jump(p, BC_JMP); end_jump(p, jump); parser_add_byte(p, BC_POP); // The only reason "elseif" was chosen over "else if" is because it // reduces indentation in this one single spot. if (consume(p, TOKEN_ELSEIF)) { if_stat(p, true); } else if (consume(p, TOKEN_ELSE)) { expect(p, ':', "expected ':' to begin 'else' block"); begin_scope(p); while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) stat(p); end_scope(p); } end_jump(p, else_jump); if (!is_elseif && !consume(p, TOKEN_END)) show_error(p, begin, "unterminated 'if' block"); } static void loop_stat(struct parser *p) { struct token begin = p->prev; struct loop loop; loop.outer = p->fp->loop; loop.start = da_len(p->fp->proto->bytecode) - 1; loop.labeled = false; loop.scope = p->fp->scope + 1; loop.breaks = da_create(int, 0); p->fp->loop = &loop; int exit_jump = -1; if (!consume(p, ':')) { if (consume(p, TOKEN_LET)) { expect(p, TOKEN_IDENT, "expected identifier"); struct token ident = p->prev; expect(p, TOKEN_IN, "expected 'in'"); expr(p); int iter = declare_named_variable(p, ""); begin_scope(p); loop.start = da_len(p->fp->proto->bytecode) - 1; parser_add_byte(p, BC_GET_LOCAL); parser_add_byte(p, (u8)iter); parser_add_byte(p, BC_CALL); parser_add_byte(p, 0); exit_jump = begin_jump(p, BC_FALSEY_JMP); declare_variable(p, ident); } else { expr(p); exit_jump = begin_jump(p, BC_FALSEY_JMP); parser_add_byte(p, BC_POP); } expect(p, ':', "expected ':' to begin 'loop' block"); } else { begin_scope(p); } if (consume(p, '<')) { expect(p, TOKEN_IDENT, "expected loop label"); loop.label = p->prev; loop.labeled = true; expect(p, '>', "expected '>' after loop label"); } while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) stat(p); end_scope(p); add_loop(p, loop.start); if (exit_jump != -1) { end_jump(p, exit_jump); parser_add_byte(p, BC_POP); } for (int i = 0; i < da_len(loop.breaks); i++) end_jump(p, loop.breaks[i]); if (!consume(p, TOKEN_END)) show_error(p, begin, "unterminated 'loop' block"); p->fp->loop = loop.outer; da_free(loop.breaks); } static struct loop *find_loop(struct parser *p, struct token label) { struct loop *loop = p->fp->loop; while (loop != NULL) { if ( loop->label.len == label.len && memcmp(label.start, loop->label.start, label.len) == 0 ) { break; } loop = loop->outer; } if (!loop) { show_error( p, label, "unknown loop label '<%.*s>'", label.len, label.start ); return p->fp->loop; } return loop; } static struct loop *loop_label(struct parser *p) { if (consume(p, '<')) { expect(p, TOKEN_IDENT, "expected loop label"); struct token label = p->prev; expect(p, '>', "expected '>' after loop label"); return find_loop(p, label); } return p->fp->loop; } static void break_stat(struct parser *p) { if (!p->fp->loop) { show_error(p, p->prev, "'break' is only allowed in loops"); return; } struct loop *loop = loop_label(p); pop_scope(p, loop->scope); da_append(int, &loop->breaks, begin_jump(p, BC_JMP)); } static void next_stat(struct parser *p) { if (!p->fp->loop) { show_error(p, p->prev, "'next' is only allowed in loops"); return; } struct loop *loop = loop_label(p); pop_scope(p, loop->scope); add_loop(p, loop->start); } static void do_stat(struct parser *p) { struct token begin = p->prev; begin_scope(p); expect(p, ':', "expected ':' to begin 'do' block"); while (p->cur.kind != TOKEN_END && p->cur.kind != TOKEN_EOF) stat(p); if (!consume(p, TOKEN_END)) show_error(p, begin, "unterminated 'do' block"); end_scope(p); } static void ret_stat(struct parser *p) { if (p->fp->is_script) show_error(p, p->prev, "ret is only allowed within functions"); if (p->cur.kind != TOKEN_END && p->cur.kind != ';') { expr(p); } else { parser_add_byte(p, BC_ZILCH); } parser_add_byte(p, BC_RET); } static void stat(struct parser *p) { if (consume(p, TOKEN_LET)) { var_def_stat(p, false); consume(p, ';'); } else if (consume(p, TOKEN_GLOBAL)) { if (!p->fp->is_script) { show_error( p, p->cur, "can only define globals in the outermost scope" ); } var_def_stat(p, true); consume(p, ';'); } else if (consume(p, TOKEN_FUN)) { fun_stat(p); } else if (consume(p, TOKEN_IF)) { if_stat(p, false); } else if (consume(p, TOKEN_LOOP)) { loop_stat(p); } else if (consume(p, TOKEN_BREAK)) { break_stat(p); consume(p, ';'); } else if (consume(p, TOKEN_NEXT)) { next_stat(p); consume(p, ';'); } else if (consume(p, TOKEN_DO)) { do_stat(p); } else if (consume(p, TOKEN_RET)) { ret_stat(p); consume(p, ';'); } else { expr_stat(p); } } struct us_proto *compile(const char *name, const char *src) { struct us_proto *proto = create_proto(copy_str(name, -1)); struct parser p; p.had_err = false; p.can_assign = false; p.fp = NULL; begin_function(&p, proto); p.fp->is_script = true; lex_init(&p.lex, src); advance(&p); while (!consume(&p, TOKEN_EOF)) { stat(&p); } expect(&p, TOKEN_EOF, "expected EOF"); end_function(&p); return p.had_err ? NULL : proto; }