summaryrefslogtreecommitdiff
path: root/uscript/lex.h
diff options
context:
space:
mode:
Diffstat (limited to 'uscript/lex.h')
-rw-r--r--uscript/lex.h74
1 files changed, 74 insertions, 0 deletions
diff --git a/uscript/lex.h b/uscript/lex.h
new file mode 100644
index 0000000..119b867
--- /dev/null
+++ b/uscript/lex.h
@@ -0,0 +1,74 @@
+#ifndef __USCRIPT_LEX_H__
+#define __USCRIPT_LEX_H__
+
+#include "common.h"
+#include "val.h"
+
+#define XTOKENS(_) \
+ _(PRINT) \
+ _(BREAK) \
+ _(DIV_EQL) \
+ _(DO) \
+ _(DOT_DOT) \
+ _(ELSE) \
+ _(ELSEIF) \
+ _(END) \
+ _(EQL) \
+ _(ERR) \
+ _(FALSE) \
+ _(FUN) \
+ _(GLOBAL) \
+ _(GTEQL) \
+ _(IDENT) \
+ _(IF) \
+ _(IN) \
+ _(LET) \
+ _(LOOP) \
+ _(LTEQL) \
+ _(MINUS_EQL) \
+ _(MOD) \
+ _(MOD_EQL) \
+ _(MULT_EQL) \
+ _(NEQL) \
+ _(NEXT) \
+ _(NUM) \
+ _(PLUS_EQL) \
+ _(RET) \
+ _(STR) \
+ _(TRUE) \
+ _(ZILCH)
+
+// single-character tokens are represented by their ASCII value, but other types
+// of tokens are represented by a token_kind enum value.
+enum token_kind {
+ TOKEN_EOF = 256,
+#define DEF_TOKEN_ENUM(name) TOKEN_##name,
+ XTOKENS(DEF_TOKEN_ENUM)
+#undef DEF_TOKEN_ENUM
+};
+
+struct lexer {
+ const char *src;
+ const char *base;
+ const char *head;
+
+ int line;
+ int col;
+};
+
+struct token {
+ const char *start;
+ int len;
+ int line;
+ int col;
+ u16 kind;
+ struct us_val val;
+};
+
+void lex_init(struct lexer *lex, const char *src);
+struct token lex_next_token(struct lexer *lex);
+
+void token_kind_name(char *dst, size_t len, u16 kind);
+void print_token(struct token tok);
+
+#endif // __USCRIPT_LEX_H__