1 files changed, 74 insertions, 0 deletions
diff --git a/uscript/lex.h b/uscript/lex.h
new file mode 100644
index 0000000..119b867
--- /dev/null
+++ b/uscript/lex.h
@@ -0,0 +1,74 @@
+#ifndef __USCRIPT_LEX_H__
+#define __USCRIPT_LEX_H__
+
+#include "common.h"
+#include "val.h"
+
+#define XTOKENS(_) \
+        _(PRINT) \
+        _(BREAK) \
+        _(DIV_EQL) \
+        _(DO) \
+        _(DOT_DOT) \
+        _(ELSE) \
+        _(ELSEIF) \
+        _(END) \
+        _(EQL) \
+        _(ERR) \
+        _(FALSE) \
+        _(FUN) \
+        _(GLOBAL) \
+        _(GTEQL) \
+        _(IDENT) \
+        _(IF) \
+        _(IN) \
+        _(LET) \
+        _(LOOP) \
+        _(LTEQL) \
+        _(MINUS_EQL) \
+        _(MOD) \
+        _(MOD_EQL) \
+        _(MULT_EQL) \
+        _(NEQL) \
+        _(NEXT) \
+        _(NUM) \
+        _(PLUS_EQL) \
+        _(RET) \
+        _(STR) \
+        _(TRUE) \
+        _(ZILCH)
+
+// single-character tokens are represented by their ASCII value, but other types
+// of tokens are represented by a token_kind enum value.
+enum token_kind {
+        TOKEN_EOF = 256,
+#define DEF_TOKEN_ENUM(name) TOKEN_##name,
+        XTOKENS(DEF_TOKEN_ENUM)
+#undef DEF_TOKEN_ENUM
+};
+
+struct lexer {
+        const char *src;
+        const char *base;
+        const char *head;
+
+        int line;
+        int col;
+};
+
+struct token {
+        const char *start;
+        int len;
+        int line;
+        int col;
+        u16 kind;
+        struct us_val val;
+};
+
+void lex_init(struct lexer *lex, const char *src);
+struct token lex_next_token(struct lexer *lex);
+
+void token_kind_name(char *dst, size_t len, u16 kind);
+void print_token(struct token tok);
+
+#endif // __USCRIPT_LEX_H__