#include "lex.h"

#include <string.h>
#include <stdio.h>
#include <math.h>

#include "dyn_arr.h"

#define advance(lex) (++lex->head, ++lex->col)
#define current_char(lex) (*lex->head)

// For printing purposes
static
char *token_names[] = {
        "TOKEN_EOF",
#define TOKEN_NAME(name) "TOKEN_" #name,
        XTOKENS(TOKEN_NAME)
#undef TOKEN_NAME
};

static
bool is_digit(char c)
{
        return c >= '0' && c <= '9';
}

static
bool is_upper(char c)
{
        return c >= 'A' && c <= 'Z';
}

static
bool is_lower(char c)
{
        return c >= 'a' && c <= 'z';
}

static
bool is_ident(char c)
{
        return is_lower(c) ||
                is_upper(c) ||
                c == '_';
}

static
bool match_kw(const struct lexer *lex, const char *b)
{
        size_t alen = (size_t)(lex->head - lex->base);
        size_t blen = strlen(b);
        return alen == blen && memcmp(lex->base, b, alen) == 0;
}

// Advances if the given character is a match.
static
bool match_char(struct lexer *lex, char c)
{
        if (current_char(lex) == c) {
                advance(lex);
                return true;
        }
        return false;
}

// Looks for a non-whitespace character and sets the start of the token to
// that. If it sees a newline, it will also update all relevant data. And
// finally, if it sees a #, it will interpret it as a comment.
static
void goto_token_start(struct lexer *lex)
{
        while (true) {
                switch (current_char(lex)) {
                case '#':
                        while (
                                current_char(lex) != '\n' &&
                                current_char(lex) != '\0'
                        )
                                advance(lex);
                        break;
                case '\n':
                        advance(lex);
                        lex->line++;
                        lex->col = 0;
                        break;
                case ' ':
                case '\t':
                case '\r':
                        advance(lex);
                        break;
                default:
                        lex->base = lex->head;
                        return;
                }
        }
}

static
struct token create_token(struct lexer *lex, u16 token_kind)
{
        struct token tok;
        tok.kind = token_kind;
        tok.start = lex->base;
        tok.len = (int)(lex->head - lex->base);
        tok.line = lex->line;
        tok.col = lex->col - tok.len;
        tok.val = create_zilch();
        return tok;
}

static
struct token err_token(struct lexer *lex, const char *msg)
{
        struct token tok;
        tok.kind = TOKEN_ERR;
        tok.start = msg;
        tok.len = (int)strlen(msg);
        tok.line = lex->line;
        tok.col = lex->col;
        tok.val = create_zilch();
        return tok;
}

static
struct token num_token(struct lexer *lex)
{
        while (is_digit(current_char(lex))) {
                advance(lex);
        }

        if (current_char(lex) == '.') {
                advance(lex);
                while (is_digit(current_char(lex))) {
                        advance(lex);
                }
        }

        return create_token(lex, TOKEN_NUM);
}

// And ident(ifier) token is either a keyword, or an actual identifier
static
struct token ident_token(struct lexer *lex)
{
        while (true) {
                char c = current_char(lex);
                if (c == ':') {
                        // If a : appears in the middle of an identifier, allow
                        // it
                        if (!is_ident(lex->head[1]))
                                break;

                } else if (!is_ident(c) && !is_digit(c)) {
                        break;
                }


                advance(lex);
        }

        // : is just to be used as a namespacer. So obviously it should be
        // disallowed at the beginning and end of identifiers.
        if (lex->head[-1] == ':')
                return err_token(lex, "cannot end an identifier in ':'");

        u16 kind = TOKEN_IDENT;

        if (match_kw(lex, "and"))
                kind = TOKEN_AND;
        else if (match_kw(lex, "break"))
                kind = TOKEN_BREAK;
        else if (match_kw(lex, "do"))
                kind = TOKEN_DO;
        else if (match_kw(lex, "else"))
                kind = TOKEN_ELSE;
        else if (match_kw(lex, "elseif"))
                kind = TOKEN_ELSEIF;
        else if (match_kw(lex, "end"))
                kind = TOKEN_END;
        else if (match_kw(lex, "false"))
                kind = TOKEN_FALSE;
        else if (match_kw(lex, "fun"))
                kind = TOKEN_FUN;
        else if (match_kw(lex, "global"))
                kind = TOKEN_GLOBAL;
        else if (match_kw(lex, "if"))
                kind = TOKEN_IF;
        else if (match_kw(lex, "in"))
                kind = TOKEN_IN;
        else if (match_kw(lex, "let"))
                kind = TOKEN_LET;
        else if (match_kw(lex, "loop"))
                kind = TOKEN_LOOP;
        else if (match_kw(lex, "nada"))
                kind = TOKEN_ZILCH;
        else if (match_kw(lex, "next"))
                kind = TOKEN_NEXT;
        else if (match_kw(lex, "not"))
                kind = TOKEN_NOT;
        else if (match_kw(lex, "or"))
                kind = TOKEN_OR;
        else if (match_kw(lex, "ret"))
                kind = TOKEN_RET;
        else if (match_kw(lex, "true"))
                kind = TOKEN_TRUE;
        else if (match_kw(lex, "zilch"))
                kind = TOKEN_ZILCH;

        return create_token(lex, kind);
}

static
struct token str_token(struct lexer *lex, char term)
{
        // TODO: escape sequences
        char *chars = da_create(char, 0);

        while (current_char(lex) != term && current_char(lex) != '\0') {
                if (current_char(lex) != '\\') {
                        da_append(char, &chars, current_char(lex));
                        advance(lex);
                        continue;
                }
                advance(lex);

                switch (current_char(lex)) {
                case '\\': da_append(char, &chars, '\\'); break;
                case '\'': da_append(char, &chars, '\''); break;
                case '"': da_append(char, &chars, '"'); break;
                case 'n': da_append(char, &chars, '\n'); break;
                case 't': da_append(char, &chars, '\t'); break;
                case 'r': da_append(char, &chars, '\r'); break;
                default:
                          da_free(chars);
                          return err_token(lex, "invalid escape sequence");
                }

                advance(lex); // eat escape
        }

        da_append(char, &chars, 0);

        if (current_char(lex) == '\0')
                return err_token(lex, "string never terminates");

        advance(lex); // eat terminator

        struct token str = create_token(lex, TOKEN_STR);
        str.val = wrap_str(copy_str(chars, da_len(chars) - 1));
        da_free(chars);
        return str;
}

// A symbol token is any token that contains no alphanumeric characters.
static
struct token symbol_token(struct lexer *lex, char c)
{
        switch (c) {
        case '(':
        case ')':
        case '{':
        case '}':
        case '[':
        case ']':
        case ',':
        case ';':
        case ':':
                return create_token(lex, c);
        case '.':
                return create_token(
                        lex,
                        match_char(lex, '.') ? TOKEN_DOT_DOT : c
                );
        case '+':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_PLUS_EQL : c
                );
        case '-':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_MINUS_EQL : c
                );
        case '*':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_MULT_EQL : c
                );
        case '/':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_DIV_EQL : c
                );
        case '%':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_MOD_EQL : c
                );
        case '<':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_LTEQL : c
                );
        case '>':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_GTEQL : c
                );
        case '=':
                return create_token(
                        lex,
                        match_char(lex, '=') ? TOKEN_EQL : c
                );
        case '!':
                if (match_char(lex, '=')) 
                        return create_token(lex, TOKEN_NEQL);
                break;
        }

        // TODO: log unknown character
        return err_token(lex, "unknown character");
}

void lex_init(struct lexer *lex, const char *src)
{
        lex->src = src;
        lex->base = src;
        lex->head = src;

        lex->line = 1;
        lex->col = 0;
}

struct token lex_next_token(struct lexer *lex)
{
        goto_token_start(lex);

        char c = current_char(lex);

        if (c == '\0')
                return create_token(lex, TOKEN_EOF);

        advance(lex);

        if (c == '"' || c == '\'')
                return str_token(lex, c);
        if (is_digit(c))
                return num_token(lex);
        if (is_ident(c))
                return ident_token(lex);

        return symbol_token(lex, c);
}

void token_kind_name(char *dst, size_t len, u16 kind)
{
        char char_name[2] = {0, 0};
        const char *name = char_name;

        if (kind < TOKEN_EOF)
                char_name[0] = kind;
        else
                name = token_names[kind - TOKEN_EOF];

        len = fmin(len, strlen(name));
        memcpy(dst, name, len);
        dst[len] = '\0'; 
}

void print_token(struct token tok)
{
        char kind_name[128];
        token_kind_name(kind_name, 128, tok.kind);

        printf(
                "%-15s | %3d:%-2d | %-24.*s\n",
                kind_name,
                tok.line,
                tok.col,
                tok.len,
                tok.start
        );
}