#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#include "main.h"


#define MAX_LEN 1024		/* the longest single token */


enum token curr_token;
int curr_token_num;		/* tok_num */
char *curr_token_str;		/* tok_str */
int curr_token_strlen;		/* tok_str */
struct symbol *curr_token_sym;	/* tok_sym */
enum op_name curr_token_op;	/* tok_op */


static int curr_ch = 0;
static int line_no = 0;

static int hexval(char c);

#define DO_TOK(x) { curr_token = tok_##x; state = state_done; }

void
lexer_startup(void)
{
	curr_ch = lex_getchar();
	line_no = 0;
}

enum token
next_token(void)
{
	enum {
		state_start,
		state_sym,
		state_num_start,
		state_num_dec,
		state_num_bin,
		state_num_hex,
		state_num_bit,
		state_str,
		state_str_escape1,
		state_str_escape2,
		state_comment,
		state_done
	} state = state_start;
	static char buf[MAX_LEN+1];
	unsigned int buf_pos = 0;

	if (curr_ch == -1) { curr_ch = lex_getchar(); }

	while (1) {
		if (curr_ch == '\n') {
			line_no++;
		}
retry:
		switch(state) {
		case state_start:
			switch(curr_ch) {
			case 0: case ' ': case '\t': case '\r':
				break; /* ignore whitespace */
			case '\n':
				DO_TOK(newline)
				break;
			case '#': DO_TOK(pound) break;
			case '0':
				curr_token_num = 0;
				state = state_num_start;
				break;
			case '(': DO_TOK(lparen) break;
			case ')': DO_TOK(rparen) break;
			case ',': DO_TOK(comma) break;
			case '=': DO_TOK(equals) break;
			case '+': DO_TOK(plus) break;
			case '-': DO_TOK(minus) break;
			case '*': DO_TOK(star) break;
			case '/': DO_TOK(slash) break;
			case '%': DO_TOK(percent) break;
			case '&': DO_TOK(and) break;
			case '|': DO_TOK(or) break;
			case '^': DO_TOK(xor) break;
			case '.': DO_TOK(dot) break;
			case '1': case '2': case '3': case '4':
			case '5': case '6': case '7': case '8': case '9':
				curr_token_num = 0;
				state = state_num_dec;
				goto retry;
			case '"': state = state_str; break;
			case ';': state = state_comment; break;
			case EOF: curr_token = tok_eof; return curr_token;
			default:
				if (isalpha(curr_ch) || curr_ch == '_') {
					state = state_sym; goto retry;
				} else {
					char c = curr_token;
					if (c < ' ') c = '.';
					do_warning("unknown token ('%c', 0x%02X)",c,curr_ch);
					DO_TOK(error) break;
				}
			}
			break;
		case state_sym:
			if ((isalnum(curr_ch) || curr_ch == '_') &&
			    buf_pos < MAX_LEN) {
				buf[buf_pos++] = curr_ch;
				break;
			} else {
				buf[buf_pos] = 0;
				if (buf_pos >= MAX_LEN) {
					do_warning("symbol name too long: %s",buf);
				}
				if (!strcasecmp(buf,"w")) {
					curr_token = tok_w;
					return curr_token;
				}
				if (!strcasecmp(buf,"include")) {
					curr_token = tok_include;
					return curr_token;
				}
				curr_token_op = op_lookup(buf);
				if (curr_token_op == OP_NUMOPS) {
					curr_token = tok_sym;
					curr_token_sym = sym_lookup(buf);
				} else {
					curr_token = tok_op;
				}
				return curr_token;
			}
		case state_num_start:
			if (curr_ch == 'x' || curr_ch == 'X' ||
			    curr_ch == 'h' || curr_ch == 'H') {
				state = state_num_hex;
			} else if (curr_ch == 'd' || curr_ch == 'D') {
				state = state_num_dec;
			} else if (curr_ch == 'b' || curr_ch == 'B') {
				state = state_num_bin;
			} else if (isdigit(curr_ch)) {
				state = state_num_dec;
				goto retry;
			} else {
				curr_token = tok_num;
				return curr_token;
			}
			break;
		case state_num_hex:
			if (isxdigit(curr_ch)) {
				curr_token_num = (curr_token_num<<4) +
					hexval(curr_ch);
			} else if (curr_ch == '.') {
				state = state_num_bit;
			} else {
				curr_token = tok_num;
				return curr_token;
			}
			break;
		case state_num_dec:
			if (isdigit(curr_ch)) {
				curr_token_num = (curr_token_num*10) +
					(curr_ch-'0');
			} else if (curr_ch == '.') {
				state = state_num_bit;
			} else {
				curr_token = tok_num;
				return curr_token;
			}
			break;
		case state_num_bin:
			if (curr_ch == '0' || curr_ch == '1') {
				curr_token_num = (curr_token_num<<1) +
					(curr_ch-'0');
			} else if (curr_ch == '.') {
				state = state_num_bit;
			} else {
				curr_token = tok_num;
				return curr_token;
			}
			break;
		case state_num_bit:	/* the .n  (n is 0-7) bit locator */
			if ((curr_ch >= '0') && (curr_ch <= '7')) {
				curr_token_num = (curr_token_num*8) +
					(curr_ch-'0');
			} else {
				do_warning("invalid dot-specifier");
			}
			curr_token = tok_num;
			state = state_done;
			break;
		case state_str:
			if (curr_ch == EOF) {
				do_warning("unexpected EOF in string");
				curr_token = tok_eof;
				return curr_token;
			} else if (curr_ch == '"' || buf_pos >= MAX_LEN) {
				buf[buf_pos] = 0;
				curr_token_str = buf;
				curr_token_strlen = buf_pos;
				curr_token = tok_str;
				state = state_done;
				if (buf_pos >= MAX_LEN) {
					do_warning("string too long: %s",buf);
				}
			} else if (curr_ch == '\\') {
				state = state_str_escape1;
			} else {
				buf[buf_pos++] = curr_ch;
			}
			break;
		case state_str_escape1:
			/* we always have space for at least one more,
			 * or we wouldn't be in this state. */
			if (isxdigit(curr_ch)) {
				/* hex escape.. */
				buf[buf_pos] = hexval(curr_ch)<<4;
				state = state_str_escape2;
				break;
			}
			switch (curr_ch) {
			case EOF: curr_token = tok_eof; return curr_token;
			case 'r': buf[buf_pos++] = '\r'; break;
			case 'n': buf[buf_pos++] = '\n'; break;
			case 't': buf[buf_pos++] = '\t'; break;
			default: buf[buf_pos++] = curr_ch; break;
			}
			state = state_str;
			break;
		case state_str_escape2:
			buf[buf_pos++] |= hexval(curr_ch);
			state = state_str;
			break;
		case state_comment:	/* comment goes to end of line */
			if (curr_ch == '\n' || curr_ch == EOF) {
				state = state_start;
				goto retry;
			}
			break;
		case state_done:
			return curr_token;
		}
		curr_ch = lex_getchar();
	}
}


/* c is 0-9, a-f, or A-F, convert it to 0-15 */
static int
hexval(char c)
{
	if (isdigit(c)) {
		return c-'0';
	} else if ((c >= 'a') && c <= 'f') {
		return 10 + (c-'a');
	} else if ((c >= 'A') && c <= 'F') {
		return 10 + (c-'A');
	}
	return 0;
}

char *
dump_token(enum token tok)
{
	static char buf[MAX_LEN+128];
	switch (tok) {
		case tok_error:
			return "error";
		case tok_eof:
			return "EOF";
		case tok_pound:
			return "#";
		case tok_lparen:
			return "(";
		case tok_rparen:
			return ")";
		case tok_comma:
			return ",";
		case tok_equals:
			return "=";
		case tok_plus:
			return "+";
		case tok_minus:
			return "-";
		case tok_star:
			return "*";
		case tok_slash:
			return "/";
		case tok_percent:
			return "%";
		case tok_and:
			return "&";
		case tok_or:
			return "|";
		case tok_xor:
			return "^";
		case tok_dot:
			return ".";
		case tok_w:
			return "w";
		case tok_num:
			sprintf(buf,"num(%d)",curr_token_num);
			return buf;
		case tok_str:
			sprintf(buf,"str%d(%s)",curr_token_strlen,curr_token_str);
			return buf;
		case tok_sym:
			sprintf(buf,"sym(%s)",curr_token_sym->label);
			return buf;
		case tok_op:
			sprintf(buf,"op(%s)",opinfo[curr_token_op].name);
			return buf;
		case tok_newline:
			sprintf(buf,"newline");
		default:
			return "unknown";
	}
}

void
do_warning(const char *fmt,...)
{
	va_list ap;

	fprintf(stderr, "warning:%d: ", line_no);
	va_start(ap, fmt);
	vfprintf(stderr, fmt, ap);
	va_end(ap);
	fprintf(stderr,"\n");
}
