about summary refs log tree commit diff stats
path: root/wrapperhelper/src/prepare.c
diff options
context:
space:
mode:
Diffstat (limited to 'wrapperhelper/src/prepare.c')
-rw-r--r--wrapperhelper/src/prepare.c372
1 files changed, 372 insertions, 0 deletions
diff --git a/wrapperhelper/src/prepare.c b/wrapperhelper/src/prepare.c
new file mode 100644
index 00000000..090da0b0
--- /dev/null
+++ b/wrapperhelper/src/prepare.c
@@ -0,0 +1,372 @@
+#include "prepare.h"
+
+#include <string.h>
+
+struct prepare_s {
+	FILE *f;
+	int buf[4];
+	int buf_len; // <= 4 (though 3 *should* be enough)
+	char *srcn;
+	enum prepare_state {
+		PREPST_NONE = 0,
+		PREPST_NL,
+		PREPST_HASH,
+		PREPST_INCL,
+		PREPST_DEF,
+		PREPST_DEFID,
+	} st;
+};
+
+prepare_t *prepare_new_file(FILE *f, const char *filename) {
+	prepare_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		fclose(f);
+		return NULL;
+	}
+	*ret = (prepare_t){
+		.f = f,
+		.buf = {0, 0, 0},
+		.buf_len = 0,
+		.srcn = strdup(filename),
+		.st = PREPST_NL,
+	};
+	return ret;
+}
+
+void prepare_del(prepare_t *prep) {
+	if (prep->f) fclose(prep->f);
+	if (prep->srcn) free(prep->srcn);
+	free(prep);
+}
+
+static int get_char(prepare_t *src) {
+start_get_char:
+	int c = src->buf_len ? src->buf[--src->buf_len] : getc(src->f);
+	src->buf_len = 0;
+	if (c == '\\') {
+		c = src->buf_len ? src->buf[--src->buf_len] : getc(src->f);
+		if (c == '\n') goto start_get_char;
+		src->buf[src->buf_len++] = c;
+		return '\\';
+	}
+	return c;
+}
+// Do not call this more than twice in a row if the last character retrieved is '\\'
+static void unget_char(prepare_t *src, int c) {
+	src->buf[src->buf_len++] = c;
+}
+
+static void fill_ident(prepare_t *src, string_t *buf) {
+	while (1) {
+		int c = get_char(src);
+		if ((c == '_') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))) {
+			string_add_char(buf, (char)c);
+		} else {
+			unget_char(src, c);
+			return;
+		}
+	}
+}
+
+static void fill_num(prepare_t *src, string_t *buf) {
+	int started_exp = 0;
+	while (1) {
+		int c = get_char(src);
+		if ((c == '_') || (c == '.') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))
+		 || (started_exp && ((c == '+') || (c == '-')))) {
+			started_exp = (c == 'e') || (c == 'E') || (c == 'p') || (c == 'P');
+			string_add_char(buf, (char)c);
+		} else {
+			unget_char(src, c);
+			return;
+		}
+	}
+}
+
+static void fill_str(prepare_t *src, string_t *buf, char end_c, int can_esc) {
+	int has_esc = 0;
+	while (1) {
+		int c = get_char(src);
+		if (has_esc && (c >= 0) && (c <= 0x7F) && (c != '\n')) {
+			// Not technically standard compliant (should support \ooo, \x..., \u..., \U...)
+			// Since we don't really care about parsing the content, only the delimiters, this is good enough
+			string_add_char(buf, '\\');
+			string_add_char(buf, (char)c);
+			has_esc = 0;
+		} else if (c == '\\') {
+			if (can_esc) {
+				has_esc = 1;
+			} else {
+				string_add_char(buf, '\\');
+			}
+		} else if ((c >= 0) && (c <= 0x7F) && (c != end_c)) {
+			has_esc = 0;
+			string_add_char(buf, (char)c);
+		} else {
+			if (has_esc) {
+				// c is invalid or a '\n', or can_esc = 0 and c = end_c
+				string_add_char(buf, '\\');
+			}
+			if (c != end_c)
+				unget_char(src, c);
+			return;
+		}
+	}
+}
+
+#define BASE_NSYMS 25
+static const struct symbs_s {
+	char c;
+	enum token_sym_type_e sym;
+	int nnext;
+	const struct symbs_s *next;
+} *symbs = (struct symbs_s[BASE_NSYMS]){
+#define TERM(ch, t) { .c = ch, .sym = t, .nnext = 0, .next = NULL }
+#define NONTERM(ch, t, n, ...) { .c = ch, .sym = t, .nnext = n, .next = (struct symbs_s[n]){__VA_ARGS__} }
+	// Only '..' must have a sym > LAST_SYM; change next_token if this is not the case
+	NONTERM('.', SYM_DOT, 1, NONTERM('.', LAST_SYM + 1, 1, TERM('.', SYM_VARIADIC))),
+	TERM('{', SYM_LBRACKET),
+	TERM('}', SYM_RBRACKET),
+	TERM('[', SYM_LSQBRACKET),
+	TERM(']', SYM_RSQBRACKET),
+	TERM('(', SYM_LPAREN),
+	TERM(')', SYM_RPAREN),
+	NONTERM('#', SYM_HASH, 1, TERM('#', SYM_HASHHASH)),
+	TERM(';', SYM_SEMICOLON),
+	NONTERM(':', SYM_COLON, 1, TERM(':', SYM_COLONCOLON)),
+	TERM('?', SYM_QUESTION),
+	TERM('~', SYM_TILDE),
+	NONTERM('!', SYM_EXCL, 1, TERM('=', SYM_EXCLEQ)),
+	NONTERM('+', SYM_PLUS, 2, TERM('=', SYM_PLUSEQ), TERM('+', SYM_PLUSPLUS)),
+	NONTERM('-', SYM_DASH, 3, TERM('=', SYM_DASHEQ), TERM('-', SYM_DASHDASH), TERM('>', SYM_DASHGT)),
+	NONTERM('*', SYM_STAR, 1, TERM('=', SYM_STAREQ)),
+	NONTERM('/', SYM_SLASH, 1, TERM('=', SYM_SLASHEQ)),
+	NONTERM('%', SYM_PERCENT, 1, TERM('=', SYM_PERCENTEQ)),
+	NONTERM('^', SYM_HAT, 1, TERM('=', SYM_HATEQ)),
+	NONTERM('&', SYM_AMP, 2, TERM('=', SYM_AMPEQ), TERM('&', SYM_AMPAMP)),
+	NONTERM('|', SYM_PIPE, 2, TERM('=', SYM_PIPEEQ), TERM('|', SYM_PIPEPIPE)),
+	NONTERM('=', SYM_EQ, 1, TERM('=', SYM_EQEQ)),
+	NONTERM('<', SYM_LT, 2, TERM('=', SYM_LTEQ), NONTERM('<', SYM_LTLT, 1, TERM('=', SYM_LTLTEQ))),
+	NONTERM('>', SYM_GT, 2, TERM('=', SYM_GTEQ), NONTERM('>', SYM_GTGT, 1, TERM('=', SYM_GTGTEQ))),
+	TERM(',', SYM_COMMA),
+#undef NONTERM
+#undef TERM
+};
+
+preproc_token_t pre_next_token(prepare_t *src, int allow_comments) {
+start_next_token:
+	int c = get_char(src);
+	if (c == EOF) {
+		if (src->st == PREPST_NL) {
+			return (preproc_token_t){
+				.tokt = PPTOK_EOF,
+				.tokv.c = (char)c
+			};
+		} else {
+			// Force newline at EOF
+			unget_char(src, c);
+			src->st = PREPST_NL;
+			return (preproc_token_t){
+				.tokt = PPTOK_NEWLINE,
+				.tokv.c = (char)c
+			};
+		}
+	}
+	
+	if (src->st == PREPST_INCL && (c == '<')) {
+		src->st = PREPST_NONE;
+		preproc_token_t ret;
+		ret.tokt = PPTOK_INCL;
+		ret.tokv.sisstr = 0;
+		ret.tokv.sstr = string_new();
+		fill_str(src, ret.tokv.sstr, '>', 0);
+		return ret;
+	}
+	if (c == '\'') {
+		src->st = PREPST_NONE;
+		preproc_token_t ret;
+		ret.tokt = PPTOK_STRING;
+		ret.tokv.sisstr = 0;
+		ret.tokv.sstr = string_new_cap(1); // Usually only one character is inside a char literal
+		fill_str(src, ret.tokv.sstr, '\'', 1);
+		return ret;
+	}
+	if (c == '"') {
+		preproc_token_t ret;
+		ret.tokt = (src->st == PREPST_INCL) ? PPTOK_INCL : PPTOK_STRING;
+		src->st = PREPST_NONE;
+		ret.tokv.sisstr = 1;
+		ret.tokv.sstr = string_new();
+		fill_str(src, ret.tokv.sstr, '"', ret.tokt == PPTOK_STRING);
+		return ret;
+	}
+	if ((c == ' ') || (c == '\f') || (c == '\t') || (c == '\v')) {
+		if (src->st == PREPST_DEFID) {
+			src->st = PREPST_NONE;
+			return (preproc_token_t){
+				.tokt = PPTOK_BLANK,
+				.tokv.c = (char)c
+			};
+		} else goto start_next_token;
+	}
+	if (c == '\n') {
+		src->st = PREPST_NL;
+		return (preproc_token_t){
+			.tokt = PPTOK_NEWLINE,
+			.tokv.c = (char)c
+		};
+	}
+	if ((c == '_') || ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))) {
+		preproc_token_t ret;
+		ret.tokt = PPTOK_IDENT;
+		ret.tokv.str = string_new_cap(1);
+		string_add_char(ret.tokv.str, (char)c);
+		fill_ident(src, ret.tokv.str);
+		src->st =
+			((src->st == PREPST_HASH) && (!strcmp(string_content(ret.tokv.str), "include"))) ? PREPST_INCL :
+			((src->st == PREPST_HASH) && (!strcmp(string_content(ret.tokv.str), "include_next"))) ? PREPST_INCL :
+			((src->st == PREPST_HASH) && (!strcmp(string_content(ret.tokv.str), "define"))) ? PREPST_DEF :
+			(src->st == PREPST_DEF) ? PREPST_DEFID :
+			PREPST_NONE;
+		return ret;
+	}
+	if ((c >= '0') && (c <= '9')) {
+		src->st = PREPST_NONE;
+		preproc_token_t ret;
+		ret.tokt = PPTOK_NUM;
+		ret.tokv.str = string_new_cap(1);
+		string_add_char(ret.tokv.str, (char)c);
+		fill_num(src, ret.tokv.str);
+		return ret;
+	}
+	if (c == '.') {
+		c = get_char(src);
+		if ((c >= '0') && (c <= '9')) {
+			src->st = PREPST_NONE;
+			preproc_token_t ret;
+			ret.tokt = PPTOK_NUM;
+			ret.tokv.str = string_new_cap(2);
+			string_add_char(ret.tokv.str, '.');
+			string_add_char(ret.tokv.str, (char)c);
+			fill_num(src, ret.tokv.str);
+			return ret;
+		} else {
+			unget_char(src, c);
+			c = '.';
+		}
+	}
+	if (c == '/') {
+		c = get_char(src);
+		if (c == '/') {
+			if (allow_comments) {
+				src->st = PREPST_NONE;
+				return (preproc_token_t){
+					.tokt = PPTOK_START_LINE_COMMENT,
+					.tokv.c = '/'
+				};
+			}
+			
+			do {
+				c = get_char(src);
+			} while ((c != EOF) && (c != '\n'));
+			if (c != EOF) {
+				if (src->st == PREPST_NL)
+					goto start_next_token;
+				else {
+					src->st = PREPST_NL;
+					return (preproc_token_t){
+						.tokt = PPTOK_NEWLINE,
+						.tokv.c = (char)c
+					};
+				}
+			}
+			
+			src->st = PREPST_NONE;
+			printf("Unfinished comment while preparing %s\n", src->srcn);
+			return (preproc_token_t){
+				.tokt = PPTOK_INVALID,
+				.tokv.c = (char)c
+			};
+		} else if (c == '*') {
+			c = get_char(src);
+			int last_star = 0;
+			while ((c != EOF) && (!last_star || (c != '/'))) {
+				last_star = c == '*';
+				c = get_char(src);
+			}
+			if (c != EOF) goto start_next_token;
+			
+			src->st = PREPST_NONE;
+			printf("Unfinished comment while preparing %s\n", src->srcn);
+			return (preproc_token_t){
+				.tokt = PPTOK_INVALID,
+				.tokv.c = (char)c
+			};
+		} else {
+			unget_char(src, c);
+			c = '/';
+		}
+	}
+	
+	struct symbs_s const *sym = NULL;
+	for (int i = 0; i < BASE_NSYMS; ++i) {
+		if (c == symbs[i].c) {
+			sym = &symbs[i];
+			break;
+		}
+	}
+	if (sym) {
+		while (sym->nnext) {
+			c = get_char(src);
+			int found = 0;
+			for (int i = 0; i < sym->nnext; ++i) {
+				if (c == sym->next[i].c) {
+					found = 1;
+					sym = &sym->next[i];
+					break;
+				}
+			}
+			if (!found) {
+				unget_char(src, c);
+				break;
+			}
+		}
+		if (sym->sym == LAST_SYM + 1) {
+			unget_char(src, sym->c);
+			sym = &symbs[0]; // This is where no check is made (see comment in the definition of symbs)
+		}
+		src->st = ((src->st == PREPST_NL) && (sym->sym == SYM_HASH)) ? PREPST_HASH : PREPST_NONE;
+		return (preproc_token_t){
+			.tokt = PPTOK_SYM,
+			.tokv.sym = sym->sym
+		};
+	}
+	
+	src->st = PREPST_NONE;
+	printf("Invalid character 0x%X (%c) while preparing %s\n", (unsigned)c, (c >= 0x20) && (c < 127) ? c : '?', src->srcn);
+	return (preproc_token_t){
+		.tokt = PPTOK_INVALID,
+		.tokv.c = (char)c
+	};
+}
+preproc_token_t pre_next_newline_token(prepare_t *src) {
+start_next_token:
+	int c = get_char(src);
+	if (c == EOF) {
+		// Force newline at EOF
+		unget_char(src, c);
+		src->st = PREPST_NL;
+		return (preproc_token_t){
+			.tokt = PPTOK_NEWLINE,
+			.tokv.c = (char)c
+		};
+	}
+	if (c == '\n') {
+		src->st = PREPST_NL;
+		return (preproc_token_t){
+			.tokt = PPTOK_NEWLINE,
+			.tokv.c = (char)c
+		};
+	}
+	goto start_next_token;
+}