about summary refs log tree commit diff stats
path: root/wrapperhelper/src/lang.c
diff options
context:
space:
mode:
authorrajdakin <rajdakin@gmail.com>2024-09-06 15:07:38 +0200
committerGitHub <noreply@github.com>2024-09-06 15:07:38 +0200
commit6044feb7fd58ff69f63f5418f516b1f3ded346e9 (patch)
tree9935288f145df5b80f6d118c8a487b0a10afb1e8 /wrapperhelper/src/lang.c
parent7dc59ac342d315dbb352becbeabc4e7057992de0 (diff)
downloadbox64-6044feb7fd58ff69f63f5418f516b1f3ded346e9.tar.gz
box64-6044feb7fd58ff69f63f5418f516b1f3ded346e9.zip
Wrapper helper (#1799)
* [WRAPPERHELPER] Added wrapperhelper v0.1, tested on libc

* [WRAPPED] Removed updates to libc from this branch

* [WRAPPERHELPER] Removed GPL header and added modified LGPL header, added notes about licensing issues
Diffstat (limited to 'wrapperhelper/src/lang.c')
-rw-r--r--wrapperhelper/src/lang.c1164
1 files changed, 1164 insertions, 0 deletions
diff --git a/wrapperhelper/src/lang.c b/wrapperhelper/src/lang.c
new file mode 100644
index 00000000..9ca51b15
--- /dev/null
+++ b/wrapperhelper/src/lang.c
@@ -0,0 +1,1164 @@
+#include "lang.h"
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#if 1
+#define DISP_ADDR_FMT ""
+#define DISP_ADDR_ARG(v) 
+#else
+#define DISP_ADDR_FMT "%p "
+#define DISP_ADDR_ARG(v) v,
+#endif
+
+void preproc_token_del(preproc_token_t *tok) {
+	switch (tok->tokt) {
+	case PPTOK_IDENT:
+	case PPTOK_IDENT_UNEXP:
+	case PPTOK_NUM:
+		string_del(tok->tokv.str);
+		break;
+	case PPTOK_STRING:
+	case PPTOK_INCL:
+		string_del(tok->tokv.sstr);
+		break;
+	case PPTOK_INVALID:
+	case PPTOK_SYM:
+	case PPTOK_NEWLINE:
+	case PPTOK_BLANK:
+	case PPTOK_START_LINE_COMMENT:
+	case PPTOK_EOF:
+		break;
+	}
+}
+
+VECTOR_IMPL(preproc, preproc_token_del)
+
+void proc_token_del(proc_token_t *tok) {
+	switch (tok->tokt) {
+	case PTOK_IDENT:
+	case PTOK_NUM:
+		string_del(tok->tokv.str);
+		break;
+	case PTOK_STRING:
+		string_del(tok->tokv.sstr);
+		break;
+	case PTOK_PRAGMA:
+		switch (tok->tokv.pragma.typ) {
+		case PRAGMA_MARK_SIMPLE:
+			string_del(tok->tokv.pragma.val);
+			break;
+		case PRAGMA_ALLOW_INTS:
+			break;
+		}
+		break;
+	case PTOK_INVALID:
+	case PTOK_KEYWORD:
+	case PTOK_SYM:
+	case PTOK_EOF:
+		break;
+	}
+}
+
+VECTOR_IMPL(proc, proc_token_del)
+
+const char *sym2str[LAST_SYM + 1] = {
+	[SYM_LBRACKET] = "{",
+	[SYM_RBRACKET] = "}",
+	[SYM_LSQBRACKET] = "[",
+	[SYM_RSQBRACKET] = "]",
+	[SYM_LPAREN] = "(",
+	[SYM_RPAREN] = ")",
+	[SYM_HASH] = "#",
+	[SYM_HASHHASH] = "##",
+	[SYM_SEMICOLON] = ";",
+	[SYM_COLON] = ":",
+	[SYM_COLONCOLON] = "::",
+	[SYM_VARIADIC] = "...",
+	[SYM_QUESTION] = "?",
+	[SYM_DOT] = ".",
+	[SYM_DASHGT] = "->",
+	[SYM_TILDE] = "~",
+	[SYM_EXCL] = "!",
+	[SYM_PLUS] = "+",
+	[SYM_DASH] = "-",
+	[SYM_STAR] = "*",
+	[SYM_SLASH] = "/",
+	[SYM_PERCENT] = "%",
+	[SYM_HAT] = "^",
+	[SYM_AMP] = "&",
+	[SYM_PIPE] = "|",
+	[SYM_EQ] = "=",
+	[SYM_PLUSEQ] = "+=",
+	[SYM_DASHEQ] = "-=",
+	[SYM_STAREQ] = "*=",
+	[SYM_SLASHEQ] = "/=",
+	[SYM_PERCENTEQ] = "%=",
+	[SYM_HATEQ] = "^=",
+	[SYM_AMPEQ] = "&=",
+	[SYM_PIPEEQ] = "|=",
+	[SYM_EQEQ] = "==",
+	[SYM_EXCLEQ] = "!=",
+	[SYM_LT] = "<",
+	[SYM_GT] = ">",
+	[SYM_LTEQ] = "<=",
+	[SYM_GTEQ] = ">=",
+	[SYM_AMPAMP] = "&&",
+	[SYM_PIPEPIPE] = "||",
+	[SYM_LTLT] = "<<",
+	[SYM_GTGT] = ">>",
+	[SYM_LTLTEQ] = "<<=",
+	[SYM_GTGTEQ] = ">>=",
+	[SYM_PLUSPLUS] = "++",
+	[SYM_DASHDASH] = "--",
+	[SYM_COMMA] = ",",
+};
+
+void preproc_token_print(const preproc_token_t *tok) {
+	switch (tok->tokt) {
+	case PPTOK_INVALID:
+		printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?');
+		break;
+	case PPTOK_IDENT:
+		printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str));
+		break;
+	case PPTOK_IDENT_UNEXP:
+		printf("Token: %7s '%s'\n", "IDENT'", string_content(tok->tokv.str));
+		break;
+	case PPTOK_NUM:
+		printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str));
+		break;
+	case PPTOK_STRING:
+		printf("Token: %7s %c%s%c\n", "STRING",
+			tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\'');
+		break;
+	case PPTOK_INCL:
+		printf("Token: %7s %c%s%c\n", "INCL",
+			tok->tokv.sisstr ? '"' : '<', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '>');
+		break;
+	case PPTOK_SYM:
+		printf("Token: %7s  %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym);
+		break;
+	case PPTOK_NEWLINE:
+		printf("Token: %7s\n", "NEWLINE");
+		break;
+	case PPTOK_BLANK:
+		printf("Token: %7s\n", "\e[2;31m(blank)\e[m");
+		break;
+	case PPTOK_START_LINE_COMMENT:
+		printf("Token: %7s\n", "\e[2;31m( // ) \e[m");
+		break;
+	case PPTOK_EOF:
+		printf("Token: %7s\n", "EOF");
+		break;
+	default:
+		printf("Token: ??? %u\n", tok->tokt);
+	}
+}
+
+int preproc_token_isend(const preproc_token_t *tok) {
+	switch (tok->tokt) {
+	case PPTOK_IDENT:
+	case PPTOK_IDENT_UNEXP:
+	case PPTOK_NUM:
+	case PPTOK_STRING:
+	case PPTOK_INCL:
+	case PPTOK_SYM:
+	case PPTOK_NEWLINE:
+	case PPTOK_BLANK:
+	case PPTOK_START_LINE_COMMENT:
+		return 0;
+	case PPTOK_INVALID:
+	case PPTOK_EOF:
+	default:
+		return 1;
+	}
+}
+
+const char *kw2str[LAST_KEYWORD + 1] = {
+	[KW_ALIGNAS] = "_Alignas",
+	[KW_ALIGNOF] = "_Alignof",
+	[KW_ATOMIC] = "_Atomic",
+	[KW_AUTO] = "auto",
+	[KW_BOOL] = "_Bool",
+	[KW_BREAK] = "break",
+	[KW_CASE] = "case",
+	[KW_CHAR] = "char",
+	[KW_COMPLEX] = "_Complex",
+	[KW_CONST] = "const",
+	[KW_CONTINUE] = "continue",
+	[KW_DEFAULT] = "default",
+	[KW_DO] = "do",
+	[KW_DOUBLE] = "double",
+	[KW_ELSE] = "else",
+	[KW_ENUM] = "enum",
+	[KW_EXTERN] = "extern",
+	[KW_FLOAT] = "float",
+	[KW_FOR] = "for",
+	[KW_GENERIC] = "_Generic",
+	[KW_GOTO] = "goto",
+	[KW_IF] = "if",
+	[KW_IMAGINARY] = "_Imaginary",
+	[KW_INLINE] = "inline",
+	[KW_INT] = "int",
+	[KW_INT128] = "__int128",
+	[KW_LONG] = "long",
+	[KW_NORETURN] = "_Noreturn",
+	[KW_REGISTER] = "register",
+	[KW_RESTRICT] = "restrict",
+	[KW_RETURN] = "return",
+	[KW_SHORT] = "short",
+	[KW_SIGNED] = "signed",
+	[KW_SIZEOF] = "sizeof",
+	[KW_STATIC] = "static",
+	[KW_STATIC_ASSERT] = "_Static_assert",
+	[KW_STRUCT] = "struct",
+	[KW_SWITCH] = "switch",
+	[KW_THREAD_LOCAL] = "_Thread_local",
+	[KW_TYPEDEF] = "typedef",
+	[KW_UNION] = "union",
+	[KW_UNSIGNED] = "unsigned",
+	[KW_VOID] = "void",
+	[KW_VOLATILE] = "volatile",
+	[KW_WHILE] = "while",
+};
+
+void proc_token_print(const proc_token_t *tok) {
+	switch (tok->tokt) {
+	case PTOK_INVALID:
+		printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?');
+		break;
+	case PTOK_IDENT:
+		printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str));
+		break;
+	case PTOK_KEYWORD:
+		printf("Token: %7s '%s' (%u)\n", "KEYWORD", kw2str[tok->tokv.kw], tok->tokv.kw);
+		break;
+	case PTOK_NUM:
+		printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str));
+		break;
+	case PTOK_STRING:
+		printf("Token: %7s %c%s%c\n", "STRING",
+			tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\'');
+		break;
+	case PTOK_SYM:
+		printf("Token: %7s  %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym);
+		break;
+	case PTOK_PRAGMA:
+		switch (tok->tokv.pragma.typ) {
+		case PRAGMA_ALLOW_INTS:
+			printf("Token: %7s Allow ints\n", "PRAGMA");
+			break;
+		case PRAGMA_MARK_SIMPLE:
+			printf("Token: %7s Mark simple: %s\n", "PRAGMA", string_content(tok->tokv.pragma.val));
+			break;
+		default:
+			printf("Token: %7s ???\n", "PRAGMA");
+		}
+		break;
+	case PTOK_EOF:
+		printf("Token: %7s\n", "EOF");
+		break;
+	default:
+		printf("Token: ??? %u\n", tok->tokt);
+	}
+}
+
+int proc_token_iserror(const proc_token_t *tok) {
+	switch (tok->tokt) {
+	case PTOK_IDENT:
+	case PTOK_KEYWORD:
+	case PTOK_NUM:
+	case PTOK_STRING:
+	case PTOK_SYM:
+	case PTOK_PRAGMA:
+	case PTOK_EOF:
+		return 0;
+	case PTOK_INVALID:
+	default:
+		return 1;
+	}
+}
+
+int proc_token_isend(const proc_token_t *tok) {
+	switch (tok->tokt) {
+	case PTOK_IDENT:
+	case PTOK_KEYWORD:
+	case PTOK_NUM:
+	case PTOK_STRING:
+	case PTOK_SYM:
+	case PTOK_PRAGMA:
+		return 0;
+	case PTOK_INVALID:
+	case PTOK_EOF:
+	default:
+		return 1;
+	}
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wanalyzer-null-dereference"
+KHASH_MAP_IMPL_STR(str2kw, enum token_keyword_type_e)
+KHASH_MAP_IMPL_STR(type_map, type_t*)
+KHASH_MAP_IMPL_STR(struct_map, struct_t*)
+KHASH_MAP_IMPL_STR(const_map, num_constant_t)
+#pragma GCC diagnostic pop
+khash_t(str2kw) *str2kw;
+
+int init_str2kw(void) {
+	if (!(str2kw = kh_init(str2kw))) {
+		printf("Failed to create the string to keyword map (init)\n");
+		return 0;
+	}
+	for (enum token_keyword_type_e kw = 0; kw <= LAST_KEYWORD; ++kw) {
+		int iret;
+		khiter_t it = kh_put(str2kw, str2kw, kw2str[kw], &iret);
+		if (iret < 0) {
+			printf("Failed to create the string to keyword map (keyword %u)\n", kw);
+			kh_destroy(str2kw, str2kw);
+			return 0;
+		}
+		kh_val(str2kw, it) = kw;
+	}
+	return 1;
+}
+void del_str2kw(void) {
+	kh_destroy(str2kw, str2kw);
+}
+
+int num_constant_convert(string_t *str, num_constant_t *cst) {
+	if (string_len(str) == 0) return 0; // Should never happen
+#define contains(c) strchr(string_content(str), c)
+	if (contains('.')
+	     || (!(contains('X') || contains('x')) && (contains('E') || contains('e')))
+	     || ((contains('X') || contains('x')) && (contains('P') || contains('p')))) {
+#undef contains
+		int ok;
+		if ((string_end(str)[-1] == 'l') || (string_end(str)[-1] == 'L')) {
+			cst->typ = NCT_LDOUBLE;
+			char *endc;
+			cst->val.l = strtold(string_content(str), &endc);
+			ok = (endc == string_end(str) - 1);
+		} else if ((string_end(str)[-1] == 'f') || (string_end(str)[-1] == 'F')) {
+			cst->typ = NCT_FLOAT;
+			char *endc;
+			cst->val.f = strtof(string_content(str), &endc);
+			ok = (endc == string_end(str) - 1);
+		} else {
+			cst->typ = NCT_DOUBLE;
+			char *endc;
+			cst->val.d = strtod(string_content(str), &endc);
+			ok = (endc == string_end(str));
+		}
+		if (!ok) {
+			printf("Error: '%s' is not a valid number\n", string_content(str));
+			return 0;
+		} else if (errno == ERANGE) {
+			printf("Warning: floating point constant is too large\n");
+			return 1;
+		}
+		return 1;
+	} else {
+		uint64_t ret = 0;
+		unsigned base = (string_content(str)[0] == '0') ? (string_content(str)[1] == 'x') ? 16 : 8 : 10;
+		size_t startidx = (base == 16) ? 2 : 0;
+		size_t endidx = string_len(str);
+		int suffix_type = 0;
+#define SUFFIX_U 1
+#define SUFFIX_L 2
+#define SUFFIX_LL 4
+#define SUFFIX_SGN SUFFIX_U
+#define SUFFIX_SZ (SUFFIX_L | SUFFIX_LL)
+	remove_suffix:
+		if ((string_content(str)[endidx - 1] == 'l') || (string_content(str)[endidx - 1] == 'L')) {
+			if (suffix_type & SUFFIX_SZ) {
+				printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str));
+				return 0;
+			}
+			if (endidx == 1) return 0; // Should never happen
+			if ((string_content(str)[endidx - 2] == 'l') || (string_content(str)[endidx - 2] == 'L')) {
+				if (endidx == 2) return 0; // Should never happen
+				if (string_content(str)[endidx - 2] != string_content(str)[endidx - 1]) {
+					printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str));
+					return 0;
+				}
+				endidx -= 2;
+				suffix_type |= SUFFIX_LL;
+			} else {
+				endidx -= 1;
+				suffix_type |= SUFFIX_L;
+			}
+			goto remove_suffix;
+		}
+		if ((string_content(str)[endidx - 1] == 'u') || (string_content(str)[endidx - 1] == 'U')) {
+			if (suffix_type & SUFFIX_SGN) {
+				printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str));
+				return 0;
+			}
+			endidx -= 1;
+			suffix_type |= SUFFIX_U;
+			goto remove_suffix;
+		}
+		// 0u has startidx=0 endidx=1
+		if (endidx <= startidx) {
+			printf("Error: '%s' is not a valid number\n", string_content(str));
+		}
+		for (size_t i = startidx; i < endidx; ++i) {
+			if ((string_content(str)[i] >= '0') && (string_content(str)[i] <= ((base == 8) ? '7' : '9'))) {
+				ret = base * ret + (unsigned)(string_content(str)[i] - '0');
+			} else if ((base == 16) && (string_content(str)[i] >= 'A') && (string_content(str)[i] <= 'F')) {
+				ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'A');
+			} else if ((base == 16) && (string_content(str)[i] >= 'a') && (string_content(str)[i] <= 'f')) {
+				ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'a');
+			} else {
+				printf("Error: '%s' is not a valid number\n", string_content(str));
+				return 0;
+			}
+		}
+		// If base == 10, keep the signness; in any case, try 32 bits if available else use 64 bits
+		cst->typ =
+			((suffix_type & SUFFIX_SGN) == SUFFIX_U) ?
+				((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_UINT32 : NCT_UINT64 :
+				((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_UINT64 :
+					NCT_UINT32 :
+				((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_INT32 : NCT_INT64 :
+				((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_INT64 :
+					NCT_INT32;
+		if (cst->typ == NCT_INT32) {
+			if (ret < 1ULL << 31) {
+				cst->val.i32 = (int32_t)ret;
+				return 1;
+			}
+			// Not in signed 32-bits = int => try long/long long for decimal, unsigned for hexadecimal/octal
+			if (base != 10) cst->typ = NCT_UINT32;
+			else cst->typ = NCT_INT64;
+		}
+		if (cst->typ == NCT_UINT32) {
+			if (ret < 1ULL << 32) {
+				cst->val.u32 = (uint32_t)ret;
+				return 1;
+			}
+			// Not in unsigned 32-bits = unsigned => try unsigned long/long long for decimal and unsigned, long/long long for signed hexadecimal/octal
+			if ((base != 10) && ((suffix_type & SUFFIX_SGN) != SUFFIX_U)) cst->typ = NCT_INT64;
+			else cst->typ = NCT_UINT64;
+		}
+		if (cst->typ == NCT_INT64) {
+			if (ret < 1ULL << 63) {
+				cst->val.i64 = (int64_t)ret;
+				return 1;
+			}
+			// Not in signed 64-bits = long/long long => fail for decimal, try unsigned long/long long for hexadecimal/octal
+			if (base != 10) cst->typ = NCT_UINT64;
+		}
+		if (cst->typ == NCT_UINT64) {
+			cst->val.u64 = ret;
+			return 1;
+		}
+		// The constant cannot be typed... (Or internal error)
+		return 0;
+	}
+}
+
+void expr_del(expr_t *e) {
+	switch (e->typ) {
+	case ETY_VAR:
+		string_del(e->val.var);
+		break;
+		
+	case ETY_CONST:
+		break;
+		
+	// case ETY_GENERIC:
+	// TODO
+		
+	case ETY_CALL:
+		expr_del(e->val.call.fun);
+		for (size_t i = 0; i < e->val.call.nargs; ++i) {
+			expr_del(e->val.call.args[i]);
+		}
+		if (e->val.call.args) free(e->val.call.args);
+		break;
+		
+	case ETY_ACCESS:
+	case ETY_PTRACCESS:
+		expr_del(e->val.access.val);
+		string_del(e->val.access.member);
+		break;
+		
+	case ETY_UNARY:
+		expr_del(e->val.unary.e);
+		break;
+		
+	case ETY_BINARY:
+		expr_del(e->val.binary.e1);
+		expr_del(e->val.binary.e2);
+		break;
+		
+	case ETY_TERNARY:
+		expr_del(e->val.ternary.e1);
+		expr_del(e->val.ternary.e2);
+		expr_del(e->val.ternary.e3);
+		break;
+		
+	// case ETY_INIT_LIST:
+	// TODO
+		
+	case ETY_CAST:
+		type_del(e->val.cast.typ);
+		expr_del(e->val.cast.e);
+		break;
+	}
+	free(e);
+}
+
+void struct_del_weak(struct_t *st) {
+	if (--st->nrefs) return;
+	if (!st->tag) struct_del(st);
+}
+void type_del(type_t *typ) {
+	if (--typ->nrefs) return;
+	switch (typ->typ) {
+	case TYPE_BUILTIN:
+		break;
+	case TYPE_ARRAY:
+		type_del(typ->val.array.typ);
+		break;
+	case TYPE_STRUCT_UNION:
+		struct_del_weak(typ->val.st);
+		break;
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		type_del(typ->val.typ);
+		break;
+	case TYPE_FUNCTION:
+		type_del(typ->val.fun.ret);
+		if (typ->val.fun.nargs != (size_t)-1) {
+			for (size_t i = 0; i < typ->val.fun.nargs; ++i) {
+				type_del(typ->val.fun.args[i]);
+			}
+		}
+		if (typ->val.fun.args) free(typ->val.fun.args);
+		break;
+	}
+	free(typ);
+}
+void type_map_del(khash_t(type_map) *map) {
+	kh_cstr_t str;
+	type_t **it;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+	kh_foreach_key_value_ref(map, str, it, free((void*)str); type_del(*it))
+#pragma GCC diagnostic pop
+	kh_destroy(type_map, map);
+}
+void type_set_del(khash_t(type_set) *set) {
+	type_t *it;
+	kh_foreach_key(set, it, type_del(it))
+	kh_destroy(type_set, set);
+}
+void const_map_del(khash_t(const_map) *map) {
+	kh_cstr_t str;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+	kh_foreach_key(map, str, free((void*)str))
+#pragma GCC diagnostic pop
+	kh_destroy(const_map, map);
+}
+
+void st_member_del(st_member_t *member) {
+	if (member->name) string_del(member->name);
+	type_del(member->typ);
+}
+
+void struct_del(struct_t *st) {
+	if (st->tag) {
+		string_del(st->tag);
+		st->tag = NULL;
+	}
+	if (st->is_defined) {
+		for (size_t i = 0; i < st->nmembers; ++i) {
+			st_member_del(&st->members[i]);
+		}
+		free(st->members);
+		st->is_defined = 0;
+	}
+	if (!st->nrefs) free(st);
+}
+void struct_map_del(khash_t(struct_map) *map) {
+	// The keys are the tag in the struct_t
+	struct_t **it;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+	kh_foreach_value_ref(map, it, struct_del(*it))
+#pragma GCC diagnostic pop
+	kh_destroy(struct_map, map);
+}
+
+type_t *type_new(void) {
+	type_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		printf("Failed to create a new type\n");
+		return NULL;
+	}
+	ret->szinfo.align = ret->szinfo.size = 0;
+	ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0;
+	ret->nrefs = 1;
+	ret->typ = TYPE_BUILTIN;
+	ret->val.builtin = BTT_INT;
+	return ret;
+}
+type_t *type_new_ptr(type_t *target) {
+	type_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		printf("Failed to create a new pointer type\n");
+		return NULL;
+	}
+	ret->szinfo.align = ret->szinfo.size = 0;
+	ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0;
+	ret->nrefs = 1;
+	ret->typ = TYPE_PTR;
+	ret->val.typ = target;
+	return ret;
+}
+// The following functions do not work for functions (val.args needs to be duplicated)
+/* type_t *type_do_copy(type_t *ref) {
+	type_t *ret = type_do_copy_nodec(ref);
+	if (!ret) return NULL;
+	type_del(ref);
+	return ret;
+}
+type_t *type_do_copy_nodec(const type_t *ref) {
+	type_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		printf("Failed to duplicate type\n");
+		return NULL;
+	}
+	memcpy(ret, ref, sizeof *ret);
+	switch (ref->typ) {
+	case TYPE_BUILTIN:
+		break;
+	case TYPE_ARRAY:
+		++ref->val.array.typ->nrefs;
+		break;
+	case TYPE_STRUCT_UNION:
+		++ret->val.st->nrefs;
+		break;
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		++ref->val.typ->nrefs;
+		break;
+	case TYPE_FUNCTION:
+		++ref->val.fun.ret->nrefs;
+		if (ref->val.fun.nargs != (size_t)-1) {
+			for (size_t i = 0; i < ref->val.fun.nargs; ++i) {
+				++ref->val.fun.args[i]->nrefs;
+			}
+		}
+		break;
+	}
+	ret->nrefs = 1;
+	return ret;
+}
+type_t *type_maybe_copy(type_t *ref) {
+	if (ref->nrefs == 1) return ref;
+	else return type_do_copy(ref);
+} */
+int type_copy_into(type_t *dest, const type_t *ref) {
+	size_t nrefs = dest->nrefs;
+	_Bool is_atomic = dest->is_atomic;
+	_Bool is_const = dest->is_const;
+	_Bool is_restrict = dest->is_restrict;
+	_Bool is_volatile = dest->is_volatile;
+	memcpy(dest, ref, sizeof *dest);
+	switch (ref->typ) {
+	case TYPE_BUILTIN:
+		break;
+	case TYPE_ARRAY:
+		++ref->val.array.typ->nrefs;
+		break;
+	case TYPE_STRUCT_UNION:
+		++dest->val.st->nrefs;
+		break;
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		++ref->val.typ->nrefs;
+		break;
+	case TYPE_FUNCTION:
+		++ref->val.fun.ret->nrefs;
+		if (ref->val.fun.nargs != (size_t)-1) {
+			if (!ref->val.fun.args) {
+				dest->typ = TYPE_BUILTIN;
+				return 0;
+			}
+			type_t **args = malloc(sizeof *args * ref->val.fun.nargs);
+			if (!args) {
+				printf("Error: failed to allocate new argument array\n");
+				return 0;
+			}
+			for (size_t i = 0; i < ref->val.fun.nargs; ++i) {
+				++ref->val.fun.args[i]->nrefs;
+				args[i] = ref->val.fun.args[i];
+			}
+			dest->val.fun.args = args;
+		}
+		break;
+	}
+	dest->is_atomic |= is_atomic;
+	dest->is_const |= is_const;
+	dest->is_restrict |= is_restrict;
+	dest->is_volatile |= is_volatile;
+	dest->nrefs = nrefs;
+	return 1;
+}
+
+struct_t *struct_new(int is_struct, string_t *tag) {
+	struct_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		printf("Failed to create a new struct\n");
+		return NULL;
+	}
+	ret->is_struct = is_struct;
+	ret->tag = tag;
+	ret->is_defined = 0;
+	ret->nrefs = 1;
+	ret->explicit_simple = 0;
+	return ret;
+}
+
+_Static_assert(sizeof(type_t*) == sizeof(khint64_t), "Not a 64-bits machine");
+khint_t type_t_hash(type_t *typ) {
+	switch (typ->typ) {
+	case TYPE_BUILTIN: return kh_int_hash_func(typ->val.builtin);
+	case TYPE_ARRAY: return kh_int_hash_func((typ->val.array.array_sz << 12) + type_t_hash(typ->val.array.typ));
+	case TYPE_STRUCT_UNION:
+		if (typ->val.st->tag) {
+			return kh_str_hash_func(string_content(typ->val.st->tag));
+		} else {
+			khint64_t acc = (typ->val.st->nmembers << 5) + 1;
+			for (size_t i = 0; i < typ->val.st->nmembers; ++i) {
+				acc += (khint64_t)type_t_hash(typ->val.st->members[i].typ) << ((1ull << (i % 64)) % 61);
+			}
+			return kh_int64_hash_func(acc);
+		}
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		return kh_int_hash_func(type_t_hash(typ->val.typ));
+	case TYPE_FUNCTION: {
+		khint64_t acc = type_t_hash(typ->val.fun.ret) + typ->val.fun.nargs * 73 + (size_t)typ->val.fun.has_varargs * 77;
+		if (typ->val.fun.nargs != (size_t)-1) {
+			for (size_t i = 0; i < typ->val.fun.nargs; ++i) {
+				acc += (khint64_t)type_t_hash(typ->val.fun.args[i]) << ((3ull << (i % 64)) % 53);
+			}
+		}
+		return kh_int64_hash_func(acc); }
+	default: return 0;
+	}
+}
+int type_t_equal(type_t *typ1, type_t *typ2) {
+	if (typ1->typ != typ2->typ) return 0;
+	if ((typ1->is_atomic != typ2->is_atomic) || (typ1->is_const != typ2->is_const)
+	 || (typ1->is_restrict != typ2->is_restrict) || (typ1->is_volatile != typ2->is_volatile)) {
+		return 0;
+	}
+	switch (typ1->typ) {
+	case TYPE_BUILTIN: return typ1->val.builtin == typ2->val.builtin;
+	case TYPE_ARRAY: return (typ1->val.array.array_sz == typ2->val.array.array_sz) && type_t_equal(typ1->val.array.typ, typ2->val.array.typ);
+	case TYPE_STRUCT_UNION:
+		// This will not do an infinite recursion since only unnamed struct/unions will compare their members
+		if (!typ1->val.st->tag != !typ2->val.st->tag) return 0;
+		if (typ1->val.st->tag) return !strcmp(string_content(typ1->val.st->tag), string_content(typ2->val.st->tag));
+		if (!typ1->val.st->is_defined || !typ2->val.st->is_defined) {
+			printf("Warning: incomplete anonymous structure/union\n");
+			return 0;
+		}
+		if (typ1->val.st->nmembers != typ2->val.st->nmembers) return 0;
+		for (size_t i = 0; i < typ1->val.st->nmembers; ++i) {
+			if (!!typ1->val.st->members[i].name != !!typ2->val.st->members[i].name) return 0;
+			if (typ1->val.st->members[i].name
+			     && strcmp(string_content(typ1->val.st->members[i].name), string_content(typ2->val.st->members[i].name)))
+				return 0;
+			if (!type_t_equal(typ1->val.st->members[i].typ, typ2->val.st->members[i].typ)) return 0;
+		}
+		return 1;
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		return type_t_equal(typ1->val.typ, typ2->val.typ);
+	case TYPE_FUNCTION:
+		if (typ1->val.fun.nargs != typ2->val.fun.nargs) return 0;
+		if (typ1->val.fun.has_varargs != typ2->val.fun.has_varargs) return 0;
+		if (!type_t_equal(typ1->val.fun.ret, typ2->val.fun.ret)) return 0;
+		if (typ1->val.fun.nargs != (size_t)-1) {
+			for (size_t i = 0; i < typ1->val.fun.nargs; ++i) {
+				if (!type_t_equal(typ1->val.fun.args[i], typ2->val.fun.args[i])) return 0;
+			}
+		}
+		return 1;
+	default: return 0;
+	}
+}
+__KHASH_IMPL(type_set, , type_t*, char, 0, type_t_hash, type_t_equal)
+
+type_t *type_try_merge(type_t *typ, khash_t(type_set) *set) {
+	int iret;
+	khiter_t it = kh_put(type_set, set, typ, &iret);
+	if (iret < 0) {
+		printf("Error: failed to add type to type_set\n");
+		return NULL;
+	} else if (iret == 0) {
+		if (typ == kh_key(set, it)) return typ;
+		kh_key(set, it)->is_incomplete &= typ->is_incomplete; // In case we have a recursive structure
+		type_del(typ);
+		++kh_key(set, it)->nrefs;
+		return kh_key(set, it);
+	} else {
+		++typ->nrefs;
+	}
+	type_t *typ2;
+	switch (typ->typ) {
+	case TYPE_BUILTIN:
+		return typ;
+	case TYPE_ARRAY:
+		typ2 = type_try_merge(typ->val.array.typ, set);
+		if (typ2) typ->val.array.typ = typ2;
+		else return NULL;
+		return typ;
+	case TYPE_STRUCT_UNION:
+		if (typ->val.st->is_defined) {
+			for (size_t i = 0; i < typ->val.st->nmembers; ++i) {
+				typ2 = type_try_merge(typ->val.st->members[i].typ, set);
+				if (typ2) typ->val.st->members[i].typ = typ2;
+				else return NULL;
+			}
+		}
+		return typ;
+	case TYPE_ENUM:
+	case TYPE_PTR:
+		typ2 = type_try_merge(typ->val.typ, set);
+		if (typ2) typ->val.typ = typ2;
+		else return NULL;
+		return typ;
+	case TYPE_FUNCTION:
+		typ2 = type_try_merge(typ->val.fun.ret, set);
+		if (typ2) typ->val.fun.ret = typ2;
+		else return NULL;
+		
+		if (typ->val.fun.nargs != (size_t)-1) {
+			for (size_t i = 0; i < typ->val.fun.nargs; ++i) {
+				typ2 = type_try_merge(typ->val.fun.args[i], set);
+				if (typ2) typ->val.fun.args[i] = typ2;
+				else return NULL;
+			}
+		}
+		return typ;
+		
+	default: return NULL;
+	}
+}
+
+const char *builtin2str[LAST_BUILTIN + 1] = {
+	[BTT_VOID] = "void",
+	[BTT_BOOL] = "_Bool",
+	[BTT_CHAR] = "char",
+	[BTT_SCHAR] = "signed char",
+	[BTT_UCHAR] = "unsigned char",
+	[BTT_SHORT] = "short",
+	[BTT_SSHORT] = "signed short",
+	[BTT_USHORT] = "unsigned short",
+	[BTT_INT] = "int",
+	[BTT_SINT] = "signed",
+	[BTT_UINT] = "unsigned",
+	[BTT_LONG] = "long",
+	[BTT_SLONG] = "signed long",
+	[BTT_ULONG] = "unsigned long",
+	[BTT_LONGLONG] = "long long",
+	[BTT_SLONGLONG] = "signed long long",
+	[BTT_ULONGLONG] = "unsigned long long",
+	[BTT_INT128] = "__int128",
+	[BTT_SINT128] = "signed __int128",
+	[BTT_UINT128] = "unsigned __int128",
+	[BTT_S8] = "int8_t",
+	[BTT_U8] = "uint8_t",
+	[BTT_S16] = "int16_t",
+	[BTT_U16] = "uint16_t",
+	[BTT_S32] = "int32_t",
+	[BTT_U32] = "uint32_t",
+	[BTT_S64] = "int64_t",
+	[BTT_U64] = "uint64_t",
+	[BTT_FLOAT] = "float",
+	[BTT_CFLOAT] = "float _Complex",
+	[BTT_IFLOAT] = "float _Imaginary",
+	[BTT_DOUBLE] = "double",
+	[BTT_CDOUBLE] = "double _Complex",
+	[BTT_IDOUBLE] = "double _Imaginary",
+	[BTT_LONGDOUBLE] = "long double",
+	[BTT_CLONGDOUBLE] = "long double _Complex",
+	[BTT_ILONGDOUBLE] = "long double _Imaginary",
+	[BTT_VA_LIST] = "__builtin_va_list",
+};
+void type_print(type_t *typ) {
+	if (typ->_internal_use) {
+		printf("...%p", typ);
+		return;
+	}
+	typ->_internal_use = 1;
+	printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(typ) typ->nrefs);
+	if (!typ->is_validated) printf("!<not validated> ");
+	if (typ->is_incomplete) printf("<incomplete> ");
+	if (typ->is_validated && !typ->is_incomplete) printf("<size=%zu align=%zu> ", typ->szinfo.size, typ->szinfo.align);
+	if (typ->is_const) printf("const ");
+	if (typ->is_restrict) printf("restrict ");
+	if (typ->is_volatile) printf("volatile ");
+	if (typ->is_atomic) printf("_Atomic ");
+	switch (typ->typ) {
+	case TYPE_BUILTIN:
+		printf("<builtin %s (%u)>", builtin2str[typ->val.builtin], typ->val.builtin);
+		break;
+	case TYPE_ARRAY:
+		if (typ->val.array.array_sz == (size_t)-1)
+			printf("[] ");
+		else printf("[%zu] ", typ->val.array.array_sz);
+		type_print(typ->val.array.typ);
+		break;
+	case TYPE_STRUCT_UNION:
+		struct_print(typ->val.st);
+		break;
+	case TYPE_ENUM:
+		printf("<enum> ");
+		type_print(typ->val.typ);
+		break;
+	case TYPE_PTR:
+		printf("*");
+		type_print(typ->val.typ);
+		break;
+	case TYPE_FUNCTION:
+		printf("<function type, ret=");
+		type_print(typ->val.fun.ret);
+		if (typ->val.fun.nargs == (size_t)-1) {
+			printf(", no argument spec>");
+		} else {
+			printf(", args=(");
+			for (size_t i = 0; i < typ->val.fun.nargs; ++i) {
+				if (i) printf("), (");
+				type_print(typ->val.fun.args[i]);
+			}
+			if (typ->val.fun.has_varargs && typ->val.fun.nargs) printf(", ...)>");
+			else if (typ->val.fun.has_varargs && !typ->val.fun.nargs) printf("...)>");
+			else printf(")>");
+		}
+		break;
+	}
+	typ->_internal_use = 0;
+}
+void struct_print(const struct_t *st) {
+	printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(st) st->nrefs);
+	if (st->explicit_simple) {
+		printf("<explicitly simple> ");
+	}
+	if (st->is_defined) {
+		printf(
+			"%s %s <with %zu members%s> { ",
+			st->is_struct ? "struct" : "union",
+			st->tag ? string_content(st->tag) : "<no tag>",
+			st->nmembers,
+			st->has_incomplete ? ", with incomplete" : "");
+		for (size_t i = 0; i < st->nmembers; ++i) {
+			if (i) printf(", ");
+			type_print(st->members[i].typ);
+			printf(" %s", st->members[i].name ? string_content(st->members[i].name) : "<no name>");
+			if (st->members[i].is_bitfield) {
+				printf(" : %zu", st->members[i].bitfield_width);
+			}
+		}
+		printf(" }");
+	} else {
+		printf("%s %s <undefined>", st->is_struct ? "struct" : "union", st->tag ? string_content(st->tag) : "<no tag>");
+	}
+}
+
+// TODO: per-arch array
+size_t sizeof_btt[LAST_BUILTIN + 1] = {
+	[BTT_VOID] = 0,
+	[BTT_BOOL] = 1,
+	[BTT_CHAR] = 1,
+	[BTT_SCHAR] = 1,
+	[BTT_UCHAR] = 1,
+	[BTT_SHORT] = 2,
+	[BTT_SSHORT] = 2,
+	[BTT_USHORT] = 2,
+	[BTT_INT] = 4,
+	[BTT_SINT] = 4,
+	[BTT_UINT] = 4,
+	[BTT_LONG] = LONG_IS_32BITS ? 4 : 8,
+	[BTT_SLONG] = LONG_IS_32BITS ? 4 : 8,
+	[BTT_ULONG] = LONG_IS_32BITS ? 4 : 8,
+	[BTT_LONGLONG] = 8,
+	[BTT_SLONGLONG] = 8,
+	[BTT_ULONGLONG] = 8,
+	[BTT_INT128] = 16,
+	[BTT_SINT128] = 16,
+	[BTT_UINT128] = 16,
+	[BTT_S8] = 1,
+	[BTT_U8] = 1,
+	[BTT_S16] = 2,
+	[BTT_U16] = 2,
+	[BTT_S32] = 4,
+	[BTT_U32] = 4,
+	[BTT_S64] = 8,
+	[BTT_U64] = 8,
+	[BTT_FLOAT] = 4,
+	[BTT_CFLOAT] = 8,
+	[BTT_IFLOAT] = 4,
+	[BTT_DOUBLE] = 8,
+	[BTT_CDOUBLE] = 16,
+	[BTT_IDOUBLE] = 8,
+	[BTT_LONGDOUBLE] = 16,
+	[BTT_CLONGDOUBLE] = 32,
+	[BTT_ILONGDOUBLE] = 16,
+	[BTT_VA_LIST] = LONG_IS_32BITS ? 0 : 24, // TODO
+};
+// The following assumes sizeof(unsigned long) == sizeof(void*)
+file_t *file_new(void) {
+	file_t *ret = malloc(sizeof *ret);
+	if (!ret) {
+		printf("Failed to create a new translation unit structure (init)\n");
+		return NULL;
+	}
+	if (!(ret->struct_map = kh_init(struct_map))) {
+		printf("Failed to create a new translation unit structure (structure map)\n");
+		free(ret);
+		return NULL;
+	}
+	if (!(ret->type_map = kh_init(type_map))) {
+		printf("Failed to create a new translation unit structure (type map)\n");
+		kh_destroy(struct_map, ret->struct_map);
+		free(ret);
+		return NULL;
+	}
+	if (!(ret->enum_map = kh_init(type_map))) {
+		printf("Failed to create a new translation unit structure (enumeration map)\n");
+		kh_destroy(struct_map, ret->struct_map);
+		kh_destroy(type_map, ret->type_map);
+		free(ret);
+		return NULL;
+	}
+	if (!(ret->decl_map = kh_init(type_map))) {
+		printf("Failed to create a new translation unit structure (declaration map)\n");
+		kh_destroy(struct_map, ret->struct_map);
+		kh_destroy(type_map, ret->type_map);
+		kh_destroy(type_map, ret->enum_map);
+		free(ret);
+		return NULL;
+	}
+	if (!(ret->type_set = kh_init(type_set))) {
+		printf("Failed to create a new translation unit structure (type set)\n");
+		kh_destroy(struct_map, ret->struct_map);
+		kh_destroy(type_map, ret->type_map);
+		kh_destroy(type_map, ret->enum_map);
+		kh_destroy(type_map, ret->decl_map);
+		free(ret);
+		return NULL;
+	}
+	if (!(ret->const_map = kh_init(const_map))) {
+		printf("Failed to create a new translation unit structure (const map)\n");
+		kh_destroy(struct_map, ret->struct_map);
+		kh_destroy(type_map, ret->type_map);
+		kh_destroy(type_map, ret->enum_map);
+		kh_destroy(type_map, ret->decl_map);
+		kh_destroy(type_set, ret->type_set);
+		free(ret);
+		return NULL;
+	}
+	
+	// Now fill in the builtin types
+	int iret;
+	for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) {
+		type_t *t = malloc(sizeof *t);
+		if (!t) {
+			printf("Failed to create a new translation unit structure (builtin type)\n");
+			for (; i--;) {
+				free(ret->builtins[i]);
+			}
+			kh_destroy(struct_map, ret->struct_map);
+			kh_destroy(type_map, ret->type_map);
+			kh_destroy(type_map, ret->enum_map);
+			kh_destroy(type_map, ret->decl_map);
+			kh_destroy(type_set, ret->type_set);
+			kh_destroy(const_map, ret->const_map);
+			free(ret);
+			return NULL;
+		}
+		t->is_atomic = t->is_const = t->is_restrict = t->is_volatile = t->_internal_use = 0;
+		t->is_incomplete = (i == BTT_VOID);
+		t->is_validated = 1;
+		t->nrefs = 2;
+		t->typ = TYPE_BUILTIN;
+		t->val.builtin = i;
+		t->szinfo.align = t->szinfo.size = sizeof_btt[i];
+		ret->builtins[i] = t;
+		kh_put(type_set, ret->type_set, t, &iret);
+		if (iret < 0) {
+			printf("Failed to create a new translation unit structure (failed to add intrinsic type to type_set)\n");
+			kh_destroy(struct_map, ret->struct_map);
+			kh_destroy(type_map, ret->type_map);
+			kh_destroy(type_map, ret->enum_map);
+			kh_destroy(type_map, ret->decl_map);
+			kh_destroy(type_set, ret->type_set);
+			kh_destroy(const_map, ret->const_map);
+			free(ret);
+			return NULL;
+		} else if (iret == 0) {
+			printf("Failed to create a new translation unit structure (duplicate intrinsic type in type_set)\n");
+			for (++i; i--;) {
+				free(ret->builtins[i]);
+			}
+			kh_destroy(struct_map, ret->struct_map);
+			kh_destroy(type_map, ret->type_map);
+			kh_destroy(type_map, ret->enum_map);
+			kh_destroy(type_map, ret->decl_map);
+			kh_destroy(type_set, ret->type_set);
+			kh_destroy(const_map, ret->const_map);
+			free(ret);
+			return NULL;
+		}
+	}
+	// ret is valid
+	
+	// Add __builtin_va_list as a typedef
+	char *sdup = strdup("__builtin_va_list");
+	if (!sdup) {
+		printf("Failed to create a new translation unit structure (va_list name)\n");
+		file_del(ret);
+		return NULL;
+	}
+	khiter_t it = kh_put(type_map, ret->type_map, sdup, &iret);
+	if (iret < 0) {
+		printf("Failed to create a new translation unit structure (add va_list typedef)\n");
+		free(sdup);
+		file_del(ret);
+		return NULL;
+	} else if (iret == 0) {
+		printf("Failed to create a new translation unit structure (__builtin_va_list is already a typedef)\n");
+		file_del(ret);
+		return NULL;
+	}
+	kh_val(ret->type_map, it) = ret->builtins[BTT_VA_LIST];
+	++ret->builtins[BTT_VA_LIST]->nrefs;
+	
+	return ret;
+}
+void file_del(file_t *f) {
+	struct_map_del(f->struct_map);
+	type_map_del(f->type_map);
+	type_map_del(f->enum_map);
+	type_map_del(f->decl_map);
+	type_set_del(f->type_set);
+	const_map_del(f->const_map);
+	for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) {
+		type_del(f->builtins[i]);
+	}
+	free(f);
+}