diff options
Diffstat (limited to 'wrapperhelper/src/lang.c')
| -rw-r--r-- | wrapperhelper/src/lang.c | 1164 |
1 files changed, 1164 insertions, 0 deletions
diff --git a/wrapperhelper/src/lang.c b/wrapperhelper/src/lang.c new file mode 100644 index 00000000..9ca51b15 --- /dev/null +++ b/wrapperhelper/src/lang.c @@ -0,0 +1,1164 @@ +#include "lang.h" + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#if 1 +#define DISP_ADDR_FMT "" +#define DISP_ADDR_ARG(v) +#else +#define DISP_ADDR_FMT "%p " +#define DISP_ADDR_ARG(v) v, +#endif + +void preproc_token_del(preproc_token_t *tok) { + switch (tok->tokt) { + case PPTOK_IDENT: + case PPTOK_IDENT_UNEXP: + case PPTOK_NUM: + string_del(tok->tokv.str); + break; + case PPTOK_STRING: + case PPTOK_INCL: + string_del(tok->tokv.sstr); + break; + case PPTOK_INVALID: + case PPTOK_SYM: + case PPTOK_NEWLINE: + case PPTOK_BLANK: + case PPTOK_START_LINE_COMMENT: + case PPTOK_EOF: + break; + } +} + +VECTOR_IMPL(preproc, preproc_token_del) + +void proc_token_del(proc_token_t *tok) { + switch (tok->tokt) { + case PTOK_IDENT: + case PTOK_NUM: + string_del(tok->tokv.str); + break; + case PTOK_STRING: + string_del(tok->tokv.sstr); + break; + case PTOK_PRAGMA: + switch (tok->tokv.pragma.typ) { + case PRAGMA_MARK_SIMPLE: + string_del(tok->tokv.pragma.val); + break; + case PRAGMA_ALLOW_INTS: + break; + } + break; + case PTOK_INVALID: + case PTOK_KEYWORD: + case PTOK_SYM: + case PTOK_EOF: + break; + } +} + +VECTOR_IMPL(proc, proc_token_del) + +const char *sym2str[LAST_SYM + 1] = { + [SYM_LBRACKET] = "{", + [SYM_RBRACKET] = "}", + [SYM_LSQBRACKET] = "[", + [SYM_RSQBRACKET] = "]", + [SYM_LPAREN] = "(", + [SYM_RPAREN] = ")", + [SYM_HASH] = "#", + [SYM_HASHHASH] = "##", + [SYM_SEMICOLON] = ";", + [SYM_COLON] = ":", + [SYM_COLONCOLON] = "::", + [SYM_VARIADIC] = "...", + [SYM_QUESTION] = "?", + [SYM_DOT] = ".", + [SYM_DASHGT] = "->", + [SYM_TILDE] = "~", + [SYM_EXCL] = "!", + [SYM_PLUS] = "+", + [SYM_DASH] = "-", + [SYM_STAR] = "*", + [SYM_SLASH] = "/", + [SYM_PERCENT] = "%", + [SYM_HAT] = "^", + [SYM_AMP] = "&", + [SYM_PIPE] = "|", + [SYM_EQ] = "=", + [SYM_PLUSEQ] = "+=", + [SYM_DASHEQ] = "-=", + [SYM_STAREQ] = "*=", + [SYM_SLASHEQ] = "/=", + [SYM_PERCENTEQ] = "%=", + [SYM_HATEQ] = "^=", + [SYM_AMPEQ] = "&=", + [SYM_PIPEEQ] = "|=", + [SYM_EQEQ] = "==", + [SYM_EXCLEQ] = "!=", + [SYM_LT] = "<", + [SYM_GT] = ">", + [SYM_LTEQ] = "<=", + [SYM_GTEQ] = ">=", + [SYM_AMPAMP] = "&&", + [SYM_PIPEPIPE] = "||", + [SYM_LTLT] = "<<", + [SYM_GTGT] = ">>", + [SYM_LTLTEQ] = "<<=", + [SYM_GTGTEQ] = ">>=", + [SYM_PLUSPLUS] = "++", + [SYM_DASHDASH] = "--", + [SYM_COMMA] = ",", +}; + +void preproc_token_print(const preproc_token_t *tok) { + switch (tok->tokt) { + case PPTOK_INVALID: + printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?'); + break; + case PPTOK_IDENT: + printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str)); + break; + case PPTOK_IDENT_UNEXP: + printf("Token: %7s '%s'\n", "IDENT'", string_content(tok->tokv.str)); + break; + case PPTOK_NUM: + printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str)); + break; + case PPTOK_STRING: + printf("Token: %7s %c%s%c\n", "STRING", + tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\''); + break; + case PPTOK_INCL: + printf("Token: %7s %c%s%c\n", "INCL", + tok->tokv.sisstr ? '"' : '<', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '>'); + break; + case PPTOK_SYM: + printf("Token: %7s %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym); + break; + case PPTOK_NEWLINE: + printf("Token: %7s\n", "NEWLINE"); + break; + case PPTOK_BLANK: + printf("Token: %7s\n", "\e[2;31m(blank)\e[m"); + break; + case PPTOK_START_LINE_COMMENT: + printf("Token: %7s\n", "\e[2;31m( // ) \e[m"); + break; + case PPTOK_EOF: + printf("Token: %7s\n", "EOF"); + break; + default: + printf("Token: ??? %u\n", tok->tokt); + } +} + +int preproc_token_isend(const preproc_token_t *tok) { + switch (tok->tokt) { + case PPTOK_IDENT: + case PPTOK_IDENT_UNEXP: + case PPTOK_NUM: + case PPTOK_STRING: + case PPTOK_INCL: + case PPTOK_SYM: + case PPTOK_NEWLINE: + case PPTOK_BLANK: + case PPTOK_START_LINE_COMMENT: + return 0; + case PPTOK_INVALID: + case PPTOK_EOF: + default: + return 1; + } +} + +const char *kw2str[LAST_KEYWORD + 1] = { + [KW_ALIGNAS] = "_Alignas", + [KW_ALIGNOF] = "_Alignof", + [KW_ATOMIC] = "_Atomic", + [KW_AUTO] = "auto", + [KW_BOOL] = "_Bool", + [KW_BREAK] = "break", + [KW_CASE] = "case", + [KW_CHAR] = "char", + [KW_COMPLEX] = "_Complex", + [KW_CONST] = "const", + [KW_CONTINUE] = "continue", + [KW_DEFAULT] = "default", + [KW_DO] = "do", + [KW_DOUBLE] = "double", + [KW_ELSE] = "else", + [KW_ENUM] = "enum", + [KW_EXTERN] = "extern", + [KW_FLOAT] = "float", + [KW_FOR] = "for", + [KW_GENERIC] = "_Generic", + [KW_GOTO] = "goto", + [KW_IF] = "if", + [KW_IMAGINARY] = "_Imaginary", + [KW_INLINE] = "inline", + [KW_INT] = "int", + [KW_INT128] = "__int128", + [KW_LONG] = "long", + [KW_NORETURN] = "_Noreturn", + [KW_REGISTER] = "register", + [KW_RESTRICT] = "restrict", + [KW_RETURN] = "return", + [KW_SHORT] = "short", + [KW_SIGNED] = "signed", + [KW_SIZEOF] = "sizeof", + [KW_STATIC] = "static", + [KW_STATIC_ASSERT] = "_Static_assert", + [KW_STRUCT] = "struct", + [KW_SWITCH] = "switch", + [KW_THREAD_LOCAL] = "_Thread_local", + [KW_TYPEDEF] = "typedef", + [KW_UNION] = "union", + [KW_UNSIGNED] = "unsigned", + [KW_VOID] = "void", + [KW_VOLATILE] = "volatile", + [KW_WHILE] = "while", +}; + +void proc_token_print(const proc_token_t *tok) { + switch (tok->tokt) { + case PTOK_INVALID: + printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?'); + break; + case PTOK_IDENT: + printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str)); + break; + case PTOK_KEYWORD: + printf("Token: %7s '%s' (%u)\n", "KEYWORD", kw2str[tok->tokv.kw], tok->tokv.kw); + break; + case PTOK_NUM: + printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str)); + break; + case PTOK_STRING: + printf("Token: %7s %c%s%c\n", "STRING", + tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\''); + break; + case PTOK_SYM: + printf("Token: %7s %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym); + break; + case PTOK_PRAGMA: + switch (tok->tokv.pragma.typ) { + case PRAGMA_ALLOW_INTS: + printf("Token: %7s Allow ints\n", "PRAGMA"); + break; + case PRAGMA_MARK_SIMPLE: + printf("Token: %7s Mark simple: %s\n", "PRAGMA", string_content(tok->tokv.pragma.val)); + break; + default: + printf("Token: %7s ???\n", "PRAGMA"); + } + break; + case PTOK_EOF: + printf("Token: %7s\n", "EOF"); + break; + default: + printf("Token: ??? %u\n", tok->tokt); + } +} + +int proc_token_iserror(const proc_token_t *tok) { + switch (tok->tokt) { + case PTOK_IDENT: + case PTOK_KEYWORD: + case PTOK_NUM: + case PTOK_STRING: + case PTOK_SYM: + case PTOK_PRAGMA: + case PTOK_EOF: + return 0; + case PTOK_INVALID: + default: + return 1; + } +} + +int proc_token_isend(const proc_token_t *tok) { + switch (tok->tokt) { + case PTOK_IDENT: + case PTOK_KEYWORD: + case PTOK_NUM: + case PTOK_STRING: + case PTOK_SYM: + case PTOK_PRAGMA: + return 0; + case PTOK_INVALID: + case PTOK_EOF: + default: + return 1; + } +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wanalyzer-null-dereference" +KHASH_MAP_IMPL_STR(str2kw, enum token_keyword_type_e) +KHASH_MAP_IMPL_STR(type_map, type_t*) +KHASH_MAP_IMPL_STR(struct_map, struct_t*) +KHASH_MAP_IMPL_STR(const_map, num_constant_t) +#pragma GCC diagnostic pop +khash_t(str2kw) *str2kw; + +int init_str2kw(void) { + if (!(str2kw = kh_init(str2kw))) { + printf("Failed to create the string to keyword map (init)\n"); + return 0; + } + for (enum token_keyword_type_e kw = 0; kw <= LAST_KEYWORD; ++kw) { + int iret; + khiter_t it = kh_put(str2kw, str2kw, kw2str[kw], &iret); + if (iret < 0) { + printf("Failed to create the string to keyword map (keyword %u)\n", kw); + kh_destroy(str2kw, str2kw); + return 0; + } + kh_val(str2kw, it) = kw; + } + return 1; +} +void del_str2kw(void) { + kh_destroy(str2kw, str2kw); +} + +int num_constant_convert(string_t *str, num_constant_t *cst) { + if (string_len(str) == 0) return 0; // Should never happen +#define contains(c) strchr(string_content(str), c) + if (contains('.') + || (!(contains('X') || contains('x')) && (contains('E') || contains('e'))) + || ((contains('X') || contains('x')) && (contains('P') || contains('p')))) { +#undef contains + int ok; + if ((string_end(str)[-1] == 'l') || (string_end(str)[-1] == 'L')) { + cst->typ = NCT_LDOUBLE; + char *endc; + cst->val.l = strtold(string_content(str), &endc); + ok = (endc == string_end(str) - 1); + } else if ((string_end(str)[-1] == 'f') || (string_end(str)[-1] == 'F')) { + cst->typ = NCT_FLOAT; + char *endc; + cst->val.f = strtof(string_content(str), &endc); + ok = (endc == string_end(str) - 1); + } else { + cst->typ = NCT_DOUBLE; + char *endc; + cst->val.d = strtod(string_content(str), &endc); + ok = (endc == string_end(str)); + } + if (!ok) { + printf("Error: '%s' is not a valid number\n", string_content(str)); + return 0; + } else if (errno == ERANGE) { + printf("Warning: floating point constant is too large\n"); + return 1; + } + return 1; + } else { + uint64_t ret = 0; + unsigned base = (string_content(str)[0] == '0') ? (string_content(str)[1] == 'x') ? 16 : 8 : 10; + size_t startidx = (base == 16) ? 2 : 0; + size_t endidx = string_len(str); + int suffix_type = 0; +#define SUFFIX_U 1 +#define SUFFIX_L 2 +#define SUFFIX_LL 4 +#define SUFFIX_SGN SUFFIX_U +#define SUFFIX_SZ (SUFFIX_L | SUFFIX_LL) + remove_suffix: + if ((string_content(str)[endidx - 1] == 'l') || (string_content(str)[endidx - 1] == 'L')) { + if (suffix_type & SUFFIX_SZ) { + printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); + return 0; + } + if (endidx == 1) return 0; // Should never happen + if ((string_content(str)[endidx - 2] == 'l') || (string_content(str)[endidx - 2] == 'L')) { + if (endidx == 2) return 0; // Should never happen + if (string_content(str)[endidx - 2] != string_content(str)[endidx - 1]) { + printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); + return 0; + } + endidx -= 2; + suffix_type |= SUFFIX_LL; + } else { + endidx -= 1; + suffix_type |= SUFFIX_L; + } + goto remove_suffix; + } + if ((string_content(str)[endidx - 1] == 'u') || (string_content(str)[endidx - 1] == 'U')) { + if (suffix_type & SUFFIX_SGN) { + printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); + return 0; + } + endidx -= 1; + suffix_type |= SUFFIX_U; + goto remove_suffix; + } + // 0u has startidx=0 endidx=1 + if (endidx <= startidx) { + printf("Error: '%s' is not a valid number\n", string_content(str)); + } + for (size_t i = startidx; i < endidx; ++i) { + if ((string_content(str)[i] >= '0') && (string_content(str)[i] <= ((base == 8) ? '7' : '9'))) { + ret = base * ret + (unsigned)(string_content(str)[i] - '0'); + } else if ((base == 16) && (string_content(str)[i] >= 'A') && (string_content(str)[i] <= 'F')) { + ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'A'); + } else if ((base == 16) && (string_content(str)[i] >= 'a') && (string_content(str)[i] <= 'f')) { + ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'a'); + } else { + printf("Error: '%s' is not a valid number\n", string_content(str)); + return 0; + } + } + // If base == 10, keep the signness; in any case, try 32 bits if available else use 64 bits + cst->typ = + ((suffix_type & SUFFIX_SGN) == SUFFIX_U) ? + ((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_UINT32 : NCT_UINT64 : + ((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_UINT64 : + NCT_UINT32 : + ((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_INT32 : NCT_INT64 : + ((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_INT64 : + NCT_INT32; + if (cst->typ == NCT_INT32) { + if (ret < 1ULL << 31) { + cst->val.i32 = (int32_t)ret; + return 1; + } + // Not in signed 32-bits = int => try long/long long for decimal, unsigned for hexadecimal/octal + if (base != 10) cst->typ = NCT_UINT32; + else cst->typ = NCT_INT64; + } + if (cst->typ == NCT_UINT32) { + if (ret < 1ULL << 32) { + cst->val.u32 = (uint32_t)ret; + return 1; + } + // Not in unsigned 32-bits = unsigned => try unsigned long/long long for decimal and unsigned, long/long long for signed hexadecimal/octal + if ((base != 10) && ((suffix_type & SUFFIX_SGN) != SUFFIX_U)) cst->typ = NCT_INT64; + else cst->typ = NCT_UINT64; + } + if (cst->typ == NCT_INT64) { + if (ret < 1ULL << 63) { + cst->val.i64 = (int64_t)ret; + return 1; + } + // Not in signed 64-bits = long/long long => fail for decimal, try unsigned long/long long for hexadecimal/octal + if (base != 10) cst->typ = NCT_UINT64; + } + if (cst->typ == NCT_UINT64) { + cst->val.u64 = ret; + return 1; + } + // The constant cannot be typed... (Or internal error) + return 0; + } +} + +void expr_del(expr_t *e) { + switch (e->typ) { + case ETY_VAR: + string_del(e->val.var); + break; + + case ETY_CONST: + break; + + // case ETY_GENERIC: + // TODO + + case ETY_CALL: + expr_del(e->val.call.fun); + for (size_t i = 0; i < e->val.call.nargs; ++i) { + expr_del(e->val.call.args[i]); + } + if (e->val.call.args) free(e->val.call.args); + break; + + case ETY_ACCESS: + case ETY_PTRACCESS: + expr_del(e->val.access.val); + string_del(e->val.access.member); + break; + + case ETY_UNARY: + expr_del(e->val.unary.e); + break; + + case ETY_BINARY: + expr_del(e->val.binary.e1); + expr_del(e->val.binary.e2); + break; + + case ETY_TERNARY: + expr_del(e->val.ternary.e1); + expr_del(e->val.ternary.e2); + expr_del(e->val.ternary.e3); + break; + + // case ETY_INIT_LIST: + // TODO + + case ETY_CAST: + type_del(e->val.cast.typ); + expr_del(e->val.cast.e); + break; + } + free(e); +} + +void struct_del_weak(struct_t *st) { + if (--st->nrefs) return; + if (!st->tag) struct_del(st); +} +void type_del(type_t *typ) { + if (--typ->nrefs) return; + switch (typ->typ) { + case TYPE_BUILTIN: + break; + case TYPE_ARRAY: + type_del(typ->val.array.typ); + break; + case TYPE_STRUCT_UNION: + struct_del_weak(typ->val.st); + break; + case TYPE_ENUM: + case TYPE_PTR: + type_del(typ->val.typ); + break; + case TYPE_FUNCTION: + type_del(typ->val.fun.ret); + if (typ->val.fun.nargs != (size_t)-1) { + for (size_t i = 0; i < typ->val.fun.nargs; ++i) { + type_del(typ->val.fun.args[i]); + } + } + if (typ->val.fun.args) free(typ->val.fun.args); + break; + } + free(typ); +} +void type_map_del(khash_t(type_map) *map) { + kh_cstr_t str; + type_t **it; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" + kh_foreach_key_value_ref(map, str, it, free((void*)str); type_del(*it)) +#pragma GCC diagnostic pop + kh_destroy(type_map, map); +} +void type_set_del(khash_t(type_set) *set) { + type_t *it; + kh_foreach_key(set, it, type_del(it)) + kh_destroy(type_set, set); +} +void const_map_del(khash_t(const_map) *map) { + kh_cstr_t str; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" + kh_foreach_key(map, str, free((void*)str)) +#pragma GCC diagnostic pop + kh_destroy(const_map, map); +} + +void st_member_del(st_member_t *member) { + if (member->name) string_del(member->name); + type_del(member->typ); +} + +void struct_del(struct_t *st) { + if (st->tag) { + string_del(st->tag); + st->tag = NULL; + } + if (st->is_defined) { + for (size_t i = 0; i < st->nmembers; ++i) { + st_member_del(&st->members[i]); + } + free(st->members); + st->is_defined = 0; + } + if (!st->nrefs) free(st); +} +void struct_map_del(khash_t(struct_map) *map) { + // The keys are the tag in the struct_t + struct_t **it; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" + kh_foreach_value_ref(map, it, struct_del(*it)) +#pragma GCC diagnostic pop + kh_destroy(struct_map, map); +} + +type_t *type_new(void) { + type_t *ret = malloc(sizeof *ret); + if (!ret) { + printf("Failed to create a new type\n"); + return NULL; + } + ret->szinfo.align = ret->szinfo.size = 0; + ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0; + ret->nrefs = 1; + ret->typ = TYPE_BUILTIN; + ret->val.builtin = BTT_INT; + return ret; +} +type_t *type_new_ptr(type_t *target) { + type_t *ret = malloc(sizeof *ret); + if (!ret) { + printf("Failed to create a new pointer type\n"); + return NULL; + } + ret->szinfo.align = ret->szinfo.size = 0; + ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0; + ret->nrefs = 1; + ret->typ = TYPE_PTR; + ret->val.typ = target; + return ret; +} +// The following functions do not work for functions (val.args needs to be duplicated) +/* type_t *type_do_copy(type_t *ref) { + type_t *ret = type_do_copy_nodec(ref); + if (!ret) return NULL; + type_del(ref); + return ret; +} +type_t *type_do_copy_nodec(const type_t *ref) { + type_t *ret = malloc(sizeof *ret); + if (!ret) { + printf("Failed to duplicate type\n"); + return NULL; + } + memcpy(ret, ref, sizeof *ret); + switch (ref->typ) { + case TYPE_BUILTIN: + break; + case TYPE_ARRAY: + ++ref->val.array.typ->nrefs; + break; + case TYPE_STRUCT_UNION: + ++ret->val.st->nrefs; + break; + case TYPE_ENUM: + case TYPE_PTR: + ++ref->val.typ->nrefs; + break; + case TYPE_FUNCTION: + ++ref->val.fun.ret->nrefs; + if (ref->val.fun.nargs != (size_t)-1) { + for (size_t i = 0; i < ref->val.fun.nargs; ++i) { + ++ref->val.fun.args[i]->nrefs; + } + } + break; + } + ret->nrefs = 1; + return ret; +} +type_t *type_maybe_copy(type_t *ref) { + if (ref->nrefs == 1) return ref; + else return type_do_copy(ref); +} */ +int type_copy_into(type_t *dest, const type_t *ref) { + size_t nrefs = dest->nrefs; + _Bool is_atomic = dest->is_atomic; + _Bool is_const = dest->is_const; + _Bool is_restrict = dest->is_restrict; + _Bool is_volatile = dest->is_volatile; + memcpy(dest, ref, sizeof *dest); + switch (ref->typ) { + case TYPE_BUILTIN: + break; + case TYPE_ARRAY: + ++ref->val.array.typ->nrefs; + break; + case TYPE_STRUCT_UNION: + ++dest->val.st->nrefs; + break; + case TYPE_ENUM: + case TYPE_PTR: + ++ref->val.typ->nrefs; + break; + case TYPE_FUNCTION: + ++ref->val.fun.ret->nrefs; + if (ref->val.fun.nargs != (size_t)-1) { + if (!ref->val.fun.args) { + dest->typ = TYPE_BUILTIN; + return 0; + } + type_t **args = malloc(sizeof *args * ref->val.fun.nargs); + if (!args) { + printf("Error: failed to allocate new argument array\n"); + return 0; + } + for (size_t i = 0; i < ref->val.fun.nargs; ++i) { + ++ref->val.fun.args[i]->nrefs; + args[i] = ref->val.fun.args[i]; + } + dest->val.fun.args = args; + } + break; + } + dest->is_atomic |= is_atomic; + dest->is_const |= is_const; + dest->is_restrict |= is_restrict; + dest->is_volatile |= is_volatile; + dest->nrefs = nrefs; + return 1; +} + +struct_t *struct_new(int is_struct, string_t *tag) { + struct_t *ret = malloc(sizeof *ret); + if (!ret) { + printf("Failed to create a new struct\n"); + return NULL; + } + ret->is_struct = is_struct; + ret->tag = tag; + ret->is_defined = 0; + ret->nrefs = 1; + ret->explicit_simple = 0; + return ret; +} + +_Static_assert(sizeof(type_t*) == sizeof(khint64_t), "Not a 64-bits machine"); +khint_t type_t_hash(type_t *typ) { + switch (typ->typ) { + case TYPE_BUILTIN: return kh_int_hash_func(typ->val.builtin); + case TYPE_ARRAY: return kh_int_hash_func((typ->val.array.array_sz << 12) + type_t_hash(typ->val.array.typ)); + case TYPE_STRUCT_UNION: + if (typ->val.st->tag) { + return kh_str_hash_func(string_content(typ->val.st->tag)); + } else { + khint64_t acc = (typ->val.st->nmembers << 5) + 1; + for (size_t i = 0; i < typ->val.st->nmembers; ++i) { + acc += (khint64_t)type_t_hash(typ->val.st->members[i].typ) << ((1ull << (i % 64)) % 61); + } + return kh_int64_hash_func(acc); + } + case TYPE_ENUM: + case TYPE_PTR: + return kh_int_hash_func(type_t_hash(typ->val.typ)); + case TYPE_FUNCTION: { + khint64_t acc = type_t_hash(typ->val.fun.ret) + typ->val.fun.nargs * 73 + (size_t)typ->val.fun.has_varargs * 77; + if (typ->val.fun.nargs != (size_t)-1) { + for (size_t i = 0; i < typ->val.fun.nargs; ++i) { + acc += (khint64_t)type_t_hash(typ->val.fun.args[i]) << ((3ull << (i % 64)) % 53); + } + } + return kh_int64_hash_func(acc); } + default: return 0; + } +} +int type_t_equal(type_t *typ1, type_t *typ2) { + if (typ1->typ != typ2->typ) return 0; + if ((typ1->is_atomic != typ2->is_atomic) || (typ1->is_const != typ2->is_const) + || (typ1->is_restrict != typ2->is_restrict) || (typ1->is_volatile != typ2->is_volatile)) { + return 0; + } + switch (typ1->typ) { + case TYPE_BUILTIN: return typ1->val.builtin == typ2->val.builtin; + case TYPE_ARRAY: return (typ1->val.array.array_sz == typ2->val.array.array_sz) && type_t_equal(typ1->val.array.typ, typ2->val.array.typ); + case TYPE_STRUCT_UNION: + // This will not do an infinite recursion since only unnamed struct/unions will compare their members + if (!typ1->val.st->tag != !typ2->val.st->tag) return 0; + if (typ1->val.st->tag) return !strcmp(string_content(typ1->val.st->tag), string_content(typ2->val.st->tag)); + if (!typ1->val.st->is_defined || !typ2->val.st->is_defined) { + printf("Warning: incomplete anonymous structure/union\n"); + return 0; + } + if (typ1->val.st->nmembers != typ2->val.st->nmembers) return 0; + for (size_t i = 0; i < typ1->val.st->nmembers; ++i) { + if (!!typ1->val.st->members[i].name != !!typ2->val.st->members[i].name) return 0; + if (typ1->val.st->members[i].name + && strcmp(string_content(typ1->val.st->members[i].name), string_content(typ2->val.st->members[i].name))) + return 0; + if (!type_t_equal(typ1->val.st->members[i].typ, typ2->val.st->members[i].typ)) return 0; + } + return 1; + case TYPE_ENUM: + case TYPE_PTR: + return type_t_equal(typ1->val.typ, typ2->val.typ); + case TYPE_FUNCTION: + if (typ1->val.fun.nargs != typ2->val.fun.nargs) return 0; + if (typ1->val.fun.has_varargs != typ2->val.fun.has_varargs) return 0; + if (!type_t_equal(typ1->val.fun.ret, typ2->val.fun.ret)) return 0; + if (typ1->val.fun.nargs != (size_t)-1) { + for (size_t i = 0; i < typ1->val.fun.nargs; ++i) { + if (!type_t_equal(typ1->val.fun.args[i], typ2->val.fun.args[i])) return 0; + } + } + return 1; + default: return 0; + } +} +__KHASH_IMPL(type_set, , type_t*, char, 0, type_t_hash, type_t_equal) + +type_t *type_try_merge(type_t *typ, khash_t(type_set) *set) { + int iret; + khiter_t it = kh_put(type_set, set, typ, &iret); + if (iret < 0) { + printf("Error: failed to add type to type_set\n"); + return NULL; + } else if (iret == 0) { + if (typ == kh_key(set, it)) return typ; + kh_key(set, it)->is_incomplete &= typ->is_incomplete; // In case we have a recursive structure + type_del(typ); + ++kh_key(set, it)->nrefs; + return kh_key(set, it); + } else { + ++typ->nrefs; + } + type_t *typ2; + switch (typ->typ) { + case TYPE_BUILTIN: + return typ; + case TYPE_ARRAY: + typ2 = type_try_merge(typ->val.array.typ, set); + if (typ2) typ->val.array.typ = typ2; + else return NULL; + return typ; + case TYPE_STRUCT_UNION: + if (typ->val.st->is_defined) { + for (size_t i = 0; i < typ->val.st->nmembers; ++i) { + typ2 = type_try_merge(typ->val.st->members[i].typ, set); + if (typ2) typ->val.st->members[i].typ = typ2; + else return NULL; + } + } + return typ; + case TYPE_ENUM: + case TYPE_PTR: + typ2 = type_try_merge(typ->val.typ, set); + if (typ2) typ->val.typ = typ2; + else return NULL; + return typ; + case TYPE_FUNCTION: + typ2 = type_try_merge(typ->val.fun.ret, set); + if (typ2) typ->val.fun.ret = typ2; + else return NULL; + + if (typ->val.fun.nargs != (size_t)-1) { + for (size_t i = 0; i < typ->val.fun.nargs; ++i) { + typ2 = type_try_merge(typ->val.fun.args[i], set); + if (typ2) typ->val.fun.args[i] = typ2; + else return NULL; + } + } + return typ; + + default: return NULL; + } +} + +const char *builtin2str[LAST_BUILTIN + 1] = { + [BTT_VOID] = "void", + [BTT_BOOL] = "_Bool", + [BTT_CHAR] = "char", + [BTT_SCHAR] = "signed char", + [BTT_UCHAR] = "unsigned char", + [BTT_SHORT] = "short", + [BTT_SSHORT] = "signed short", + [BTT_USHORT] = "unsigned short", + [BTT_INT] = "int", + [BTT_SINT] = "signed", + [BTT_UINT] = "unsigned", + [BTT_LONG] = "long", + [BTT_SLONG] = "signed long", + [BTT_ULONG] = "unsigned long", + [BTT_LONGLONG] = "long long", + [BTT_SLONGLONG] = "signed long long", + [BTT_ULONGLONG] = "unsigned long long", + [BTT_INT128] = "__int128", + [BTT_SINT128] = "signed __int128", + [BTT_UINT128] = "unsigned __int128", + [BTT_S8] = "int8_t", + [BTT_U8] = "uint8_t", + [BTT_S16] = "int16_t", + [BTT_U16] = "uint16_t", + [BTT_S32] = "int32_t", + [BTT_U32] = "uint32_t", + [BTT_S64] = "int64_t", + [BTT_U64] = "uint64_t", + [BTT_FLOAT] = "float", + [BTT_CFLOAT] = "float _Complex", + [BTT_IFLOAT] = "float _Imaginary", + [BTT_DOUBLE] = "double", + [BTT_CDOUBLE] = "double _Complex", + [BTT_IDOUBLE] = "double _Imaginary", + [BTT_LONGDOUBLE] = "long double", + [BTT_CLONGDOUBLE] = "long double _Complex", + [BTT_ILONGDOUBLE] = "long double _Imaginary", + [BTT_VA_LIST] = "__builtin_va_list", +}; +void type_print(type_t *typ) { + if (typ->_internal_use) { + printf("...%p", typ); + return; + } + typ->_internal_use = 1; + printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(typ) typ->nrefs); + if (!typ->is_validated) printf("!<not validated> "); + if (typ->is_incomplete) printf("<incomplete> "); + if (typ->is_validated && !typ->is_incomplete) printf("<size=%zu align=%zu> ", typ->szinfo.size, typ->szinfo.align); + if (typ->is_const) printf("const "); + if (typ->is_restrict) printf("restrict "); + if (typ->is_volatile) printf("volatile "); + if (typ->is_atomic) printf("_Atomic "); + switch (typ->typ) { + case TYPE_BUILTIN: + printf("<builtin %s (%u)>", builtin2str[typ->val.builtin], typ->val.builtin); + break; + case TYPE_ARRAY: + if (typ->val.array.array_sz == (size_t)-1) + printf("[] "); + else printf("[%zu] ", typ->val.array.array_sz); + type_print(typ->val.array.typ); + break; + case TYPE_STRUCT_UNION: + struct_print(typ->val.st); + break; + case TYPE_ENUM: + printf("<enum> "); + type_print(typ->val.typ); + break; + case TYPE_PTR: + printf("*"); + type_print(typ->val.typ); + break; + case TYPE_FUNCTION: + printf("<function type, ret="); + type_print(typ->val.fun.ret); + if (typ->val.fun.nargs == (size_t)-1) { + printf(", no argument spec>"); + } else { + printf(", args=("); + for (size_t i = 0; i < typ->val.fun.nargs; ++i) { + if (i) printf("), ("); + type_print(typ->val.fun.args[i]); + } + if (typ->val.fun.has_varargs && typ->val.fun.nargs) printf(", ...)>"); + else if (typ->val.fun.has_varargs && !typ->val.fun.nargs) printf("...)>"); + else printf(")>"); + } + break; + } + typ->_internal_use = 0; +} +void struct_print(const struct_t *st) { + printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(st) st->nrefs); + if (st->explicit_simple) { + printf("<explicitly simple> "); + } + if (st->is_defined) { + printf( + "%s %s <with %zu members%s> { ", + st->is_struct ? "struct" : "union", + st->tag ? string_content(st->tag) : "<no tag>", + st->nmembers, + st->has_incomplete ? ", with incomplete" : ""); + for (size_t i = 0; i < st->nmembers; ++i) { + if (i) printf(", "); + type_print(st->members[i].typ); + printf(" %s", st->members[i].name ? string_content(st->members[i].name) : "<no name>"); + if (st->members[i].is_bitfield) { + printf(" : %zu", st->members[i].bitfield_width); + } + } + printf(" }"); + } else { + printf("%s %s <undefined>", st->is_struct ? "struct" : "union", st->tag ? string_content(st->tag) : "<no tag>"); + } +} + +// TODO: per-arch array +size_t sizeof_btt[LAST_BUILTIN + 1] = { + [BTT_VOID] = 0, + [BTT_BOOL] = 1, + [BTT_CHAR] = 1, + [BTT_SCHAR] = 1, + [BTT_UCHAR] = 1, + [BTT_SHORT] = 2, + [BTT_SSHORT] = 2, + [BTT_USHORT] = 2, + [BTT_INT] = 4, + [BTT_SINT] = 4, + [BTT_UINT] = 4, + [BTT_LONG] = LONG_IS_32BITS ? 4 : 8, + [BTT_SLONG] = LONG_IS_32BITS ? 4 : 8, + [BTT_ULONG] = LONG_IS_32BITS ? 4 : 8, + [BTT_LONGLONG] = 8, + [BTT_SLONGLONG] = 8, + [BTT_ULONGLONG] = 8, + [BTT_INT128] = 16, + [BTT_SINT128] = 16, + [BTT_UINT128] = 16, + [BTT_S8] = 1, + [BTT_U8] = 1, + [BTT_S16] = 2, + [BTT_U16] = 2, + [BTT_S32] = 4, + [BTT_U32] = 4, + [BTT_S64] = 8, + [BTT_U64] = 8, + [BTT_FLOAT] = 4, + [BTT_CFLOAT] = 8, + [BTT_IFLOAT] = 4, + [BTT_DOUBLE] = 8, + [BTT_CDOUBLE] = 16, + [BTT_IDOUBLE] = 8, + [BTT_LONGDOUBLE] = 16, + [BTT_CLONGDOUBLE] = 32, + [BTT_ILONGDOUBLE] = 16, + [BTT_VA_LIST] = LONG_IS_32BITS ? 0 : 24, // TODO +}; +// The following assumes sizeof(unsigned long) == sizeof(void*) +file_t *file_new(void) { + file_t *ret = malloc(sizeof *ret); + if (!ret) { + printf("Failed to create a new translation unit structure (init)\n"); + return NULL; + } + if (!(ret->struct_map = kh_init(struct_map))) { + printf("Failed to create a new translation unit structure (structure map)\n"); + free(ret); + return NULL; + } + if (!(ret->type_map = kh_init(type_map))) { + printf("Failed to create a new translation unit structure (type map)\n"); + kh_destroy(struct_map, ret->struct_map); + free(ret); + return NULL; + } + if (!(ret->enum_map = kh_init(type_map))) { + printf("Failed to create a new translation unit structure (enumeration map)\n"); + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + free(ret); + return NULL; + } + if (!(ret->decl_map = kh_init(type_map))) { + printf("Failed to create a new translation unit structure (declaration map)\n"); + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + free(ret); + return NULL; + } + if (!(ret->type_set = kh_init(type_set))) { + printf("Failed to create a new translation unit structure (type set)\n"); + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + kh_destroy(type_map, ret->decl_map); + free(ret); + return NULL; + } + if (!(ret->const_map = kh_init(const_map))) { + printf("Failed to create a new translation unit structure (const map)\n"); + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + kh_destroy(type_map, ret->decl_map); + kh_destroy(type_set, ret->type_set); + free(ret); + return NULL; + } + + // Now fill in the builtin types + int iret; + for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) { + type_t *t = malloc(sizeof *t); + if (!t) { + printf("Failed to create a new translation unit structure (builtin type)\n"); + for (; i--;) { + free(ret->builtins[i]); + } + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + kh_destroy(type_map, ret->decl_map); + kh_destroy(type_set, ret->type_set); + kh_destroy(const_map, ret->const_map); + free(ret); + return NULL; + } + t->is_atomic = t->is_const = t->is_restrict = t->is_volatile = t->_internal_use = 0; + t->is_incomplete = (i == BTT_VOID); + t->is_validated = 1; + t->nrefs = 2; + t->typ = TYPE_BUILTIN; + t->val.builtin = i; + t->szinfo.align = t->szinfo.size = sizeof_btt[i]; + ret->builtins[i] = t; + kh_put(type_set, ret->type_set, t, &iret); + if (iret < 0) { + printf("Failed to create a new translation unit structure (failed to add intrinsic type to type_set)\n"); + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + kh_destroy(type_map, ret->decl_map); + kh_destroy(type_set, ret->type_set); + kh_destroy(const_map, ret->const_map); + free(ret); + return NULL; + } else if (iret == 0) { + printf("Failed to create a new translation unit structure (duplicate intrinsic type in type_set)\n"); + for (++i; i--;) { + free(ret->builtins[i]); + } + kh_destroy(struct_map, ret->struct_map); + kh_destroy(type_map, ret->type_map); + kh_destroy(type_map, ret->enum_map); + kh_destroy(type_map, ret->decl_map); + kh_destroy(type_set, ret->type_set); + kh_destroy(const_map, ret->const_map); + free(ret); + return NULL; + } + } + // ret is valid + + // Add __builtin_va_list as a typedef + char *sdup = strdup("__builtin_va_list"); + if (!sdup) { + printf("Failed to create a new translation unit structure (va_list name)\n"); + file_del(ret); + return NULL; + } + khiter_t it = kh_put(type_map, ret->type_map, sdup, &iret); + if (iret < 0) { + printf("Failed to create a new translation unit structure (add va_list typedef)\n"); + free(sdup); + file_del(ret); + return NULL; + } else if (iret == 0) { + printf("Failed to create a new translation unit structure (__builtin_va_list is already a typedef)\n"); + file_del(ret); + return NULL; + } + kh_val(ret->type_map, it) = ret->builtins[BTT_VA_LIST]; + ++ret->builtins[BTT_VA_LIST]->nrefs; + + return ret; +} +void file_del(file_t *f) { + struct_map_del(f->struct_map); + type_map_del(f->type_map); + type_map_del(f->enum_map); + type_map_del(f->decl_map); + type_set_del(f->type_set); + const_map_del(f->const_map); + for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) { + type_del(f->builtins[i]); + } + free(f); +} |