#include "lang.h" #include #include #include #include #include #include #if 1 #define DISP_ADDR_FMT "" #define DISP_ADDR_ARG(v) #else #define DISP_ADDR_FMT "%p " #define DISP_ADDR_ARG(v) v, #endif preproc_token_t preproc_token_dup(preproc_token_t tok) { preproc_token_t ret; ret.tokt = tok.tokt; switch (tok.tokt) { case PPTOK_IDENT: case PPTOK_IDENT_UNEXP: case PPTOK_NUM: ret.tokv.str = string_dup(tok.tokv.str); if (!ret.tokv.str) { ret.tokt = PPTOK_INVALID; ret.tokv.c = '\0'; } break; case PPTOK_STRING: case PPTOK_INCL: ret.tokv.sisstr = tok.tokv.sisstr; ret.tokv.sstr = string_dup(tok.tokv.sstr); if (!ret.tokv.sstr) { ret.tokt = PPTOK_INVALID; ret.tokv.c = '\0'; } string_del(tok.tokv.sstr); break; case PPTOK_INVALID: case PPTOK_SYM: case PPTOK_NEWLINE: case PPTOK_BLANK: case PPTOK_START_LINE_COMMENT: case PPTOK_EOF: ret.tokv.c = tok.tokv.c; break; } return ret; } void preproc_token_del(preproc_token_t *tok) { switch (tok->tokt) { case PPTOK_IDENT: case PPTOK_IDENT_UNEXP: case PPTOK_NUM: string_del(tok->tokv.str); break; case PPTOK_STRING: case PPTOK_INCL: string_del(tok->tokv.sstr); break; case PPTOK_INVALID: case PPTOK_SYM: case PPTOK_NEWLINE: case PPTOK_BLANK: case PPTOK_START_LINE_COMMENT: case PPTOK_EOF: break; } } VECTOR_IMPL(preproc, preproc_token_del) void proc_token_del(proc_token_t *tok) { switch (tok->tokt) { case PTOK_IDENT: case PTOK_NUM: string_del(tok->tokv.str); break; case PTOK_STRING: string_del(tok->tokv.sstr); break; case PTOK_PRAGMA: switch (tok->tokv.pragma.typ) { case PRAGMA_EXPLICIT_CONV: string_del(tok->tokv.pragma.val); break; case PRAGMA_ALLOW_INTS: break; } break; case PTOK_INVALID: case PTOK_KEYWORD: case PTOK_SYM: case PTOK_EOF: break; } } VECTOR_IMPL(proc, proc_token_del) const char *sym2str[LAST_SYM + 1] = { [SYM_LBRACKET] = "{", [SYM_RBRACKET] = "}", [SYM_LSQBRACKET] = "[", [SYM_RSQBRACKET] = "]", [SYM_LPAREN] = "(", [SYM_RPAREN] = ")", [SYM_HASH] = "#", [SYM_HASHHASH] = "##", [SYM_SEMICOLON] = ";", [SYM_COLON] = ":", [SYM_COLONCOLON] = "::", [SYM_VARIADIC] = "...", [SYM_QUESTION] = "?", [SYM_DOT] = ".", [SYM_DASHGT] = "->", [SYM_TILDE] = "~", [SYM_EXCL] = "!", [SYM_PLUS] = "+", [SYM_DASH] = "-", [SYM_STAR] = "*", [SYM_SLASH] = "/", [SYM_PERCENT] = "%", [SYM_HAT] = "^", [SYM_AMP] = "&", [SYM_PIPE] = "|", [SYM_EQ] = "=", [SYM_PLUSEQ] = "+=", [SYM_DASHEQ] = "-=", [SYM_STAREQ] = "*=", [SYM_SLASHEQ] = "/=", [SYM_PERCENTEQ] = "%=", [SYM_HATEQ] = "^=", [SYM_AMPEQ] = "&=", [SYM_PIPEEQ] = "|=", [SYM_EQEQ] = "==", [SYM_EXCLEQ] = "!=", [SYM_LT] = "<", [SYM_GT] = ">", [SYM_LTEQ] = "<=", [SYM_GTEQ] = ">=", [SYM_AMPAMP] = "&&", [SYM_PIPEPIPE] = "||", [SYM_LTLT] = "<<", [SYM_GTGT] = ">>", [SYM_LTLTEQ] = "<<=", [SYM_GTGTEQ] = ">>=", [SYM_PLUSPLUS] = "++", [SYM_DASHDASH] = "--", [SYM_COMMA] = ",", }; void preproc_token_print(const preproc_token_t *tok) { switch (tok->tokt) { case PPTOK_INVALID: printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?'); break; case PPTOK_IDENT: printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str)); break; case PPTOK_IDENT_UNEXP: printf("Token: %7s '%s'\n", "IDENT'", string_content(tok->tokv.str)); break; case PPTOK_NUM: printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str)); break; case PPTOK_STRING: printf("Token: %7s %c%s%c\n", "STRING", tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\''); break; case PPTOK_INCL: printf("Token: %7s %c%s%c\n", "INCL", tok->tokv.sisstr ? '"' : '<', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '>'); break; case PPTOK_SYM: printf("Token: %7s %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym); break; case PPTOK_NEWLINE: printf("Token: %7s\n", "NEWLINE"); break; case PPTOK_BLANK: printf("Token: %7s\n", "\e[2;31m(blank)\e[m"); break; case PPTOK_START_LINE_COMMENT: printf("Token: %7s\n", "\e[2;31m( // ) \e[m"); break; case PPTOK_EOF: printf("Token: %7s\n", "EOF"); break; default: printf("Token: ??? %u\n", tok->tokt); } } int preproc_token_isend(const preproc_token_t *tok) { switch (tok->tokt) { case PPTOK_IDENT: case PPTOK_IDENT_UNEXP: case PPTOK_NUM: case PPTOK_STRING: case PPTOK_INCL: case PPTOK_SYM: case PPTOK_NEWLINE: case PPTOK_BLANK: case PPTOK_START_LINE_COMMENT: return 0; case PPTOK_INVALID: case PPTOK_EOF: default: return 1; } } const char *kw2str[LAST_KEYWORD + 1] = { [KW_ALIGNAS] = "_Alignas", [KW_ALIGNOF] = "_Alignof", [KW_ATOMIC] = "_Atomic", [KW_AUTO] = "auto", [KW_BOOL] = "_Bool", [KW_BREAK] = "break", [KW_CASE] = "case", [KW_CHAR] = "char", [KW_COMPLEX] = "_Complex", [KW_CONST] = "const", [KW_CONTINUE] = "continue", [KW_DEFAULT] = "default", [KW_DO] = "do", [KW_DOUBLE] = "double", [KW_ELSE] = "else", [KW_ENUM] = "enum", [KW_EXTERN] = "extern", [KW_FLOAT] = "float", [KW_FOR] = "for", [KW_GENERIC] = "_Generic", [KW_GOTO] = "goto", [KW_IF] = "if", [KW_IMAGINARY] = "_Imaginary", [KW_INLINE] = "inline", [KW_INT] = "int", [KW_INT128] = "__int128", [KW_LONG] = "long", [KW_NORETURN] = "_Noreturn", [KW_REGISTER] = "register", [KW_RESTRICT] = "restrict", [KW_RETURN] = "return", [KW_SHORT] = "short", [KW_SIGNED] = "signed", [KW_SIZEOF] = "sizeof", [KW_STATIC] = "static", [KW_STATIC_ASSERT] = "_Static_assert", [KW_STRUCT] = "struct", [KW_SWITCH] = "switch", [KW_THREAD_LOCAL] = "_Thread_local", [KW_TYPEDEF] = "typedef", [KW_UNION] = "union", [KW_UNSIGNED] = "unsigned", [KW_VOID] = "void", [KW_VOLATILE] = "volatile", [KW_WHILE] = "while", }; void proc_token_print(const proc_token_t *tok) { switch (tok->tokt) { case PTOK_INVALID: printf("Token: %7s %hhd (%c)\n", "#INVAL#", tok->tokv.c, (tok->tokv.c >= 0x20) && (tok->tokv.c < 0x7F) ? tok->tokv.c : '?'); break; case PTOK_IDENT: printf("Token: %7s '%s'\n", "IDENT", string_content(tok->tokv.str)); break; case PTOK_KEYWORD: printf("Token: %7s '%s' (%u)\n", "KEYWORD", kw2str[tok->tokv.kw], tok->tokv.kw); break; case PTOK_NUM: printf("Token: %7s '%s'\n", "NUM", string_content(tok->tokv.str)); break; case PTOK_STRING: printf("Token: %7s %c%s%c\n", "STRING", tok->tokv.sisstr ? '"' : '\'', string_content(tok->tokv.sstr), tok->tokv.sisstr ? '"' : '\''); break; case PTOK_SYM: printf("Token: %7s %-3s (%u)\n", "SYM", sym2str[tok->tokv.sym], tok->tokv.sym); break; case PTOK_PRAGMA: switch (tok->tokv.pragma.typ) { case PRAGMA_ALLOW_INTS: printf("Token: %7s Allow ints\n", "PRAGMA"); break; case PRAGMA_EXPLICIT_CONV: printf("Token: %7s Explicit conversion: destination is %s\n", "PRAGMA", string_content(tok->tokv.pragma.val)); break; default: printf("Token: %7s ???\n", "PRAGMA"); } break; case PTOK_EOF: printf("Token: %7s\n", "EOF"); break; default: printf("Token: ??? %u\n", tok->tokt); } } int proc_token_iserror(const proc_token_t *tok) { switch (tok->tokt) { case PTOK_IDENT: case PTOK_KEYWORD: case PTOK_NUM: case PTOK_STRING: case PTOK_SYM: case PTOK_PRAGMA: case PTOK_EOF: return 0; case PTOK_INVALID: default: return 1; } } int proc_token_isend(const proc_token_t *tok) { switch (tok->tokt) { case PTOK_IDENT: case PTOK_KEYWORD: case PTOK_NUM: case PTOK_STRING: case PTOK_SYM: case PTOK_PRAGMA: return 0; case PTOK_INVALID: case PTOK_EOF: default: return 1; } } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wanalyzer-null-dereference" KHASH_MAP_IMPL_STR(str2kw, enum token_keyword_type_e) KHASH_MAP_IMPL_STR(type_map, type_t*) KHASH_MAP_IMPL_STR(struct_map, struct_t*) KHASH_MAP_IMPL_STR(const_map, num_constant_t) #pragma GCC diagnostic pop khash_t(str2kw) *str2kw; int init_str2kw(void) { if (!(str2kw = kh_init(str2kw))) { printf("Failed to create the string to keyword map (init)\n"); return 0; } int iret; khiter_t it; for (enum token_keyword_type_e kw = 0; kw <= LAST_KEYWORD; ++kw) { it = kh_put(str2kw, str2kw, kw2str[kw], &iret); if (iret < 0) { printf("Failed to create the string to keyword map (keyword %u)\n", kw); kh_destroy(str2kw, str2kw); return 0; } kh_val(str2kw, it) = kw; } #define ADD_ALIAS(alias, k) \ it = kh_put(str2kw, str2kw, #alias, &iret); \ if (iret < 0) { \ printf("Failed to create the string to keyword map (keyword alias " #alias ")\n"); \ kh_destroy(str2kw, str2kw); \ return 0; \ } \ kh_val(str2kw, it) = KW_ ## k; ADD_ALIAS(__alignof__, ALIGNOF) // ADD_ALIAS(__asm, asm) // ADD_ALIAS(__asm__, asm) ADD_ALIAS(__complex, COMPLEX) ADD_ALIAS(__complex__, COMPLEX) ADD_ALIAS(__const, CONST) ADD_ALIAS(__const__, CONST) // ADD_ALIAS(__decltype, decltype) // ADD_ALIAS(__imag__, __imag) ADD_ALIAS(__inline, INLINE) ADD_ALIAS(__inline__, INLINE) // ADD_ALIAS(__nullptr, nullptr) // ADD_ALIAS(__real__, __real) ADD_ALIAS(__restrict, RESTRICT) ADD_ALIAS(__restrict__, RESTRICT) ADD_ALIAS(__signed, SIGNED) ADD_ALIAS(__signed__, SIGNED) // ADD_ALIAS(__typeof, typeof) // ADD_ALIAS(__typeof__, typeof) ADD_ALIAS(__volatile, VOLATILE) ADD_ALIAS(__volatile__, VOLATILE) return 1; } void del_str2kw(void) { kh_destroy(str2kw, str2kw); } int num_constant_convert(string_t *str, num_constant_t *cst) { if (string_len(str) == 0) return 0; // Should never happen #define contains(c) strchr(string_content(str), c) if (contains('.') || (!(contains('X') || contains('x')) && (contains('E') || contains('e'))) || ((contains('X') || contains('x')) && (contains('P') || contains('p')))) { #undef contains int ok; if ((string_end(str)[-1] == 'l') || (string_end(str)[-1] == 'L')) { cst->typ = NCT_LDOUBLE; char *endc; cst->val.l = strtold(string_content(str), &endc); ok = (endc == string_end(str) - 1); } else if ((string_end(str)[-1] == 'f') || (string_end(str)[-1] == 'F')) { cst->typ = NCT_FLOAT; char *endc; cst->val.f = strtof(string_content(str), &endc); ok = (endc == string_end(str) - 1); } else { cst->typ = NCT_DOUBLE; char *endc; cst->val.d = strtod(string_content(str), &endc); ok = (endc == string_end(str)); } if (!ok) { printf("Error: '%s' is not a valid number\n", string_content(str)); return 0; } else if (errno == ERANGE) { printf("Warning: floating point constant is too large\n"); return 1; } return 1; } else { uint64_t ret = 0; unsigned base = (string_content(str)[0] == '0') ? (string_content(str)[1] == 'x') ? 16 : 8 : 10; size_t startidx = (base == 16) ? 2 : 0; size_t endidx = string_len(str); int suffix_type = 0; #define SUFFIX_U 1 #define SUFFIX_L 2 #define SUFFIX_LL 4 #define SUFFIX_SGN SUFFIX_U #define SUFFIX_SZ (SUFFIX_L | SUFFIX_LL) remove_suffix: if ((string_content(str)[endidx - 1] == 'l') || (string_content(str)[endidx - 1] == 'L')) { if (suffix_type & SUFFIX_SZ) { printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); return 0; } if (endidx == 1) return 0; // Should never happen if ((string_content(str)[endidx - 2] == 'l') || (string_content(str)[endidx - 2] == 'L')) { if (endidx == 2) return 0; // Should never happen if (string_content(str)[endidx - 2] != string_content(str)[endidx - 1]) { printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); return 0; } endidx -= 2; suffix_type |= SUFFIX_LL; } else { endidx -= 1; suffix_type |= SUFFIX_L; } goto remove_suffix; } if ((string_content(str)[endidx - 1] == 'u') || (string_content(str)[endidx - 1] == 'U')) { if (suffix_type & SUFFIX_SGN) { printf("Error: '%s' is not a valid number (invalid suffix)\n", string_content(str)); return 0; } endidx -= 1; suffix_type |= SUFFIX_U; goto remove_suffix; } // 0u has startidx=0 endidx=1 if (endidx <= startidx) { printf("Error: '%s' is not a valid number\n", string_content(str)); } for (size_t i = startidx; i < endidx; ++i) { if ((string_content(str)[i] >= '0') && (string_content(str)[i] <= ((base == 8) ? '7' : '9'))) { ret = base * ret + (unsigned)(string_content(str)[i] - '0'); } else if ((base == 16) && (string_content(str)[i] >= 'A') && (string_content(str)[i] <= 'F')) { ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'A'); } else if ((base == 16) && (string_content(str)[i] >= 'a') && (string_content(str)[i] <= 'f')) { ret = base * ret + 10 + (unsigned)(string_content(str)[i] - 'a'); } else { printf("Error: '%s' is not a valid number\n", string_content(str)); return 0; } } // If base == 10, keep the signness; in any case, try 32 bits if available else use 64 bits cst->typ = ((suffix_type & SUFFIX_SGN) == SUFFIX_U) ? ((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_UINT32 : NCT_UINT64 : ((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_UINT64 : NCT_UINT32 : ((suffix_type & SUFFIX_SZ) == SUFFIX_L) ? LONG_IS_32BITS ? NCT_INT32 : NCT_INT64 : ((suffix_type & SUFFIX_SZ) == SUFFIX_LL) ? NCT_INT64 : NCT_INT32; if (cst->typ == NCT_INT32) { if (ret < 1ULL << 31) { cst->val.i32 = (int32_t)ret; return 1; } // Not in signed 32-bits = int => try long/long long for decimal, unsigned for hexadecimal/octal if (base != 10) cst->typ = NCT_UINT32; else cst->typ = NCT_INT64; } if (cst->typ == NCT_UINT32) { if (ret < 1ULL << 32) { cst->val.u32 = (uint32_t)ret; return 1; } // Not in unsigned 32-bits = unsigned => try unsigned long/long long for decimal and unsigned, long/long long for signed hexadecimal/octal if ((base != 10) && ((suffix_type & SUFFIX_SGN) != SUFFIX_U)) cst->typ = NCT_INT64; else cst->typ = NCT_UINT64; } if (cst->typ == NCT_INT64) { if (ret < 1ULL << 63) { cst->val.i64 = (int64_t)ret; return 1; } // Not in signed 64-bits = long/long long => fail for decimal, try unsigned long/long long for hexadecimal/octal if (base != 10) cst->typ = NCT_UINT64; } if (cst->typ == NCT_UINT64) { cst->val.u64 = ret; return 1; } // The constant cannot be typed... (Or internal error) return 0; } } void expr_del(expr_t *e) { switch (e->typ) { case ETY_VAR: string_del(e->val.var); break; case ETY_CONST: break; // case ETY_GENERIC: // TODO case ETY_CALL: expr_del(e->val.call.fun); for (size_t i = 0; i < e->val.call.nargs; ++i) { expr_del(e->val.call.args[i]); } if (e->val.call.args) free(e->val.call.args); break; case ETY_ACCESS: case ETY_PTRACCESS: expr_del(e->val.access.val); string_del(e->val.access.member); break; case ETY_UNARY: expr_del(e->val.unary.e); break; case ETY_BINARY: expr_del(e->val.binary.e1); expr_del(e->val.binary.e2); break; case ETY_TERNARY: expr_del(e->val.ternary.e1); expr_del(e->val.ternary.e2); expr_del(e->val.ternary.e3); break; // case ETY_INIT_LIST: // TODO case ETY_CAST: type_del(e->val.cast.typ); expr_del(e->val.cast.e); break; } free(e); } void struct_del_weak(struct_t *st) { if (--st->nrefs) return; if (!st->tag) struct_del(st); } void type_del(type_t *typ) { if (--typ->nrefs) return; if (typ->converted) string_del(typ->converted); switch (typ->typ) { case TYPE_BUILTIN: break; case TYPE_ARRAY: type_del(typ->val.array.typ); break; case TYPE_STRUCT_UNION: struct_del_weak(typ->val.st); break; case TYPE_ENUM: case TYPE_PTR: type_del(typ->val.typ); break; case TYPE_FUNCTION: type_del(typ->val.fun.ret); if (typ->val.fun.nargs != (size_t)-1) { for (size_t i = 0; i < typ->val.fun.nargs; ++i) { type_del(typ->val.fun.args[i]); } } if (typ->val.fun.args) free(typ->val.fun.args); break; } free(typ); } void type_map_del(khash_t(type_map) *map) { kh_cstr_t str; type_t **it; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-qual" kh_foreach_key_value_ref(map, str, it, free((void*)str); type_del(*it)) #pragma GCC diagnostic pop kh_destroy(type_map, map); } void type_set_del(khash_t(type_set) *set) { type_t *it; kh_foreach_key(set, it, type_del(it)) kh_destroy(type_set, set); } void const_map_del(khash_t(const_map) *map) { kh_cstr_t str; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-qual" kh_foreach_key(map, str, free((void*)str)) #pragma GCC diagnostic pop kh_destroy(const_map, map); } void st_member_del(st_member_t *member) { if (member->name) string_del(member->name); type_del(member->typ); } void struct_del(struct_t *st) { if (st->tag) { string_del(st->tag); st->tag = NULL; } if (st->is_defined) { for (size_t i = 0; i < st->nmembers; ++i) { st_member_del(&st->members[i]); } free(st->members); st->is_defined = 0; } if (!st->nrefs) free(st); } void struct_map_del(khash_t(struct_map) *map) { // The keys are the tag in the struct_t struct_t **it; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-qual" kh_foreach_value_ref(map, it, struct_del(*it)) #pragma GCC diagnostic pop kh_destroy(struct_map, map); } type_t *type_new(void) { type_t *ret = malloc(sizeof *ret); if (!ret) { printf("Failed to create a new type\n"); return NULL; } ret->szinfo.align = ret->szinfo.size = 0; ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0; ret->converted = NULL; ret->nrefs = 1; ret->typ = TYPE_BUILTIN; ret->val.builtin = BTT_INT; return ret; } type_t *type_new_ptr(type_t *target) { type_t *ret = malloc(sizeof *ret); if (!ret) { printf("Failed to create a new pointer type\n"); return NULL; } ret->szinfo.align = ret->szinfo.size = 0; ret->is_atomic = ret->is_const = ret->is_restrict = ret->is_volatile = ret->is_incomplete = ret->is_validated = ret->_internal_use = 0; ret->converted = NULL; ret->nrefs = 1; ret->typ = TYPE_PTR; ret->val.typ = target; return ret; } // The following functions do not work for functions (val.args needs to be duplicated) /* type_t *type_do_copy(type_t *ref) { type_t *ret = type_do_copy_nodec(ref); if (!ret) return NULL; type_del(ref); return ret; } type_t *type_do_copy_nodec(const type_t *ref) { type_t *ret = malloc(sizeof *ret); if (!ret) { printf("Failed to duplicate type\n"); return NULL; } memcpy(ret, ref, sizeof *ret); switch (ref->typ) { case TYPE_BUILTIN: break; case TYPE_ARRAY: ++ref->val.array.typ->nrefs; break; case TYPE_STRUCT_UNION: ++ret->val.st->nrefs; break; case TYPE_ENUM: case TYPE_PTR: ++ref->val.typ->nrefs; break; case TYPE_FUNCTION: ++ref->val.fun.ret->nrefs; if (ref->val.fun.nargs != (size_t)-1) { for (size_t i = 0; i < ref->val.fun.nargs; ++i) { ++ref->val.fun.args[i]->nrefs; } } break; } ret->nrefs = 1; return ret; } type_t *type_maybe_copy(type_t *ref) { if (ref->nrefs == 1) return ref; else return type_do_copy(ref); } */ int type_copy_into(type_t *dest, const type_t *ref) { size_t nrefs = dest->nrefs; _Bool is_atomic = dest->is_atomic; _Bool is_const = dest->is_const; _Bool is_restrict = dest->is_restrict; _Bool is_volatile = dest->is_volatile; memcpy(dest, ref, sizeof *dest); switch (ref->typ) { case TYPE_BUILTIN: break; case TYPE_ARRAY: ++ref->val.array.typ->nrefs; break; case TYPE_STRUCT_UNION: ++dest->val.st->nrefs; break; case TYPE_ENUM: case TYPE_PTR: ++ref->val.typ->nrefs; break; case TYPE_FUNCTION: ++ref->val.fun.ret->nrefs; if (ref->val.fun.nargs != (size_t)-1) { if (!ref->val.fun.args) { dest->typ = TYPE_BUILTIN; return 0; } type_t **args = malloc(sizeof *args * ref->val.fun.nargs); if (!args) { printf("Error: failed to allocate new argument array\n"); return 0; } for (size_t i = 0; i < ref->val.fun.nargs; ++i) { ++ref->val.fun.args[i]->nrefs; args[i] = ref->val.fun.args[i]; } dest->val.fun.args = args; } break; } dest->is_atomic |= is_atomic; dest->is_const |= is_const; dest->is_restrict |= is_restrict; dest->is_volatile |= is_volatile; dest->nrefs = nrefs; return 1; } struct_t *struct_new(int is_struct, string_t *tag) { struct_t *ret = malloc(sizeof *ret); if (!ret) { printf("Failed to create a new struct\n"); return NULL; } ret->is_struct = is_struct; ret->tag = tag; ret->is_defined = 0; ret->nrefs = 1; return ret; } _Static_assert(sizeof(type_t*) == sizeof(khint64_t), "Not a 64-bits machine"); khint_t type_t_hash(type_t *typ) { switch (typ->typ) { case TYPE_BUILTIN: return kh_int_hash_func(typ->val.builtin); case TYPE_ARRAY: return kh_int_hash_func((typ->val.array.array_sz << 12) + type_t_hash(typ->val.array.typ)); case TYPE_STRUCT_UNION: if (typ->val.st->tag) { return kh_str_hash_func(string_content(typ->val.st->tag)); } else { khint64_t acc = (typ->val.st->nmembers << 5) + 1; for (size_t i = 0; i < typ->val.st->nmembers; ++i) { acc += (khint64_t)type_t_hash(typ->val.st->members[i].typ) << ((1ull << (i % 64)) % 61); } return kh_int64_hash_func(acc); } case TYPE_ENUM: case TYPE_PTR: return kh_int_hash_func(type_t_hash(typ->val.typ)); case TYPE_FUNCTION: { khint64_t acc = type_t_hash(typ->val.fun.ret) + typ->val.fun.nargs * 73 + (size_t)typ->val.fun.has_varargs * 77; if (typ->val.fun.nargs != (size_t)-1) { for (size_t i = 0; i < typ->val.fun.nargs; ++i) { acc += (khint64_t)type_t_hash(typ->val.fun.args[i]) << ((3ull << (i % 64)) % 53); } } return kh_int64_hash_func(acc); } default: return 0; } } int type_t_equal(type_t *typ1, type_t *typ2) { if (typ1->typ != typ2->typ) return 0; if ((typ1->is_atomic != typ2->is_atomic) || (typ1->is_const != typ2->is_const) || (typ1->is_restrict != typ2->is_restrict) || (typ1->is_volatile != typ2->is_volatile)) { return 0; } switch (typ1->typ) { case TYPE_BUILTIN: return typ1->val.builtin == typ2->val.builtin; case TYPE_ARRAY: return (typ1->val.array.array_sz == typ2->val.array.array_sz) && type_t_equal(typ1->val.array.typ, typ2->val.array.typ); case TYPE_STRUCT_UNION: // This will not do an infinite recursion since only unnamed struct/unions will compare their members if (!typ1->val.st->tag != !typ2->val.st->tag) return 0; if (typ1->val.st->tag) return !strcmp(string_content(typ1->val.st->tag), string_content(typ2->val.st->tag)); if (!typ1->val.st->is_defined || !typ2->val.st->is_defined) { printf("Warning: incomplete anonymous structure/union\n"); return 0; } if (typ1->val.st->nmembers != typ2->val.st->nmembers) return 0; for (size_t i = 0; i < typ1->val.st->nmembers; ++i) { if (!!typ1->val.st->members[i].name != !!typ2->val.st->members[i].name) return 0; if (typ1->val.st->members[i].name && strcmp(string_content(typ1->val.st->members[i].name), string_content(typ2->val.st->members[i].name))) return 0; if (!type_t_equal(typ1->val.st->members[i].typ, typ2->val.st->members[i].typ)) return 0; } return 1; case TYPE_ENUM: case TYPE_PTR: return type_t_equal(typ1->val.typ, typ2->val.typ); case TYPE_FUNCTION: if (typ1->val.fun.nargs != typ2->val.fun.nargs) return 0; if (typ1->val.fun.has_varargs != typ2->val.fun.has_varargs) return 0; if (!type_t_equal(typ1->val.fun.ret, typ2->val.fun.ret)) return 0; if (typ1->val.fun.nargs != (size_t)-1) { for (size_t i = 0; i < typ1->val.fun.nargs; ++i) { if (!type_t_equal(typ1->val.fun.args[i], typ2->val.fun.args[i])) return 0; } } return 1; default: return 0; } } __KHASH_IMPL(type_set, , type_t*, char, 0, type_t_hash, type_t_equal) type_t *type_try_merge(type_t *typ, khash_t(type_set) *set) { int iret; khiter_t it = kh_put(type_set, set, typ, &iret); if (iret < 0) { printf("Error: failed to add type to type_set\n"); return NULL; } else if (iret == 0) { if (typ == kh_key(set, it)) return typ; kh_key(set, it)->is_incomplete &= typ->is_incomplete; // In case we have a recursive structure type_del(typ); ++kh_key(set, it)->nrefs; return kh_key(set, it); } else { ++typ->nrefs; } type_t *typ2; switch (typ->typ) { case TYPE_BUILTIN: return typ; case TYPE_ARRAY: typ2 = type_try_merge(typ->val.array.typ, set); if (typ2) typ->val.array.typ = typ2; else return NULL; return typ; case TYPE_STRUCT_UNION: if (typ->val.st->is_defined) { for (size_t i = 0; i < typ->val.st->nmembers; ++i) { typ2 = type_try_merge(typ->val.st->members[i].typ, set); if (typ2) typ->val.st->members[i].typ = typ2; else return NULL; } } return typ; case TYPE_ENUM: case TYPE_PTR: typ2 = type_try_merge(typ->val.typ, set); if (typ2) typ->val.typ = typ2; else return NULL; return typ; case TYPE_FUNCTION: typ2 = type_try_merge(typ->val.fun.ret, set); if (typ2) typ->val.fun.ret = typ2; else return NULL; if (typ->val.fun.nargs != (size_t)-1) { for (size_t i = 0; i < typ->val.fun.nargs; ++i) { typ2 = type_try_merge(typ->val.fun.args[i], set); if (typ2) typ->val.fun.args[i] = typ2; else return NULL; } } return typ; default: return NULL; } } const char *builtin2str[LAST_BUILTIN + 1] = { [BTT_VOID] = "void", [BTT_BOOL] = "_Bool", [BTT_CHAR] = "char", [BTT_SCHAR] = "signed char", [BTT_UCHAR] = "unsigned char", [BTT_SHORT] = "short", [BTT_SSHORT] = "signed short", [BTT_USHORT] = "unsigned short", [BTT_INT] = "int", [BTT_SINT] = "signed", [BTT_UINT] = "unsigned", [BTT_LONG] = "long", [BTT_SLONG] = "signed long", [BTT_ULONG] = "unsigned long", [BTT_LONGLONG] = "long long", [BTT_SLONGLONG] = "signed long long", [BTT_ULONGLONG] = "unsigned long long", [BTT_INT128] = "__int128", [BTT_SINT128] = "signed __int128", [BTT_UINT128] = "unsigned __int128", [BTT_S8] = "int8_t", [BTT_U8] = "uint8_t", [BTT_S16] = "int16_t", [BTT_U16] = "uint16_t", [BTT_S32] = "int32_t", [BTT_U32] = "uint32_t", [BTT_S64] = "int64_t", [BTT_U64] = "uint64_t", [BTT_FLOAT] = "float", [BTT_CFLOAT] = "float _Complex", [BTT_IFLOAT] = "float _Imaginary", [BTT_DOUBLE] = "double", [BTT_CDOUBLE] = "double _Complex", [BTT_IDOUBLE] = "double _Imaginary", [BTT_LONGDOUBLE] = "long double", [BTT_CLONGDOUBLE] = "long double _Complex", [BTT_ILONGDOUBLE] = "long double _Imaginary", [BTT_VA_LIST] = "__builtin_va_list", }; void type_print(type_t *typ) { if (typ->_internal_use) { printf("...%p", typ); return; } typ->_internal_use = 1; printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(typ) typ->nrefs); if (!typ->is_validated) printf("! "); if (typ->is_incomplete) printf(" "); if (typ->is_validated && !typ->is_incomplete) printf(" ", typ->szinfo.size, typ->szinfo.align); if (typ->converted) printf(" ", string_content(typ->converted)); if (typ->is_const) printf("const "); if (typ->is_restrict) printf("restrict "); if (typ->is_volatile) printf("volatile "); if (typ->is_atomic) printf("_Atomic "); switch (typ->typ) { case TYPE_BUILTIN: printf("", builtin2str[typ->val.builtin], typ->val.builtin); break; case TYPE_ARRAY: if (typ->val.array.array_sz == (size_t)-1) printf("[] "); else printf("[%zu] ", typ->val.array.array_sz); type_print(typ->val.array.typ); break; case TYPE_STRUCT_UNION: struct_print(typ->val.st); break; case TYPE_ENUM: printf(" "); type_print(typ->val.typ); break; case TYPE_PTR: printf("*"); type_print(typ->val.typ); break; case TYPE_FUNCTION: printf(""); } else { printf(", args=("); for (size_t i = 0; i < typ->val.fun.nargs; ++i) { if (i) printf("), ("); type_print(typ->val.fun.args[i]); } if (typ->val.fun.has_varargs && typ->val.fun.nargs) printf(", ...)>"); else if (typ->val.fun.has_varargs && !typ->val.fun.nargs) printf("...)>"); else printf(")>"); } break; } typ->_internal_use = 0; } void struct_print(const struct_t *st) { printf("<" DISP_ADDR_FMT "n_uses=%zu> ", DISP_ADDR_ARG(st) st->nrefs); if (st->is_defined) { printf( "%s %s { ", st->is_struct ? "struct" : "union", st->tag ? string_content(st->tag) : "", st->nmembers, st->has_incomplete ? ", with incomplete" : ""); for (size_t i = 0; i < st->nmembers; ++i) { if (i) printf(", "); type_print(st->members[i].typ); printf(" %s", st->members[i].name ? string_content(st->members[i].name) : ""); if (st->members[i].is_bitfield) { printf(" : %zu", st->members[i].bitfield_width); } } printf(" }"); } else { printf("%s %s ", st->is_struct ? "struct" : "union", st->tag ? string_content(st->tag) : ""); } } // TODO: per-arch array size_t sizeof_btt[LAST_BUILTIN + 1] = { [BTT_VOID] = 0, [BTT_BOOL] = 1, [BTT_CHAR] = 1, [BTT_SCHAR] = 1, [BTT_UCHAR] = 1, [BTT_SHORT] = 2, [BTT_SSHORT] = 2, [BTT_USHORT] = 2, [BTT_INT] = 4, [BTT_SINT] = 4, [BTT_UINT] = 4, [BTT_LONG] = LONG_IS_32BITS ? 4 : 8, [BTT_SLONG] = LONG_IS_32BITS ? 4 : 8, [BTT_ULONG] = LONG_IS_32BITS ? 4 : 8, [BTT_LONGLONG] = 8, [BTT_SLONGLONG] = 8, [BTT_ULONGLONG] = 8, [BTT_INT128] = 16, [BTT_SINT128] = 16, [BTT_UINT128] = 16, [BTT_S8] = 1, [BTT_U8] = 1, [BTT_S16] = 2, [BTT_U16] = 2, [BTT_S32] = 4, [BTT_U32] = 4, [BTT_S64] = 8, [BTT_U64] = 8, [BTT_FLOAT] = 4, [BTT_CFLOAT] = 8, [BTT_IFLOAT] = 4, [BTT_DOUBLE] = 8, [BTT_CDOUBLE] = 16, [BTT_IDOUBLE] = 8, [BTT_LONGDOUBLE] = 16, [BTT_CLONGDOUBLE] = 32, [BTT_ILONGDOUBLE] = 16, [BTT_VA_LIST] = LONG_IS_32BITS ? 0 : 24, // TODO }; // The following assumes sizeof(unsigned long) == sizeof(void*) file_t *file_new(void) { file_t *ret = malloc(sizeof *ret); if (!ret) { printf("Failed to create a new translation unit structure (init)\n"); return NULL; } if (!(ret->struct_map = kh_init(struct_map))) { printf("Failed to create a new translation unit structure (structure map)\n"); free(ret); return NULL; } if (!(ret->type_map = kh_init(type_map))) { printf("Failed to create a new translation unit structure (type map)\n"); kh_destroy(struct_map, ret->struct_map); free(ret); return NULL; } if (!(ret->enum_map = kh_init(type_map))) { printf("Failed to create a new translation unit structure (enumeration map)\n"); kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); free(ret); return NULL; } if (!(ret->decl_map = kh_init(type_map))) { printf("Failed to create a new translation unit structure (declaration map)\n"); kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); free(ret); return NULL; } if (!(ret->type_set = kh_init(type_set))) { printf("Failed to create a new translation unit structure (type set)\n"); kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); kh_destroy(type_map, ret->decl_map); free(ret); return NULL; } if (!(ret->const_map = kh_init(const_map))) { printf("Failed to create a new translation unit structure (const map)\n"); kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); kh_destroy(type_map, ret->decl_map); kh_destroy(type_set, ret->type_set); free(ret); return NULL; } // Now fill in the builtin types int iret; khiter_t it; for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) { type_t *t = malloc(sizeof *t); if (!t) { printf("Failed to create a new translation unit structure (builtin type)\n"); for (; i--;) { free(ret->builtins[i]); } kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); kh_destroy(type_map, ret->decl_map); kh_destroy(type_set, ret->type_set); kh_destroy(const_map, ret->const_map); free(ret); return NULL; } t->is_atomic = t->is_const = t->is_restrict = t->is_volatile = t->_internal_use = 0; t->is_incomplete = (i == BTT_VOID); t->converted = NULL; // Maybe should be something else? t->is_validated = 1; t->nrefs = 2; t->typ = TYPE_BUILTIN; t->val.builtin = i; t->szinfo.align = t->szinfo.size = sizeof_btt[i]; ret->builtins[i] = t; kh_put(type_set, ret->type_set, t, &iret); if (iret < 0) { printf("Failed to create a new translation unit structure (failed to add intrinsic type to type_set)\n"); kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); kh_destroy(type_map, ret->decl_map); kh_destroy(type_set, ret->type_set); kh_destroy(const_map, ret->const_map); free(ret); return NULL; } else if (iret == 0) { printf("Failed to create a new translation unit structure (duplicate intrinsic type in type_set)\n"); for (++i; i--;) { free(ret->builtins[i]); } kh_destroy(struct_map, ret->struct_map); kh_destroy(type_map, ret->type_map); kh_destroy(type_map, ret->enum_map); kh_destroy(type_map, ret->decl_map); kh_destroy(type_set, ret->type_set); kh_destroy(const_map, ret->const_map); free(ret); return NULL; } } // ret is valid // Add __builtin_va_list, __int128_t, __uint128_t as typedef char *sdup; #define ADD_TYPEDEF(name, btt) \ sdup = strdup(#name); \ if (!sdup) { \ printf("Failed to create a new translation unit structure (" #name " name)\n"); \ file_del(ret); \ return NULL; \ } \ it = kh_put(type_map, ret->type_map, sdup, &iret); \ if (iret < 0) { \ printf("Failed to create a new translation unit structure (add " #name " typedef)\n"); \ free(sdup); \ file_del(ret); \ return NULL; \ } else if (iret == 0) { \ printf("Failed to create a new translation unit structure (" #name " is already a typedef)\n"); \ free(sdup); \ file_del(ret); \ return NULL; \ } \ kh_val(ret->type_map, it) = ret->builtins[BTT_ ## btt]; \ ++ret->builtins[BTT_ ## btt]->nrefs; ADD_TYPEDEF(__builtin_va_list, VA_LIST) ADD_TYPEDEF(__int128_t, INT128) ADD_TYPEDEF(__uint128_t, UINT128) return ret; } void file_del(file_t *f) { struct_map_del(f->struct_map); type_map_del(f->type_map); type_map_del(f->enum_map); type_map_del(f->decl_map); type_set_del(f->type_set); const_map_del(f->const_map); for (enum type_builtin_e i = 0; i < LAST_BUILTIN + 1; ++i) { type_del(f->builtins[i]); } free(f); }