diff options
Diffstat (limited to 'tcg/arm/tcg-target.c')
| -rw-r--r-- | tcg/arm/tcg-target.c | 98 |
1 files changed, 60 insertions, 38 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index e790bf04b4..47612fe260 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -611,6 +611,22 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) } } +/* swap the two low bytes assuming that the two high input bytes and the + two high output bit can hold any value. */ +static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, rn, SHIFT_IMM_LSR(8)); + tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_R8, TCG_REG_R8, 0xff); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_R8, rn, SHIFT_IMM_LSL(8)); + } +} + static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) { if (use_armv6_instructions) { @@ -639,6 +655,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int cond, (rn << 16) | (rd << 12) | ((-im) & 0xfff)); } +/* Offset pre-increment with base writeback. */ +static inline void tcg_out_ld32_12wb(TCGContext *s, int cond, + int rd, int rn, tcg_target_long im) +{ + /* ldr with writeback and both register equals is UNPREDICTABLE */ + assert(rd != rn); + + if (im >= 0) { + tcg_out32(s, (cond << 28) | 0x05b00000 | + (rn << 16) | (rd << 12) | (im & 0xfff)); + } else { + tcg_out32(s, (cond << 28) | 0x05300000 | + (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + } +} + static inline void tcg_out_st32_12(TCGContext *s, int cond, int rd, int rn, tcg_target_long im) { @@ -1071,7 +1103,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { int addr_reg, data_reg, data_reg2, bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits; + int mem_index, s_bits, tlb_offset; TCGReg argreg; # if TARGET_LONG_BITS == 64 int addr_reg2; @@ -1111,19 +1143,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* In the - * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))] - * below, the offset is likely to exceed 12 bits if mem_index != 0 and - * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) - * before. - */ - if (mem_index) + /* We assume that the offset is contained within 20 bits. */ + tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + assert(tlb_offset & ~0xfffff == 0); + if (tlb_offset > 0xfff) { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - (mem_index << (TLB_SHIFT & 1)) | - ((16 - (TLB_SHIFT >> 1)) << 8)); - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addr_read)); + 0xa00 | (tlb_offset >> 12)); + tlb_offset &= 0xfff; + } + tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -1131,15 +1159,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_out_dat_imm(s, COND_EQ, ARITH_TST, 0, addr_reg, (1 << s_bits) - 1); # if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load or writeback in - * the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4); + /* XXX: possibly we could use a block data load in the first access. */ + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addend)); + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_read)); switch (opc) { case 0: @@ -1288,7 +1315,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { int addr_reg, data_reg, data_reg2, bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits; + int mem_index, s_bits, tlb_offset; TCGReg argreg; # if TARGET_LONG_BITS == 64 int addr_reg2; @@ -1325,19 +1352,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* In the - * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))] - * below, the offset is likely to exceed 12 bits if mem_index != 0 and - * not exceed otherwise, so use an - * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) - * before. - */ - if (mem_index) + /* We assume that the offset is contained within 20 bits. */ + tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + assert(tlb_offset & ~0xfffff == 0); + if (tlb_offset > 0xfff) { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - (mem_index << (TLB_SHIFT & 1)) | - ((16 - (TLB_SHIFT >> 1)) << 8)); - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addr_write)); + 0xa00 | (tlb_offset >> 12)); + tlb_offset &= 0xfff; + } + tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ @@ -1345,15 +1368,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out_dat_imm(s, COND_EQ, ARITH_TST, 0, addr_reg, (1 << s_bits) - 1); # if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load or writeback in - * the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4); + /* XXX: possibly we could use a block data load in the first access. */ + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, - offsetof(CPUArchState, tlb_table[0][0].addend)); + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_write)); switch (opc) { case 0: @@ -1361,7 +1383,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) break; case 1: if (bswap) { - tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg); + tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg); tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); @@ -1447,7 +1469,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) break; case 1: if (bswap) { - tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0); } else { tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0); |