summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAurelien Jarno <aurelien@aurel32.net>2015-07-05 18:50:09 +0200
committerAurelien Jarno <aurelien@aurel32.net>2015-09-13 23:08:51 +0200
commiteb6ca2b4a69325e95526bc0f2897791df04e44dc (patch)
tree1501d1dabef3ec5639070bec1ad1bd54deb8d3a5
parent218fd7301f88df440da3e16b9cfca000cd2fe111 (diff)
downloadfocaccia-qemu-eb6ca2b4a69325e95526bc0f2897791df04e44dc.tar.gz
focaccia-qemu-eb6ca2b4a69325e95526bc0f2897791df04e44dc.zip
target-sh4: improve cmp/str instruction
Instead of testing bytes one by one, we can use the following trick
from https://graphics.stanford.edu/~seander/bithacks.html:

  haszero(v) = (v - 0x01010101) & ~v & 0x80808080

The subexpression v - 0x01010101, evaluates to a high bit set in any
byte whenever the corresponding byte in v is zero or greater than 0x80.
The sub-expression ~v & 0x80808080 evaluates to high bits set in bytes
where the byte of v doesn't have its high bit set (so the byte was less
than 0x80). Finally, by ANDing these two sub-expressions the result is
the high bits set where the bytes in v were zero, since the high bits
set due to a value greater than 0x80 in the first sub-expression are
masked off by the second.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
-rw-r--r--target-sh4/translate.c17
1 files changed, 5 insertions, 12 deletions
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 50043cf5b5..ca6ef5aca7 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -688,18 +688,11 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv cmp1 = tcg_temp_new();
 	    TCGv cmp2 = tcg_temp_new();
-	    tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8));
-	    tcg_gen_andi_i32(cmp2, cmp1, 0xff000000);
-            tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0);
-	    tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000);
-	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
-	    tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00);
-	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
-	    tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff);
-	    tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
-            tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
+            tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8));
+            tcg_gen_subi_i32(cmp1, cmp2, 0x01010101);
+            tcg_gen_andc_i32(cmp1, cmp1, cmp2);
+            tcg_gen_andi_i32(cmp1, cmp1, 0x80808080);
+            tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0);
 	    tcg_temp_free(cmp2);
 	    tcg_temp_free(cmp1);
 	}