diff mbox series

[2/2] target/i386: Fix carry flag for BLSI

Message ID 20240801075845.573075-3-richard.henderson@linaro.org
State New
Headers show
Series target/i386: Fix carry flag for BLSI | expand

Commit Message

Richard Henderson Aug. 1, 2024, 7:58 a.m. UTC
BLSI has inverted semantics for C as compared to the other two
BMI1 instructions, BLSMSK and BLSR.  Introduce CC_OP_BLSI* for
this purpose.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2175
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/cpu.h                        |  5 +++++
 target/i386/tcg/cc_helper.c              | 18 ++++++++++++++++++
 target/i386/tcg/translate.c              |  5 +++++
 tests/tcg/x86_64/test-2175.c             | 24 ++++++++++++++++++++++++
 target/i386/tcg/cc_helper_template.h.inc | 18 ++++++++++++++++++
 target/i386/tcg/emit.c.inc               |  2 +-
 tests/tcg/x86_64/Makefile.target         |  1 +
 7 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 tests/tcg/x86_64/test-2175.c

Comments

Philippe Mathieu-Daudé Aug. 16, 2024, 9:30 a.m. UTC | #1
On 1/8/24 09:58, Richard Henderson wrote:
> BLSI has inverted semantics for C as compared to the other two
> BMI1 instructions, BLSMSK and BLSR.  Introduce CC_OP_BLSI* for
> this purpose.

Fixes: 1d0b926150 ("target/i386: move scalar 0F 38 and 0F 3A instruction 
to new decoder")

or even older?

> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2175
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/i386/cpu.h                        |  5 +++++
>   target/i386/tcg/cc_helper.c              | 18 ++++++++++++++++++
>   target/i386/tcg/translate.c              |  5 +++++
>   tests/tcg/x86_64/test-2175.c             | 24 ++++++++++++++++++++++++
>   target/i386/tcg/cc_helper_template.h.inc | 18 ++++++++++++++++++
>   target/i386/tcg/emit.c.inc               |  2 +-
>   tests/tcg/x86_64/Makefile.target         |  1 +
>   7 files changed, 72 insertions(+), 1 deletion(-)
>   create mode 100644 tests/tcg/x86_64/test-2175.c


> +static int glue(compute_all_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
> +{
> +    int cf, pf, af, zf, sf, of;
> +
> +    cf = (src1 != 0);
> +    pf = 0; /* undefined */
> +    af = 0; /* undefined */
> +    zf = (dst == 0) * CC_Z;
> +    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
> +    of = 0;
> +    return cf | pf | af | zf | sf | of;
> +}

Except this function which I'm not reviewing, for the rest
to the best of my x86 knowledge:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c6cc035df3..14edd57a37 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1339,6 +1339,11 @@  typedef enum {
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
+    CC_OP_BLSIB, /* Z,S via CC_DST, C = SRC!=0; O=0; P,A undefined */
+    CC_OP_BLSIW,
+    CC_OP_BLSIL,
+    CC_OP_BLSIQ,
+
     /*
      * Note that only CC_OP_POPCNT (i.e. the one with MO_TL size)
      * is used or implemented, because the translation needs
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index 301ed95406..dbddaa2fcb 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -186,6 +186,13 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_all_bmilgl(dst, src1);
 
+    case CC_OP_BLSIB:
+        return compute_all_blsib(dst, src1);
+    case CC_OP_BLSIW:
+        return compute_all_blsiw(dst, src1);
+    case CC_OP_BLSIL:
+        return compute_all_blsil(dst, src1);
+
     case CC_OP_ADCX:
         return compute_all_adcx(dst, src1, src2);
     case CC_OP_ADOX:
@@ -216,6 +223,8 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return compute_all_sarq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_all_bmilgq(dst, src1);
+    case CC_OP_BLSIQ:
+        return compute_all_blsiq(dst, src1);
 #endif
     }
 }
@@ -308,6 +317,13 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_c_bmilgl(dst, src1);
 
+    case CC_OP_BLSIB:
+        return compute_c_blsib(dst, src1);
+    case CC_OP_BLSIW:
+        return compute_c_blsiw(dst, src1);
+    case CC_OP_BLSIL:
+        return compute_c_blsil(dst, src1);
+
 #ifdef TARGET_X86_64
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
@@ -321,6 +337,8 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
         return compute_c_shlq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_c_bmilgq(dst, src1);
+    case CC_OP_BLSIQ:
+        return compute_c_blsiq(dst, src1);
 #endif
     }
 }
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e62ffa2858..fb0d01b356 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -304,6 +304,7 @@  static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_BLSIB ... CC_OP_BLSIQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
@@ -922,6 +923,10 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         size = s->cc_op - CC_OP_BMILGB;
         return gen_prepare_val_nz(cpu_cc_src, size, true);
 
+    case CC_OP_BLSIB ... CC_OP_BLSIQ:
+        size = s->cc_op - CC_OP_BLSIB;
+        return gen_prepare_val_nz(cpu_cc_src, size, false);
+
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
diff --git a/tests/tcg/x86_64/test-2175.c b/tests/tcg/x86_64/test-2175.c
new file mode 100644
index 0000000000..aafd037bce
--- /dev/null
+++ b/tests/tcg/x86_64/test-2175.c
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* See https://gitlab.com/qemu-project/qemu/-/issues/2185 */
+
+#include <assert.h>
+
+int test_setc(unsigned int x, unsigned int y)
+{
+    asm("blsi %1, %0; setc %b0" : "+r"(x) : "r"(y));
+    return (unsigned char)x;
+}
+
+int test_pushf(unsigned int x, unsigned int y)
+{
+    asm("blsi %1, %0; pushf; pop %q0" : "+r"(x) : "r"(y));
+    return x & 1;
+}
+
+int main()
+{
+    assert(test_setc(1, 0xedbf530a));
+    assert(test_pushf(1, 0xedbf530a));
+    return 0;
+}
+
diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc
index bb611feb04..c5425e57cf 100644
--- a/target/i386/tcg/cc_helper_template.h.inc
+++ b/target/i386/tcg/cc_helper_template.h.inc
@@ -235,6 +235,24 @@  static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return src1 == 0;
 }
 
+static int glue(compute_all_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    int cf, pf, af, zf, sf, of;
+
+    cf = (src1 != 0);
+    pf = 0; /* undefined */
+    af = 0; /* undefined */
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    return src1 != 0;
+}
+
 #undef DATA_BITS
 #undef SIGN_MASK
 #undef DATA_TYPE
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 016dce8146..3e867135dd 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1299,7 +1299,7 @@  static void gen_BLSI(DisasContext *s, X86DecodedInsn *decode)
     /* input in T1, which is ready for prepare_update2_cc  */
     tcg_gen_neg_tl(s->T0, s->T1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
-    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
+    prepare_update2_cc(decode, s, CC_OP_BLSIB + ot);
 }
 
 static void gen_BLSMSK(DisasContext *s, X86DecodedInsn *decode)
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index eda9bd7396..783ab5b21a 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -16,6 +16,7 @@  X86_64_TESTS += noexec
 X86_64_TESTS += cmpxchg
 X86_64_TESTS += adox
 X86_64_TESTS += test-1648
+X86_64_TESTS += test-2175
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)