diff mbox series

[2/8] target/sparc: Fix VIS fmul8x16au instruction.

Message ID 20230925050545.30912-3-nbowler@draconx.ca
State New
Headers show
Series SPARC VIS fixes | expand

Commit Message

Nick Bowler Sept. 25, 2023, 5:03 a.m. UTC
On a real UltraSparc II, the fmul8x16au instruction takes two single-
precision input operands and returns a double-precision result.  For
the second operand, bits 31:16 are used, and bits 15:0 are ignored.

However, the emulation is taking two double-precision input operands,
and furthermore it is using bits 15:0 of the second operand (ignoring
bits 31:16).  These are unlikely to contain the correct values.

Even still, the emulator overwrites the second input before all outputs
are calculated, so even if by chance the data loaded in happens to be
correct, the results are just garbage except in trivial cases.

Signed-off-by: Nick Bowler <nbowler@draconx.ca>
---
 target/sparc/helper.h     |  2 +-
 target/sparc/translate.c  | 19 ++++++++++++++++++-
 target/sparc/vis_helper.c | 14 +++++++++-----
 3 files changed, 28 insertions(+), 7 deletions(-)

Comments

Richard Henderson Sept. 28, 2023, 9:32 p.m. UTC | #1
On 9/24/23 01:03, Nick Bowler wrote:
> On a real UltraSparc II, the fmul8x16au instruction takes two single-
> precision input operands and returns a double-precision result.  For
> the second operand, bits 31:16 are used, and bits 15:0 are ignored.
> 
> However, the emulation is taking two double-precision input operands,
> and furthermore it is using bits 15:0 of the second operand (ignoring
> bits 31:16).  These are unlikely to contain the correct values.
> 
> Even still, the emulator overwrites the second input before all outputs
> are calculated, so even if by chance the data loaded in happens to be
> correct, the results are just garbage except in trivial cases.
> 
> Signed-off-by: Nick Bowler <nbowler@draconx.ca>
> ---
>   target/sparc/helper.h     |  2 +-
>   target/sparc/translate.c  | 19 ++++++++++++++++++-
>   target/sparc/vis_helper.c | 14 +++++++++-----
>   3 files changed, 28 insertions(+), 7 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

>   #define PMUL(r)                                                 \
> -    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
> +    tmp = (int32_t)s2.VIS_SW32(1) * (int32_t)s1.VIS_B64(r);     \
>       if ((tmp & 0xff) > 0x7f) {                                  \
>           tmp += 0x100;                                           \
>       }                                                           \

Belated follow-up suggestion:

-   if ((tmp & 0xff) > 0x7f) {
-       tmp += 0x100;
-   }
+   tmp += 0x80;

7 occurrences throughout vis_helper.c.


r~
Nick Bowler Sept. 29, 2023, 12:41 a.m. UTC | #2
On 2023-09-28, Richard Henderson <richard.henderson@linaro.org> wrote:
> Belated follow-up suggestion:
>
> -   if ((tmp & 0xff) > 0x7f) {
> -       tmp += 0x100;
> -   }
> +   tmp += 0x80;
>
> 7 occurrences throughout vis_helper.c.

I agree with making this particular change but I think since it doesn't
fix a bug, it should go in a separate patch.

So I will include a patch to do that in series v2 and keep this one
as-is with your Reviewed-by.

Thanks,
  Nick
Richard Henderson Sept. 29, 2023, 1:04 a.m. UTC | #3
On 9/28/23 17:41, Nick Bowler wrote:
> On 2023-09-28, Richard Henderson <richard.henderson@linaro.org> wrote:
>> Belated follow-up suggestion:
>>
>> -   if ((tmp & 0xff) > 0x7f) {
>> -       tmp += 0x100;
>> -   }
>> +   tmp += 0x80;
>>
>> 7 occurrences throughout vis_helper.c.
> 
> I agree with making this particular change but I think since it doesn't
> fix a bug, it should go in a separate patch.

Of course.  That's what I meant by followup.


r~
diff mbox series

Patch

diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index ace731a22c..76e06b8ea5 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -128,7 +128,7 @@  DEF_HELPER_FLAGS_1(fqtox, TCG_CALL_NO_RWG, s64, env)
 DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i32, i64)
 DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i32, i32)
 DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index bb65b8daf8..ca81b35a25 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -1786,6 +1786,23 @@  static void gen_fop_DFF(DisasContext *dc, int rd, int rs1, int rs2,
     gen_store_fpr_D(dc, rd, dst);
 }
 
+#ifdef TARGET_SPARC64
+static void gen_ne_fop_DFF(DisasContext *dc, int rd, int rs1, int rs2,
+                           void (*gen)(TCGv_i64, TCGv_i32, TCGv_i32))
+{
+    TCGv_i64 dst;
+    TCGv_i32 src1, src2;
+
+    src1 = gen_load_fpr_F(dc, rs1);
+    src2 = gen_load_fpr_F(dc, rs2);
+    dst = gen_dest_fpr_D(dc, rd);
+
+    gen(dst, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+#endif
+
 static void gen_fop_QDD(DisasContext *dc, int rd, int rs1, int rs2,
                         void (*gen)(TCGv_ptr, TCGv_i64, TCGv_i64))
 {
@@ -4758,7 +4775,7 @@  static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 0x033: /* VIS I fmul8x16au */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fmul8x16au);
+                    gen_ne_fop_DFF(dc, rd, rs1, rs2, gen_helper_fmul8x16au);
                     break;
                 case 0x035: /* VIS I fmul8x16al */
                     CHECK_FPU_FEATURE(dc, VIS1);
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c
index d158b39b85..2fc783a054 100644
--- a/target/sparc/vis_helper.c
+++ b/target/sparc/vis_helper.c
@@ -49,6 +49,7 @@  target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 #define VIS_L64(n) l[1 - (n)]
 #define VIS_B32(n) b[3 - (n)]
 #define VIS_W32(n) w[1 - (n)]
+#define VIS_SW32(n) sw[1 - (n)]
 #else
 #define VIS_B64(n) b[n]
 #define VIS_W64(n) w[n]
@@ -56,6 +57,7 @@  target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 #define VIS_L64(n) l[n]
 #define VIS_B32(n) b[n]
 #define VIS_W32(n) w[n]
+#define VIS_SW32(n) sw[n]
 #endif
 
 typedef union {
@@ -70,6 +72,7 @@  typedef union {
 typedef union {
     uint8_t b[4];
     uint16_t w[2];
+    int16_t sw[2];
     uint32_t l;
     float32 f;
 } VIS32;
@@ -143,16 +146,17 @@  uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
     return d.ll;
 }
 
-uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
+uint64_t helper_fmul8x16au(uint32_t src1, uint32_t src2)
 {
-    VIS64 s, d;
+    VIS32 s1, s2;
+    VIS64 d;
     uint32_t tmp;
 
-    s.ll = src1;
-    d.ll = src2;
+    s1.l = src1;
+    s2.l = src2;
 
 #define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
+    tmp = (int32_t)s2.VIS_SW32(1) * (int32_t)s1.VIS_B64(r);     \
     if ((tmp & 0xff) > 0x7f) {                                  \
         tmp += 0x100;                                           \
     }                                                           \