diff mbox series

target: Fix asm codegen for vfpclasss* and vcvtph2* instructions

Message ID bf4464e4-ca92-4ffd-8adb-b71628fb6895@zoho.com
State New
Headers show
Series target: Fix asm codegen for vfpclasss* and vcvtph2* instructions | expand

Commit Message

Antoni Boucher Oct. 18, 2024, 1:08 a.m. UTC
Hi.
This is a patch for the bug 116725.
I'm not sure if it is a good fix, but it seems to do the job.
If you have suggestions for better comments than what I wrote that would 
explain what's happening, I'm open to suggestions.

Here are the tests results:

		=== gcc Summary ===

# of expected passes		208652
# of unexpected failures	120
# of unexpected successes	25
# of expected failures		1545
# of unsupported tests		3518

		=== g++ Summary ===

# of expected passes		234412
# of unexpected failures	41
# of expected failures		2215
# of unsupported tests		2061

They are the same on master:

# Comparing directories
## Dir1=/home/bouanto/tmp/master/: 2 sum files
## Dir2=/home/bouanto/tmp/branch/: 2 sum files

# Comparing 2 common sum files
## /bin/sh ./contrib/compare_tests  /tmp/gxx-sum1.29654 /tmp/gxx-sum2.29654
New tests that PASS (1 tests):

gcc: gcc.target/i386/pr116725.c (test for excess errors)

Old tests that passed, that have disappeared (136 tests): (Eeek!)

g++: c-c++-common/tsan/atomic_stack.c   -O0  execution test
[…]

Old tests that failed, that have disappeared (69 tests): (Eeek!)

g++: c-c++-common/tsan/atomic_stack.c   -O0  output pattern test
[…]

# No differences found in 2 common sum files

Thanks.

Comments

Hongtao Liu Oct. 18, 2024, 1:50 a.m. UTC | #1
On Fri, Oct 18, 2024 at 9:08 AM Antoni Boucher <bouanto@zoho.com> wrote:
>
> Hi.
> This is a patch for the bug 116725.
> I'm not sure if it is a good fix, but it seems to do the job.
> If you have suggestions for better comments than what I wrote that would
> explain what's happening, I'm open to suggestions.

>@@ -7548,7 +7548,8 @@ (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><
>     [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")]
>     UNSPEC_US_FIX_NOTRUNC))]
>   "TARGET_AVX512FP16 && <round_mode_condition>"
>-  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
>+;; %X1 so that we don't emit any *WORD PTR for -masm=intel.
>+  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %X1<round_mask_op2>}"
Could you define something like

 ;; Pointer size override for 16-bit upper-convert modes (Intel asm dialect)
 (define_mode_attr iptrh
  [(V32HI "") (V16SI "") (V8DI "")
   (V16HI "") (V8SI "") (V4DI "q")
   (V8HI "") (V4SI "q") (V2DI "k")])

And use
+  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1,
%0<mask_operand2>|%0<mask_operand2>, %<iptrh>1<round_mask_op2>}"

>   [(set_attr "type" "ssecvt")
>    (set_attr "prefix" "evex")
>    (set_attr "mode" "<sseinsnmode>")])
>@@ -29854,7 +29855,8 @@ (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
>      UNSPEC_FPCLASS)
>    (const_int 1)))]
>    "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)"
>-   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
>+;; %X1 so that we don't emit any *WORD PTR for -masm=intel.
>+   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %X1, %2}";

For scaar memory operand rewrite, we usually use <iptr>, so
   "vfpclass<ssescalarmodesuffix>\t{%2, %1,
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>,
%<iptr>1, %2}";
diff mbox series

Patch

From 20ba6ec63d29b5d1ac93bdb9d461527eaf8962f5 Mon Sep 17 00:00:00 2001
From: Antoni Boucher <bouanto@zoho.com>
Date: Mon, 23 Sep 2024 18:58:47 -0400
Subject: [PATCH] target: Fix asm codegen for vfpclasss* and vcvtph2*
 instructions

This only happens when using -masm=intel.

    gcc/ChangeLog:
            PR target/116725
            * config/i386/sse.md: Fix asm generation.

    gcc/testsuite/ChangeLog:
            PR target/116725
            * gcc.target/i386/pr116725.c: Add test using those AVX builtins.
---
 gcc/config/i386/sse.md                   |  6 ++--
 gcc/testsuite/gcc.target/i386/pr116725.c | 40 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr116725.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 685bce3094a..040f3ed3d14 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7548,7 +7548,8 @@  (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><
 	   [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")]
 	   UNSPEC_US_FIX_NOTRUNC))]
   "TARGET_AVX512FP16 && <round_mode_condition>"
-  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+;; %X1 so that we don't emit any *WORD PTR for -masm=intel.
+  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %X1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -29854,7 +29855,8 @@  (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
 	    UNSPEC_FPCLASS)
 	  (const_int 1)))]
    "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)"
-   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
+;; %X1 so that we don't emit any *WORD PTR for -masm=intel.
+   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %X1, %2}";
   [(set_attr "type" "sse")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
diff --git a/gcc/testsuite/gcc.target/i386/pr116725.c b/gcc/testsuite/gcc.target/i386/pr116725.c
new file mode 100644
index 00000000000..9e5070e16e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116725.c
@@ -0,0 +1,40 @@ 
+/* PR gcc/116725 */
+/* { dg-do assemble } */
+/* { dg-options "-masm=intel -mavx512dq -mavx512fp16 -mavx512vl" } */
+/* { dg-require-effective-target masm_intel } */
+
+#include <stdio.h>
+
+typedef double __m128d __attribute__ ((__vector_size__ (16)));
+typedef float __m128f __attribute__ ((__vector_size__ (16)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+typedef _Float16 __m256h __attribute__ ((__vector_size__ (32)));
+typedef long long __m512i __attribute__((__vector_size__(64)));
+typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+int main(void) {
+    __m128d vec = {1.0, 2.0};
+    char res = __builtin_ia32_fpclasssd_mask(vec, 1, 1);
+    printf("%d\n", res);
+
+    __m128f vec2 = {1.0, 2.0, 3.0, 4.0};
+    char res2 = __builtin_ia32_fpclassss_mask(vec2, 1, 1);
+    printf("%d\n", res2);
+
+    __m128h vec3 = {2.0, 1.0, 3.0};
+    __v4si vec4 = {};
+    __v4si res3 = __builtin_ia32_vcvtph2dq128_mask(vec3, vec4, -1);
+    printf("%d\n", res3[0]);
+
+    __v4si res4 = __builtin_ia32_vcvtph2udq128_mask(vec3, vec4, -1);
+    printf("%d\n", res4[0]);
+
+    __m128i vec5 = {};
+    __m128i res5 = __builtin_ia32_vcvtph2qq128_mask(vec3, vec5, -1);
+    printf("%d\n", res5[0]);
+
+    __m128i res6 = __builtin_ia32_vcvtph2uqq128_mask(vec3, vec5, -1);
+    printf("%d\n", res6[0]);
+}
-- 
2.47.0