From 69ee4f981e090fb06ff5f27692fa4f79be7d54ea Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Mon, 20 Jul 2020 10:13:58 +0800
Subject: [PATCH] Using UNSPEC for vector compare to mask register.
For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a
boolean value and try to do some optimization. But it is not true for
vector compare, also other places in rtl passes hold the same
assumption.
2020-07-20 Hongtao Liu <hongtao.liu@intel.com>
gcc/
* config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for
maskcmp.
(ix86_expand_mask_vec_cmp): Change prototype.
* config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change prototype.
* config/i386/i386.c (ix86_print_operand): Remove operand
modifier 'I'.
* config/i386/sse.md
(*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
*<avx512>_cmp<mode>3<mask_scalar_merge_name>,
*<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
*<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
avx512f_maskcmp<mode>3): Deleted.
gcc/testsuite
* gcc.target/i386/pr92865-1.c: Adjust testcase.
---
gcc/config/i386/i386-expand.c | 19 +++---
gcc/config/i386/i386-protos.h | 2 +-
gcc/config/i386/i386.c | 35 -----------
gcc/config/i386/sse.md | 72 +++--------------------
gcc/testsuite/gcc.target/i386/pr92865-1.c | 10 ++--
5 files changed, 26 insertions(+), 112 deletions(-)
@@ -3480,6 +3480,13 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|| (op_false && reg_overlap_mentioned_p (dest, op_false)))
dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
+ if (maskcmp)
+ {
+ bool ok = ix86_expand_mask_vec_cmp (dest, code, cmp_op0, cmp_op1);
+ gcc_assert (ok);
+ return dest;
+ }
+
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
if (cmp_mode != mode && !maskcmp)
@@ -3915,11 +3922,10 @@ ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
/* Expand AVX-512 vector comparison. */
bool
-ix86_expand_mask_vec_cmp (rtx operands[])
+ix86_expand_mask_vec_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1)
{
- machine_mode mask_mode = GET_MODE (operands[0]);
- machine_mode cmp_mode = GET_MODE (operands[2]);
- enum rtx_code code = GET_CODE (operands[1]);
+ machine_mode mask_mode = GET_MODE (dest);
+ machine_mode cmp_mode = GET_MODE (cmp_op0);
rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
int unspec_code;
rtx unspec;
@@ -3937,10 +3943,9 @@ ix86_expand_mask_vec_cmp (rtx operands[])
unspec_code = UNSPEC_PCMP;
}
- unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
- operands[3], imm),
+ unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, cmp_op0, cmp_op1, imm),
unspec_code);
- emit_insn (gen_rtx_SET (operands[0], unspec));
+ emit_insn (gen_rtx_SET (dest, unspec));
return true;
}
@@ -143,7 +143,7 @@ extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_vec_perm (rtx[]);
-extern bool ix86_expand_mask_vec_cmp (rtx[]);
+extern bool ix86_expand_mask_vec_cmp (rtx, enum rtx_code, rtx, rtx);
extern bool ix86_expand_int_vec_cmp (rtx[]);
extern bool ix86_expand_fp_vec_cmp (rtx[]);
extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
@@ -12544,7 +12544,6 @@ print_reg (rtx x, int code, FILE *file)
M -- print addr32 prefix for TARGET_X32 with VSIB address.
! -- print NOTRACK prefix for jxx/call/ret instructions if required.
N -- print maskz if it's constant 0 operand.
- I -- print comparision predicate operand for sse cmp condition.
*/
void
@@ -12774,40 +12773,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
}
return;
- case 'I':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('$', file);
- switch (GET_CODE (x))
- {
- case EQ:
- putc ('0', file);
- break;
- case NE:
- putc ('4', file);
- break;
- case GE:
- case GEU:
- putc ('5', file);
- break;
- case GT:
- case GTU:
- putc ('6', file);
- break;
- case LE:
- case LEU:
- putc ('2', file);
- break;
- case LT:
- case LTU:
- putc ('1', file);
- break;
- default:
- output_operand_lossage ("operand is not a condition code, "
- "invalid operand code 'I'");
- return;
- }
- return;
-
case 'Y':
switch (GET_CODE (x))
{
@@ -2947,18 +2947,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
- (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
- [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
- "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
- "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -2973,18 +2961,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
- (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
- [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
- "TARGET_AVX512BW"
- "vpcmp<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -2999,18 +2975,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
- (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
- [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
- "TARGET_AVX512BW"
- "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3025,18 +2989,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
- (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
- [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]))]
- "TARGET_AVX512F"
- "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(and:<avx512fmaskmode>
@@ -3071,18 +3023,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_maskcmp<mode>3"
- [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
- (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
- [(match_operand:VF_AVX512VL 1 "register_operand" "v")
- (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "vm")]))]
- "TARGET_AVX512F"
- "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<sse>_<unord>comi<round_saeonly_name>"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
@@ -3110,7 +3050,8 @@
(match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
"TARGET_AVX512F"
{
- bool ok = ix86_expand_mask_vec_cmp (operands);
+ bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
gcc_assert (ok);
DONE;
})
@@ -3122,7 +3063,8 @@
(match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
"TARGET_AVX512BW"
{
- bool ok = ix86_expand_mask_vec_cmp (operands);
+ bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
gcc_assert (ok);
DONE;
})
@@ -3194,7 +3136,8 @@
(match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
"TARGET_AVX512F"
{
- bool ok = ix86_expand_mask_vec_cmp (operands);
+ bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
gcc_assert (ok);
DONE;
})
@@ -3206,7 +3149,8 @@
(match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
"TARGET_AVX512BW"
{
- bool ok = ix86_expand_mask_vec_cmp (operands);
+ bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
gcc_assert (ok);
DONE;
})
@@ -1,12 +1,12 @@
/* PR target/92865 */
/* { dg-do compile } */
-/* { dg-options "-Ofast -mavx512f -mavx512bw -mxop" } */
+/* { dg-options "-Ofast -mavx512bw -mxop" } */
/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 4 } } */
/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[\t ]" 4 } } *
-/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[\t ]" 6 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[\t ]" 6 } } *
+/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[\t ]" 6 } } */
+/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[\t ]" 6 } } */
extern char arraysb[64];
extern short arraysw[32];
--
2.18.1