@@ -2317,21 +2317,15 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
case E_DImode:
if (TARGET_64BIT)
goto simple;
- /* For 32-bit target DI comparison may be performed on
- SSE registers. To allow this we should avoid split
- to SI mode which is achieved by doing xor in DI mode
- and then comparing with zero (which is recognized by
- STV pass). We don't compare using xor when optimizing
- for size. */
- if (!optimize_insn_for_size_p ()
- && TARGET_STV
- && (code == EQ || code == NE))
- {
- op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
- op1 = const0_rtx;
- }
/* FALLTHRU */
case E_TImode:
+ /* DI and TI mode equality/inequality comparisons may be performed
+ on SSE registers. Avoid splitting them, except when optimizing
+ for size. */
+ if ((code == EQ || code == NE)
+ && !optimize_insn_for_size_p ())
+ goto simple;
+
/* Expand DImode branch into multiple compare+branch. */
{
rtx lo[2], hi[2];
@@ -2350,34 +2344,7 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
submode = mode == DImode ? SImode : DImode;
- /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
- avoid two branches. This costs one extra insn, so disable when
- optimizing for size. */
-
- if ((code == EQ || code == NE)
- && (!optimize_insn_for_size_p ()
- || hi[1] == const0_rtx || lo[1] == const0_rtx))
- {
- rtx xor0, xor1;
-
- xor1 = hi[0];
- if (hi[1] != const0_rtx)
- xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- xor0 = lo[0];
- if (lo[1] != const0_rtx)
- xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- tmp = expand_binop (submode, ior_optab, xor1, xor0,
- NULL_RTX, 0, OPTAB_WIDEN);
-
- ix86_expand_branch (code, tmp, const0_rtx, label);
- return;
- }
-
- /* Otherwise, if we are doing less-than or greater-or-equal-than,
+ /* If we are doing less-than or greater-or-equal-than,
op1 is a constant and the low word is zero, then we can just
examine the high word. Similarly for low word -1 and
less-or-equal-than or greater-than. */
@@ -711,8 +711,7 @@ gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
switch (GET_MODE_NUNITS (vmode))
{
case 1:
- /* We are not using this case currently. */
- gcc_unreachable ();
+ return gen_rtx_SUBREG (vmode, gpr, 0);
case 2:
return gen_rtx_VEC_CONCAT (vmode, gpr,
CONST0_RTX (GET_MODE_INNER (vmode)));
@@ -932,6 +931,48 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
}
}
+/* Convert COMPARE to vector mode. */
+
+rtx
+general_scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
+{
+ rtx tmp = gen_reg_rtx (vmode);
+ rtx src;
+ convert_op (&op1, insn);
+ /* Comparison against anything other than zero, requires an XOR. */
+ if (op2 != const0_rtx)
+ {
+ convert_op (&op2, insn);
+ /* If both operands are MEMs, explicitly load the OP1 into TMP. */
+ if (MEM_P (op1) && MEM_P (op2))
+ {
+ emit_insn_before (gen_rtx_SET (tmp, op1), insn);
+ src = tmp;
+ }
+ else
+ src = op1;
+ src = gen_rtx_XOR (vmode, src, op2);
+ }
+ else
+ src = op1;
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+
+ if (vmode == V2DImode)
+ emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (tmp),
+ copy_rtx_if_shared (tmp),
+ copy_rtx_if_shared (tmp)),
+ insn);
+ else if (vmode == V4SImode)
+ emit_insn_before (gen_sse2_pshufd (copy_rtx_if_shared (tmp),
+ copy_rtx_if_shared (tmp),
+ const0_rtx),
+ insn);
+
+ return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (tmp),
+ copy_rtx_if_shared (tmp)),
+ UNSPEC_PTEST);
+}
+
/* Convert INSN to vector mode. */
void
@@ -1090,19 +1131,8 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
break;
case COMPARE:
- src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
-
- gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
- subreg = gen_rtx_SUBREG (V2DImode, src, 0);
- emit_insn_before (gen_vec_interleave_lowv2di
- (copy_rtx_if_shared (subreg),
- copy_rtx_if_shared (subreg),
- copy_rtx_if_shared (subreg)),
- insn);
dst = gen_rtx_REG (CCmode, FLAGS_REG);
- src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
- copy_rtx_if_shared (subreg)),
- UNSPEC_PTEST);
+ src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
break;
case CONST_INT:
@@ -1339,20 +1369,14 @@ pseudo_reg_set (rtx_insn *insn)
return set;
}
-/* Check if comparison INSN may be transformed
- into vector comparison. Currently we transform
- zero checks only which look like:
-
- (set (reg:CCZ 17 flags)
- (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
- (subreg:SI (reg:DI x) 0))
- (const_int 0 [0]))) */
+/* Check if comparison INSN may be transformed into vector comparison.
+ Currently we transform equality/inequality checks which look like:
+ (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y))) */
static bool
convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
{
- /* ??? Currently convertible for double-word DImode chain only. */
- if (TARGET_64BIT || mode != DImode)
+ if (mode != (TARGET_64BIT ? TImode : DImode))
return false;
if (!TARGET_SSE4_1)
@@ -1375,31 +1399,14 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
rtx op1 = XEXP (src, 0);
rtx op2 = XEXP (src, 1);
- if (op2 != CONST0_RTX (GET_MODE (op2)))
+ if (!CONST_INT_P (op1)
+ && ((!REG_P (op1) && !MEM_P (op1))
+ || GET_MODE (op1) != mode))
return false;
- if (GET_CODE (op1) != IOR)
- return false;
-
- op2 = XEXP (op1, 1);
- op1 = XEXP (op1, 0);
-
- if (!SUBREG_P (op1)
- || !SUBREG_P (op2)
- || GET_MODE (op1) != SImode
- || GET_MODE (op2) != SImode
- || ((SUBREG_BYTE (op1) != 0
- || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
- && (SUBREG_BYTE (op2) != 0
- || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
- return false;
-
- op1 = SUBREG_REG (op1);
- op2 = SUBREG_REG (op2);
-
- if (op1 != op2
- || !REG_P (op1)
- || GET_MODE (op1) != DImode)
+ if (!CONST_INT_P (op2)
+ && ((!REG_P (op2) && !MEM_P (op2))
+ || GET_MODE (op2) != mode))
return false;
return true;
@@ -181,6 +181,7 @@ class general_scalar_chain : public scalar_chain
void convert_reg (rtx_insn *insn, rtx dst, rtx src);
void make_vector_copies (rtx_insn *, rtx);
void convert_registers ();
+ rtx convert_compare (rtx op1, rtx op2, rtx_insn *insn);
int vector_const_cost (rtx exp);
};
@@ -1357,14 +1357,20 @@
(define_expand "cstore<mode>4"
[(set (reg:CC FLAGS_REG)
- (compare:CC (match_operand:SWIM 2 "nonimmediate_operand")
- (match_operand:SWIM 3 "<general_operand>")))
+ (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
+ (match_operand:SDWIM 3 "<general_operand>")))
(set (match_operand:QI 0 "register_operand")
(match_operator 1 "ordered_comparison_operator"
[(reg:CC FLAGS_REG) (const_int 0)]))]
""
{
- if (MEM_P (operands[2]) && MEM_P (operands[3]))
+ if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
+ {
+ if (GET_CODE (operands[1]) != EQ
+ && GET_CODE (operands[1]) != NE)
+ FAIL;
+ }
+ else if (MEM_P (operands[2]) && MEM_P (operands[3]))
operands[2] = force_reg (<MODE>mode, operands[2]);
ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
operands[2], operands[3]);
@@ -1500,6 +1506,52 @@
[(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+(define_insn_and_split "*cmp<dwi>_doubleword"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
+ (match_operand:<DWI> 1 "x86_64_general_operand")))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
+ (const_int 0)))
+ (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
+{
+ split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
+ /* Placing the SUBREG pieces in pseudos helps reload. */
+ for (int i = 0; i < 4; i++)
+ if (SUBREG_P (operands[i]))
+ operands[i] = force_reg (<MODE>mode, operands[i]);
+
+ operands[4] = gen_reg_rtx (<MODE>mode);
+ if (operands[1] == const0_rtx)
+ emit_move_insn (operands[4], operands[0]);
+ else if (operands[0] == const0_rtx)
+ emit_move_insn (operands[4], operands[1]);
+ else if (operands[1] == constm1_rtx)
+ emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
+ else if (operands[0] == constm1_rtx)
+ emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
+ else
+ emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
+
+ if (operands[3] == const0_rtx)
+ operands[5] = operands[2];
+ else if (operands[2] == const0_rtx)
+ operands[5] = operands[3];
+ else
+ {
+ operands[5] = gen_reg_rtx (<MODE>mode);
+ if (operands[3] == constm1_rtx)
+ emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
+ else if (operands[2] == constm1_rtx)
+ emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
+ else
+ emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
+ }
+})
+
;; These implement float point compares.
;; %%% See if we can get away with VOIDmode operands on the actual insns,
;; which would allow mix and match FP modes on the compares. Which is what
@@ -466,9 +466,9 @@
;; All DImode vector integer modes
(define_mode_iterator V_AVX
- [V16QI V8HI V4SI V2DI V4SF V2DF
+ [V16QI V8HI V4SI V2DI V1TI V4SF V2DF
(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
- (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
+ (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") (V2TI "TARGET_AVX")
(V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
(define_mode_iterator VI48_AVX
@@ -890,6 +890,7 @@
[(V4SF "sse4_1") (V2DF "sse4_1")
(V8SF "avx") (V4DF "avx")
(V8DF "avx512f")
+ (V2TI "avx") (V1TI "sse4_1")
(V4DI "avx") (V2DI "sse4_1")
(V8SI "avx") (V4SI "sse4_1")
(V16QI "sse4_1") (V32QI "avx")
new file mode 100644
@@ -0,0 +1,10 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2" } */
+
+void foo (long long ixi)
+{
+ if (ixi != 14348907)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "mov" 1 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse4.1" } */
+long long a[1024];
+long long b[1024];
+
+int foo()
+{
+ for (int i=0; i<1024; i++)
+ {
+ long long t = (a[i]<<8) | (b[i]<<24);
+ if (t == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-assembler "ptest" } } */
+/* { dg-final { scan-assembler-not "pxor" } } */