@@ -6678,6 +6678,142 @@ ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
}
}
+/* Helper function to split TImode ashl under NDD. */
+void
+ix86_split_ashl_ndd (rtx *operands, rtx scratch)
+{
+ gcc_assert (TARGET_APX_NDD);
+ int half_width = GET_MODE_BITSIZE (TImode) >> 1;
+
+ rtx low[2], high[2];
+ int count;
+
+ split_double_mode (TImode, operands, 2, low, high);
+ if (CONST_INT_P (operands[2]))
+ {
+ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1);
+
+ if (count >= half_width)
+ {
+ count = count - half_width;
+ if (count == 0)
+ {
+ if (!rtx_equal_p (high[0], low[1]))
+ emit_move_insn (high[0], low[1]);
+ }
+ else if (count == 1)
+ emit_insn (gen_adddi3 (high[0], low[1], low[1]));
+ else
+ emit_insn (gen_ashldi3 (high[0], low[1], GEN_INT (count)));
+
+ ix86_expand_clear (low[0]);
+ }
+ else if (count == 1)
+ {
+ rtx x3 = gen_rtx_REG (CCCmode, FLAGS_REG);
+ rtx x4 = gen_rtx_LTU (TImode, x3, const0_rtx);
+ emit_insn (gen_add3_cc_overflow_1 (DImode, low[0],
+ low[1], low[1]));
+ emit_insn (gen_add3_carry (DImode, high[0], high[1], high[1],
+ x3, x4));
+ }
+ else
+ {
+ emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1],
+ GEN_INT (count)));
+ emit_insn (gen_ashldi3 (low[0], low[1], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1],
+ operands[2]));
+ emit_insn (gen_ashldi3 (low[0], low[1], operands[2]));
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn (gen_x86_shift_adj_1
+ (DImode, high[0], low[0], operands[2], scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (DImode, high[0], low[0], operands[2]));
+ }
+}
+
+/* Helper function to split TImode l/ashr under NDD. */
+void
+ix86_split_rshift_ndd (enum rtx_code code, rtx *operands, rtx scratch)
+{
+ gcc_assert (TARGET_APX_NDD);
+ int half_width = GET_MODE_BITSIZE (TImode) >> 1;
+ bool ashr_p = code == ASHIFTRT;
+ rtx (*gen_shr)(rtx, rtx, rtx) = ashr_p ? gen_ashrdi3
+ : gen_lshrdi3;
+
+ rtx low[2], high[2];
+ int count;
+
+ split_double_mode (TImode, operands, 2, low, high);
+ if (CONST_INT_P (operands[2]))
+ {
+ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1);
+
+ if (ashr_p && (count == GET_MODE_BITSIZE (TImode) - 1))
+ {
+ emit_insn (gen_shr (high[0], high[1],
+ GEN_INT (half_width - 1)));
+ emit_move_insn (low[0], high[0]);
+ }
+ else if (count >= half_width)
+ {
+ if (ashr_p)
+ emit_insn (gen_shr (high[0], high[1],
+ GEN_INT (half_width - 1)));
+ else
+ ix86_expand_clear (high[0]);
+
+ if (count > half_width)
+ emit_insn (gen_shr (low[0], high[1],
+ GEN_INT (count - half_width)));
+ else
+ emit_move_insn (low[0], high[1]);
+ }
+ else
+ {
+ emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1],
+ GEN_INT (count)));
+ emit_insn (gen_shr (high[0], high[1], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1],
+ operands[2]));
+ emit_insn (gen_shr (high[0], high[1], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ if (ashr_p)
+ {
+ emit_move_insn (scratch, high[0]);
+ emit_insn (gen_shr (scratch, scratch,
+ GEN_INT (half_width - 1)));
+ }
+ else
+ ix86_expand_clear (scratch);
+
+ emit_insn (gen_x86_shift_adj_1
+ (DImode, low[0], high[0], operands[2], scratch));
+ }
+ else if (ashr_p)
+ emit_insn (gen_x86_shift_adj_3
+ (DImode, low[0], high[0], operands[2]));
+ else
+ emit_insn (gen_x86_shift_adj_2
+ (DImode, low[0], high[0], operands[2]));
+ }
+}
+
/* Expand move of V1TI mode register X to a new TI mode register. */
static rtx
ix86_expand_v1ti_to_ti (rtx x)
@@ -174,8 +174,10 @@ extern void x86_initialize_trampoline (rtx, rtx, rtx);
extern rtx ix86_zero_extend_to_Pmode (rtx);
extern void ix86_split_long_move (rtx[]);
extern void ix86_split_ashl (rtx *, rtx, machine_mode);
+extern void ix86_split_ashl_ndd (rtx *, rtx);
extern void ix86_split_ashr (rtx *, rtx, machine_mode);
extern void ix86_split_lshr (rtx *, rtx, machine_mode);
+extern void ix86_split_rshift_ndd (enum rtx_code, rtx *, rtx);
extern void ix86_expand_v1ti_shift (enum rtx_code, rtx[]);
extern void ix86_expand_v1ti_rotate (enum rtx_code, rtx[]);
extern void ix86_expand_v1ti_ashiftrt (rtx[]);
@@ -14420,13 +14420,14 @@ (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
})
(define_insn "ashl<mode>3_doubleword"
- [(set (match_operand:DWI 0 "register_operand" "=&r")
- (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
- (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+ [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+ (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
+ (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
(clobber (reg:CC FLAGS_REG))]
""
"#"
- [(set_attr "type" "multi")])
+ [(set_attr "type" "multi")
+ (set_attr "isa" "*,apx_ndd")])
(define_split
[(set (match_operand:DWI 0 "register_operand")
@@ -14435,7 +14436,15 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"epilogue_completed"
[(const_int 0)]
- "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+{
+ if (TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1])
+ && REG_P (operands[1]))
+ ix86_split_ashl_ndd (operands, NULL_RTX);
+ else
+ ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
+ DONE;
+})
;; By default we don't ask for a scratch register, because when DWImode
;; values are manipulated, registers are already at a premium. But if
@@ -14451,7 +14460,15 @@ (define_peephole2
(match_dup 3)]
"TARGET_CMOVE"
[(const_int 0)]
- "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+{
+ if (TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1])
+ && (REG_P (operands[1])))
+ ix86_split_ashl_ndd (operands, operands[3]);
+ else
+ ix86_split_ashl (operands, operands[3], <DWI>mode);
+ DONE;
+})
(define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
[(set (match_operand:<DWI> 0 "register_operand" "=r")
@@ -15708,16 +15725,24 @@ (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
})
(define_insn_and_split "<insn><mode>3_doubleword"
- [(set (match_operand:DWI 0 "register_operand" "=&r")
- (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+ [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+ (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
+ (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
(clobber (reg:CC FLAGS_REG))]
""
"#"
"epilogue_completed"
[(const_int 0)]
- "ix86_split_<insn> (operands, NULL_RTX, <MODE>mode); DONE;"
- [(set_attr "type" "multi")])
+{
+ if (TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))
+ ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
+ else
+ ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
+ DONE;
+}
+ [(set_attr "type" "multi")
+ (set_attr "isa" "*,apx_ndd")])
;; By default we don't ask for a scratch register, because when DWImode
;; values are manipulated, registers are already at a premium. But if
@@ -15733,7 +15758,14 @@ (define_peephole2
(match_dup 3)]
"TARGET_CMOVE"
[(const_int 0)]
- "ix86_split_<insn> (operands, operands[3], <DWI>mode); DONE;")
+{
+ if (TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))
+ ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
+ else
+ ix86_split_<insn> (operands, operands[3], <DWI>mode);
+ DONE;
+})
;; Split truncations of double word right shifts into x86_shrd_1.
(define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-do run { target { int128 && { ! ia32 } } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-O2" } */
+
+#include <stdlib.h>
+
+#define APX_TARGET __attribute__((noinline, target("apxf")))
+#define NO_APX __attribute__((noinline, target("no-apxf")))
+typedef __uint128_t u128;
+typedef __int128 i128;
+
+#define TI_SHIFT_FUNC(TYPE, op, name) \
+APX_TARGET \
+TYPE apx_##name##TYPE (TYPE a, char b) \
+{ \
+ return a op b; \
+} \
+TYPE noapx_##name##TYPE (TYPE a, char b) \
+{ \
+ return a op b; \
+} \
+
+#define TI_SHIFT_FUNC_CONST(TYPE, i, op, name) \
+APX_TARGET \
+TYPE apx_##name##TYPE##_const (TYPE a) \
+{ \
+ return a op i; \
+} \
+NO_APX \
+TYPE noapx_##name##TYPE##_const (TYPE a) \
+{ \
+ return a op i; \
+}
+
+#define TI_SHIFT_TEST(TYPE, name, val) \
+{\
+ if (apx_##name##TYPE (val, b) != noapx_##name##TYPE (val, b)) \
+ abort (); \
+}
+
+#define TI_SHIFT_CONST_TEST(TYPE, name, val) \
+{\
+ if (apx_##name##1##TYPE##_const (val) \
+ != noapx_##name##1##TYPE##_const (val)) \
+ abort (); \
+ if (apx_##name##2##TYPE##_const (val) \
+ != noapx_##name##2##TYPE##_const (val)) \
+ abort (); \
+ if (apx_##name##3##TYPE##_const (val) \
+ != noapx_##name##3##TYPE##_const (val)) \
+ abort (); \
+ if (apx_##name##4##TYPE##_const (val) \
+ != noapx_##name##4##TYPE##_const (val)) \
+ abort (); \
+}
+
+TI_SHIFT_FUNC(i128, <<, ashl)
+TI_SHIFT_FUNC(i128, >>, ashr)
+TI_SHIFT_FUNC(u128, >>, lshr)
+
+TI_SHIFT_FUNC_CONST(i128, 1, <<, ashl1)
+TI_SHIFT_FUNC_CONST(i128, 65, <<, ashl2)
+TI_SHIFT_FUNC_CONST(i128, 64, <<, ashl3)
+TI_SHIFT_FUNC_CONST(i128, 87, <<, ashl4)
+TI_SHIFT_FUNC_CONST(i128, 127, >>, ashr1)
+TI_SHIFT_FUNC_CONST(i128, 87, >>, ashr2)
+TI_SHIFT_FUNC_CONST(i128, 27, >>, ashr3)
+TI_SHIFT_FUNC_CONST(i128, 64, >>, ashr4)
+TI_SHIFT_FUNC_CONST(u128, 127, >>, lshr1)
+TI_SHIFT_FUNC_CONST(u128, 87, >>, lshr2)
+TI_SHIFT_FUNC_CONST(u128, 27, >>, lshr3)
+TI_SHIFT_FUNC_CONST(u128, 64, >>, lshr4)
+
+int main (void)
+{
+ if (!__builtin_cpu_supports ("apxf"))
+ return 0;
+
+ u128 ival = 0x123456788765432FLL;
+ u128 uval = 0xF234567887654321ULL;
+ char b = 28;
+
+ TI_SHIFT_TEST(i128, ashl, ival)
+ TI_SHIFT_TEST(i128, ashr, ival)
+ TI_SHIFT_TEST(u128, lshr, uval)
+ TI_SHIFT_CONST_TEST(i128, ashl, ival)
+ TI_SHIFT_CONST_TEST(i128, ashr, ival)
+ TI_SHIFT_CONST_TEST(u128, lshr, uval)
+
+ return 0;
+}