@@ -119,7 +119,6 @@ (define_c_enum "unspec"
UNSPEC_STVLXL
UNSPEC_STVRX
UNSPEC_STVRXL
- UNSPEC_VADU
UNSPEC_VSLV
UNSPEC_VSRV
UNSPEC_VMULWHUB
@@ -4323,19 +4322,15 @@ (define_insn "*p8v_clz<mode>2"
[(set_attr "type" "vecsimple")])
;; Vector absolute difference unsigned
-(define_expand "vadu<mode>3"
- [(set (match_operand:VI 0 "register_operand")
- (unspec:VI [(match_operand:VI 1 "register_operand")
- (match_operand:VI 2 "register_operand")]
- UNSPEC_VADU))]
- "TARGET_P9_VECTOR")
-
-;; Vector absolute difference unsigned
-(define_insn "p9_vadu<mode>3"
+(define_insn "uabd<mode>3"
[(set (match_operand:VI 0 "register_operand" "=v")
- (unspec:VI [(match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")]
- UNSPEC_VADU))]
+ (minus:VI
+ (umax:VI
+ (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v"))
+ (umin:VI
+ (match_dup 1)
+ (match_dup 2))))]
"TARGET_P9_VECTOR"
"vabsdu<wd> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -4500,7 +4495,7 @@ (define_expand "usadv16qi"
rtx zero = gen_reg_rtx (V4SImode);
rtx psum = gen_reg_rtx (V4SImode);
- emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2]));
+ emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2]));
emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
emit_insn (gen_altivec_vsum4ubs (psum, absd, zero));
emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
@@ -4521,7 +4516,7 @@ (define_expand "usadv8hi"
rtx zero = gen_reg_rtx (V4SImode);
rtx psum = gen_reg_rtx (V4SImode);
- emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2]));
+ emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2]));
emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
emit_insn (gen_altivec_vsum4shs (psum, absd, zero));
emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
@@ -2377,13 +2377,13 @@
VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {}
const vsc __builtin_altivec_vadub (vsc, vsc);
- VADUB vaduv16qi3 {}
+ VADUB uabdv16qi3 {}
const vss __builtin_altivec_vaduh (vss, vss);
- VADUH vaduv8hi3 {}
+ VADUH uabdv8hi3 {}
const vsi __builtin_altivec_vaduw (vsi, vsi);
- VADUW vaduv4si3 {}
+ VADUW uabdv4si3 {}
const vsll __builtin_altivec_vbpermd (vsll, vsc);
VBPERMD altivec_vbpermd {}
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned. */
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST(T) \
+ void uabd_##T (unsigned T *restrict a, unsigned T *restrict b, \
+ unsigned T *restrict out) \
+ { \
+ UNROLL0 \
+ for (int i = 0; i < N; i++) \
+ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \
+ }
+
+TEST(char)
+TEST(short)
+TEST(int)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Expliot vector absolute difference unsigned. */
+
+#define N 128
+#define PRAGMA(X) _Pragma (#X)
+#define UNROLL0 PRAGMA (GCC unroll 0)
+
+#define TEST1(TYPE) \
+ void test1_##TYPE (unsigned TYPE *restrict a, unsigned TYPE *restrict b, \
+ unsigned TYPE *restrict out) \
+ { \
+ UNROLL0 \
+ for (int i = 0; i < N; i++) \
+ out[i] = __builtin_abs (a[i] - b[i]); \
+ }
+
+TEST1(char)
+TEST1(short)
+
+#define TEST2(TYPE1, TYPE2, FUNC) \
+ void test2_##TYPE1 (unsigned TYPE1 *restrict a, unsigned TYPE1 *restrict b, \
+ unsigned TYPE1 *restrict out) \
+ { \
+ UNROLL0 \
+ for (int i = 0; i < N; i++) \
+ out[i] = __builtin_##FUNC ((TYPE2) a[i] - (TYPE2) b[i]); \
+ }
+
+TEST2(char, int, abs)
+TEST2(short, int, abs)
+TEST2(int, long long, llabs)
+
+/* { dg-final { scan-assembler-times {\mvabsdub\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvabsduw\M} 1 } } */