commit 53142c1282ab6a902ed8c5c1afc5089657c4437a
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue Jun 17 13:33:57 2014 +0100
[AArch64] Implement some saturating math *laneq_s* intrinsics
@@ -192,9 +192,9 @@
BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
- BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
+ BUILTIN_VSDQ_HSI (TERNOP, sqdmulh_laneq, 0)
BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
- BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
+ BUILTIN_VSDQ_HSI (TERNOP, sqrdmulh_laneq, 0)
BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
@@ -2793,8 +2793,8 @@
)
(define_expand "aarch64_sqdmulh_laneq<mode>"
- [(match_operand:VDQHS 0 "register_operand" "")
- (match_operand:VDQHS 1 "register_operand" "")
+ [(match_operand:VSDQ_HSI 0 "register_operand" "")
+ (match_operand:VSDQ_HSI 1 "register_operand" "")
(match_operand:<VCONQ> 2 "register_operand" "")
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_SIMD"
@@ -2810,8 +2810,8 @@
)
(define_expand "aarch64_sqrdmulh_laneq<mode>"
- [(match_operand:VDQHS 0 "register_operand" "")
- (match_operand:VDQHS 1 "register_operand" "")
+ [(match_operand:VSDQ_HSI 0 "register_operand" "")
+ (match_operand:VSDQ_HSI 1 "register_operand" "")
(match_operand:<VCONQ> 2 "register_operand" "")
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_SIMD"
@@ -2890,6 +2890,21 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
+ [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+ (unspec:SD_HSI
+ [(match_operand:SD_HSI 1 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+ VQDMULH))]
+ "TARGET_SIMD"
+ "*
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
+)
+
;; vqdml[sa]l
(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
@@ -19426,6 +19426,12 @@ vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
}
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
+}
+
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
{
@@ -19439,6 +19445,12 @@ vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{__builtin_aarch64_sqdmlal_lanesi (__a[0], __b, __c, __d)};
}
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
+}
+
/* vqdmlsl */
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -19553,6 +19565,12 @@ vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
}
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
+}
+
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
{
@@ -19565,6 +19583,12 @@ vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
return (int64x1_t) {__builtin_aarch64_sqdmlsl_lanesi (__a[0], __b, __c, __d)};
}
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
+}
+
/* vqdmulh */
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
@@ -19603,6 +19627,12 @@ vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
}
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+{
+ return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
+}
+
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
{
@@ -19615,6 +19645,12 @@ vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
}
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
+}
+
/* vqdmull */
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -19919,6 +19955,12 @@ vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
}
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+{
+ return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
+}
+
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
{
@@ -19931,6 +19973,12 @@ vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
}
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+{
+ return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
+}
+
/* vqrshl */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
new file mode 100644
@@ -0,0 +1,35 @@
+/* Test the vqdmlalh_laneq_s16 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int32_t arg1;
+ int16_t arg2;
+ int16x8_t arg3;
+ int32_t actual;
+ int32_t expected;
+
+ arg1 = 0x80000000;
+ arg2 = -24497;
+ arg3 = vcombine_s16 (vcreate_s16 (0x008a80007fff7fffULL),
+ vcreate_s16 (0xfffffa797fff8000ULL));
+
+ actual = vqdmlalh_laneq_s16 (arg1, arg2, arg3, 7);
+ expected = -2147434654;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,35 @@
+/* Test the vqdmlals_laneq_s32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int64_t arg1;
+ int32_t arg2;
+ int32x4_t arg3;
+ int64_t actual;
+ int64_t expected;
+
+ arg1 = -9223182289494545592LL;
+ arg2 = 32768;
+ arg3 = vcombine_s32 (vcreate_s32 (0xffff7fff8000ffffULL),
+ vcreate_s32 (0x80000000ffff0000ULL));
+
+ actual = vqdmlals_laneq_s32 (arg1, arg2, arg3, 3);
+ expected = -9223323026982900920LL;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,35 @@
+/* Test the vqdmlslh_laneq_s16 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int32_t arg1;
+ int16_t arg2;
+ int16x8_t arg3;
+ int32_t actual;
+ int32_t expected;
+
+ arg1 = -2147450881;
+ arg2 = 32767;
+ arg3 = vcombine_s16 (vcreate_s16 (0x359d7fff00007fffULL),
+ vcreate_s16 (0xe678ffff00008000ULL));
+
+ actual = vqdmlslh_laneq_s16 (arg1, arg2, arg3, 4);
+ expected = -32769;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[4\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,35 @@
+/* Test the vqdmlsls_laneq_s32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int64_t arg1;
+ int32_t arg2;
+ int32x4_t arg3;
+ int64_t actual;
+ int64_t expected;
+
+ arg1 = 140733193453567LL;
+ arg2 = 25544;
+ arg3 = vcombine_s32 (vcreate_s32 (0x417b8000ffff8397LL),
+ vcreate_s32 (0x7fffffff58488000LL));
+
+
+ actual = vqdmlsls_laneq_s32 (arg1, arg2, arg3, 3);
+ expected = 31022548895631LL;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,33 @@
+/* Test the vqdmulhh_laneq_s16 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int16_t arg1;
+ int16x8_t arg2;
+ int16_t actual;
+ int16_t expected;
+
+ arg1 = 268;
+ arg2 = vcombine_s16 (vcreate_s16 (0xffffffff00000000ULL),
+ vcreate_s16 (0x0000800018410000ULL));
+
+ actual = vqdmulhh_laneq_s16 (arg1, arg2, 7);
+ expected = 0;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,33 @@
+/* Test the vqdmulhs_laneq_s32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int32_t arg1;
+ int32x4_t arg2;
+ int32_t actual;
+ int32_t expected;
+
+ arg1 = 0x80000000;
+ arg2 = vcombine_s32 (vcreate_s32 (0x950dffffc4f40000ULL),
+ vcreate_s32 (0x7fff8000274a8000ULL));
+
+ actual = vqdmulhs_laneq_s32 (arg1, arg2, 3);
+ expected = -2147450880;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,33 @@
+/* Test the vqrdmulhh_laneq_s16 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int16_t arg1;
+ int16x8_t arg2;
+ int16_t actual;
+ int16_t expected;
+
+ arg1 = 0;
+ arg2 = vcombine_s16 (vcreate_s16 (0x7fffffffa7908000ULL),
+ vcreate_s16 (0x8000d2607fff0000ULL));
+
+ actual = vqrdmulhh_laneq_s16 (arg1, arg2, 7);
+ expected = 0;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* Test the vqrdmulhs_laneq_s32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ int32_t arg1;
+ int32x4_t arg2;
+ int32_t actual;
+ int32_t expected;
+
+ arg1 = 32768;
+ arg2 = vcombine_s32 (vcreate_s32 (0x8000ffffffffcd5bULL),
+ vcreate_s32 (0x7fffffffffffffffULL));
+
+ actual = vqrdmulhs_laneq_s32 (arg1, arg2, 3);
+ expected = 32768;
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */