commit c1ba193030ba81fd69669036c7f706a957f44b5d
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed Jun 25 15:00:35 2014 +0100
[Needs-tests][AArch64] Implement some vmul* intrinsics
@@ -1277,6 +1277,12 @@ vmul_f32 (float32x2_t __a, float32x2_t __b)
return __a * __b;
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a * __b;
+}
+
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmul_u8 (uint8x8_t __a, uint8x8_t __b)
{
@@ -8299,19 +8305,6 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
return result;
}
-#define vmuld_lane_f64(a, b, c) \
- __extension__ \
- ({ \
- float64x2_t b_ = (b); \
- float64_t a_ = (a); \
- float64_t result; \
- __asm__ ("fmul %d0,%d1,%2.d[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
#define vmull_high_lane_s16(a, b, c) \
__extension__ \
({ \
@@ -8828,19 +8821,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b)
return result;
}
-#define vmuls_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32_t a_ = (a); \
- float32_t result; \
- __asm__ ("fmul %s0,%s1,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_f32 (float32x2_t a, float32x2_t b)
{
@@ -19041,6 +19021,34 @@ vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
return __a * __aarch64_vget_lane_u32 (__b, __lane);
}
+/* vmuld_lane */
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
+{
+ return __a * vget_lane_f64 (__b, __lane);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
+{
+ return __a * vgetq_lane_f64 (__b, __lane);
+}
+
+/* vmuls_lane */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
+{
+ return __a * vget_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
+{
+ return __a * vgetq_lane_f32 (__b, __lane);
+}
+
/* vmul_laneq */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -19079,6 +19087,14 @@ vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
}
+/* vmul_n */
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_n_f64 (float64x1_t __a, float64_t __b)
+{
+ return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
+}
+
/* vmulq_lane */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
new file mode 100644
@@ -0,0 +1,30 @@
+/* Test the vmul_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float64_t minus_e, pi;
+ float64_t expected, actual;
+
+ pi = 3.14159265359;
+ minus_e = -2.71828;
+
+ expected = pi * minus_e;
+
+ actual = vget_lane_f64 (vmul_f64 ((float64x1_t) { pi },
+ (float64x1_t) { minus_e }), 0);
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler "fmul\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* Test the vmul_n_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float64_t minus_e, pi;
+ float64_t expected, actual;
+
+ pi = 3.14159265359;
+ minus_e = -2.71828;
+
+ expected = pi * minus_e;
+
+ actual = vget_lane_f64 (vmul_n_f64 ((float64x1_t) { pi },
+ minus_e), 0);
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,26 @@
+/* Test the vmuld_lane_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options " -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float64_t minus_e, pi;
+ float64_t expected, actual;
+
+ pi = 3.14159265359;
+ minus_e = -2.71828;
+
+ expected = pi * minus_e;
+
+ actual = vmuld_lane_f64 (pi, (float64x1_t) { minus_e }, 0);
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* Test the vmuld_laneq_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options " -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float64_t minus_e, pi, ln2;
+ float64_t expected, actual;
+ float64x2_t arg2;
+ float64_t arr[2];
+
+ pi = 3.14159265359;
+ arr[0] = minus_e = -2.71828;
+ arr[1] = ln2 = 0.69314718056;
+
+ arg2 = vld1q_f64 (arr);
+ actual = vmuld_laneq_f64 (pi, arg2, 0);
+ expected = pi * minus_e;
+
+ if (expected != actual)
+ abort ();
+
+ expected = pi * ln2;
+ actual = vmuld_laneq_f64 (pi, arg2, 1);
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* Test the vmuls_lane_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options " -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float32_t minus_e, pi, ln2;
+ float32_t expected, actual;
+ float32x2_t arg2;
+ float32_t arr[2];
+
+ pi = 3.14159265359;
+ arr[0] = minus_e = -2.71828;
+ arr[1] = ln2 = 0.69314718056;
+
+ arg2 = vld1_f32 (arr);
+ actual = vmuls_lane_f32 (pi, arg2, 0);
+ expected = pi * minus_e;
+
+ if (expected != actual)
+ abort ();
+
+ expected = pi * ln2;
+ actual = vmuls_lane_f32 (pi, arg2, 1);
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,50 @@
+/* Test the vmuls_laneq_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options " -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (void)
+{
+ volatile float32_t minus_e, pi, ln2, sqrt2, phi;
+ float32_t expected, actual;
+ float32x4_t arg2;
+ float32_t arr[4];
+
+ pi = 3.14159265359;
+ arr[0] = minus_e = -2.71828;
+ arr[1] = ln2 = 0.69314718056;
+ arr[2] = sqrt2 = 1.41421356237;
+ arr[3] = phi = 1.61803398874;
+
+ arg2 = vld1q_f32 (arr);
+ actual = vmuls_laneq_f32 (pi, arg2, 0);
+ expected = pi * minus_e;
+
+ if (expected != actual)
+ abort ();
+
+ expected = pi * ln2;
+ actual = vmuls_laneq_f32 (pi, arg2, 1);
+
+ if (expected != actual)
+ abort ();
+
+ expected = pi * sqrt2;
+ actual = vmuls_laneq_f32 (pi, arg2, 2);
+
+ if (expected != actual)
+ abort ();
+
+ expected = pi * phi;
+ actual = vmuls_laneq_f32 (pi, arg2, 3);
+
+ if (expected != actual)
+ abort ();
+
+ return 0;
+}