@@ -63,6 +63,7 @@
#define v8qi_UP V8QImode
#define v4hi_UP V4HImode
+#define v4hf_UP V4HFmode
#define v2si_UP V2SImode
#define v2sf_UP V2SFmode
#define v1df_UP V1DFmode
@@ -70,6 +71,7 @@
#define df_UP DFmode
#define v16qi_UP V16QImode
#define v8hi_UP V8HImode
+#define v8hf_UP V8HFmode
#define v4si_UP V4SImode
#define v4sf_UP V4SFmode
#define v2di_UP V2DImode
@@ -522,6 +524,8 @@ aarch64_simd_builtin_std_type (enum machine_mode mode,
return aarch64_simd_intCI_type_node;
case XImode:
return aarch64_simd_intXI_type_node;
+ case HFmode:
+ return aarch64_fp16_type_node;
case SFmode:
return float_type_node;
case DFmode:
@@ -606,6 +610,8 @@ aarch64_init_simd_builtin_types (void)
aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
/* Continue with standard types. */
+ aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
+ aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
aarch64_simd_types[Float32x2_t].eltype = float_type_node;
aarch64_simd_types[Float32x4_t].eltype = float_type_node;
aarch64_simd_types[Float64x1_t].eltype = double_type_node;
@@ -44,6 +44,8 @@
ENTRY (Poly16x8_t, V8HI, poly, 12)
ENTRY (Poly64x1_t, DI, poly, 12)
ENTRY (Poly64x2_t, V2DI, poly, 12)
+ ENTRY (Float16x4_t, V4HF, none, 13)
+ ENTRY (Float16x8_t, V8HF, none, 13)
ENTRY (Float32x2_t, V2SF, none, 13)
ENTRY (Float32x4_t, V4SF, none, 13)
ENTRY (Float64x1_t, V1DF, none, 13)
@@ -345,11 +345,11 @@
VAR1 (UNOP, float_extend_lo_, 0, v2df)
VAR1 (UNOP, float_truncate_lo_, 0, v2sf)
- /* Implemented by aarch64_ld1<VALL:mode>. */
- BUILTIN_VALL (LOAD1, ld1, 0)
+ /* Implemented by aarch64_ld1<VALL_F16:mode>. */
+ BUILTIN_VALL_F16 (LOAD1, ld1, 0)
- /* Implemented by aarch64_st1<VALL:mode>. */
- BUILTIN_VALL (STORE1, st1, 0)
+ /* Implemented by aarch64_st1<VALL_F16:mode>. */
+ BUILTIN_VALL_F16 (STORE1, st1, 0)
/* Implemented by fma<mode>4. */
BUILTIN_VDQF (TERNOP, fma, 4)
@@ -19,8 +19,8 @@
;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
- [(set (match_operand:VALL 0 "nonimmediate_operand" "")
- (match_operand:VALL 1 "general_operand" ""))]
+ [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
+ (match_operand:VALL_F16 1 "general_operand" ""))]
"TARGET_SIMD"
"
if (GET_CODE (operands[0]) == MEM)
@@ -2397,7 +2397,7 @@
(define_insn "aarch64_get_lane<mode>"
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
(vec_select:<VEL>
- (match_operand:VALL 1 "register_operand" "w, w, w")
+ (match_operand:VALL_F16 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
"TARGET_SIMD"
{
@@ -4036,8 +4036,9 @@
)
(define_insn "aarch64_be_ld1<mode>"
- [(set (match_operand:VALLDI 0 "register_operand" "=w")
- (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+ [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
+ (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
+ "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%0<Vmtype>}, %1"
@@ -4045,8 +4046,8 @@
)
(define_insn "aarch64_be_st1<mode>"
- [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+ [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%1<Vmtype>}, %0"
@@ -4303,16 +4304,16 @@
DONE;
})
-(define_expand "aarch64_ld1<VALL:mode>"
- [(match_operand:VALL 0 "register_operand")
+(define_expand "aarch64_ld1<VALL_F16:mode>"
+ [(match_operand:VALL_F16 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
- machine_mode mode = <VALL:MODE>mode;
+ machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
+ emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
else
emit_move_insn (operands[0], mem);
DONE;
@@ -4671,16 +4672,16 @@
DONE;
})
-(define_expand "aarch64_st1<VALL:mode>"
+(define_expand "aarch64_st1<VALL_F16:mode>"
[(match_operand:DI 0 "register_operand")
- (match_operand:VALL 1 "register_operand")]
+ (match_operand:VALL_F16 1 "register_operand")]
"TARGET_SIMD"
{
- machine_mode mode = <VALL:MODE>mode;
+ machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[0]);
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
+ emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
else
emit_move_insn (mem, operands[1]);
DONE;
@@ -1003,6 +1003,9 @@ aarch64_split_simd_move (rtx dst, rtx src)
case V2DImode:
gen = gen_aarch64_split_simd_movv2di;
break;
+ case V8HFmode:
+ gen = gen_aarch64_split_simd_movv8hf;
+ break;
case V4SFmode:
gen = gen_aarch64_split_simd_movv4sf;
break;
@@ -7792,6 +7795,7 @@ aarch64_vector_mode_supported_p (machine_mode mode)
|| mode == V2SImode || mode == V4HImode
|| mode == V8QImode || mode == V2SFmode
|| mode == V4SFmode || mode == V2DFmode
+ || mode == V4HFmode || mode == V8HFmode
|| mode == V1DFmode))
return true;
@@ -36,6 +36,7 @@ typedef __Int8x8_t int8x8_t;
typedef __Int16x4_t int16x4_t;
typedef __Int32x2_t int32x2_t;
typedef __Int64x1_t int64x1_t;
+typedef __Float16x4_t float16x4_t;
typedef __Float32x2_t float32x2_t;
typedef __Poly8x8_t poly8x8_t;
typedef __Poly16x4_t poly16x4_t;
@@ -48,6 +49,7 @@ typedef __Int8x16_t int8x16_t;
typedef __Int16x8_t int16x8_t;
typedef __Int32x4_t int32x4_t;
typedef __Int64x2_t int64x2_t;
+typedef __Float16x8_t float16x8_t;
typedef __Float32x4_t float32x4_t;
typedef __Float64x2_t float64x2_t;
typedef __Poly8x16_t poly8x16_t;
@@ -63,6 +65,7 @@ typedef __Poly16_t poly16_t;
typedef __Poly64_t poly64_t;
typedef __Poly128_t poly128_t;
+typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;
@@ -2447,6 +2450,12 @@ vcreate_p16 (uint64_t __a)
/* vget_lane */
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vget_lane_f16 (float16x4_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vget_lane_f32 (float32x2_t __a, const int __b)
{
@@ -2521,6 +2530,12 @@ vget_lane_u64 (uint64x1_t __a, const int __b)
/* vgetq_lane */
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vgetq_lane_f16 (float16x8_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vgetq_lane_f32 (float32x4_t __a, const int __b)
{
@@ -4181,6 +4196,12 @@ vreinterpretq_u32_p16 (poly16x8_t __a)
/* vset_lane */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
{
@@ -4255,6 +4276,12 @@ vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
/* vsetq_lane */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
{
@@ -15372,6 +15399,12 @@ vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
/* vld1 */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_f16 (const float16_t *__a)
+{
+ return __builtin_aarch64_ld1v4hf (__a);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t *a)
{
@@ -15451,6 +15484,12 @@ vld1_u64 (const uint64_t *a)
/* vld1q */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_f16 (const float16_t *__a)
+{
+ return __builtin_aarch64_ld1v8hf (__a);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t *a)
{
@@ -15679,6 +15718,12 @@ vld1q_dup_u64 (const uint64_t* __a)
/* vld1_lane */
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
{
@@ -15753,6 +15798,12 @@ vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
/* vld1q_lane */
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
{
@@ -22416,6 +22467,12 @@ vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
/* vst1 */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t *__a, float16x4_t __b)
+{
+ __builtin_aarch64_st1v4hf (__a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t *a, float32x2_t b)
{
__builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
@@ -22495,6 +22552,12 @@ vst1_u64 (uint64_t *a, uint64x1_t b)
/* vst1q */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t *__a, float16x8_t __b)
+{
+ __builtin_aarch64_st1v8hf (__a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t *a, float32x4_t b)
{
__builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
@@ -22575,6 +22638,12 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)
/* vst1_lane */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -22649,6 +22718,12 @@ vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
/* vst1q_lane */
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -58,7 +58,7 @@
(define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
;; Double vector modes.
-(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
;; vector, 64-bit container, all integer modes
(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
@@ -67,10 +67,10 @@
(define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI])
;; Quad vector modes.
-(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
;; VQ without 2 element modes.
-(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V4SF])
+(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
;; Quad vector with only 2 element modes.
(define_mode_iterator VQ_2E [V2DI V2DF])
@@ -113,12 +113,20 @@
;; Vector Float modes with 2 elements.
(define_mode_iterator V2F [V2SF V2DF])
-;; All modes.
+;; All vector modes on which we support any arithmetic operations.
(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
-;; All vector modes and DI.
+;; All vector modes, including HF modes on which we cannot operate
+(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V2SF V4SF V2DF])
+
+;; All vector modes barring F16, plus DI.
(define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])
+;; All vector modes and DI.
+(define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V2SF V4SF V2DF DI])
+
;; All vector modes and DI and DF.
(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
V2DI V2SF V4SF V2DF DI DF])
@@ -380,7 +388,8 @@
(define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
(V4HI ".4h") (V8HI ".8h")
(V2SI ".2s") (V4SI ".4s")
- (V2DI ".2d") (V2SF ".2s")
+ (V2DI ".2d") (V4HF ".4h")
+ (V8HF ".8h") (V2SF ".2s")
(V4SF ".4s") (V2DF ".2d")
(DI "") (SI "")
(HI "") (QI "")
@@ -416,6 +425,7 @@
(V4HI "HI") (V8HI "HI")
(V2SI "SI") (V4SI "SI")
(DI "DI") (V2DI "DI")
+ (V4HF "HF") (V8HF "HF")
(V2SF "SF") (V4SF "SF")
(V2DF "DF") (DF "DF")
(SI "SI") (HI "HI")
@@ -434,6 +444,7 @@
(V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI")
+ (V4HF "V8HF") (V8HF "V8HF")
(V2SF "V2SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")])
@@ -443,10 +454,12 @@
(V4HI "V2HI") (V8HI "V4HI")
(V2SI "SI") (V4SI "V2SI")
(V2DI "DI") (V2SF "SF")
- (V4SF "V2SF") (V2DF "DF")])
+ (V4SF "V2SF") (V4HF "V2HF")
+ (V8HF "V4HF") (V2DF "DF")])
;; Double modes of vector modes.
(define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
+ (V4HF "V8HF")
(V2SI "V4SI") (V2SF "V4SF")
(SI "V2SI") (DI "V2DI")
(DF "V2DF")])
@@ -557,6 +570,7 @@
(define_mode_attr nregs [(OI "2") (CI "3") (XI "4")])
(define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI")
+ (V4HF "V16HF")
(V2SI "V8SI") (V2SF "V8SF")
(DI "V4DI") (DF "V4DF")
(V16QI "V32QI") (V8HI "V16HI")
@@ -564,16 +578,20 @@
(V2DI "V4DI") (V2DF "V4DF")])
(define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI")
+ (V4HF "V24HF")
(V2SI "V12SI") (V2SF "V12SF")
(DI "V6DI") (DF "V6DF")
(V16QI "V48QI") (V8HI "V24HI")
+ (V8HF "V48HF")
(V4SI "V12SI") (V4SF "V12SF")
(V2DI "V6DI") (V2DF "V6DF")])
(define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI")
+ (V4HF "V32HF")
(V2SI "V16SI") (V2SF "V16SF")
(DI "V8DI") (DF "V8DF")
(V16QI "V64QI") (V8HI "V32HI")
+ (V8HF "V32HF")
(V4SI "V16SI") (V4SF "V16SF")
(V2DI "V8DI") (V2DF "V8DF")])
@@ -586,6 +604,7 @@
(V2SI "V2SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "V2DI")
(V2SF "V2SF") (V4SF "V2SF")
+ (V4HF "SF") (V8HF "SF")
(DF "V2DI") (V2DF "V2DI")])
;; Similar, for three elements.
@@ -594,6 +613,7 @@
(V2SI "BLK") (V4SI "BLK")
(DI "EI") (V2DI "EI")
(V2SF "BLK") (V4SF "BLK")
+ (V4HF "BLK") (V8HF "BLK")
(DF "EI") (V2DF "EI")])
;; Similar, for four elements.
@@ -602,6 +622,7 @@
(V2SI "V4SI") (V4SI "V4SI")
(DI "OI") (V2DI "OI")
(V2SF "V4SF") (V4SF "V4SF")
+ (V4HF "V4HF") (V8HF "V4HF")
(DF "OI") (V2DF "OI")])
@@ -660,6 +681,7 @@
(V4HI "") (V8HI "_q")
(V2SI "") (V4SI "_q")
(DI "") (V2DI "_q")
+ (V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(V2DF "_q")
(QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
@@ -13,6 +13,7 @@ void f3 (uint8x8_t a) {}
void f4 (uint16x4_t a) {}
void f5 (uint32x2_t a) {}
void f23 (uint64x1_t a) {}
+void f61 (float16x4_t a) {}
void f6 (float32x2_t a) {}
void f7 (poly8x8_t a) {}
void f8 (poly16x4_t a) {}
@@ -25,6 +26,7 @@ void f13 (uint8x16_t a) {}
void f14 (uint16x8_t a) {}
void f15 (uint32x4_t a) {}
void f16 (uint64x2_t a) {}
+void f171 (float16x8_t a) {}
void f17 (float32x4_t a) {}
void f18 (float64x2_t a) {}
void f19 (poly8x16_t a) {}
@@ -42,6 +44,7 @@ void g1 (int8x16_t, int8x16_t) {}
// { dg-final { scan-assembler "_Z2f412__Uint16x4_t:" } }
// { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } }
// { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } }
+// { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } }
// { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } }
// { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } }
// { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } }
@@ -53,6 +56,7 @@ void g1 (int8x16_t, int8x16_t) {}
// { dg-final { scan-assembler "_Z3f1412__Uint16x8_t:" } }
// { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } }
+// { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } }
// { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } }
// { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } }
@@ -31,6 +31,7 @@ THING (int8x8_t, 8, int8_t, _s8) \
THING (uint8x8_t, 8, uint8_t, _u8) \
THING (int16x4_t, 4, int16_t, _s16) \
THING (uint16x4_t, 4, uint16_t, _u16) \
+THING (float16x4_t, 4, float16_t, _f16) \
THING (int32x2_t, 2, int32_t, _s32) \
THING (uint32x2_t, 2, uint32_t, _u32) \
THING (float32x2_t, 2, float32_t, _f32) \
@@ -38,8 +39,10 @@ THING (int8x16_t, 16, int8_t, q_s8) \
THING (uint8x16_t, 16, uint8_t, q_u8) \
THING (int16x8_t, 8, int16_t, q_s16) \
THING (uint16x8_t, 8, uint16_t, q_u16) \
+THING (float16x8_t, 8, float16_t, q_f16)\
THING (int32x4_t, 4, int32_t, q_s32) \
THING (uint32x4_t, 4, uint32_t, q_u32) \
+THING (float32x4_t, 4, float32_t, q_f32)\
THING (int64x2_t, 2, int64_t, q_s64) \
THING (uint64x2_t, 2, uint64_t, q_u64) \
THING (float64x2_t, 2, float64_t, q_f64)
@@ -16,6 +16,7 @@ VARIANT (int32, , 2, _s32, 0) \
VARIANT (int64, , 1, _s64, 0) \
VARIANT (poly8, , 8, _p8, 7) \
VARIANT (poly16, , 4, _p16, 2) \
+VARIANT (float16, , 4, _f16, 3) \
VARIANT (float32, , 2, _f32, 1) \
VARIANT (float64, , 1, _f64, 0) \
VARIANT (uint8, q, 16, _u8, 13) \
@@ -28,6 +29,7 @@ VARIANT (int32, q, 4, _s32, 1) \
VARIANT (int64, q, 2, _s64, 1) \
VARIANT (poly8, q, 16, _p8, 7) \
VARIANT (poly16, q, 8, _p16, 4) \
+VARIANT (float16, q, 8, _f16, 3)\
VARIANT (float32, q, 4, _f32, 2)\
VARIANT (float64, q, 2, _f64, 1)
@@ -56,7 +58,7 @@ VARIANTS (TESTMETH)
#define CHECK(BASE, Q, ELTS, SUFFIX, LANE) \
if (test_vld1##Q##_lane##SUFFIX ((const BASE##_t *)orig_data, \
- BASE##_data) != 0) \
+ & BASE##_data) != 0) \
abort ();
int
@@ -65,20 +67,20 @@ main (int argc, char **argv)
/* Original data for all vector formats. */
uint64_t orig_data[2] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL};
- /* Data with which vldN_lane will overwrite some of previous. */
- uint8_t uint8_data[4] = { 7, 11, 13, 17 };
- uint16_t uint16_data[4] = { 257, 263, 269, 271 };
- uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
- uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
- 0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
- int8_t int8_data[4] = { -1, 3, -5, 7 };
- int16_t int16_data[4] = { 257, -259, 261, -263 };
- int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
- int64_t *int64_data = (int64_t *)uint64_data;
- poly8_t poly8_data[4] = { 0, 7, 13, 18, };
- poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
- float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
- float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
+ /* Data with which vld1_lane will overwrite one element of previous. */
+ uint8_t uint8_data = 7;
+ uint16_t uint16_data = 257;
+ uint32_t uint32_data = 65537;
+ uint64_t uint64_data = 0xdeadbeefcafebabeULL;
+ int8_t int8_data = -1;
+ int16_t int16_data = -259;
+ int32_t int32_data = -987654321;
+ int64_t int64_data = 0x1234567890abcdefLL;
+ poly8_t poly8_data = 13;
+ poly16_t poly16_data = 11111;
+ float16_t float16_data = 8.75;
+ float32_t float32_data = 3.14159;
+ float64_t float64_data = 1.010010001;
VARIANTS (CHECK);
return 0;
@@ -16,6 +16,7 @@ VARIANT (int32_t, , 2, int32x2_t, _s32, 0) \
VARIANT (int64_t, , 1, int64x1_t, _s64, 0) \
VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6) \
VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2) \
+VARIANT (float16_t, , 4, float16x4_t, _f16, 3) \
VARIANT (float32_t, , 2, float32x2_t, _f32, 1) \
VARIANT (float64_t, , 1, float64x1_t, _f64, 0) \
VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11) \
@@ -28,6 +29,7 @@ VARIANT (int32_t, q, 4, int32x4_t, _s32, 3) \
VARIANT (int64_t, q, 2, int64x2_t, _s64, 0) \
VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14) \
VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6) \
+VARIANT (float16_t, q, 8, float16x8_t, _f16, 6) \
VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \
VARIANT (float64_t, q, 2, float64x2_t, _f64, 1)
@@ -76,6 +78,9 @@ main (int argc, char **argv)
poly8_t poly8_t_data[16] =
{ 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 };
poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 };
+ float16_t float16_t_data[8] = { 1.25, 4.5, 7.875, 2.3125, 5.675, 8.875,
+ 3.6875, 6.75};
+
float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };