@@ -169,6 +169,10 @@ aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_none, qualifier_immediate };
+#define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_immediate };
#define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
@@ -187,11 +191,20 @@ aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
static enum aarch64_type_qualifiers
-aarch64_types_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
-#define TYPES_SETREG (aarch64_types_ternop_imm_qualifiers)
-#define TYPES_SHIFTINSERT (aarch64_types_ternop_imm_qualifiers)
-#define TYPES_SHIFTACC (aarch64_types_ternop_imm_qualifiers)
+aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
+#define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
+#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
+#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
+#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
+#define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -206,6 +219,11 @@ aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_COMBINE (aarch64_types_combine_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_poly, qualifier_poly };
+#define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers)
+
+static enum aarch64_type_qualifiers
aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
@@ -238,6 +256,10 @@ aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_map_mode | qualifier_pointer to build a pointer to the
element type of the vector. */
static enum aarch64_type_qualifiers
+aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
+#define TYPES_STORE1P (aarch64_types_store1_p_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
@@ -40,6 +40,7 @@
10 - CODE_FOR_<name><mode>. */
BUILTIN_VDC (COMBINE, combine, 0)
+ VAR1 (COMBINEP, combine, 0, di)
BUILTIN_VB (BINOP, pmul, 0)
BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
@@ -68,14 +69,23 @@
BUILTIN_VDC (GETREG, get_dregoi, 0)
BUILTIN_VDC (GETREG, get_dregci, 0)
BUILTIN_VDC (GETREG, get_dregxi, 0)
+ VAR1 (GETREGP, get_dregoi, 0, di)
+ VAR1 (GETREGP, get_dregci, 0, di)
+ VAR1 (GETREGP, get_dregxi, 0, di)
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
BUILTIN_VQ (GETREG, get_qregoi, 0)
BUILTIN_VQ (GETREG, get_qregci, 0)
BUILTIN_VQ (GETREG, get_qregxi, 0)
+ VAR1 (GETREGP, get_qregoi, 0, v2di)
+ VAR1 (GETREGP, get_qregci, 0, v2di)
+ VAR1 (GETREGP, get_qregxi, 0, v2di)
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
BUILTIN_VQ (SETREG, set_qregoi, 0)
BUILTIN_VQ (SETREG, set_qregci, 0)
BUILTIN_VQ (SETREG, set_qregxi, 0)
+ VAR1 (SETREGP, set_qregoi, 0, v2di)
+ VAR1 (SETREGP, set_qregci, 0, v2di)
+ VAR1 (SETREGP, set_qregxi, 0, v2di)
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
BUILTIN_VDC (LOADSTRUCT, ld2, 0)
BUILTIN_VDC (LOADSTRUCT, ld3, 0)
@@ -224,6 +234,7 @@
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
+ VAR2 (SHIFTINSERTP, ssli_n, 0, di, v2di)
BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
/* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
@@ -416,9 +427,11 @@
/* Implemented by aarch64_ld1<VALL_F16:mode>. */
BUILTIN_VALL_F16 (LOAD1, ld1, 0)
+ VAR1(STORE1P, ld1, 0, v2di)
/* Implemented by aarch64_st1<VALL_F16:mode>. */
BUILTIN_VALL_F16 (STORE1, st1, 0)
+ VAR1(STORE1P, st1, 0, v2di)
/* Implemented by fma<mode>4. */
BUILTIN_VHSDF (TERNOP, fma, 4)
@@ -5115,7 +5115,8 @@
rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
int offset = part * 16;
- emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
+ emit_move_insn (temp,
+ gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
DONE;
})
@@ -58,6 +58,7 @@ typedef __Float64x2_t float64x2_t;
typedef __Poly8x16_t poly8x16_t;
typedef __Poly16x8_t poly16x8_t;
typedef __Poly64x2_t poly64x2_t;
+typedef __Poly64x1_t poly64x1_t;
typedef __Uint8x16_t uint8x16_t;
typedef __Uint16x8_t uint16x8_t;
typedef __Uint32x4_t uint32x4_t;
@@ -202,6 +203,36 @@ typedef struct poly16x8x2_t
poly16x8_t val[2];
} poly16x8x2_t;
+typedef struct poly64x1x2_t
+{
+ poly64x1_t val[2];
+} poly64x1x2_t;
+
+typedef struct poly64x1x3_t
+{
+ poly64x1_t val[3];
+} poly64x1x3_t;
+
+typedef struct poly64x1x4_t
+{
+ poly64x1_t val[4];
+} poly64x1x4_t;
+
+typedef struct poly64x2x2_t
+{
+ poly64x2_t val[2];
+} poly64x2x2_t;
+
+typedef struct poly64x2x3_t
+{
+ poly64x2_t val[3];
+} poly64x2x3_t;
+
+typedef struct poly64x2x4_t
+{
+ poly64x2_t val[4];
+} poly64x2x4_t;
+
typedef struct int8x8x3_t
{
int8x8_t val[3];
@@ -476,6 +507,8 @@ typedef struct poly16x8x4_t
__aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , __a, __b)
+#define __aarch64_vdup_lane_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_lane_s16(__a, __b) \
@@ -504,6 +537,8 @@ typedef struct poly16x8x4_t
__aarch64_vdup_lane_any (p8, , __a, __b)
#define __aarch64_vdup_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, , __a, __b)
+#define __aarch64_vdup_laneq_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, , __a, __b)
#define __aarch64_vdup_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, , __a, __b)
#define __aarch64_vdup_laneq_s16(__a, __b) \
@@ -532,6 +567,8 @@ typedef struct poly16x8x4_t
__aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_lane_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, __a, __b)
+#define __aarch64_vdupq_lane_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_lane_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_lane_s16(__a, __b) \
@@ -560,6 +597,8 @@ typedef struct poly16x8x4_t
__aarch64_vdup_lane_any (p8, q, __a, __b)
#define __aarch64_vdupq_laneq_p16(__a, __b) \
__aarch64_vdup_lane_any (p16, q, __a, __b)
+#define __aarch64_vdupq_laneq_p64(__a, __b) \
+ __aarch64_vdup_lane_any (p64, q, __a, __b)
#define __aarch64_vdupq_laneq_s8(__a, __b) \
__aarch64_vdup_lane_any (s8, q, __a, __b)
#define __aarch64_vdupq_laneq_s16(__a, __b) \
@@ -2735,6 +2774,12 @@ vcreate_p16 (uint64_t __a)
return (poly16x4_t) __a;
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vcreate_p64 (uint64_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
/* vget_lane */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
@@ -2767,6 +2812,12 @@ vget_lane_p16 (poly16x4_t __a, const int __b)
return __aarch64_vget_lane_any (__a, __b);
}
+__extension__ static __inline poly64_t __attribute__ ((__always_inline__))
+vget_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vget_lane_s8 (int8x8_t __a, const int __b)
{
@@ -2847,6 +2898,12 @@ vgetq_lane_p16 (poly16x8_t __a, const int __b)
return __aarch64_vget_lane_any (__a, __b);
}
+__extension__ static __inline poly64_t __attribute__ ((__always_inline__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vget_lane_any (__a, __b);
+}
+
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
vgetq_lane_s8 (int8x16_t __a, const int __b)
{
@@ -2969,6 +3026,12 @@ vreinterpret_p8_p16 (poly16x4_t __a)
return (poly8x8_t) __a;
}
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_p64 (poly64x1_t __a)
+{
+ return (poly8x8_t) __a;
+}
+
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vreinterpretq_p8_f64 (float64x2_t __a)
{
@@ -3041,6 +3104,12 @@ vreinterpretq_p8_p16 (poly16x8_t __a)
return (poly8x16_t) __a;
}
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p64 (poly64x2_t __a)
+{
+ return (poly8x16_t) __a;
+}
+
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vreinterpret_p16_f16 (float16x4_t __a)
{
@@ -3113,6 +3182,12 @@ vreinterpret_p16_p8 (poly8x8_t __a)
return (poly16x4_t) __a;
}
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_p64 (poly64x1_t __a)
+{
+ return (poly16x4_t) __a;
+}
+
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vreinterpretq_p16_f64 (float64x2_t __a)
{
@@ -3185,6 +3260,156 @@ vreinterpretq_p16_p8 (poly8x16_t __a)
return (poly16x8_t) __a;
}
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p64 (poly64x2_t __a)
+{
+ return (poly16x8_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_f16 (float16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_f64 (float64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s8 (int8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s16 (int16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s32 (int32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_s64 (int64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_f32 (float32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u8 (uint8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u16 (uint16x4_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u32 (uint32x2_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_u64 (uint64x1_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vreinterpret_p64_p8 (poly8x8_t __a)
+{
+ return (poly64x1_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_f64 (float64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s8 (int8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s16 (int16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s32 (int32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_s64 (int64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_f16 (float16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_f32 (float32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u8 (uint8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u16 (uint16x8_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u32 (uint32x4_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_u64 (uint64x2_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_p64_p8 (poly8x16_t __a)
+{
+ return (poly64x2_t) __a;
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vreinterpret_f16_f64 (float64x1_t __a)
{
@@ -3257,6 +3482,12 @@ vreinterpret_f16_p16 (poly16x4_t __a)
return (float16x4_t) __a;
}
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vreinterpret_f16_p64 (poly64x1_t __a)
+{
+ return (float16x4_t) __a;
+}
+
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vreinterpretq_f16_f64 (float64x2_t __a)
{
@@ -3329,6 +3560,12 @@ vreinterpretq_f16_p16 (poly16x8_t __a)
return (float16x8_t) __a;
}
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_f16_p64 (poly64x2_t __a)
+{
+ return (float16x8_t) __a;
+}
+
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vreinterpret_f32_f16 (float16x4_t __a)
{
@@ -3401,6 +3638,12 @@ vreinterpret_f32_p16 (poly16x4_t __a)
return (float32x2_t) __a;
}
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p64 (poly64x1_t __a)
+{
+ return (float32x2_t) __a;
+}
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vreinterpretq_f32_f16 (float16x8_t __a)
{
@@ -3473,6 +3716,12 @@ vreinterpretq_f32_p16 (poly16x8_t __a)
return (float32x4_t) __a;
}
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p64 (poly64x2_t __a)
+{
+ return (float32x4_t) __a;
+}
+
__extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_f16 (float16x4_t __a)
{
@@ -3498,6 +3747,12 @@ vreinterpret_f64_p16 (poly16x4_t __a)
}
__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_p64 (poly64x1_t __a)
+{
+ return (float64x1_t) __a;
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
vreinterpret_f64_s8 (int8x8_t __a)
{
return (float64x1_t) __a;
@@ -3570,6 +3825,12 @@ vreinterpretq_f64_p16 (poly16x8_t __a)
}
__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_p64 (poly64x2_t __a)
+{
+ return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
vreinterpretq_f64_s8 (int8x16_t __a)
{
return (float64x2_t) __a;
@@ -3689,6 +3950,12 @@ vreinterpret_s64_p16 (poly16x4_t __a)
return (int64x1_t) __a;
}
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p64 (poly64x1_t __a)
+{
+ return (int64x1_t) __a;
+}
+
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vreinterpretq_s64_f64 (float64x2_t __a)
{
@@ -3761,6 +4028,12 @@ vreinterpretq_s64_p16 (poly16x8_t __a)
return (int64x2_t) __a;
}
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p64 (poly64x2_t __a)
+{
+ return (int64x2_t) __a;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vreinterpret_u64_f16 (float16x4_t __a)
{
@@ -3833,6 +4106,12 @@ vreinterpret_u64_p16 (poly16x4_t __a)
return (uint64x1_t) __a;
}
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p64 (poly64x1_t __a)
+{
+ return (uint64x1_t) __a;
+}
+
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vreinterpretq_u64_f64 (float64x2_t __a)
{
@@ -3905,6 +4184,12 @@ vreinterpretq_u64_p16 (poly16x8_t __a)
return (uint64x2_t) __a;
}
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p64 (poly64x2_t __a)
+{
+ return (uint64x2_t) __a;
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vreinterpret_s8_f16 (float16x4_t __a)
{
@@ -3977,6 +4262,12 @@ vreinterpret_s8_p16 (poly16x4_t __a)
return (int8x8_t) __a;
}
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p64 (poly64x1_t __a)
+{
+ return (int8x8_t) __a;
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vreinterpretq_s8_f64 (float64x2_t __a)
{
@@ -4049,6 +4340,12 @@ vreinterpretq_s8_p16 (poly16x8_t __a)
return (int8x16_t) __a;
}
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p64 (poly64x2_t __a)
+{
+ return (int8x16_t) __a;
+}
+
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vreinterpret_s16_f16 (float16x4_t __a)
{
@@ -4121,6 +4418,12 @@ vreinterpret_s16_p16 (poly16x4_t __a)
return (int16x4_t) __a;
}
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p64 (poly64x1_t __a)
+{
+ return (int16x4_t) __a;
+}
+
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vreinterpretq_s16_f64 (float64x2_t __a)
{
@@ -4193,6 +4496,12 @@ vreinterpretq_s16_p16 (poly16x8_t __a)
return (int16x8_t) __a;
}
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p64 (poly64x2_t __a)
+{
+ return (int16x8_t) __a;
+}
+
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vreinterpret_s32_f16 (float16x4_t __a)
{
@@ -4265,6 +4574,12 @@ vreinterpret_s32_p16 (poly16x4_t __a)
return (int32x2_t) __a;
}
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p64 (poly64x1_t __a)
+{
+ return (int32x2_t) __a;
+}
+
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vreinterpretq_s32_f64 (float64x2_t __a)
{
@@ -4337,6 +4652,12 @@ vreinterpretq_s32_p16 (poly16x8_t __a)
return (int32x4_t) __a;
}
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p64 (poly64x2_t __a)
+{
+ return (int32x4_t) __a;
+}
+
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vreinterpret_u8_f16 (float16x4_t __a)
{
@@ -4409,6 +4730,12 @@ vreinterpret_u8_p16 (poly16x4_t __a)
return (uint8x8_t) __a;
}
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p64 (poly64x1_t __a)
+{
+ return (uint8x8_t) __a;
+}
+
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vreinterpretq_u8_f64 (float64x2_t __a)
{
@@ -4481,6 +4808,12 @@ vreinterpretq_u8_p16 (poly16x8_t __a)
return (uint8x16_t) __a;
}
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p64 (poly64x2_t __a)
+{
+ return (uint8x16_t) __a;
+}
+
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vreinterpret_u16_f16 (float16x4_t __a)
{
@@ -4553,6 +4886,12 @@ vreinterpret_u16_p16 (poly16x4_t __a)
return (uint16x4_t) __a;
}
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p64 (poly64x1_t __a)
+{
+ return (uint16x4_t) __a;
+}
+
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vreinterpretq_u16_f64 (float64x2_t __a)
{
@@ -4625,6 +4964,12 @@ vreinterpretq_u16_p16 (poly16x8_t __a)
return (uint16x8_t) __a;
}
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p64 (poly64x2_t __a)
+{
+ return (uint16x8_t) __a;
+}
+
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vreinterpret_u32_f16 (float16x4_t __a)
{
@@ -4697,6 +5042,12 @@ vreinterpret_u32_p16 (poly16x4_t __a)
return (uint32x2_t) __a;
}
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p64 (poly64x1_t __a)
+{
+ return (uint32x2_t) __a;
+}
+
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vreinterpretq_u32_f64 (float64x2_t __a)
{
@@ -4769,6 +5120,12 @@ vreinterpretq_u32_p16 (poly16x8_t __a)
return (uint32x4_t) __a;
}
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p64 (poly64x2_t __a)
+{
+ return (uint32x4_t) __a;
+}
+
/* vset_lane */
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
@@ -4801,6 +5158,12 @@ vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vset_lane_p64 (poly64_t __elem, poly64x1_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
{
@@ -4881,6 +5244,12 @@ vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_p64 (poly64_t __elem, poly64x2_t __vec, const int __index)
+{
+ return __aarch64_vset_lane_any (__elem, __vec, __index);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
{
@@ -4964,6 +5333,12 @@ vget_low_p16 (poly16x8_t __a)
__GET_LOW (p16);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vget_low_p64 (poly64x2_t __a)
+{
+ __GET_LOW (p64);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_low_s8 (int8x16_t __a)
{
@@ -5049,6 +5424,12 @@ vget_high_p16 (poly16x8_t __a)
__GET_HIGH (p16);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vget_high_p64 (poly64x2_t __a)
+{
+ __GET_HIGH (p64);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vget_high_s8 (int8x16_t __a)
{
@@ -5182,6 +5563,12 @@ vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
(int16x4_t) __b);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+ return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
+}
+
/* Start of temporary inline asm implementations. */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -9367,6 +9754,8 @@ __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
int8x16_t)
__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
@@ -9402,6 +9791,7 @@ __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
@@ -9449,6 +9839,8 @@ __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
int8x16_t)
__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
@@ -9484,6 +9876,7 @@ __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
@@ -9536,6 +9929,8 @@ __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
int8x16_t)
__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
int16x8_t)
+__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
+ poly64x2_t)
__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
@@ -9571,6 +9966,7 @@ __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
@@ -12254,6 +12650,14 @@ vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vcopy_lane_p64 (poly64x1_t __a, const int __lane1,
+ poly64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcopy_lane_s8 (int8x8_t __a, const int __lane1,
int8x8_t __b, const int __lane2)
@@ -12352,6 +12756,14 @@ vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vcopy_laneq_p64 (poly64x1_t __a, const int __lane1,
+ poly64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
int8x16_t __b, const int __lane2)
@@ -12450,6 +12862,14 @@ vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vcopyq_lane_p64 (poly64x2_t __a, const int __lane1,
+ poly64x1_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
int8x8_t __b, const int __lane2)
@@ -12548,6 +12968,14 @@ vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
__a, __lane1);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vcopyq_laneq_p64 (poly64x2_t __a, const int __lane1,
+ poly64x2_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
int8x16_t __b, const int __lane2)
@@ -13293,6 +13721,12 @@ vdup_n_p16 (poly16_t __a)
return (poly16x4_t) {__a, __a, __a, __a};
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vdup_n_p64 (poly64_t __a)
+{
+ return (poly64x1_t) {__a};
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_n_s8 (int8_t __a)
{
@@ -13374,6 +13808,12 @@ vdupq_n_p16 (uint32_t __a)
return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vdupq_n_p64 (poly64_t __a)
+{
+ return (poly64x2_t) {__a, __a};
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_n_s8 (int32_t __a)
{
@@ -13456,6 +13896,12 @@ vdup_lane_p16 (poly16x4_t __a, const int __b)
return __aarch64_vdup_lane_p16 (__a, __b);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vdup_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vdup_lane_p64 (__a, __b);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_lane_s8 (int8x8_t __a, const int __b)
{
@@ -13536,6 +13982,12 @@ vdup_laneq_p16 (poly16x8_t __a, const int __b)
return __aarch64_vdup_laneq_p16 (__a, __b);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vdup_laneq_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vdup_laneq_p64 (__a, __b);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vdup_laneq_s8 (int8x16_t __a, const int __b)
{
@@ -13616,6 +14068,12 @@ vdupq_lane_p16 (poly16x4_t __a, const int __b)
return __aarch64_vdupq_lane_p16 (__a, __b);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_p64 (poly64x1_t __a, const int __b)
+{
+ return __aarch64_vdupq_lane_p64 (__a, __b);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_lane_s8 (int8x8_t __a, const int __b)
{
@@ -13696,6 +14154,12 @@ vdupq_laneq_p16 (poly16x8_t __a, const int __b)
return __aarch64_vdupq_laneq_p16 (__a, __b);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vdupq_laneq_p64 (poly64x2_t __a, const int __b)
+{
+ return __aarch64_vdupq_laneq_p64 (__a, __b);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vdupq_laneq_s8 (int8x16_t __a, const int __b)
{
@@ -14570,6 +15034,12 @@ vld1_p16 (const poly16_t *a)
__builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_p64 (const poly64_t *a)
+{
+ return (poly64x1_t) {*a};
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8 (const int8_t *a)
{
@@ -14655,6 +15125,13 @@ vld1q_p16 (const poly16_t *a)
__builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_p64 (const poly64_t *a)
+{
+ return (poly64x2_t)
+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8 (const int8_t *a)
{
@@ -14739,6 +15216,12 @@ vld1_dup_p16 (const poly16_t* __a)
return vdup_n_p16 (*__a);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_dup_p64 (const poly64_t* __a)
+{
+ return vdup_n_p64 (*__a);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_dup_s8 (const int8_t* __a)
{
@@ -14819,6 +15302,12 @@ vld1q_dup_p16 (const poly16_t* __a)
return vdupq_n_p16 (*__a);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_p64 (const poly64_t* __a)
+{
+ return vdupq_n_p64 (*__a);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_dup_s8 (const int8_t* __a)
{
@@ -14899,6 +15388,12 @@ vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
return __aarch64_vset_lane_any (*__src, __vec, __lane);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vld1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
{
@@ -14979,6 +15474,12 @@ vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
return __aarch64_vset_lane_any (*__src, __vec, __lane);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+ return __aarch64_vset_lane_any (*__src, __vec, __lane);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
{
@@ -15084,6 +15585,17 @@ vld2_p8 (const poly8_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
+vld2_p64 (const poly64_t * __a)
+{
+ poly64x1x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+ return ret;
+}
+
__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vld2_s16 (const int16_t * __a)
{
@@ -15216,6 +15728,17 @@ vld2q_p16 (const poly16_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x2_t __attribute__ ((__always_inline__))
+vld2q_p64 (const poly64_t * __a)
+{
+ poly64x2x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+ return ret;
+}
+
__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vld2q_s32 (const int32_t * __a)
{
@@ -15471,6 +15994,18 @@ vld3_f32 (const float32_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
+vld3_p64 (const poly64_t * __a)
+{
+ poly64x1x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+ return ret;
+}
+
__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
vld3q_s8 (const int8_t * __a)
{
@@ -15627,6 +16162,18 @@ vld3q_f64 (const float64_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x3_t __attribute__ ((__always_inline__))
+vld3q_p64 (const poly64_t * __a)
+{
+ poly64x2x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+ return ret;
+}
+
__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_s64 (const int64_t * __a)
{
@@ -15796,6 +16343,19 @@ vld4_f32 (const float32_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
+vld4_p64 (const poly64_t * __a)
+{
+ poly64x1x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+ return ret;
+}
+
__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
vld4q_s8 (const int8_t * __a)
{
@@ -15965,6 +16525,19 @@ vld4q_f64 (const float64_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x4_t __attribute__ ((__always_inline__))
+vld4q_p64 (const poly64_t * __a)
+{
+ poly64x2x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+ return ret;
+}
+
/* vldn_dup */
__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
@@ -16088,6 +16661,18 @@ vld2_dup_p16 (const poly16_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
+ return ret;
+}
+
+
__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
vld2_dup_s64 (const int64_t * __a)
{
@@ -16253,6 +16838,17 @@ vld2q_dup_f64 (const float64_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x2_t __attribute__ ((__always_inline__))
+vld2q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x2_t ret;
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
+ return ret;
+}
+
__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
vld3_dup_s64 (const int64_t * __a)
{
@@ -16409,6 +17005,18 @@ vld3_dup_f32 (const float32_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
+ return ret;
+}
+
__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
vld3q_dup_s8 (const int8_t * __a)
{
@@ -16565,6 +17173,18 @@ vld3q_dup_f64 (const float64_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x3_t __attribute__ ((__always_inline__))
+vld3q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x3_t ret;
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
+ return ret;
+}
+
__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
vld4_dup_s64 (const int64_t * __a)
{
@@ -16734,6 +17354,19 @@ vld4_dup_f32 (const float32_t * __a)
return ret;
}
+__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_p64 (const poly64_t * __a)
+{
+ poly64x1x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
+ return ret;
+}
+
__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
vld4q_dup_s8 (const int8_t * __a)
{
@@ -16903,6 +17536,19 @@ vld4q_dup_f64 (const float64_t * __a)
return ret;
}
+__extension__ static __inline poly64x2x4_t __attribute__ ((__always_inline__))
+vld4q_dup_p64 (const poly64_t * __a)
+{
+ poly64x2x4_t ret;
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
+ return ret;
+}
+
/* vld2_lane */
#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
@@ -16939,6 +17585,8 @@ __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi,
int8x16_t)
__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
@@ -16980,6 +17628,7 @@ __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
@@ -17033,6 +17682,8 @@ __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi,
int8x16_t)
__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
@@ -17076,6 +17727,7 @@ __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
@@ -17137,6 +17789,8 @@ __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi,
int8x16_t)
__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
p16, int16x8_t)
+__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di,
+ v2di_ssps, di, p64, poly64x2_t)
__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
int8x16_t)
__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
@@ -17182,6 +17836,7 @@ __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
@@ -18457,6 +19112,12 @@ vmov_n_p16 (poly16_t __a)
return vdup_n_p16 (__a);
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vmov_n_p64 (poly64_t __a)
+{
+ return vdup_n_p64 (__a);
+}
+
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmov_n_s8 (int8_t __a)
{
@@ -18535,6 +19196,12 @@ vmovq_n_p16 (poly16_t __a)
return vdupq_n_p16 (__a);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vmovq_n_p64 (poly64_t __a)
+{
+ return vdupq_n_p64 (__a);
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovq_n_s8 (int8_t __a)
{
@@ -22510,6 +23177,12 @@ vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
}
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+ return (poly64x1_t) {__builtin_aarch64_ssli_ndi_ppps (__a[0], __b[0], __c)};
+}
+
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
{
@@ -22558,6 +23231,12 @@ vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
}
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+ return __builtin_aarch64_ssli_nv2di_ppps (__a, __b, __c);
+}
+
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
{
@@ -22924,6 +23603,12 @@ vst1_p16 (poly16_t *a, poly16x4_t b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p64 (poly64_t *a, poly64x1_t b)
+{
+ *a = b[0];
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_s8 (int8_t *a, int8x8_t b)
{
__builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
@@ -23009,6 +23694,13 @@ vst1q_p16 (poly16_t *a, poly16x8_t b)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p64 (poly64_t *a, poly64x2_t b)
+{
+ __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a,
+ (poly64x2_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_s8 (int8_t *a, int8x16_t b)
{
__builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
@@ -23093,6 +23785,12 @@ vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p64 (poly64_t *__a, poly64x1_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -23173,6 +23871,12 @@ vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p64 (poly64_t *__a, poly64x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
{
*__a = __aarch64_vget_lane_any (__b, __lane);
@@ -23379,6 +24083,20 @@ vst2_f32 (float32_t * __a, float32x2x2_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p64 (poly64_t * __a, poly64x1x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ poly64x2x2_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_s8 (int8_t * __a, int8x16x2_t val)
{
__builtin_aarch64_simd_oi __o;
@@ -23495,6 +24213,17 @@ vst2q_f64 (float64_t * __a, float64x2x2_t val)
__builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p64 (poly64_t * __a, poly64x2x2_t val)
+{
+ __builtin_aarch64_simd_oi __o;
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
__extension__ static __inline void
vst3_s64 (int64_t * __a, int64x1x3_t val)
{
@@ -23678,6 +24407,23 @@ vst3_f32 (float32_t * __a, float32x2x3_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p64 (poly64_t * __a, poly64x1x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ poly64x2x3_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) temp.val[2], 2);
+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_s8 (int8_t * __a, int8x16x3_t val)
{
__builtin_aarch64_simd_ci __o;
@@ -23807,6 +24553,19 @@ vst3q_f64 (float64_t * __a, float64x2x3_t val)
__builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p64 (poly64_t * __a, poly64x2x3_t val)
+{
+ __builtin_aarch64_simd_ci __o;
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
+ (poly64x2_t) val.val[2], 2);
+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
__extension__ static __inline void
vst4_s64 (int64_t * __a, int64x1x4_t val)
{
@@ -24016,6 +24775,26 @@ vst4_f32 (float32_t * __a, float32x2x4_t val)
}
__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p64 (poly64_t * __a, poly64x1x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ poly64x2x4_t temp;
+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0)));
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) temp.val[3], 3);
+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_s8 (int8_t * __a, int8x16x4_t val)
{
__builtin_aarch64_simd_xi __o;
@@ -24158,6 +24937,21 @@ vst4q_f64 (float64_t * __a, float64x2x4_t val)
__builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
}
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p64 (poly64_t * __a, poly64x2x4_t val)
+{
+ __builtin_aarch64_simd_xi __o;
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[0], 0);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[1], 1);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[2], 2);
+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
+ (poly64x2_t) val.val[3], 3);
+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+}
+
/* vsub */
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
@@ -84,6 +84,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@@ -176,6 +183,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@@ -189,6 +199,9 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
@@ -205,6 +218,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
\
CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
@@ -217,6 +231,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
@@ -390,6 +405,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@@ -405,6 +423,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@@ -430,6 +451,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+ DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@@ -465,6 +493,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@@ -473,6 +502,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@@ -483,6 +513,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@@ -491,6 +522,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@@ -532,6 +564,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+ MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@@ -602,13 +641,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
- MACRO(VAR1, VAR2, , poly, p, 16, 4)
+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
- MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
new file mode 100644
@@ -0,0 +1,302 @@
+/* This file contains tests for the VLD{X}, VLD{X}_DUP and VSLI. */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* { dg-options "-march=armv8-a+crypto" } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
+/* Expected results: vld1. */
+VECT_VAR_DECL (vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+/* Expected results: vld1_dup. */
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
+/* Expected results: vldX. */
+VECT_VAR_DECL (vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vldX_dup. */
+VECT_VAR_DECL (vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vsli. */
+VECT_VAR_DECL (vsli_expected,poly,64,1) [] = { 0x10 };
+VECT_VAR_DECL (vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
+ 0x7ffffffffffff1 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+int main (void)
+{
+ int i;
+
+ /* vld1_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1/VLD1Q"
+
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = vld1##Q##_##T2##W (VECT_VAR (BUF, T1, W, N)); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_vector, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vld1_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ TEST_VLD1 (vld1_vector, buffer, , poly, p, 64, 1);
+ TEST_VLD1 (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
+
+ /* vld1_dup_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = \
+ vld1##Q##_dup_##T2##W (&VECT_VAR (BUF, T1, W, N)[i]); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i<3; i++)
+ {
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
+
+ switch (i)
+ {
+ case 0:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
+ break;
+ case 1:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
+ break;
+ case 2:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* vldX_p64 tests. */
+#define DECL_VLDX(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_##T2##W (VECT_ARRAY_VAR (buffer_vld##X, T1, W, N, X)); \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX (poly, 64, 1, 2);
+ DECL_VLDX (poly, 64, 1, 3);
+ DECL_VLDX (poly, 64, 1, 4);
+
+ VECT_ARRAY_INIT2 (buffer_vld2, poly, 64, 1);
+ PAD (buffer_vld2_pad, poly, 64, 1);
+ VECT_ARRAY_INIT3 (buffer_vld3, poly, 64, 1);
+ PAD (buffer_vld3_pad, poly, 64, 1);
+ VECT_ARRAY_INIT4 (buffer_vld4, poly, 64, 1);
+ PAD (buffer_vld4_pad, poly, 64, 1);
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2/VLD2Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
+
+ /* vldX_dup_p64 tests. */
+#define DECL_VLDX_DUP(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_dup_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X) = \
+ vld##X##Q##_dup_##T2##W (&VECT_VAR (buffer_dup, T1, W, N)[0]); \
+ \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX_DUP (poly, 64, 1, 2);
+ DECL_VLDX_DUP (poly, 64, 1, 3);
+ DECL_VLDX_DUP (poly, 64, 1, 4);
+
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+
+ /* vsli_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VSLI"
+
+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \
+ VECT_VAR (vsXi_vector_res, T1, W, N) = \
+ INSN##Q##_n_##T2##W (VECT_VAR (vsXi_vector, T1, W, N), \
+ VECT_VAR (vsXi_vector2, T1, W, N), \
+ V); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vsXi_vector_res, T1, W, N))
+
+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \
+ TEST_VSXI1 (INSN, Q, T1, T2, W, N, V)
+
+ DECL_VARIABLE (vsXi_vector, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vsXi_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vsXi_vector, buffer, q, poly, p, 64, 2);
+
+ VDUP (vsXi_vector2, , poly, p, 64, 1, 2);
+ VDUP (vsXi_vector2, q, poly, p, 64, 2, 3);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 3);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 53);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
+
+ /* Test cases with maximum shift amount. */
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 63);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 63);
+
+#define COMMENT "(max shift amount)"
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
+
+ return 0;
+}
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -25,6 +26,9 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0x66, 0x66, 0x66, 0x66 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x77 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x40533333, 0x40533333 };
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
@@ -62,6 +66,9 @@ void exec_vcombine (void)
VDUP(vector64_b, , uint, u, 64, 1, 0x88);
VDUP(vector64_b, , poly, p, 8, 8, 0x55);
VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VDUP(vector64_b, , poly, p, 64, 1, 0x77);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VDUP(vector64_b, , float, f, 16, 4, 2.25);
#endif
@@ -80,6 +87,9 @@ void exec_vcombine (void)
TEST_VCOMBINE(uint, u, 64, 1, 2);
TEST_VCOMBINE(poly, p, 8, 8, 16);
TEST_VCOMBINE(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCOMBINE(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VCOMBINE(float, f, 16, 4, 8);
#endif
@@ -95,6 +105,9 @@ void exec_vcombine (void)
CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
0x78, 0x56, 0x34, 0x12 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0x123456789abcdef0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
@@ -49,6 +53,9 @@ FNNAME (INSN_NAME)
DECL_VAL(val, uint, 64, 1);
DECL_VAL(val, poly, 8, 8);
DECL_VAL(val, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VAL(val, poly, 64, 1);
+#endif
DECL_VARIABLE(vector_res, int, 8, 8);
DECL_VARIABLE(vector_res, int, 16, 4);
@@ -64,6 +71,9 @@ FNNAME (INSN_NAME)
DECL_VARIABLE(vector_res, uint, 64, 1);
DECL_VARIABLE(vector_res, poly, 8, 8);
DECL_VARIABLE(vector_res, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VARIABLE(vector_res, poly, 64, 1);
+#endif
clean_results ();
@@ -82,6 +92,9 @@ FNNAME (INSN_NAME)
VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL;
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_VAR(val, poly, 64, 1) = 0x123456789abcdef0ULL;
+#endif
TEST_VCREATE(int, s, 8, 8);
TEST_VCREATE(int, s, 16, 4);
@@ -97,6 +110,9 @@ FNNAME (INSN_NAME)
TEST_VCREATE(uint, u, 64, 1);
TEST_VCREATE(poly, p, 8, 8);
TEST_VCREATE(poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCREATE(poly, p, 64, 1);
+#endif
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
@@ -108,6 +124,9 @@ FNNAME (INSN_NAME)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* We test vdup and vmov in the same place since they are aliases. */
@@ -19,6 +20,11 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@@ -63,6 +69,11 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@@ -107,6 +118,11 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@@ -171,6 +187,9 @@ void exec_vdup_vmov (void)
TEST_VDUP(, uint, u, 64, 1);
TEST_VDUP(, poly, p, 8, 8);
TEST_VDUP(, poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP(, poly, p, 64, 1);
+#endif
TEST_VDUP(, float, f, 32, 2);
TEST_VDUP(q, int, s, 8, 16);
@@ -183,9 +202,13 @@ void exec_vdup_vmov (void)
TEST_VDUP(q, uint, u, 64, 2);
TEST_VDUP(q, poly, p, 8, 16);
TEST_VDUP(q, poly, p, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP(q, poly, p, 64, 2);
+#endif
TEST_VDUP(q, float, f, 32, 4);
- switch (i) {
+ switch (i)
+ {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
@@ -203,7 +226,8 @@ void exec_vdup_vmov (void)
/* Do the same tests with vmov. Use the same expected results. */
#undef TEST_MSG
#define TEST_MSG "VMOV/VMOVQ"
- for (i=0; i< 3; i++) {
+ for (i=0; i< 3; i++)
+ {
clean_results ();
TEST_VMOV(, int, s, 8, 8);
@@ -216,6 +240,9 @@ void exec_vdup_vmov (void)
TEST_VMOV(, uint, u, 64, 1);
TEST_VMOV(, poly, p, 8, 8);
TEST_VMOV(, poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VMOV(, poly, p, 64, 1);
+#endif
TEST_VMOV(, float, f, 32, 2);
TEST_VMOV(q, int, s, 8, 16);
@@ -228,9 +255,13 @@ void exec_vdup_vmov (void)
TEST_VMOV(q, uint, u, 64, 2);
TEST_VMOV(q, poly, p, 8, 16);
TEST_VMOV(q, poly, p, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VMOV(q, poly, p, 64, 2);
+#endif
TEST_VMOV(q, float, f, 32, 4);
- switch (i) {
+ switch (i)
+ {
case 0:
CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -27,6 +28,10 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
0xfffffff1, 0xfffffff1 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
@@ -43,6 +48,9 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@@ -76,6 +84,9 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
+#endif
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -88,6 +99,9 @@ void exec_vdup_lane (void)
TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
+#endif
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -50,6 +54,9 @@ void exec_vget_high (void)
TEST_VGET_HIGH(uint, u, 64, 1, 2);
TEST_VGET_HIGH(poly, p, 8, 8, 16);
TEST_VGET_HIGH(poly, p, 16, 4, 8);
+ #if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+ #endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_HIGH(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,7 @@ void exec_vget_high (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
}
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
int8_t expected_s8 = 0xf7;
@@ -13,6 +14,9 @@ uint32_t expected_u32 = 0xfffffff1;
uint64_t expected_u64 = 0xfffffffffffffff0;
poly8_t expected_p8 = 0xf6;
poly16_t expected_p16 = 0xfff2;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expected_p64 = 0xfffffffffffffff0;
+#endif
hfloat16_t expected_f16 = 0xcb80;
hfloat32_t expected_f32 = 0xc1700000;
@@ -26,6 +30,9 @@ uint32_t expectedq_u32 = 0xfffffff2;
uint64_t expectedq_u64 = 0xfffffffffffffff1;
poly8_t expectedq_p8 = 0xfe;
poly16_t expectedq_p16 = 0xfff6;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expectedq_p64 = 0xfffffffffffffff1;
+#endif
hfloat16_t expectedq_f16 = 0xca80;
hfloat32_t expectedq_f32 = 0xc1500000;
@@ -89,6 +96,9 @@ void exec_vget_lane (void)
VAR_DECL(var, uint, 64);
VAR_DECL(var, poly, 8);
VAR_DECL(var, poly, 16);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VAR_DECL(var, poly, 64);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VAR_DECL(var, float, 16);
#endif
@@ -114,6 +124,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(, uint, u, 64, 1, 0);
TEST_VGET_LANE(, poly, p, 8, 8, 6);
TEST_VGET_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(, float, f, 16, 4, 1);
#endif
@@ -129,6 +142,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(q, uint, u, 64, 2, 1);
TEST_VGET_LANE(q, poly, p, 8, 16, 14);
TEST_VGET_LANE(q, poly, p, 16, 8, 6);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LANE(q, poly, p, 64, 2, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(q, float, f, 16, 8, 3);
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -50,6 +54,9 @@ void exec_vget_low (void)
TEST_VGET_LOW(uint, u, 64, 1, 2);
TEST_VGET_LOW(poly, p, 8, 8, 16);
TEST_VGET_LOW(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LOW(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LOW(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,9 @@ void exec_vget_low (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -33,7 +37,7 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
- 0xfff3, 0xfff4, 0xfff5,
+ 0xfff3, 0xfff4, 0xfff5,
0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
/* Chunk 0. */
@@ -17,6 +18,9 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
@@ -64,6 +72,9 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -92,6 +103,10 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
@@ -111,6 +126,9 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
@@ -139,6 +157,10 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -61,6 +67,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -104,6 +115,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -147,6 +163,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -193,6 +214,11 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -238,6 +264,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -281,6 +312,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -324,6 +360,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -367,6 +408,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xfffffffffffffff6,
+ 0xfffffffffffffff7 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
@@ -424,6 +470,19 @@ void exec_vldX (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_CRYPTO(T1, W, N, X) \
+ DECL_VLDX(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X) \
+ TEST_VLDX(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#else
+#define DECL_VLDX_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
#define DECL_ALL_VLDX_NO_FP16(X) \
DECL_VLDX(int, 8, 8, X); \
@@ -436,6 +495,7 @@ void exec_vldX (void)
DECL_VLDX(uint, 64, 1, X); \
DECL_VLDX(poly, 8, 8, X); \
DECL_VLDX(poly, 16, 4, X); \
+ DECL_VLDX_CRYPTO(poly, 64, 1, X); \
DECL_VLDX(float, 32, 2, X); \
DECL_VLDX(int, 8, 16, X); \
DECL_VLDX(int, 16, 8, X); \
@@ -445,6 +505,7 @@ void exec_vldX (void)
DECL_VLDX(uint, 32, 4, X); \
DECL_VLDX(poly, 8, 16, X); \
DECL_VLDX(poly, 16, 8, X); \
+ DECL_VLDX_CRYPTO(poly, 64, 2, X); \
DECL_VLDX(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -467,6 +528,7 @@ void exec_vldX (void)
TEST_VLDX(, uint, u, 64, 1, X); \
TEST_VLDX(, poly, p, 8, 8, X); \
TEST_VLDX(, poly, p, 16, 4, X); \
+ TEST_VLDX_CRYPTO(, poly, p, 64, 1, X); \
TEST_VLDX(, float, f, 32, 2, X); \
TEST_VLDX(q, int, s, 8, 16, X); \
TEST_VLDX(q, int, s, 16, 8, X); \
@@ -476,6 +538,7 @@ void exec_vldX (void)
TEST_VLDX(q, uint, u, 32, 4, X); \
TEST_VLDX(q, poly, p, 8, 16, X); \
TEST_VLDX(q, poly, p, 16, 8, X); \
+ TEST_VLDX_CRYPTO(q, poly, p, 64, 2, X); \
TEST_VLDX(q, float, f, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -498,6 +561,7 @@ void exec_vldX (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
@@ -507,6 +571,7 @@ void exec_vldX (void)
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -530,6 +595,7 @@ void exec_vldX (void)
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
\
CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
@@ -540,6 +606,7 @@ void exec_vldX (void)
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -580,6 +647,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -607,6 +678,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -635,6 +710,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -662,6 +741,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -690,6 +773,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -717,6 +804,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,9 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -36,6 +40,9 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -56,6 +63,9 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
0xf1, 0xf2, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -76,6 +86,9 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
0xf0, 0xf1, 0xf2, 0xf0 };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
@@ -96,6 +109,9 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
0xf2, 0xf0, 0xf1, 0xf2 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
@@ -114,6 +130,9 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -131,6 +150,9 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -148,6 +170,9 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -165,6 +190,9 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -197,6 +225,16 @@ void exec_vldX_dup (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X) TEST_VLDX_DUP(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X) DECL_VLDX_DUP(T1, W, N, X)
+#else
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X)
+#endif
+
#define DECL_ALL_VLDX_DUP_NO_FP16(X) \
DECL_VLDX_DUP(int, 8, 8, X); \
DECL_VLDX_DUP(int, 16, 4, X); \
@@ -208,6 +246,7 @@ void exec_vldX_dup (void)
DECL_VLDX_DUP(uint, 64, 1, X); \
DECL_VLDX_DUP(poly, 8, 8, X); \
DECL_VLDX_DUP(poly, 16, 4, X); \
+ DECL_VLDX_DUP_CRYPTO(poly, 64, 1, X); \
DECL_VLDX_DUP(float, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -229,6 +268,7 @@ void exec_vldX_dup (void)
TEST_VLDX_DUP(, uint, u, 64, 1, X); \
TEST_VLDX_DUP(, poly, p, 8, 8, X); \
TEST_VLDX_DUP(, poly, p, 16, 4, X); \
+ TEST_VLDX_DUP_CRYPTO(, poly, p, 64, 1, X); \
TEST_VLDX_DUP(, float, f, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -250,6 +290,7 @@ void exec_vldX_dup (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -272,6 +313,7 @@ void exec_vldX_dup (void)
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -313,6 +355,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -340,6 +386,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -368,6 +418,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -395,6 +449,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -423,6 +481,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -450,6 +512,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -47,6 +53,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -76,6 +87,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -105,6 +121,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xf1, 0xf2, 0xaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -134,6 +155,11 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
@@ -163,6 +189,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -192,6 +223,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -221,6 +257,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -250,6 +291,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -279,6 +325,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -295,6 +344,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -311,6 +363,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -356,6 +411,16 @@ void exec_vldX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X) DECL_VLDX_LANE(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#else
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
#define DECL_ALL_VLDX_LANE_NO_FP16(X) \
DECL_VLDX_LANE(int, 8, 8, X); \
@@ -366,11 +431,13 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(uint, 32, 2, X); \
DECL_VLDX_LANE(poly, 8, 8, X); \
DECL_VLDX_LANE(poly, 16, 4, X); \
+ DECL_VLDX_LANE(poly, 64, 1, X); \
DECL_VLDX_LANE(int, 16, 8, X); \
DECL_VLDX_LANE(int, 32, 4, X); \
DECL_VLDX_LANE(uint, 16, 8, X); \
DECL_VLDX_LANE(uint, 32, 4, X); \
DECL_VLDX_LANE(poly, 16, 8, X); \
+ DECL_VLDX_LANE_CRYPTO(poly, 64, 2, X); \
DECL_VLDX_LANE(float, 32, 2, X); \
DECL_VLDX_LANE(float, 32, 4, X)
@@ -400,11 +467,13 @@ void exec_vldX_lane (void)
TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \
+ TEST_VLDX_LANE_CRYPTO(, poly, p, 64, 1, X, 0);\
TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \
TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \
TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \
+ TEST_VLDX_LANE_CRYPTO(q, poly, p, 64, 2, X, 0);\
TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
@@ -426,11 +495,13 @@ void exec_vldX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
@@ -453,12 +524,14 @@ void exec_vldX_lane (void)
CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -489,11 +562,17 @@ void exec_vldX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+#endif
DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
@@ -25,7 +29,7 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333,
- 0x33333333, 0x33333333 };
+ 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
@@ -43,6 +47,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0x3333333333333333 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
@@ -72,6 +80,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(, uint, u, 64, 1, 0);
TEST_VST1_LANE(, poly, p, 8, 8, 6);
TEST_VST1_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(, float, f, 16, 4, 1);
#endif
@@ -87,6 +98,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(q, uint, u, 64, 2, 0);
TEST_VST1_LANE(q, poly, p, 8, 16, 10);
TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(q, poly, p, 64, 2, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(q, float, f, 16, 8, 6);
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results for vst2, chunk 0. */
VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
@@ -14,6 +15,9 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
@@ -42,6 +46,9 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -68,6 +75,9 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 };
VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
@@ -97,6 +107,9 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -123,6 +136,9 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -149,6 +165,9 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -178,6 +197,9 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -204,6 +226,9 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -230,6 +255,9 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -256,6 +284,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -272,6 +303,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -288,6 +322,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -336,6 +373,19 @@ void exec_vstX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) \
+ TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X) \
+ DECL_VSTX_LANE(T1, W, N, X)
+#else
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant,
nor 128 bits vectors of int8/uint8/poly8. */
#define DECL_ALL_VSTX_LANE_NO_FP16(X) \
@@ -347,12 +397,14 @@ void exec_vstX_lane (void)
DECL_VSTX_LANE(uint, 32, 2, X); \
DECL_VSTX_LANE(poly, 8, 8, X); \
DECL_VSTX_LANE(poly, 16, 4, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 1, X); \
DECL_VSTX_LANE(float, 32, 2, X); \
DECL_VSTX_LANE(int, 16, 8, X); \
DECL_VSTX_LANE(int, 32, 4, X); \
DECL_VSTX_LANE(uint, 16, 8, X); \
DECL_VSTX_LANE(uint, 32, 4, X); \
DECL_VSTX_LANE(poly, 16, 8, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 2, X); \
DECL_VSTX_LANE(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -378,6 +430,7 @@ void exec_vstX_lane (void)
TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \
+ TEST_VSTX_LANE_CRYPTO(, poly, p, 64, 1, X, 0);\
TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \
@@ -403,6 +456,7 @@ void exec_vstX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
@@ -434,6 +488,9 @@ void exec_vstX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
#endif
@@ -462,6 +519,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_0, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
@@ -485,6 +543,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_1, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
@@ -514,6 +573,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_0, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
@@ -538,6 +598,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_1, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
@@ -562,6 +623,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_2, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
@@ -591,6 +653,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_0, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
@@ -615,6 +678,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_1, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
@@ -639,6 +703,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_2, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
@@ -663,6 +728,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_3, CMT);
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);