@@ -1591,6 +1591,9 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
enum class aarch64_builtin_signatures
{
binary,
+ binary_fpm,
+ ternary_fpm,
+ unary_fpm,
};
namespace {
@@ -1602,6 +1605,9 @@ struct simd_type {
namespace simd_types {
+ constexpr simd_type f8 { V8QImode, qualifier_modal_float };
+ constexpr simd_type f8q { V16QImode, qualifier_modal_float };
+
constexpr simd_type s8 { V8QImode, qualifier_none };
constexpr simd_type u8 { V8QImode, qualifier_unsigned };
constexpr simd_type s8q { V16QImode, qualifier_none };
@@ -1612,6 +1618,11 @@ namespace simd_types {
constexpr simd_type s16q { V8HImode, qualifier_none };
constexpr simd_type u16q { V8HImode, qualifier_unsigned };
+ constexpr simd_type s32 { V2SImode, qualifier_none };
+ constexpr simd_type s32q { V4SImode, qualifier_none };
+
+ constexpr simd_type s64q { V2DImode, qualifier_none };
+
constexpr simd_type p8 { V8QImode, qualifier_poly };
constexpr simd_type p8q { V16QImode, qualifier_poly };
constexpr simd_type p16 { V4HImode, qualifier_poly };
@@ -1655,7 +1666,7 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
static tree
aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
{
- tree type0, type1, type2;
+ tree type0, type1, type2, type3;
switch (builtin_data.signature)
{
@@ -1668,6 +1679,36 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
builtin_data.types[2].qualifiers);
return build_function_type_list (type0, type1, type2, NULL_TREE);
+ case aarch64_builtin_signatures::binary_fpm:
+ type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+ builtin_data.types[0].qualifiers);
+ type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+ builtin_data.types[1].qualifiers);
+ type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
+ builtin_data.types[2].qualifiers);
+ return build_function_type_list (type0, type1, type2, uint64_type_node,
+ NULL_TREE);
+
+ case aarch64_builtin_signatures::ternary_fpm:
+ type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+ builtin_data.types[0].qualifiers);
+ type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+ builtin_data.types[1].qualifiers);
+ type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
+ builtin_data.types[2].qualifiers);
+ type3 = aarch64_simd_builtin_type (builtin_data.types[3].mode,
+ builtin_data.types[3].qualifiers);
+ return build_function_type_list (type0, type1, type2, type3,
+ uint64_type_node, NULL_TREE);
+
+ case aarch64_builtin_signatures::unary_fpm:
+ type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+ builtin_data.types[0].qualifiers);
+ type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+ builtin_data.types[1].qualifiers);
+ return build_function_type_list (type0, type1, uint64_type_node,
+ NULL_TREE);
+
default:
gcc_unreachable ();
}
@@ -3383,24 +3424,89 @@ static rtx
aarch64_expand_pragma_builtin (tree exp, rtx target,
const aarch64_pragma_builtins_data *builtin_data)
{
- expand_operand ops[3];
- auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
- auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
- create_output_operand (&ops[0], target, builtin_data->types[0].mode);
- create_input_operand (&ops[1], op1, builtin_data->types[1].mode);
- create_input_operand (&ops[2], op2, builtin_data->types[2].mode);
-
auto unspec = builtin_data->unspec;
- insn_code icode;
+ expand_operand ops[4];
switch (builtin_data->signature)
{
case aarch64_builtin_signatures::binary:
- icode = code_for_aarch64 (unspec, builtin_data->types[0].mode);
- expand_insn (icode, 3, ops);
- break;
+ {
+ auto input1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+ auto input2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+ create_output_operand (&ops[0], target, builtin_data->types[0].mode);
+ create_input_operand (&ops[1], input1, builtin_data->types[1].mode);
+ create_input_operand (&ops[2], input2, builtin_data->types[2].mode);
+
+ auto icode = code_for_aarch64 (unspec,
+ builtin_data->types[1].mode,
+ builtin_data->types[2].mode);
+ expand_insn (icode, 3, ops);
+ break;
+ }
+
+ case aarch64_builtin_signatures::binary_fpm:
+ {
+ auto input1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+ auto input2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+ auto fpm_input = expand_normal (CALL_EXPR_ARG (exp, 2));
+
+ auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
+ emit_move_insn (fpmr, fpm_input);
+
+ create_output_operand (&ops[0], target, builtin_data->types[0].mode);
+ create_input_operand (&ops[1], input1, builtin_data->types[1].mode);
+ create_input_operand (&ops[2], input2, builtin_data->types[2].mode);
+ auto icode = code_for_aarch64 (unspec,
+ builtin_data->types[0].mode,
+ builtin_data->types[1].mode,
+ builtin_data->types[2].mode);
+ expand_insn (icode, 3, ops);
+ break;
+ }
+
+ case aarch64_builtin_signatures::ternary_fpm:
+ {
+ auto input1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+ auto input2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+ auto input3 = expand_normal (CALL_EXPR_ARG (exp, 2));
+ auto fpm_input = expand_normal (CALL_EXPR_ARG (exp, 3));
+
+ auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
+ emit_move_insn (fpmr, fpm_input);
+
+ create_output_operand (&ops[0], target, builtin_data->types[0].mode);
+ create_input_operand (&ops[1], input1, builtin_data->types[1].mode);
+ create_input_operand (&ops[2], input2, builtin_data->types[2].mode);
+ create_input_operand (&ops[3], input3, builtin_data->types[3].mode);
+ auto icode = code_for_aarch64 (unspec,
+ builtin_data->types[0].mode,
+ builtin_data->types[1].mode,
+ builtin_data->types[2].mode,
+ builtin_data->types[3].mode);
+ expand_insn (icode, 4, ops);
+ break;
+ }
+
+ case aarch64_builtin_signatures::unary_fpm:
+ {
+ auto input = expand_normal (CALL_EXPR_ARG (exp, 0));
+ auto fpm_input = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+ auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
+ emit_move_insn (fpmr, fpm_input);
+
+ create_output_operand (&ops[0], target, builtin_data->types[0].mode);
+ create_input_operand (&ops[1], input, builtin_data->types[1].mode);
+ auto icode = code_for_aarch64 (unspec,
+ builtin_data->types[0].mode,
+ builtin_data->types[1].mode);
+ expand_insn (icode, 2, ops);
+ break;
+ }
+
default:
- gcc_unreachable();
+ gcc_unreachable ();
}
return target;
}
@@ -257,6 +257,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SVE_BF16,
"__ARM_FEATURE_SVE_BF16", pfile);
+ aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
+
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
@@ -23,6 +23,16 @@
#define ENTRY_BINARY(N, S, T0, T1, T2, U) \
ENTRY (N, S, T0, T1, T2, none, U)
+#undef ENTRY_BINARY_FPM
+#define ENTRY_BINARY_FPM(N, S, T0, T1, T2, U) \
+ ENTRY (N, S, T0, T1, T2, none, U)
+
+#define ENTRY_TERNARY_FPM(N, S, T0, T1, T2, T3, U) \
+ ENTRY (N, S, T0, T1, T2, T3, U)
+
+#define ENTRY_UNARY_FPM(N, S, T0, T1, U) \
+ ENTRY (N, S, T0, T1, none, none, U)
+
#undef ENTRY_VHSDF
#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \
ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \
@@ -31,8 +41,54 @@
ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \
ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC)
+#undef ENTRY_VHSDF_VHSDI
+#define ENTRY_VHSDF_VHSDI(NAME, SIGNATURE, UNSPEC) \
+ ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, s16, UNSPEC) \
+ ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, s16q, UNSPEC) \
+ ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, s32, UNSPEC) \
+ ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, s32q, UNSPEC) \
+ ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, s64q, UNSPEC)
+
// faminmax
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX)
ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN)
#undef REQUIRED_EXTENSIONS
+
+// fpm conversion
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
+ENTRY_UNARY_FPM (vcvt1_bf16_mf8_fpm, unary_fpm, bf16q, f8, UNSPEC_VCVT1_BF16)
+ENTRY_UNARY_FPM (vcvt1_high_bf16_mf8_fpm, unary_fpm, bf16q, f8q, \
+ UNSPEC_VCVT1_HIGH_BF16)
+ENTRY_UNARY_FPM (vcvt1_low_bf16_mf8_fpm, unary_fpm, bf16q, f8q, \
+ UNSPEC_VCVT1_LOW_BF16)
+ENTRY_UNARY_FPM (vcvt1_f16_mf8_fpm, unary_fpm, f16q, f8, UNSPEC_VCVT1_F16)
+ENTRY_UNARY_FPM (vcvt1_high_f16_mf8_fpm, unary_fpm, f16q, f8q, \
+ UNSPEC_VCVT1_HIGH_F16)
+ENTRY_UNARY_FPM (vcvt1_low_f16_mf8_fpm, unary_fpm, f16q, f8q, \
+ UNSPEC_VCVT1_LOW_F16)
+ENTRY_UNARY_FPM (vcvt2_bf16_mf8_fpm, unary_fpm, bf16q, f8, UNSPEC_VCVT2_BF16)
+ENTRY_UNARY_FPM (vcvt2_high_bf16_mf8_fpm, unary_fpm, bf16q, f8q, \
+ UNSPEC_VCVT2_HIGH_BF16)
+ENTRY_UNARY_FPM (vcvt2_low_bf16_mf8_fpm, unary_fpm, bf16q, f8q, \
+ UNSPEC_VCVT2_LOW_BF16)
+ENTRY_UNARY_FPM (vcvt2_f16_mf8_fpm, unary_fpm, f16q, f8, UNSPEC_VCVT2_F16)
+ENTRY_UNARY_FPM (vcvt2_high_f16_mf8_fpm, unary_fpm, f16q, f8q, \
+ UNSPEC_VCVT2_HIGH_F16)
+ENTRY_UNARY_FPM (vcvt2_low_f16_mf8_fpm, unary_fpm, f16q, f8q, \
+ UNSPEC_VCVT2_LOW_F16)
+
+ENTRY_BINARY_FPM (vcvt_mf8_f16_fpm, binary_fpm, f8, f16, f16, UNSPEC_VCVT_F16)
+ENTRY_BINARY_FPM (vcvtq_mf8_f16_fpm, binary_fpm, f8q, f16q, f16q, \
+ UNSPEC_VCVTQ_F16)
+ENTRY_BINARY_FPM (vcvt_mf8_f32_fpm, binary_fpm, f8, f32q, f32q, \
+ UNSPEC_VCVT_F32)
+
+ENTRY_TERNARY_FPM (vcvt_high_mf8_f32_fpm, ternary_fpm, f8q, f8, f32q, f32q, \
+ UNSPEC_VCVT_HIGH_F32)
+#undef REQUIRED_EXTENSIONS
+
+// fpm scaling
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
+ENTRY_VHSDF_VHSDI (vscale, binary, UNSPEC_FSCALE)
+#undef REQUIRED_EXTENSIONS
@@ -9982,13 +9982,13 @@
)
;; faminmax
-(define_insn "@aarch64_<faminmax_uns_op><mode>"
+(define_insn "@aarch64_<faminmax_uns_op><VHSDF:mode><VHSDF:mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FAMINMAX_UNS))]
"TARGET_FAMINMAX"
- "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ "<faminmax_uns_op>\t%0.<Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDF:Vtype>"
)
(define_insn "*aarch64_faminmax_fused"
@@ -9999,3 +9999,71 @@
"TARGET_FAMINMAX"
"<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
)
+
+;; fpm unary instructions.
+(define_insn "@aarch64_<fpm_uns_name><V8HFBF:mode><VB:mode>"
+ [(set (match_operand:V8HFBF 0 "register_operand" "=w")
+ (unspec:V8HFBF
+ [(match_operand:VB 1 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_UNARY_UNS))]
+ "TARGET_FP8"
+ "<fpm_uns_op>\t%0.<V8HFBF:Vtype>, %1.<VB:Vtype>"
+)
+
+;; fpm unary instructions, where the input is lowered from V16QI to
+;; V8QI.
+(define_insn "@aarch64_<fpm_uns_name><V8HFBF:mode><V16QI_ONLY:mode>"
+ [(set (match_operand:V8HFBF 0 "register_operand" "=w")
+ (unspec:V8HFBF
+ [(match_operand:V16QI_ONLY 1 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_UNARY_LOW_UNS))]
+ "TARGET_FP8"
+ {
+ operands[1] = force_lowpart_subreg (V8QImode,
+ operands[1],
+ recog_data.operand[1]->mode);
+ return "<fpm_uns_op>\t%0.<V8HFBF:Vtype>, %1.8b";
+ }
+)
+
+;; fpm binary instructions.
+(define_insn
+ "@aarch64_<fpm_uns_name><VB:mode><VCVTFPM:mode><VH_SF:mode>"
+ [(set (match_operand:VB 0 "register_operand" "=w")
+ (unspec:VB
+ [(match_operand:VCVTFPM 1 "register_operand" "w")
+ (match_operand:VH_SF 2 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_BINARY_UNS))]
+ "TARGET_FP8"
+ "<fpm_uns_op>\t%0.<VB:Vtype>, %1.<VCVTFPM:Vtype>, %2.<VH_SF:Vtype>"
+)
+
+;; fpm ternary instructions.
+(define_insn
+ "@aarch64_<fpm_uns_name><V16QI_ONLY:mode><V8QI_ONLY:mode><V4SF_ONLY:mode><V4SF_ONLY:mode>"
+ [(set (match_operand:V16QI_ONLY 0 "register_operand" "=w")
+ (unspec:V16QI_ONLY
+ [(match_operand:V8QI_ONLY 1 "register_operand" "w")
+ (match_operand:V4SF_ONLY 2 "register_operand" "w")
+ (match_operand:V4SF_ONLY 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_TERNARY_VCVT_UNS))]
+ "TARGET_FP8"
+ {
+ operands[1] = force_reg (V16QImode, operands[1]);
+ return "<fpm_uns_op>\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
+ }
+)
+
+;; fpm scale instructions
+(define_insn "@aarch64_<fpm_uns_op><VHSDF:mode><VHSDI:mode>"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDI 2 "register_operand" "w")]
+ FPM_SCALE_UNS))]
+ "TARGET_FP8"
+ "<fpm_uns_op>\t%0.<VHSDF:Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDI:Vtype>"
+)
@@ -41,6 +41,9 @@
;; Iterators for single modes, for "@" patterns.
(define_mode_iterator SI_ONLY [SI])
(define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator V8QI_ONLY [V8QI])
+(define_mode_iterator V16QI_ONLY [V16QI])
+(define_mode_iterator V4SF_ONLY [V4SF])
;; Iterator for all integer modes (up to 64-bit)
(define_mode_iterator ALLI [QI HI SI DI])
@@ -163,6 +166,12 @@
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF])
+(define_mode_iterator VH_SF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V4SF])
+
+;; Advanced SIMD Integer modes.
+(define_mode_iterator VHSDI [V4HI V8HI V2SI V4SI V2DI])
;; Advanced SIMD Float modes, and DF.
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
@@ -426,6 +435,12 @@
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF])
+;; Modes available for Advanced SIMD FP8 conversion operations.
+(define_mode_iterator VCVTFPM [V8QI
+ (V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V4SF])
+
;; Iterators for single modes, for "@" patterns.
(define_mode_iterator VNx16QI_ONLY [VNx16QI])
(define_mode_iterator VNx16SI_ONLY [VNx16SI])
@@ -630,6 +645,9 @@
;; Bfloat16 modes to which V4SF can be converted
(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+;; Float16 and Bfloat16 modes
+(define_mode_iterator V8HFBF [V8HF V8BF])
+
(define_mode_iterator SVE_BHSx24 [VNx32QI VNx16HI VNx8SI
VNx16BF VNx16HF VNx8SF
VNx64QI VNx32HI VNx16SI
@@ -694,6 +712,7 @@
UNSPEC_FMINV ; Used in aarch64-simd.md.
UNSPEC_FADDV ; Used in aarch64-simd.md.
UNSPEC_FNEG ; Used in aarch64-simd.md.
+ UNSPEC_FSCALE ; Used in aarch64-simd.md.
UNSPEC_ADDV ; Used in aarch64-simd.md.
UNSPEC_SMAXV ; Used in aarch64-simd.md.
UNSPEC_SMINV ; Used in aarch64-simd.md.
@@ -731,6 +750,22 @@
UNSPEC_SSHLL ; Used in aarch64-simd.md.
UNSPEC_USHLL ; Used in aarch64-simd.md.
UNSPEC_ADDP ; Used in aarch64-simd.md.
+ UNSPEC_VCVT_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVTQ_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT_F32 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT_HIGH_F32 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_HIGH_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_HIGH_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_LOW_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT1_LOW_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_HIGH_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_HIGH_F16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_LOW_BF16 ; Used in aarch64-simd.md.
+ UNSPEC_VCVT2_LOW_F16 ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_TBX ; Used in vector permute patterns.
UNSPEC_CONCAT ; Used in vector permute patterns.
@@ -4534,3 +4569,67 @@
(define_code_attr faminmax_op
[(smax "famax") (smin "famin")])
+
+;; Iterators and attributes for fpm instructions
+
+(define_int_iterator FPM_UNARY_UNS
+ [UNSPEC_VCVT1_BF16
+ UNSPEC_VCVT1_F16
+ UNSPEC_VCVT1_HIGH_BF16
+ UNSPEC_VCVT1_HIGH_F16
+ UNSPEC_VCVT2_BF16
+ UNSPEC_VCVT2_F16
+ UNSPEC_VCVT2_HIGH_BF16
+ UNSPEC_VCVT2_HIGH_F16])
+
+(define_int_iterator FPM_UNARY_LOW_UNS
+ [UNSPEC_VCVT1_LOW_BF16
+ UNSPEC_VCVT1_LOW_F16
+ UNSPEC_VCVT2_LOW_BF16
+ UNSPEC_VCVT2_LOW_F16])
+
+(define_int_iterator FPM_BINARY_UNS
+ [UNSPEC_VCVT_F16
+ UNSPEC_VCVTQ_F16
+ UNSPEC_VCVT_F32])
+
+(define_int_iterator FPM_SCALE_UNS [UNSPEC_FSCALE])
+
+(define_int_iterator FPM_TERNARY_VCVT_UNS [UNSPEC_VCVT_HIGH_F32])
+
+(define_int_attr fpm_uns_op
+ [(UNSPEC_FSCALE "fscale")
+ (UNSPEC_VCVT_F16 "fcvtn")
+ (UNSPEC_VCVTQ_F16 "fcvtn")
+ (UNSPEC_VCVT_F32 "fcvtn")
+ (UNSPEC_VCVT_HIGH_F32 "fcvtn2")
+ (UNSPEC_VCVT1_BF16 "bf1cvtl")
+ (UNSPEC_VCVT1_F16 "f1cvtl")
+ (UNSPEC_VCVT1_HIGH_BF16 "bf1cvtl2")
+ (UNSPEC_VCVT1_HIGH_F16 "f1cvtl2")
+ (UNSPEC_VCVT1_LOW_BF16 "bf1cvtl")
+ (UNSPEC_VCVT1_LOW_F16 "f1cvtl")
+ (UNSPEC_VCVT2_BF16 "bf2cvtl")
+ (UNSPEC_VCVT2_F16 "f2cvtl")
+ (UNSPEC_VCVT2_HIGH_BF16 "bf2cvtl2")
+ (UNSPEC_VCVT2_HIGH_F16 "f2cvtl2")
+ (UNSPEC_VCVT2_LOW_BF16 "bf2cvtl")
+ (UNSPEC_VCVT2_LOW_F16 "f2cvtl")])
+
+(define_int_attr fpm_uns_name
+ [(UNSPEC_VCVT_F16 "vcvt_mf8_f16_fpm")
+ (UNSPEC_VCVTQ_F16 "vcvtq_mf8_f16_fpm")
+ (UNSPEC_VCVT_F32 "vcvt_mf8_f32_fpm")
+ (UNSPEC_VCVT_HIGH_F32 "vcvt_high_mf8_f32_fpm")
+ (UNSPEC_VCVT1_BF16 "vcvt1_bf16_mf8_fpm")
+ (UNSPEC_VCVT1_F16 "vcvt1_f16_mf8_fpm")
+ (UNSPEC_VCVT1_HIGH_BF16 "vcvt1_high_bf16_mf8_fpm")
+ (UNSPEC_VCVT1_HIGH_F16 "vcvt1_high_f16_mf8_fpm")
+ (UNSPEC_VCVT1_LOW_BF16 "vcvt1_low_bf16_mf8_fpm")
+ (UNSPEC_VCVT1_LOW_F16 "vcvt1_low_f16_mf8_fpm")
+ (UNSPEC_VCVT2_BF16 "vcvt2_bf16_mf8_fpm")
+ (UNSPEC_VCVT2_F16 "vcvt2_f16_mf8_fpm")
+ (UNSPEC_VCVT2_HIGH_BF16 "vcvt2_high_bf16_mf8_fpm")
+ (UNSPEC_VCVT2_HIGH_F16 "vcvt2_high_f16_mf8_fpm")
+ (UNSPEC_VCVT2_LOW_BF16 "vcvt2_low_bf16_mf8_fpm")
+ (UNSPEC_VCVT2_LOW_F16 "vcvt2_low_f16_mf8_fpm")])
@@ -5,19 +5,9 @@
#include <arm_acle.h>
-#ifdef __ARM_FEATURE_FP8
-#error "__ARM_FEATURE_FP8 feature macro defined."
-#endif
-
#pragma GCC push_options
#pragma GCC target("arch=armv9.4-a+fp8")
-/* We do not define __ARM_FEATURE_FP8 until all
- relevant features have been added. */
-#ifdef __ARM_FEATURE_FP8
-#error "__ARM_FEATURE_FP8 feature macro defined."
-#endif
-
/*
**test_write_fpmr_sysreg_asm_64:
** msr fpmr, x0
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vscale_f16:
+** fscale v0.4h, v0.4h, v1.4h
+** ret
+*/
+float16x4_t
+test_vscale_f16 (float16x4_t a, int16x4_t b)
+{
+ return vscale_f16 (a, b);
+}
+
+/*
+** test_vscaleq_f16:
+** fscale v0.8h, v0.8h, v1.8h
+** ret
+*/
+float16x8_t
+test_vscaleq_f16 (float16x8_t a, int16x8_t b)
+{
+ return vscaleq_f16 (a, b);
+}
+
+/*
+** test_vscale_f32:
+** fscale v0.2s, v0.2s, v1.2s
+** ret
+*/
+float32x2_t
+test_vscale_f32 (float32x2_t a, int32x2_t b)
+{
+ return vscale_f32 (a, b);
+}
+
+/*
+** test_vscaleq_f32:
+** fscale v0.4s, v0.4s, v1.4s
+** ret
+*/
+float32x4_t
+test_vscaleq_f32 (float32x4_t a, int32x4_t b)
+{
+ return vscaleq_f32 (a, b);
+}
+
+/*
+** test_vscaleq_f64:
+** fscale v0.2d, v0.2d, v1.2d
+** ret
+*/
+float64x2_t
+test_vscaleq_f64 (float64x2_t a, int64x2_t b)
+{
+ return vscaleq_f64 (a, b);
+}
new file mode 100644
@@ -0,0 +1,197 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vcvt1_bf16:
+** msr fpmr, x0
+** bf1cvtl v0.8h, v0.8b
+** ret
+*/
+bfloat16x8_t
+test_vcvt1_bf16 (mfloat8x8_t a, fpm_t b)
+{
+ return vcvt1_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_high_vcvt1_bf16:
+** msr fpmr, x0
+** bf1cvtl2 v0.8h, v0.16b
+** ret
+*/
+bfloat16x8_t
+test_high_vcvt1_bf16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_high_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_low_vcvt1_bf16:
+** msr fpmr, x0
+** bf1cvtl v0.8h, v0.8b
+** ret
+*/
+bfloat16x8_t
+test_low_vcvt1_bf16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_low_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_vcvt1_f16:
+** msr fpmr, x0
+** f1cvtl v0.8h, v0.8b
+** ret
+*/
+float16x8_t
+test_vcvt1_f16 (mfloat8x8_t a, fpm_t b)
+{
+ return vcvt1_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_high_vcvt1_f16:
+** msr fpmr, x0
+** f1cvtl2 v0.8h, v0.16b
+** ret
+*/
+float16x8_t
+test_high_vcvt1_f16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_high_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_low_vcvt1_f16:
+** msr fpmr, x0
+** f1cvtl v0.8h, v0.8b
+** ret
+*/
+float16x8_t
+test_low_vcvt1_f16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_low_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_vcvt2_bf16:
+** msr fpmr, x0
+** bf2cvtl v0.8h, v0.8b
+** ret
+*/
+bfloat16x8_t
+test_vcvt2_bf16 (mfloat8x8_t a, fpm_t b)
+{
+ return vcvt2_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_high_vcvt2_bf16:
+** msr fpmr, x0
+** bf2cvtl2 v0.8h, v0.16b
+** ret
+*/
+bfloat16x8_t
+test_high_vcvt2_bf16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt2_high_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_low_vcvt2_bf16:
+** msr fpmr, x0
+** bf1cvtl v0.8h, v0.8b
+** ret
+*/
+bfloat16x8_t
+test_low_vcvt2_bf16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_low_bf16_mf8_fpm(a, b);
+}
+
+/*
+** test_vcvt2_f16:
+** msr fpmr, x0
+** f2cvtl v0.8h, v0.8b
+** ret
+*/
+float16x8_t
+test_vcvt2_f16 (mfloat8x8_t a, fpm_t b)
+{
+ return vcvt2_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_high_vcvt2_f16:
+** msr fpmr, x0
+** f2cvtl2 v0.8h, v0.16b
+** ret
+*/
+float16x8_t
+test_high_vcvt2_f16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt2_high_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_low_vcvt2_f16:
+** msr fpmr, x0
+** f1cvtl v0.8h, v0.8b
+** ret
+*/
+float16x8_t
+test_low_vcvt2_f16 (mfloat8x16_t a, fpm_t b)
+{
+ return vcvt1_low_f16_mf8_fpm(a, b);
+}
+
+/*
+** test_vcvt_f16:
+** msr fpmr, x0
+** fcvtn v0.8b, v0.4h, v1.4h
+** ret
+*/
+mfloat8x8_t
+test_vcvt_f16 (float16x4_t a, float16x4_t b, fpm_t c)
+{
+ return vcvt_mf8_f16_fpm(a, b, c);
+}
+
+/*
+** test_vcvtq_f16:
+** msr fpmr, x0
+** fcvtn v0.16b, v0.8h, v1.8h
+** ret
+*/
+mfloat8x16_t
+test_vcvtq_f16 (float16x8_t a, float16x8_t b, fpm_t c)
+{
+ return vcvtq_mf8_f16_fpm(a, b, c);
+}
+
+/*
+** test_vcvt_f32:
+** msr fpmr, x0
+** fcvtn v0.8b, v0.4s, v1.4s
+** ret
+*/
+mfloat8x8_t
+test_vcvt_f32 (float32x4_t a, float32x4_t b, fpm_t c)
+{
+ return vcvt_mf8_f32_fpm(a, b, c);
+}
+
+/*
+** test_vcvt_high_f32:
+** msr fpmr, x0
+** fcvtn2 v0.16b, v1.4s, v2.4s
+** ret
+*/
+mfloat8x16_t
+test_vcvt_high_f32 (mfloat8x8_t a, float32x4_t b, float32x4_t c, fpm_t d)
+{
+ return vcvt_high_mf8_f32_fpm(a, b, c, d);
+}