@@ -829,6 +829,17 @@ enum aarch64_builtins
AARCH64_RBIT,
AARCH64_RBITL,
AARCH64_RBITLL,
+ /* FAMINMAX builtins. */
+ AARCH64_FAMINMAX_BUILTIN_FAMAX4H,
+ AARCH64_FAMINMAX_BUILTIN_FAMAX8H,
+ AARCH64_FAMINMAX_BUILTIN_FAMAX2S,
+ AARCH64_FAMINMAX_BUILTIN_FAMAX4S,
+ AARCH64_FAMINMAX_BUILTIN_FAMAX2D,
+ AARCH64_FAMINMAX_BUILTIN_FAMIN4H,
+ AARCH64_FAMINMAX_BUILTIN_FAMIN8H,
+ AARCH64_FAMINMAX_BUILTIN_FAMIN2S,
+ AARCH64_FAMINMAX_BUILTIN_FAMIN4S,
+ AARCH64_FAMINMAX_BUILTIN_FAMIN2D,
/* System register builtins. */
AARCH64_RSR,
AARCH64_RSRP,
@@ -1547,6 +1558,66 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
}
}
+/* Initialize the absolute maximum/minimum (FAMINMAX) builtins. */
+
+typedef struct
+{
+ const char *name;
+ unsigned int code;
+ tree eltype;
+ machine_mode mode;
+} faminmax_builtins_data;
+
+static void
+aarch64_init_faminmax_builtins ()
+{
+ faminmax_builtins_data data[] = {
+ /* Absolute maximum. */
+ {"vamax_f16", AARCH64_FAMINMAX_BUILTIN_FAMAX4H,
+ aarch64_simd_types[Float16x4_t].eltype,
+ aarch64_simd_types[Float16x4_t].mode},
+ {"vamaxq_f16", AARCH64_FAMINMAX_BUILTIN_FAMAX8H,
+ aarch64_simd_types[Float16x8_t].eltype,
+ aarch64_simd_types[Float16x8_t].mode},
+ {"vamax_f32", AARCH64_FAMINMAX_BUILTIN_FAMAX2S,
+ aarch64_simd_types[Float32x2_t].eltype,
+ aarch64_simd_types[Float32x2_t].mode},
+ {"vamaxq_f32", AARCH64_FAMINMAX_BUILTIN_FAMAX4S,
+ aarch64_simd_types[Float32x4_t].eltype,
+ aarch64_simd_types[Float32x4_t].mode},
+ {"vamaxq_f64", AARCH64_FAMINMAX_BUILTIN_FAMAX2D,
+ aarch64_simd_types[Float64x2_t].eltype,
+ aarch64_simd_types[Float64x2_t].mode},
+ /* Absolute minimum. */
+ {"vamin_f16", AARCH64_FAMINMAX_BUILTIN_FAMIN4H,
+ aarch64_simd_types[Float16x4_t].eltype,
+ aarch64_simd_types[Float16x4_t].mode},
+ {"vaminq_f16", AARCH64_FAMINMAX_BUILTIN_FAMIN8H,
+ aarch64_simd_types[Float16x8_t].eltype,
+ aarch64_simd_types[Float16x8_t].mode},
+ {"vamin_f32", AARCH64_FAMINMAX_BUILTIN_FAMIN2S,
+ aarch64_simd_types[Float32x2_t].eltype,
+ aarch64_simd_types[Float32x2_t].mode},
+ {"vaminq_f32", AARCH64_FAMINMAX_BUILTIN_FAMIN4S,
+ aarch64_simd_types[Float32x4_t].eltype,
+ aarch64_simd_types[Float32x4_t].mode},
+ {"vaminq_f64", AARCH64_FAMINMAX_BUILTIN_FAMIN2D,
+ aarch64_simd_types[Float64x2_t].eltype,
+ aarch64_simd_types[Float64x2_t].mode},
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
+ {
+ tree type
+ = build_vector_type (data[i].eltype, GET_MODE_NUNITS (data[i].mode));
+ tree fntype = build_function_type_list (type, type, type, NULL_TREE);
+ unsigned int code = data[i].code;
+ const char *name = data[i].name;
+ aarch64_builtin_decls[code]
+ = aarch64_general_simulate_builtin (name, fntype, code);
+ }
+}
+
/* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
indexed by TYPE_INDEX. */
static void
@@ -1640,6 +1711,7 @@ handle_arm_neon_h (void)
aarch64_init_simd_builtin_functions (true);
aarch64_init_simd_intrinsics ();
+ aarch64_init_faminmax_builtins ();
}
static void
@@ -2317,6 +2389,19 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>,
return aarch64_check_required_extensions (location, decl,
AARCH64_FL_LS64);
+ case AARCH64_FAMINMAX_BUILTIN_FAMAX4H:
+ case AARCH64_FAMINMAX_BUILTIN_FAMAX8H:
+ case AARCH64_FAMINMAX_BUILTIN_FAMAX2S:
+ case AARCH64_FAMINMAX_BUILTIN_FAMAX4S:
+ case AARCH64_FAMINMAX_BUILTIN_FAMAX2D:
+ case AARCH64_FAMINMAX_BUILTIN_FAMIN4H:
+ case AARCH64_FAMINMAX_BUILTIN_FAMIN8H:
+ case AARCH64_FAMINMAX_BUILTIN_FAMIN2S:
+ case AARCH64_FAMINMAX_BUILTIN_FAMIN4S:
+ case AARCH64_FAMINMAX_BUILTIN_FAMIN2D:
+ return aarch64_check_required_extensions (location, decl,
+ AARCH64_FL_FAMINMAX);
+
default:
break;
}
@@ -3189,6 +3274,44 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
return ops[0].value;
}
+static rtx
+aarch64_expand_builtin_faminmax (unsigned int fcode, tree exp, rtx target)
+{
+ machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+ rtx op0 = force_reg (mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
+ rtx op1 = force_reg (mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
+
+ enum insn_code icode;
+ if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX4H)
+ icode = CODE_FOR_aarch64_famaxv4hf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX8H)
+ icode = CODE_FOR_aarch64_famaxv8hf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX2S)
+ icode = CODE_FOR_aarch64_famaxv2sf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX4S)
+ icode = CODE_FOR_aarch64_famaxv4sf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX2D)
+ icode = CODE_FOR_aarch64_famaxv2df;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN4H)
+ icode = CODE_FOR_aarch64_faminv4hf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN8H)
+ icode = CODE_FOR_aarch64_faminv8hf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN2S)
+ icode = CODE_FOR_aarch64_faminv2sf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN4S)
+ icode = CODE_FOR_aarch64_faminv4sf;
+ else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN2D)
+ icode = CODE_FOR_aarch64_faminv2df;
+ else
+ gcc_unreachable ();
+
+ rtx pat = GEN_FCN (icode) (target, op0, op1);
+
+ emit_insn (pat);
+
+ return target;
+}
+
/* Expand an expression EXP as fpsr or fpcr setter (depending on
UNSPEC) using MODE. */
static void
@@ -3368,6 +3491,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
if (fcode >= AARCH64_REV16
&& fcode <= AARCH64_RBITLL)
return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
+ if (fcode >= AARCH64_FAMINMAX_BUILTIN_FAMAX4H
+ && fcode <= AARCH64_FAMINMAX_BUILTIN_FAMIN2D)
+ return aarch64_expand_builtin_faminmax (fcode, exp, target);
gcc_unreachable ();
}
@@ -234,6 +234,8 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
+AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
+
#undef AARCH64_OPT_FMV_EXTENSION
#undef AARCH64_OPT_EXTENSION
#undef AARCH64_FMV_FEATURE
@@ -9910,3 +9910,14 @@
"shl\\t%d0, %d1, #16"
[(set_attr "type" "neon_shift_imm")]
)
+
+;; faminmax
+(define_insn "aarch64_<faminmax_uns_op><mode>"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FAMINMAX_UNS))]
+ "TARGET_FAMINMAX"
+ "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_fp_aminmax<q>")]
+)
@@ -457,6 +457,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
enabled through +gcs. */
#define TARGET_GCS AARCH64_HAVE_ISA (GCS)
+/* Floating Point Absolute Maximum/Minimum extension instructions are
+ enabled through +faminmax. */
+#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX)
+
/* Prefer different predicate registers for the output of a predicated
operation over re-using an existing input predicate. */
#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \
@@ -1057,6 +1057,8 @@
UNSPEC_BFCVTN2 ; Used in aarch64-simd.md.
UNSPEC_BFCVT ; Used in aarch64-simd.md.
UNSPEC_FCVTXN ; Used in aarch64-simd.md.
+ UNSPEC_FAMAX ; Used in aarch64-simd.md.
+ UNSPEC_FAMIN ; Used in aarch64-simd.md.
;; All used in aarch64-sve2.md
UNSPEC_FCVTN
@@ -4463,3 +4465,10 @@
(UNSPECV_SET_FPCR "fpcr")])
(define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")])
+
+;; Iterators and attributes for faminmax
+
+(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
+
+(define_int_attr faminmax_uns_op
+ [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
@@ -492,6 +492,8 @@
; neon_fp_reduc_minmax_s_q
; neon_fp_reduc_minmax_d
; neon_fp_reduc_minmax_d_q
+; neon_fp_aminmax
+; neon_fp_aminmax_q
; neon_fp_cvt_narrow_s_q
; neon_fp_cvt_narrow_d_q
; neon_fp_cvt_widen_h
@@ -1044,6 +1046,8 @@
neon_fp_reduc_minmax_d,\
neon_fp_reduc_minmax_d_q,\
\
+ neon_fp_aminmax,\
+ neon_fp_aminmax_q,\
neon_fp_cvt_narrow_s_q,\
neon_fp_cvt_narrow_d_q,\
neon_fp_cvt_widen_h,\
@@ -1264,6 +1268,8 @@
neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,
neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\
neon_fp_reduc_minmax_d_q,\
+ neon_fp_aminmax, neon_fp_aminmax_q,\
+ neon_fp_aminmax, neon_fp_aminmax_q,\
neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\
neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\
neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\
@@ -21865,6 +21865,8 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension.
Enable the RCpc3 (Release Consistency) extension.
@item fp8
Enable the fp8 (8-bit floating point) extension.
+@item faminmax
+Enable the Floating Point Absolute Maximum/Minimum extension.
@end table
new file mode 100644
@@ -0,0 +1,10 @@
+/* { dg-do assemble} */
+/* { dg-additional-options "-march=armv9-a" } */
+
+#include "arm_neon.h"
+
+void
+test (float32x4_t a, float32x4_t b)
+{
+ vamaxq_f32 (a, b); /* { dg-error {ACLE function 'vamaxq_f32' requires ISA extension 'faminmax'} } */
+}
new file mode 100644
@@ -0,0 +1,115 @@
+/* { dg-do assemble} */
+/* { dg-additional-options "-O3 -march=armv9-a+faminmax" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vamax_f16:
+** famax v0.4h, v0.4h, v1.4h
+** ret
+*/
+float16x4_t
+test_vamax_f16 (float16x4_t a, float16x4_t b)
+{
+ return vamax_f16 (a, b);
+}
+
+/*
+** test_vamaxq_f16:
+** famax v0.8h, v0.8h, v1.8h
+** ret
+*/
+float16x8_t
+test_vamaxq_f16 (float16x8_t a, float16x8_t b)
+{
+ return vamaxq_f16 (a, b);
+}
+
+/*
+** test_vamax_f32:
+** famax v0.2s, v0.2s, v1.2s
+** ret
+*/
+float32x2_t
+test_vamax_f32 (float32x2_t a, float32x2_t b)
+{
+ return vamax_f32 (a, b);
+}
+
+/*
+** test_vamaxq_f32:
+** famax v0.4s, v0.4s, v1.4s
+** ret
+*/
+float32x4_t
+test_vamaxq_f32 (float32x4_t a, float32x4_t b)
+{
+ return vamaxq_f32 (a, b);
+}
+
+/*
+** test_vamaxq_f64:
+** famax v0.2d, v0.2d, v1.2d
+** ret
+*/
+float64x2_t
+test_vamaxq_f64 (float64x2_t a, float64x2_t b)
+{
+ return vamaxq_f64 (a, b);
+}
+
+/*
+** test_vamin_f16:
+** famin v0.4h, v0.4h, v1.4h
+** ret
+*/
+float16x4_t
+test_vamin_f16 (float16x4_t a, float16x4_t b)
+{
+ return vamin_f16 (a, b);
+}
+
+/*
+** test_vaminq_f16:
+** famin v0.8h, v0.8h, v1.8h
+** ret
+*/
+float16x8_t
+test_vaminq_f16 (float16x8_t a, float16x8_t b)
+{
+ return vaminq_f16 (a, b);
+}
+
+/*
+** test_vamin_f32:
+** famin v0.2s, v0.2s, v1.2s
+** ret
+*/
+float32x2_t
+test_vamin_f32 (float32x2_t a, float32x2_t b)
+{
+ return vamin_f32 (a, b);
+}
+
+/*
+** test_vaminq_f32:
+** famin v0.4s, v0.4s, v1.4s
+** ret
+*/
+float32x4_t
+test_vaminq_f32 (float32x4_t a, float32x4_t b)
+{
+ return vaminq_f32 (a, b);
+}
+
+/*
+** test_vaminq_f64:
+** famin v0.2d, v0.2d, v1.2d
+** ret
+*/
+float64x2_t
+test_vaminq_f64 (float64x2_t a, float64x2_t b)
+{
+ return vaminq_f64 (a, b);
+}