@@ -2561,6 +2561,17 @@ vector_cst_all_same (tree v, unsigned int step)
return true;
}
+/* Return true if V is a constant predicate that acts as a ptrue when
+ predicating STEP-byte elements. */
+bool
+is_ptrue (tree v, unsigned int step)
+{
+ return (TREE_CODE (v) == VECTOR_CST
+ && TYPE_MODE (TREE_TYPE (v)) == VNx16BImode
+ && integer_nonzerop (VECTOR_CST_ENCODED_ELT (v, 0))
+ && vector_cst_all_same (v, step));
+}
+
gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
gimple_stmt_iterator *gsi_in, gcall *call_in)
: function_call_info (gimple_location (call_in), instance, fndecl),
@@ -2635,6 +2646,37 @@ gimple_folder::redirect_call (const function_instance &instance)
return call;
}
+/* Redirect _z and _m calls to _x functions if the predicate is all-true.
+ This allows us to use unpredicated instructions, where available. */
+gimple *
+gimple_folder::redirect_pred_x ()
+{
+ if (pred != PRED_z && pred != PRED_m)
+ return nullptr;
+
+ if (gimple_call_num_args (call) < 2)
+ return nullptr;
+
+ tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
+ tree arg0_type = type_argument_type (TREE_TYPE (fndecl), 1);
+ tree arg1_type = type_argument_type (TREE_TYPE (fndecl), 2);
+ if (!VECTOR_TYPE_P (lhs_type)
+ || !VECTOR_TYPE_P (arg0_type)
+ || !VECTOR_TYPE_P (arg1_type))
+ return nullptr;
+
+ auto lhs_step = element_precision (lhs_type);
+ auto rhs_step = element_precision (arg1_type);
+ auto step = MAX (lhs_step, rhs_step);
+ if (!multiple_p (step, BITS_PER_UNIT)
+ || !is_ptrue (gimple_call_arg (call, 0), step / BITS_PER_UNIT))
+ return nullptr;
+
+ function_instance instance (*this);
+ instance.pred = PRED_x;
+ return redirect_call (instance);
+}
+
/* Fold the call to constant VAL. */
gimple *
gimple_folder::fold_to_cstu (poly_uint64 val)
@@ -2707,6 +2749,10 @@ gimple_folder::fold ()
if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
return NULL;
+ /* First try some simplifications that are common to many functions. */
+ if (auto *call = redirect_pred_x ())
+ return call;
+
return base->fold (*this);
}
@@ -500,6 +500,8 @@ public:
tree load_store_cookie (tree);
gimple *redirect_call (const function_instance &);
+ gimple *redirect_pred_x ();
+
gimple *fold_to_cstu (poly_uint64);
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
@@ -673,6 +675,7 @@ extern tree acle_svpattern;
extern tree acle_svprfop;
bool vector_cst_all_same (tree, unsigned int);
+bool is_ptrue (tree, unsigned int);
/* Return the ACLE type svbool_t. */
inline tree
new file mode 100644
@@ -0,0 +1,378 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** add1:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add1 (svint32_t x, svint32_t y)
+{
+ return svadd_z (svptrue_b8 (), x, y);
+}
+
+/*
+** add2:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add2 (svint32_t x, svint32_t y)
+{
+ return svadd_z (svptrue_b16 (), x, y);
+}
+
+/*
+** add3:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add3 (svint32_t x, svint32_t y)
+{
+ return svadd_z (svptrue_b32 (), x, y);
+}
+
+/*
+** add4:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svint32_t
+add4 (svint32_t x, svint32_t y)
+{
+ return svadd_z (svptrue_b64 (), x, y);
+}
+
+/*
+** add5:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add5 (svint32_t x, svint32_t y)
+{
+ return svadd_m (svptrue_b8 (), x, y);
+}
+
+/*
+** add6:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add6 (svint32_t x, svint32_t y)
+{
+ return svadd_m (svptrue_b16 (), x, y);
+}
+
+/*
+** add7:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add7 (svint32_t x, svint32_t y)
+{
+ return svadd_m (svptrue_b32 (), x, y);
+}
+
+/*
+** add8:
+** ptrue (p[0-7])\.d(?:, all)?
+** add z0\.s, \1/m, z0\.s, z1\.s
+** ret
+*/
+svint32_t
+add8 (svint32_t x, svint32_t y)
+{
+ return svadd_m (svptrue_b64 (), x, y);
+}
+
+/*
+** add9:
+** ptrue (p[0-7])\.s(?:, all)?
+** add z0\.h, \1/m, z0\.h, z1\.h
+** ret
+*/
+svint16_t
+add9 (svint16_t x, svint16_t y)
+{
+ return svadd_m (svptrue_b32 (), x, y);
+}
+
+/*
+** and1:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and1 (svint32_t x)
+{
+ return svand_z (svptrue_b8 (), x, 1);
+}
+
+/*
+** and2:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and2 (svint32_t x)
+{
+ return svand_z (svptrue_b16 (), x, 1);
+}
+
+/*
+** and3:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and3 (svint32_t x)
+{
+ return svand_z (svptrue_b32 (), x, 1);
+}
+
+/*
+** and4:
+** (?!and z0\.s, z0\.s, #).*
+** ret
+*/
+svint32_t
+and4 (svint32_t x)
+{
+ return svand_z (svptrue_b64 (), x, 1);
+}
+
+/*
+** and5:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and5 (svint32_t x)
+{
+ return svand_m (svptrue_b8 (), x, 1);
+}
+
+/*
+** and6:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and6 (svint32_t x)
+{
+ return svand_m (svptrue_b16 (), x, 1);
+}
+
+/*
+** and7:
+** and z0\.s, z0\.s, #(?:0x)?1
+** ret
+*/
+svint32_t
+and7 (svint32_t x)
+{
+ return svand_m (svptrue_b32 (), x, 1);
+}
+
+/*
+** and8:
+** (?!and z0\.s, z0\.s, #).*
+** ret
+*/
+svint32_t
+and8 (svint32_t x)
+{
+ return svand_m (svptrue_b64 (), x, 1);
+}
+
+/*
+** and9:
+** (
+** and p0\.b, p0/z, p1\.b, p1\.b
+** |
+** and p0\.b, p1/z, p0\.b, p0\.b
+** )
+** ret
+*/
+svbool_t
+and9 (svbool_t x, svbool_t y)
+{
+ return svand_z (svptrue_b8 (), x, y);
+}
+
+/*
+** not1:
+** ptrue (p[0-7])\.b(?:, all)?
+** not z0\.s, \1/m, z1\.s
+** ret
+*/
+svint32_t
+not1 (svint32_t x, svint32_t y)
+{
+ return svnot_m (x, svptrue_b8 (), y);
+}
+
+/*
+** cvt1:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvtzs z0\.s, \1/m, z0\.h
+** ret
+*/
+svint32_t
+cvt1 (svfloat16_t x)
+{
+ return svcvt_s32_z (svptrue_b8 (), x);
+}
+
+/*
+** cvt2:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvtzs z0\.s, \1/m, z0\.h
+** ret
+*/
+svint32_t
+cvt2 (svfloat16_t x)
+{
+ return svcvt_s32_z (svptrue_b16 (), x);
+}
+
+/*
+** cvt3:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvtzs z0\.s, \1/m, z0\.h
+** ret
+*/
+svint32_t
+cvt3 (svfloat16_t x)
+{
+ return svcvt_s32_z (svptrue_b32 (), x);
+}
+
+/*
+** cvt4:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svint32_t
+cvt4 (svfloat16_t x)
+{
+ return svcvt_s32_z (svptrue_b64 (), x);
+}
+
+/*
+** cvt5:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvt z0\.h, \1/m, z0\.s
+** ret
+*/
+svfloat16_t
+cvt5 (svfloat32_t x)
+{
+ return svcvt_f16_z (svptrue_b8 (), x);
+}
+
+/*
+** cvt6:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvt z0\.h, \1/m, z0\.s
+** ret
+*/
+svfloat16_t
+cvt6 (svfloat32_t x)
+{
+ return svcvt_f16_z (svptrue_b16 (), x);
+}
+
+/*
+** cvt7:
+** ptrue (p[0-7])\.b(?:, all)?
+** fcvt z0\.h, \1/m, z0\.s
+** ret
+*/
+svfloat16_t
+cvt7 (svfloat32_t x)
+{
+ return svcvt_f16_z (svptrue_b32 (), x);
+}
+
+/*
+** cvt8:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svfloat16_t
+cvt8 (svfloat32_t x)
+{
+ return svcvt_f16_z (svptrue_b64 (), x);
+}
+
+/*
+** cvt9:
+** ptrue (p[0-7])\.b(?:, all)?
+** scvtf z0\.h, \1/m, z0\.h
+** ret
+*/
+svfloat16_t
+cvt9 (svint16_t x)
+{
+ return svcvt_f16_z (svptrue_b8 (), x);
+}
+
+/*
+** cvt10:
+** ptrue (p[0-7])\.b(?:, all)?
+** scvtf z0\.h, \1/m, z0\.h
+** ret
+*/
+svfloat16_t
+cvt10 (svint16_t x)
+{
+ return svcvt_f16_z (svptrue_b16 (), x);
+}
+
+/*
+** cvt11:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svfloat16_t
+cvt11 (svint16_t x)
+{
+ return svcvt_f16_z (svptrue_b32 (), x);
+}
+
+/*
+** cvt12:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svfloat16_t
+cvt12 (svint16_t x)
+{
+ return svcvt_f16_z (svptrue_b64 (), x);
+}
+
+#ifdef __cplusplus
+}
+#endif