@@ -47,11 +47,31 @@
#include "aarch64-builtins.h"
#include "ssa.h"
#include "gimple-fold.h"
+#include "tree-ssa.h"
using namespace aarch64_sve;
namespace {
+/* Return true if VAL is an undefined value. */
+static bool
+is_undef (tree val)
+{
+ if (TREE_CODE (val) == SSA_NAME)
+ {
+ if (ssa_undefined_value_p (val, false))
+ return true;
+
+ gimple *def = SSA_NAME_DEF_STMT (val);
+ if (gcall *call = dyn_cast<gcall *> (def))
+ if (tree fndecl = gimple_call_fndecl (call))
+ if (const function_instance *instance = lookup_fndecl (fndecl))
+ if (instance->base == functions::svundef)
+ return true;
+ }
+ return false;
+}
+
/* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
static int
unspec_cmla (int rot)
@@ -1142,6 +1162,13 @@ public:
expand (function_expander &e) const override
{
machine_mode mode = e.vector_mode (0);
+
+ /* If the SVE argument is undefined, we just need to reinterpret the
+ Advanced SIMD argument as an SVE vector. */
+ if (!BYTES_BIG_ENDIAN
+ && is_undef (CALL_EXPR_ARG (e.call_expr, 0)))
+ return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
+
rtx_vector_builder builder (VNx16BImode, 16, 2);
for (unsigned int i = 0; i < 16; i++)
builder.quick_push (CONST1_RTX (BImode));
@@ -1055,6 +1055,22 @@ get_vector_type (sve_type type)
return acle_vector_types[type.num_vectors - 1][vector_type];
}
+/* If FNDECL is an SVE builtin, return its function instance, otherwise
+ return null. */
+const function_instance *
+lookup_fndecl (tree fndecl)
+{
+ if (!fndecl_built_in_p (fndecl, BUILT_IN_MD))
+ return nullptr;
+
+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+ if ((code & AARCH64_BUILTIN_CLASS) != AARCH64_BUILTIN_SVE)
+ return nullptr;
+
+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
+ return &(*registered_functions)[subcode]->instance;
+}
+
/* Report an error against LOCATION that the user has tried to use
function FNDECL when extension EXTENSION is disabled. */
static void
@@ -810,6 +810,7 @@ extern tree acle_svprfop;
bool vector_cst_all_same (tree, unsigned int);
bool is_ptrue (tree, unsigned int);
+const function_instance *lookup_fndecl (tree);
/* Try to find a mode with the given mode_suffix_info fields. Return the
mode on success or MODE_none on failure. */
new file mode 100644
@@ -0,0 +1,94 @@
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon_sve_bridge.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+svint32_t svundef_foo ();
+
+/*
+** f1: { target aarch64_little_endian }
+** ldr q0, \[x0\]
+** ret
+*/
+svint32_t
+f1 (int *a)
+{
+ return svset_neonq (svundef_s32 (), vld1q_s32 (a));
+}
+
+/*
+** f2: { target aarch64_little_endian }
+** ldr q0, \[x0\]
+** ret
+*/
+svint32_t
+f2 (int *a)
+{
+ svint32_t undef;
+ return svset_neonq (undef, vld1q_s32 (a));
+}
+
+/*
+** f3: { target aarch64_little_endian }
+** mov [vz]0.[^\n]+, [vz]1.[^\n]+
+** ret
+*/
+svint32_t
+f3 (int32x4_t v0, int32x4_t v1)
+{
+ return svset_neonq (svundef_s32 (), v1);
+}
+
+/*
+** f4: { target aarch64_little_endian }
+** uzp1 z([0-9]+)\.s, z0\.s, z1\.s
+** ldr q([0-9]+), \[x0\]
+** ptrue p([0-7])\.s, vl4
+** sel z0\.s, p\3, z\2\.s, z\1\.s
+** ret
+*/
+svint32_t
+f4 (int *a, svint32_t x, svint32_t y)
+{
+ x = svuzp1 (x, y);
+ int32x4_t z = vld1q_s32 (a);
+ return svset_neonq (x, z);
+}
+
+/*
+** f5:
+** ...
+** bl svundef_foo
+** ...
+** sel z0\.s, [^\n]+
+** ...
+** ret
+*/
+svint32_t
+f5 (int *a)
+{
+ return svset_neonq (svundef_foo (), vld1q_s32 (a));
+}
+
+/*
+** f6:
+** ...
+** blr x[0-9]+
+** ...
+** sel z0\.s, [^\n]+
+** ...
+** ret
+*/
+svint32_t
+f6 (int *a, svint32_t (*svundef_s32) ())
+{
+ return svset_neonq (svundef_s32 (), vld1q_s32 (a));
+}
+
+#ifdef __cplusplus
+}
+#endif
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon_sve_bridge.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** f1: { target aarch64_little_endian }
+** ldr q0, \[x0\]
+** ret
+*/
+svint32_t
+f1 (int *a)
+{
+ return svset_neonq (svundef_s32 (), vld1q_s32 (a));
+}
+
+/*
+** f2: { target aarch64_little_endian }
+** ldr q0, \[x0\]
+** ret
+*/
+svint32_t
+f2 (int *a)
+{
+ svint32_t undef;
+ return svset_neonq (undef, vld1q_s32 (a));
+}
+
+/*
+** f3: { target aarch64_little_endian }
+** mov [vz]0.[^\n]+, [vz]1.[^\n]+
+** ret
+*/
+svint32_t
+f3 (int32x4_t v0, int32x4_t v1)
+{
+ return svset_neonq (svundef_s32 (), v1);
+}
+
+#ifdef __cplusplus
+}
+#endif