@@ -84,6 +84,7 @@
#include "aarch64-feature-deps.h"
#include "config/arm/aarch-common.h"
#include "config/arm/aarch-common-protos.h"
+#include "ssa.h"
/* This file should be included last. */
#include "target-def.h"
@@ -16411,20 +16412,20 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
if (code != PLUS_EXPR && code != MINUS_EXPR)
return false;
- for (int i = 1; i < 3; ++i)
+ auto is_mul_result = [&](int i)
{
tree rhs = gimple_op (assign, i);
/* ??? Should we try to check for a single use as well? */
if (TREE_CODE (rhs) != SSA_NAME)
- continue;
+ return false;
stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
if (!def_stmt_info
|| STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
- continue;
+ return false;
gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
- continue;
+ return false;
if (vec_flags & VEC_ADVSIMD)
{
@@ -16444,8 +16445,19 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
}
return true;
- }
- return false;
+ };
+
+ if (code == MINUS_EXPR && (vec_flags & VEC_ADVSIMD))
+ /* Advanced SIMD doesn't have FNMADD/FNMSUB/FNMLA/FNMLS, so the
+ multiplication must be on the second operand (to form an FMLS).
+ But if both operands are multiplications and the second operand
+ is used more than once, we'll instead negate the second operand
+ and use it as an accumulator for the first operand. */
+ return (is_mul_result (2)
+ && (has_single_use (gimple_assign_rhs2 (assign))
+ || !is_mul_result (1)));
+
+ return is_mul_result (1) || is_mul_result (2);
}
/* Return true if STMT_INFO is the second part of a two-statement boolean AND
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mcpu=neoverse-v1 --param aarch64-autovec-preference=1 -fdump-tree-vect-details" } */
+
+void
+f (float x[restrict][100], float y[restrict][100])
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ x[0][i] = y[0][i] * y[1][i] - y[3][i] * y[4][i];
+ x[1][i] = y[1][i] * y[2][i] - y[3][i] * y[4][i];
+ }
+}
+
+/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times vector_stmt costs 2 } "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector_stmt costs 0 } "vect" } } */
+/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times scalar_stmt costs 0 } "vect" } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-options "-O2 -mcpu=neoverse-v1 --param aarch64-autovec-preference=2 -fdump-tree-vect-details" } */
+
+void
+f (float x[restrict][100], float y[restrict][100])
+{
+ for (int i = 0; i < 100; ++i)
+ {
+ x[0][i] = y[0][i] * y[1][i] - y[3][i] * y[4][i];
+ x[1][i] = y[1][i] * y[2][i] - y[3][i] * y[4][i];
+ }
+}
+
+/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times vector_stmt costs 0 } "vect" } } */
+/* { dg-final { scan-tree-dump {_[0-9]+ - _[0-9]+ 1 times scalar_stmt costs 0 } "vect" } } */