@@ -45,6 +45,8 @@
#include "arm-builtins.h"
#include "stringpool.h"
#include "attribs.h"
+#include "basic-block.h"
+#include "gimple.h"
#define SIMD_MAX_BUILTIN_ARGS 7
@@ -1298,6 +1300,13 @@ enum arm_builtins
#define VAR1(T, N, X) \
ARM_BUILTIN_##N,
+ ARM_BUILTIN_NEON_SDOTV8QI,
+ ARM_BUILTIN_NEON_SDOTV16QI,
+ ARM_BUILTIN_NEON_UDOTV8QI,
+ ARM_BUILTIN_NEON_UDOTV16QI,
+ ARM_BUILTIN_NEON_USDOTV8QI,
+ ARM_BUILTIN_NEON_USDOTV16QI,
+
ARM_BUILTIN_ACLE_BASE,
ARM_BUILTIN_SAT_IMM_CHECK = ARM_BUILTIN_ACLE_BASE,
@@ -2648,6 +2657,60 @@ arm_init_fp16_builtins (void)
"__fp16");
}
+static void
+arm_init_dotprod_builtins (void)
+{
+ tree fndecl = NULL;
+ tree ftype = NULL;
+
+ tree uv8qi = arm_simd_builtin_type (V8QImode, qualifier_unsigned);
+ tree sv8qi = arm_simd_builtin_type (V8QImode, qualifier_none);
+ tree uv16qi = arm_simd_builtin_type (V16QImode, qualifier_unsigned);
+ tree sv16qi = arm_simd_builtin_type (V16QImode, qualifier_none);
+ tree uv2si = arm_simd_builtin_type (V2SImode, qualifier_unsigned);
+ tree sv2si = arm_simd_builtin_type (V2SImode, qualifier_none);
+ tree uv4si = arm_simd_builtin_type (V4SImode, qualifier_unsigned);
+ tree sv4si = arm_simd_builtin_type (V4SImode, qualifier_none);
+
+ struct builtin_decls_data
+ {
+ tree out_type_node;
+ tree in_type1_node;
+ tree in_type2_node;
+ const char *builtin_name;
+ int function_code;
+ };
+
+#define NAME(A) "__builtin_neon_" #A
+#define ENUM(B) ARM_BUILTIN_NEON_##B
+
+ builtin_decls_data bdda[] =
+ {
+ { sv2si, sv8qi, sv8qi, NAME (sdotv8qi), ENUM (SDOTV8QI) },
+ { uv2si, uv8qi, uv8qi, NAME (udotv8qi_uuuu), ENUM (UDOTV8QI) },
+ { sv2si, uv8qi, sv8qi, NAME (usdotv8qi_ssus), ENUM (USDOTV8QI) },
+ { sv4si, sv16qi, sv16qi, NAME (sdotv16qi), ENUM (SDOTV16QI) },
+ { uv4si, uv16qi, uv16qi, NAME (udotv16qi_uuuu), ENUM (UDOTV16QI) },
+ { sv4si, uv16qi, sv16qi, NAME (usdotv16qi_ssus), ENUM (USDOTV16QI) },
+ };
+
+#undef NAME
+#undef ENUM
+
+ builtin_decls_data *bdd = bdda;
+ builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
+
+ for (; bdd < bdd_end; bdd++)
+ {
+ ftype = build_function_type_list (bdd->out_type_node, bdd->out_type_node,
+ bdd->in_type1_node, bdd->in_type2_node,
+ NULL_TREE);
+ fndecl = arm_general_add_builtin_function (bdd->builtin_name,
+ ftype, bdd->function_code);
+ arm_builtin_decls[bdd->function_code] = fndecl;
+ }
+}
+
void
arm_init_builtins (void)
{
@@ -2676,6 +2739,7 @@ arm_init_builtins (void)
arm_init_neon_builtins ();
arm_init_vfp_builtins ();
arm_init_crypto_builtins ();
+ arm_init_dotprod_builtins ();
}
if (TARGET_CDE)
@@ -2738,6 +2802,37 @@ arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
}
}
+/* Try to fold STMT, given that it's a call to the built-in function with
+ subcode FCODE. Return the new statement on success and null on
+ failure. */
+gimple *
+arm_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
+ gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
+{
+ gimple *new_stmt = NULL;
+ unsigned nargs = gimple_call_num_args (stmt);
+ tree *args = (nargs > 0
+ ? gimple_call_arg_ptr (stmt, 0)
+ : &error_mark_node);
+
+ switch (fcode)
+ {
+ case ARM_BUILTIN_NEON_SDOTV8QI:
+ case ARM_BUILTIN_NEON_SDOTV16QI:
+ case ARM_BUILTIN_NEON_UDOTV8QI:
+ case ARM_BUILTIN_NEON_UDOTV16QI:
+ case ARM_BUILTIN_NEON_USDOTV8QI:
+ case ARM_BUILTIN_NEON_USDOTV16QI:
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ DOT_PROD_EXPR, args[1],
+ args[2], args[0]);
+ break;
+ default:
+ break;
+ }
+ return new_stmt;
+}
+
/* Errors in the source file can cause expand_expr to return const0_rtx
where we expect a vector. To avoid crashing, use one of the vector
clear instructions. */
@@ -57,6 +57,9 @@ extern rtx arm_expand_builtin (tree exp, rtx target, rtx subtarget
extern tree arm_builtin_decl (unsigned code, bool initialize_p
ATTRIBUTE_UNUSED);
extern void arm_init_builtins (void);
+extern gimple *arm_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
+ gimple_stmt_iterator *gsi
+ ATTRIBUTE_UNUSED);
extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
@@ -2873,6 +2873,7 @@ arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
switch (code & ARM_BUILTIN_CLASS)
{
case ARM_BUILTIN_GENERAL:
+ new_stmt = arm_general_gimple_fold_builtin (subcode, stmt, gsi);
break;
case ARM_BUILTIN_MVE:
new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
@@ -349,14 +349,11 @@ VAR13 (STORE1, vst4,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR11 (STORE1LANE, vst4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
-VAR2 (TERNOP, sdot, v8qi, v16qi)
-VAR2 (UTERNOP, udot, v8qi, v16qi)
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
VAR2 (MAC_LANE, sdot_laneq, v8qi, v16qi)
VAR2 (UMAC_LANE, udot_laneq, v8qi, v16qi)
-VAR2 (USTERNOP, usdot, v8qi, v16qi)
VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
VAR2 (USMAC_LANE_QUADTUP, usdot_laneq, v8qi, v16qi)
@@ -2989,7 +2989,7 @@ (define_expand "cmul<conj_op><mode>3"
;; ...
;;
;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sup>dot_prod<vsi2qi>"
+(define_insn "<sup>dot_prod<mode><vsi2qi>"
[(set (match_operand:VCVTI 0 "register_operand" "=w")
(plus:VCVTI
(unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -3013,7 +3013,7 @@ (define_expand "neon_<sup>dot<vsi2qi>"
)
;; These instructions map to the __builtins for the Dot Product operations.
-(define_insn "neon_usdot<vsi2qi>"
+(define_insn "neon_usdot<mode><vsi2qi>"
[(set (match_operand:VCVTI 0 "register_operand" "=w")
(plus:VCVTI
(unspec:VCVTI