@@ -4783,29 +4783,49 @@ it is unspecified which of the two operands is returned as the result.
@cindex @code{reduc_smax_@var{m}} instruction pattern
@item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
Find the signed minimum/maximum of the elements of a vector. The vector is
-operand 1, and the scalar result is stored in the least significant bits of
+operand 1, and the result is stored in the least significant bits of
operand 0 (also a vector). The output and input vector should have the same
-modes.
+modes. These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_smin_scal_@var{m}} and @samp{reduc_smax_scal_@var{m}}.
@cindex @code{reduc_umin_@var{m}} instruction pattern
@cindex @code{reduc_umax_@var{m}} instruction pattern
@item @samp{reduc_umin_@var{m}}, @samp{reduc_umax_@var{m}}
Find the unsigned minimum/maximum of the elements of a vector. The vector is
-operand 1, and the scalar result is stored in the least significant bits of
+operand 1, and the result is stored in the least significant bits of
operand 0 (also a vector). The output and input vector should have the same
-modes.
+modes. These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_umin_scal_@var{m}} and @samp{reduc_umax_scal_@var{m}}.
@cindex @code{reduc_splus_@var{m}} instruction pattern
-@item @samp{reduc_splus_@var{m}}
-Compute the sum of the signed elements of a vector. The vector is operand 1,
-and the scalar result is stored in the least significant bits of operand 0
-(also a vector). The output and input vector should have the same modes.
-
@cindex @code{reduc_uplus_@var{m}} instruction pattern
-@item @samp{reduc_uplus_@var{m}}
-Compute the sum of the unsigned elements of a vector. The vector is operand 1,
-and the scalar result is stored in the least significant bits of operand 0
+@item @samp{reduc_splus_@var{m}}, @samp{reduc_uplus_@var{m}}
+Compute the sum of the signed/unsigned elements of a vector. The vector is
+operand 1, and the result is stored in the least significant bits of operand 0
(also a vector). The output and input vector should have the same modes.
+These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_plus_scal_@var{m}@var{n}}.
+
+@cindex @code{reduc_smin_scal_@var{m}} instruction pattern
+@cindex @code{reduc_smax_scal_@var{m}} instruction pattern
+@item @samp{reduc_smin_scal_@var{m}}, @samp{reduc_smax_scal_@var{m}}
+Find the signed minimum/maximum of the elements of a vector. The vector is
+operand 1, and operand 0 is the scalar result, with mode equal to the mode of
+the elements of the input vector.
+
+@cindex @code{reduc_umin_scal_@var{m}} instruction pattern
+@cindex @code{reduc_umax_scal_@var{m}} instruction pattern
+@item @samp{reduc_umin_scal_@var{m}}, @samp{reduc_umax_scal_@var{m}}
+Find the unsigned minimum/maximum of the elements of a vector. The vector is
+operand 1, and operand 0 is the scalar result, with mode equal to the mode of
+the elements of the input vector.
+
+@cindex @code{reduc_plus_scal_@var{m}@var{n}} instruction pattern
+@item @samp{reduc_plus_scal_@var{m}@var{n}}
+Compute the sum of the elements of a vector. The vector, of mode @var{m}, is
+operand 1, and operand 0 is the scalar result, of mode @var{n}. Note that at
+present the vectorizer only looks for patterns where @var{n} is the mode of the
+elements of @var{m}.
@cindex @code{sdot_prod@var{m}} instruction pattern
@item @samp{sdot_prod@var{m}}
@@ -9045,6 +9045,23 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
op0 = expand_normal (treeop0);
this_optab = optab_for_tree_code (code, type, optab_default);
enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
+ enum insn_code icode = reduction_optab_handler (this_optab, vec_mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ struct expand_operand ops[2];
+
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], op0, vec_mode);
+ if (maybe_expand_insn (icode, 2, ops))
+ {
+ target = ops[0].value;
+ if (GET_MODE (target) != mode)
+ return gen_lowpart (tmode, target);
+ return target;
+ }
+ }
+ /* Fall back to optab with vector result, and then extract scalar. */
+ this_optab = scalar_reduc_to_vector (this_optab, type);
temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
gcc_assert (temp);
/* The tree code produces a scalar result, but (somewhat by convention)
@@ -506,13 +506,15 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return fma_optab;
case REDUC_MAX_EXPR:
- return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
+ return TYPE_UNSIGNED (type)
+ ? reduc_umax_scal_optab : reduc_smax_scal_optab;
case REDUC_MIN_EXPR:
- return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
+ return TYPE_UNSIGNED (type)
+ ? reduc_umin_scal_optab : reduc_smin_scal_optab;
case REDUC_PLUS_EXPR:
- return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
+ return reduc_plus_scal_optab;
case VEC_LSHIFT_EXPR:
return vec_shl_optab;
@@ -608,7 +610,49 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return unknown_optab;
}
}
-
+
+/* Given optab UNOPTAB that reduces a vector to a scalar, find instead the old
+ optab that produces a vector with the reduction result in one element,
+ for a tree with type TYPE. */
+
+optab
+scalar_reduc_to_vector (optab unoptab, const_tree type)
+{
+ switch (unoptab)
+ {
+ case reduc_plus_scal_optab:
+ return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
+
+ case reduc_smin_scal_optab: return reduc_smin_optab;
+ case reduc_umin_scal_optab: return reduc_umin_optab;
+ case reduc_smax_scal_optab: return reduc_smax_optab;
+ case reduc_umax_scal_optab: return reduc_umax_optab;
+ default: return unknown_optab;
+ }
+}
+
+/* Given reduction optab OPTAB, find the handler that reduces a vector of mode
+ VEC_MODE to a scalar of mode the same as the vector elements. */
+
+insn_code
+reduction_optab_handler (optab optab, enum machine_mode vec_mode)
+{
+ gcc_assert (VECTOR_MODE_P (vec_mode));
+ switch (optab)
+ {
+ case reduc_plus_scal_optab:
+ /* Optab allows for the scalar result to be different/wider than the
+ mode of the vector elements. However we don't yet exploit this. */
+ return convert_optab_handler (optab, vec_mode, GET_MODE_INNER (vec_mode));
+ case reduc_smin_scal_optab:
+ case reduc_umin_scal_optab:
+ case reduc_smax_scal_optab:
+ case reduc_umax_scal_optab:
+ return optab_handler (optab, vec_mode);
+ default:
+ return CODE_FOR_nothing;
+ }
+}
/* Expand vector widening operations.
@@ -61,6 +61,9 @@ OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b")
OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b")
OPTAB_CD(vcond_optab, "vcond$a$b")
OPTAB_CD(vcondu_optab, "vcondu$a$b")
+/* Vector reduction to a scalar, possibly widening. The second mode is for the
+ result, usually (but possibly wider than) the elements of the mode input. */
+OPTAB_CD (reduc_plus_scal_optab, "reduc_plus_scal_$a$b")
OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
OPTAB_NX(add_optab, "add$F$a3")
@@ -243,12 +246,19 @@ OPTAB_D (sin_optab, "sin$a2")
OPTAB_D (sincos_optab, "sincos$a3")
OPTAB_D (tan_optab, "tan$a2")
+/* Vector reduction to a scalar. */
+OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
+OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
+OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
+OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
+/* (Old) Vector reduction, returning a vector with the result in one lane. */
OPTAB_D (reduc_smax_optab, "reduc_smax_$a")
OPTAB_D (reduc_smin_optab, "reduc_smin_$a")
OPTAB_D (reduc_splus_optab, "reduc_splus_$a")
OPTAB_D (reduc_umax_optab, "reduc_umax_$a")
OPTAB_D (reduc_umin_optab, "reduc_umin_$a")
OPTAB_D (reduc_uplus_optab, "reduc_uplus_$a")
+
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
@@ -162,6 +162,15 @@ enum optab_subtype
vector shifts and rotates */
extern optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype);
+/* Given an optab that reduces a vector to a scalar, find instead the old
+ optab that produces a vector with the reduction result in one element,
+ for a tree with the specified type. */
+extern optab scalar_reduc_to_vector (optab, const_tree type);
+
+/* Given an optab that reduces a vector to a scalar, find the handler for the
+ specified vector mode. */
+extern insn_code reduction_optab_handler (optab, enum machine_mode);
+
/* The various uses that a comparison can have; used by can_compare_p:
jumps, conditional moves, store flag operations. */
enum can_compare_purpose
@@ -5102,16 +5102,21 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = ERROR_MARK;
}
-
- if (reduc_optab
- && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
+ else
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduc op not supported by target.\n");
+ if (!reduction_optab_handler (reduc_optab, vec_mode))
+ {
+ optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
+ if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduc op not supported by target.\n");
- epilog_reduc_code = ERROR_MARK;
- }
+ epilog_reduc_code = ERROR_MARK;
+ }
+ }
+ }
}
else
{