@@ -2612,3 +2612,20 @@ (define_expand "rawmemchr<ANYI:mode>"
DONE;
}
)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Saturation ALU.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - add
+;; -------------------------------------------------------------------------
+(define_expand "usadd<mode>3"
+ [(match_operand:V_VLSI 0 "register_operand")
+ (match_operand:V_VLSI 1 "register_operand")
+ (match_operand:V_VLSI 2 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
+ DONE;
+ }
+)
@@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree, const char *);
extern bool
riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_usadd (rtx, rtx, rtx);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -619,6 +620,7 @@ void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
@@ -4635,6 +4635,16 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask,
}
}
+static void
+emit_vec_saddu (rtx op_dest, rtx op_1, rtx op_2, insn_type type,
+ machine_mode vec_mode)
+{
+ rtx ops[] = {op_dest, op_1, op_2};
+ insn_code icode = code_for_pred (US_PLUS, vec_mode);
+
+ emit_vlmax_insn (icode, type, ops);
+}
+
void
expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
@@ -4862,6 +4872,12 @@ expand_vec_lfloor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
vec_int_mode);
}
+void
+expand_vec_usadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+ emit_vec_saddu (op_0, op_1, op_2, BINARY_OP, vec_mode);
+}
+
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void
@@ -10840,6 +10840,53 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
return true;
}
+/* Implements the unsigned saturation add standard name usadd for int mode. */
+
+void
+riscv_expand_usadd (rtx dest, rtx x, rtx y)
+{
+ machine_mode mode = GET_MODE (dest);
+ rtx xmode_sum = gen_reg_rtx (Xmode);
+ rtx xmode_lt = gen_reg_rtx (Xmode);
+ rtx xmode_x = gen_lowpart (Xmode, x);
+ rtx xmode_y = gen_lowpart (Xmode, y);
+ rtx xmode_dest = gen_reg_rtx (Xmode);
+
+ /* Step-1: sum = x + y */
+ if (mode == SImode && mode != Xmode)
+ { /* Take addw to avoid the sum truncate. */
+ rtx simode_sum = gen_reg_rtx (SImode);
+ riscv_emit_binary (PLUS, simode_sum, x, y);
+ emit_move_insn (xmode_sum, gen_lowpart (Xmode, simode_sum));
+ }
+ else
+ riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
+
+ /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. */
+ if (mode == HImode || mode == QImode)
+ {
+ int shift_bits = GET_MODE_BITSIZE (Xmode)
+ - GET_MODE_BITSIZE (mode).to_constant ();
+
+ gcc_assert (shift_bits > 0);
+
+ riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
+ riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
+ }
+
+ /* Step-2: lt = sum < x */
+ riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
+
+ /* Step-3: lt = -lt */
+ riscv_emit_unary (NEG, xmode_lt, xmode_lt);
+
+ /* Step-4: xmode_dest = sum | lt */
+ riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
+
+ /* Step-5: dest = xmode_dest */
+ emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -3839,6 +3839,17 @@ (define_insn "*large_load_address"
[(set_attr "type" "load")
(set (attr "length") (const_int 8))])
+(define_expand "usadd<mode>3"
+ [(match_operand:ANYI 0 "register_operand")
+ (match_operand:ANYI 1 "register_operand")
+ (match_operand:ANYI 2 "register_operand")]
+ ""
+ {
+ riscv_expand_usadd (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
+
(include "bitmanip.md")
(include "crypto.md")
(include "sync.md")
@@ -4073,8 +4073,8 @@ (define_insn "@pred_trunc<mode>"
;; Saturating Add and Subtract
(define_insn "@pred_<optab><mode>"
- [(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr, vd, vd, vr, vr")
- (if_then_else:VI
+ [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr, vd, vd, vr, vr")
+ (if_then_else:V_VLSI
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK")
@@ -4083,10 +4083,10 @@ (define_insn "@pred_<optab><mode>"
(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (any_sat_int_binop:VI
- (match_operand:VI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
- (match_operand:VI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
- (match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0, vu, 0")))]
+ (any_sat_int_binop:V_VLSI
+ (match_operand:V_VLSI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
+ (match_operand:V_VLSI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
+ (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"@
v<insn>.vv\t%0,%3,%4%p1
@@ -4181,6 +4181,7 @@ commutative_binary_fn_p (internal_fn fn)
case IFN_UBSAN_CHECK_MUL:
case IFN_ADD_OVERFLOW:
case IFN_MUL_OVERFLOW:
+ case IFN_SAT_ADD:
case IFN_VEC_WIDEN_PLUS:
case IFN_VEC_WIDEN_PLUS_LO:
case IFN_VEC_WIDEN_PLUS_HI:
@@ -275,6 +275,9 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first,
DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
smulhrs, umulhrs, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST | ECF_NOTHROW, first,
+ ssadd, usadd, binary)
+
DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
@@ -3043,6 +3043,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
&& wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@1)))))
+
+(match (usadd_right_part_2 @0 @1)
+ (negate (convert (gt @0 (plus:c @0 @1))))
+ (if (INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@0))
+ && types_match (type, TREE_TYPE (@1)))))
+
+/* Unsigned saturation add. Case 1 (branchless):
+ SAT_U_ADD = (X + Y) | - ((X + Y) < X) or
+ SAT_U_ADD = (X + Y) | - (X > (X + Y)). */
+(simplify
+ (bit_ior:c
+ (usadd_left_part_1 @0 @1)
+ (usadd_right_part_1 @0 @1))
+ (if (optimize) (IFN_SAT_ADD @0 @1)))
+(simplify
+ (bit_ior:c
+ (usadd_left_part_1 @0 @1)
+ (usadd_right_part_2 @0 @1))
+ (if (optimize) (IFN_SAT_ADD @0 @1)))
+
+/* Unsigned saturation add. Case 2 (branch):
+ SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1 or
+ SAT_U_ADD = x <= (X + Y) ? (X + Y) : -1. */
+(simplify
+ (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep)
+ (if (optimize) (IFN_SAT_ADD @0 @1)))
+(simplify
+ (cond (le @0 (usadd_left_part_1@2 @0 @1)) @2 integer_minus_onep)
+ (if (optimize) (IFN_SAT_ADD @0 @1)))
+
+/* Vect recog pattern will leverage unsigned_integer_sat_add. */
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c
+ (usadd_left_part_1 @0 @1)
+ (usadd_right_part_1 @0 @1))
+ (if (optimize)))
+(match (unsigned_integer_sat_add @0 @1)
+ (bit_ior:c
+ (usadd_left_part_1 @0 @1)
+ (usadd_right_part_2 @0 @1))
+ (if (optimize)))
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep)
+ (if (optimize)))
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (le @0 (usadd_left_part_1@2 @0 @1)) @2 integer_minus_onep)
+ (if (optimize)))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
@@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
OPTAB_NX(add_optab, "add$Q$a3")
OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
OPTAB_VX(addv_optab, "add$F$a3")
-OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc)
-OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc)
+OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc)
+OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc)
OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc)
OPTAB_NX(sub_optab, "sub$F$a3")
OPTAB_NX(sub_optab, "sub$Q$a3")
new file mode 100644
@@ -0,0 +1,33 @@
+#ifndef HAVE_DEFINED_VEC_SAT_BINARY
+#define HAVE_DEFINED_VEC_SAT_BINARY
+
+/* To leverage this header files for run test, you need to:
+ 1. define T as the type, for example uint8_t,
+ 2. defint N as the test array size, for example 16.
+ 3. define RUN_VEC_SAT_BINARY as run function.
+ 4. prepare the test_data for test cases.
+ */
+
+int
+main ()
+{
+ unsigned i, k;
+ T out[N];
+
+ for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
+ {
+ T *op_1 = test_data[i][0];
+ T *op_2 = test_data[i][1];
+ T *expect = test_data[i][2];
+
+ RUN_VEC_SAT_BINARY (T, out, op_1, op_2, N);
+
+ for (k = 0; k < N; k++)
+ if (out[k] != expect[k])
+ __builtin_abort ();
+ }
+
+ return 0;
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint8_t)
+
+/*
+** vec_sat_u_add_uint8_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_2(uint8_t)
+
+/*
+** vec_sat_u_add_uint8_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_3(uint8_t, 0xffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 12 "expand" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint16_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint16_t)
+
+/*
+** vec_sat_u_add_uint16_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_2(uint16_t)
+
+/*
+** vec_sat_u_add_uint16_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_3(uint16_t, 0xffffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 12 "expand" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint32_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint32_t)
+
+/*
+** vec_sat_u_add_uint32_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_2(uint32_t)
+
+/*
+** vec_sat_u_add_uint32_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_3(uint32_t, 0xffffffffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 12 "expand" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint64_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint64_t)
+
+/*
+** vec_sat_u_add_uint64_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_2(uint64_t)
+
+/*
+** vec_sat_u_add_uint64_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_3(uint64_t, 0xffffffffffffffffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 12 "expand" } } */
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint8_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_3
+
+DEF_VEC_SAT_U_ADD_FMT_3(T, 0xffffu)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_3
+
+DEF_VEC_SAT_U_ADD_FMT_3(T, 0xffffffffu)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 9,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 4294967294, 4294967294, 4294967294, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 9,
+ },
+ {
+ 0, 1, 2, 4294967294,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_3
+
+DEF_VEC_SAT_U_ADD_FMT_3(T, 0xffffffffffffffffu)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 18446744073709551614u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 1, 18446744073709551614u,
+ 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 2, 18446744073709551614u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 9,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 4294967294, 4294967294, 4294967294, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 9,
+ },
+ {
+ 0, 1, 2, 4294967294,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 18446744073709551614u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 1, 18446744073709551614u,
+ 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 2, 18446744073709551614u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint8_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_2
+
+DEF_VEC_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_2
+
+DEF_VEC_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_2
+
+DEF_VEC_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 9,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 4294967294, 4294967294, 4294967294, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 9,
+ },
+ {
+ 0, 1, 2, 4294967294,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_2
+
+DEF_VEC_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 18446744073709551614u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 1, 18446744073709551614u,
+ 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 2, 18446744073709551614u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T uint8_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_3
+
+DEF_VEC_SAT_U_ADD_FMT_3(T, 0xffu)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,79 @@
+#ifndef HAVE_SAT_ARITH
+#define HAVE_SAT_ARITH
+
+#include <stdint-gcc.h>
+
+#define DEF_SAT_U_ADD_FMT_1(T) \
+T __attribute__((noinline)) \
+sat_u_add_##T##_fmt_1 (T x, T y) \
+{ \
+ return (x + y) | (-(T)((T)(x + y) < x)); \
+}
+
+#define DEF_SAT_U_ADD_FMT_2(T) \
+T __attribute__((noinline)) \
+sat_u_add_##T##_fmt_2 (T x, T y) \
+{ \
+ return (T)(x + y) >= x ? (x + y) : -1; \
+}
+
+#define DEF_SAT_U_ADD_FMT_3(T, MAX) \
+T __attribute__((noinline)) \
+sat_u_add_##T##_fmt_3 (T x, T y) \
+{ \
+ return (T)(x + y) >= x ? (x + y) : MAX; \
+}
+
+#define DEF_VEC_SAT_U_ADD_FMT_1(T) \
+void __attribute__((noinline)) \
+vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ out[i] = (x + y) | (-(T)((T)(x + y) < x)); \
+ } \
+}
+
+#define DEF_VEC_SAT_U_ADD_FMT_2(T) \
+void __attribute__((noinline)) \
+vec_sat_u_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ out[i] = (T)(x + y) >= x ? (x + y) : -1; \
+ } \
+}
+
+#define DEF_VEC_SAT_U_ADD_FMT_3(T, MAX) \
+void __attribute__((noinline)) \
+vec_sat_u_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ out[i] = (T)(x + y) >= x ? (x + y) : MAX; \
+ } \
+}
+
+#define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
+#define RUN_SAT_U_ADD_FMT_2(T, x, y) sat_u_add_##T##_fmt_2(x, y)
+#define RUN_SAT_U_ADD_FMT_3(T, x, y) sat_u_add_##T##_fmt_3(x, y)
+
+#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
+ vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
+
+#define RUN_VEC_SAT_U_ADD_FMT_2(T, out, op_1, op_2, N) \
+ vec_sat_u_add_##T##_fmt_2(out, op_1, op_2, N)
+
+#define RUN_VEC_SAT_U_ADD_FMT_3(T, out, op_1, op_2, N) \
+ vec_sat_u_add_##T##_fmt_3(out, op_1, op_2, N)
+
+#endif
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint8_t)
+
+/*
+** sat_u_add_uint8_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint8_t)
+
+/*
+** sat_u_add_uint8_t_fmt_3:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_3(uint8_t, 0xffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 6 "expand" } } */
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint16_t)
+
+/*
+** sat_u_add_uint16_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint16_t)
+
+/*
+** sat_u_add_uint16_t_fmt_3:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_3(uint16_t, 0xffffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 6 "expand" } } */
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint32_t_fmt_1:
+** addw\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint32_t)
+
+/*
+** sat_u_add_uint32_t_fmt_2:
+** addw\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint32_t)
+
+/*
+** sat_u_add_uint32_t_fmt_3:
+** addw\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_ADD_FMT_3(uint32_t, 0xffffffffu)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 6 "expand" } } */
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint64_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint64_t)
+
+/*
+** sat_u_add_uint64_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint64_t)
+
+/*
+** sat_u_add_uint64_t_fmt_3:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_ADD_FMT_3(uint64_t, 0xffffffffffffffff)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 6 "expand" } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 254, 254, },
+ { 1, 254, 255, },
+ { 2, 254, 255, },
+ { 0, 255, 255, },
+ { 1, 255, 255, },
+ { 2, 255, 255, },
+ { 255, 255, 255, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_3
+
+DEF_SAT_U_ADD_FMT_3(T, 0xffffu)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 65534, 65534, },
+ { 1, 65534, 65535, },
+ { 2, 65534, 65535, },
+ { 0, 65535, 65535, },
+ { 1, 65535, 65535, },
+ { 2, 65535, 65535, },
+ { 65535, 65535, 65535, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_3
+
+DEF_SAT_U_ADD_FMT_3(T, 0xffffffffu)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 4294967294, 4294967294, },
+ { 1, 4294967294, 4294967295, },
+ { 2, 4294967294, 4294967295, },
+ { 0, 4294967295, 4294967295, },
+ { 1, 4294967295, 4294967295, },
+ { 2, 4294967295, 4294967295, },
+ { 4294967295, 4294967295, 4294967295, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_3
+
+DEF_SAT_U_ADD_FMT_3(T, 0xffffffffffffffffu)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 18446744073709551614u, 18446744073709551614u, },
+ { 1, 18446744073709551614u, 18446744073709551615u, },
+ { 2, 18446744073709551614u, 18446744073709551615u, },
+ { 0, 18446744073709551615u, 18446744073709551615u, },
+ { 1, 18446744073709551615u, 18446744073709551615u, },
+ { 2, 18446744073709551615u, 18446744073709551615u, },
+ { 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 65534, 65534, },
+ { 1, 65534, 65535, },
+ { 2, 65534, 65535, },
+ { 0, 65535, 65535, },
+ { 1, 65535, 65535, },
+ { 2, 65535, 65535, },
+ { 65535, 65535, 65535, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 4294967294, 4294967294, },
+ { 1, 4294967294, 4294967295, },
+ { 2, 4294967294, 4294967295, },
+ { 0, 4294967295, 4294967295, },
+ { 1, 4294967295, 4294967295, },
+ { 2, 4294967295, 4294967295, },
+ { 4294967295, 4294967295, 4294967295, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 18446744073709551614u, 18446744073709551614u, },
+ { 1, 18446744073709551614u, 18446744073709551615u, },
+ { 2, 18446744073709551614u, 18446744073709551615u, },
+ { 0, 18446744073709551615u, 18446744073709551615u, },
+ { 1, 18446744073709551615u, 18446744073709551615u, },
+ { 2, 18446744073709551615u, 18446744073709551615u, },
+ { 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_2
+
+DEF_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 254, 254, },
+ { 1, 254, 255, },
+ { 2, 254, 255, },
+ { 0, 255, 255, },
+ { 1, 255, 255, },
+ { 2, 255, 255, },
+ { 255, 255, 255, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_2
+
+DEF_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 65534, 65534, },
+ { 1, 65534, 65535, },
+ { 2, 65534, 65535, },
+ { 0, 65535, 65535, },
+ { 1, 65535, 65535, },
+ { 2, 65535, 65535, },
+ { 65535, 65535, 65535, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_2
+
+DEF_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 4294967294, 4294967294, },
+ { 1, 4294967294, 4294967295, },
+ { 2, 4294967294, 4294967295, },
+ { 0, 4294967295, 4294967295, },
+ { 1, 4294967295, 4294967295, },
+ { 2, 4294967295, 4294967295, },
+ { 4294967295, 4294967295, 4294967295, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_2
+
+DEF_SAT_U_ADD_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 18446744073709551614u, 18446744073709551614u, },
+ { 1, 18446744073709551614u, 18446744073709551615u, },
+ { 2, 18446744073709551614u, 18446744073709551615u, },
+ { 0, 18446744073709551615u, 18446744073709551615u, },
+ { 1, 18446744073709551615u, 18446744073709551615u, },
+ { 2, 18446744073709551615u, 18446744073709551615u, },
+ { 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_3
+
+DEF_SAT_U_ADD_FMT_3(T, 0xffu)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 1, },
+ { 1, 1, 2, },
+ { 0, 254, 254, },
+ { 1, 254, 255, },
+ { 2, 254, 255, },
+ { 0, 255, 255, },
+ { 1, 255, 255, },
+ { 2, 255, 255, },
+ { 255, 255, 255, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,27 @@
+#ifndef HAVE_DEFINED_SCALAR_SAT_BINARY
+#define HAVE_DEFINED_SCALAR_SAT_BINARY
+
+/* To leverage this header files for run test, you need to:
+ 1. define T as the type, for example uint8_t,
+ 2. define RUN_SAT_BINARY as run function.
+ 3. prepare the test_data for test cases.
+ */
+
+int
+main ()
+{
+ unsigned i;
+ T *d;
+
+ for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
+ {
+ d = test_data[i];
+
+ if (RUN_SAT_BINARY (T, d[0], d[1]) != d[2])
+ __builtin_abort ();
+ }
+
+ return 0;
+}
+
+#endif
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vector-builder.h"
#include "vec-perm-indices.h"
#include "gimple-range.h"
+#include "gimple-match-auto.h"
/* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
@@ -4498,6 +4499,65 @@ vect_recog_mult_pattern (vec_info *vinfo,
return pattern_stmt;
}
+static gimple *
+vect_sat_add_build_call (vec_info *vinfo, gimple *last_stmt, tree *type_out,
+ tree op_0, tree op_1)
+{
+ tree itype = TREE_TYPE (op_0);
+ tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+ if (vtype == NULL_TREE)
+ return NULL;
+
+ if (!direct_internal_fn_supported_p (IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ *type_out = vtype;
+
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, op_0, op_1);
+ gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+ gimple_call_set_nothrow (call, /* nothrow_p */ true);
+ gimple_set_location (call, gimple_location (last_stmt));
+
+ vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+
+ return call;
+}
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ * _7 = _4 + _6;
+ * _8 = _4 > _7;
+ * _9 = (long unsigned int) _8;
+ * _10 = -_9;
+ * _12 = _7 | _10;
+ *
+ * And then simplied to
+ * _12 = .SAT_ADD (_4, _6);
+ */
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+ tree *type_out)
+{
+ gimple *last_stmt = stmt_vinfo->stmt;
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ tree res_ops[2];
+ tree lhs = gimple_assign_lhs (last_stmt);
+
+ if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+ {
+ gimple *call = vect_sat_add_build_call (vinfo, last_stmt, type_out,
+ res_ops[0], res_ops[1]);
+ if (call)
+ return call;
+ }
+
+ return NULL;
+}
+
/* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
@@ -6998,6 +7058,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
{ vect_recog_divmod_pattern, "divmod" },
{ vect_recog_mult_pattern, "mult" },
+ { vect_recog_sat_add_pattern, "sat_add" },
{ vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
{ vect_recog_gcond_pattern, "gcond" },
{ vect_recog_bool_pattern, "bool" },