@@ -134,6 +134,7 @@ extern bool
riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ussub (rtx, rtx, rtx);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -11612,6 +11612,41 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
+/* Implements the unsigned saturation sub standard name usadd for int mode.
+
+ z = SAT_SUB(x, y).
+ =>
+ 1. minus = x - y.
+ 2. lt = x < y.
+ 3. lt = lt - 1.
+ 4. z = minus & lt. */
+
+void
+riscv_expand_ussub (rtx dest, rtx x, rtx y)
+{
+ machine_mode mode = GET_MODE (dest);
+ rtx pmode_x = gen_lowpart (Pmode, x);
+ rtx pmode_y = gen_lowpart (Pmode, y);
+ rtx pmode_lt = gen_reg_rtx (Pmode);
+ rtx pmode_minus = gen_reg_rtx (Pmode);
+ rtx pmode_dest = gen_reg_rtx (Pmode);
+
+ /* Step-1: minus = x - y */
+ riscv_emit_binary (MINUS, pmode_minus, pmode_x, pmode_y);
+
+ /* Step-2: lt = x < y */
+ riscv_emit_binary (LTU, pmode_lt, pmode_x, pmode_y);
+
+ /* Step-3: lt = lt - 1 (lt + (-1)) */
+ riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
+
+ /* Step-4: pmode_dest = minus & lt */
+ riscv_emit_binary (AND, pmode_dest, pmode_lt, pmode_minus);
+
+ /* Step-5: dest = pmode_dest */
+ emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -4228,6 +4228,17 @@ (define_expand "usadd<mode>3"
}
)
+(define_expand "ussub<mode>3"
+ [(match_operand:ANYI 0 "register_operand")
+ (match_operand:ANYI 1 "register_operand")
+ (match_operand:ANYI 2 "register_operand")]
+ ""
+ {
+ riscv_expand_ussub (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
+
;; These are forms of (x << C1) + C2, potentially canonicalized from
;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may
;; want to go ahead and recognize this form as C2 may be cheaper to
@@ -3,6 +3,9 @@
#include <stdint-gcc.h>
+/******************************************************************************/
+/* Saturation Add (unsigned and signed) */
+/******************************************************************************/
#define DEF_SAT_U_ADD_FMT_1(T) \
T __attribute__((noinline)) \
sat_u_add_##T##_fmt_1 (T x, T y) \
@@ -72,4 +75,24 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
+/******************************************************************************/
+/* Saturation Sub (Unsigned and Signed) */
+/******************************************************************************/
+#define DEF_SAT_U_SUB_FMT_1(T) \
+T __attribute__((noinline)) \
+sat_u_sub_##T##_fmt_1 (T x, T y) \
+{ \
+ return (x - y) & (-(T)(x >= y)); \
+}
+
+#define DEF_SAT_U_SUB_FMT_2(T) \
+T __attribute__((noinline)) \
+sat_u_sub_##T##_fmt_2 (T x, T y) \
+{ \
+ return (x - y) & (-(T)(x > y)); \
+}
+
+#define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
+#define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
+
#endif
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint8_t_fmt_1:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_SUB_FMT_1(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint16_t_fmt_1:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_SUB_FMT_1(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint32_t_fmt_1:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_SUB_FMT_1(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint64_t_fmt_1:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+a0,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_SUB_FMT_1(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint8_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_SUB_FMT_2(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint16_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_SUB_FMT_2(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint32_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_SUB_FMT_2(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_uint64_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*a0,\s*a1
+** addi\s+a0,\s*[atx][0-9]+,\s*-1
+** and\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_SUB_FMT_2(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_1
+
+DEF_SAT_U_SUB_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 255, 254, 1, },
+ { 255, 255, 0, },
+ { 254, 255, 0, },
+ { 253, 254, 0, },
+ { 0, 255, 0, },
+ { 1, 255, 0, },
+ { 32, 5, 27, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_1
+
+DEF_SAT_U_SUB_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 65535, 65534, 1, },
+ { 65535, 65535, 0, },
+ { 65534, 65535, 0, },
+ { 65533, 65534, 0, },
+ { 0, 65535, 0, },
+ { 1, 65535, 0, },
+ { 35, 5, 30, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_1
+
+DEF_SAT_U_SUB_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 4294967295, 4294967294, 1, },
+ { 4294967295, 4294967295, 0, },
+ { 4294967294, 4294967295, 0, },
+ { 4294967293, 4294967294, 0, },
+ { 1, 4294967295, 0, },
+ { 2, 4294967295, 0, },
+ { 5, 1, 4, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_1
+
+DEF_SAT_U_SUB_FMT_1(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 18446744073709551615u, 18446744073709551614u, 1, },
+ { 18446744073709551615u, 18446744073709551615u, 0, },
+ { 18446744073709551614u, 18446744073709551615u, 0, },
+ { 18446744073709551613u, 18446744073709551614u, 0, },
+ { 0, 18446744073709551615u, 0, },
+ { 1, 18446744073709551615u, 0, },
+ { 43, 11, 32, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_2
+
+DEF_SAT_U_SUB_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 255, 254, 1, },
+ { 255, 255, 0, },
+ { 254, 255, 0, },
+ { 253, 254, 0, },
+ { 0, 255, 0, },
+ { 1, 255, 0, },
+ { 32, 5, 27, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_2
+
+DEF_SAT_U_SUB_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 65535, 65534, 1, },
+ { 65535, 65535, 0, },
+ { 65534, 65535, 0, },
+ { 65533, 65534, 0, },
+ { 0, 65535, 0, },
+ { 1, 65535, 0, },
+ { 35, 5, 30, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_2
+
+DEF_SAT_U_SUB_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 4294967295, 4294967294, 1, },
+ { 4294967295, 4294967295, 0, },
+ { 4294967294, 4294967295, 0, },
+ { 4294967293, 4294967294, 0, },
+ { 1, 4294967295, 0, },
+ { 2, 4294967295, 0, },
+ { 5, 1, 4, },
+};
+
+#include "scalar_sat_binary.h"
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_SUB_FMT_2
+
+DEF_SAT_U_SUB_FMT_2(T)
+
+T test_data[][3] = {
+ /* arg_0, arg_1, expect */
+ { 0, 0, 0, },
+ { 0, 1, 0, },
+ { 1, 1, 0, },
+ { 18446744073709551615u, 18446744073709551614u, 1, },
+ { 18446744073709551615u, 18446744073709551615u, 0, },
+ { 18446744073709551614u, 18446744073709551615u, 0, },
+ { 18446744073709551613u, 18446744073709551614u, 0, },
+ { 0, 18446744073709551615u, 0, },
+ { 1, 18446744073709551615u, 0, },
+ { 43, 11, 32, },
+};
+
+#include "scalar_sat_binary.h"