new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int64_t, uint64_t, INT64_MIN, INT64_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int16_t
+#define T1 int16_t
+#define T2 uint16_t
+
+DEF_VEC_SAT_S_SUB_FMT_4_WRAP (T1, T2, INT16_MIN, INT16_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_4_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int32_t
+#define T1 int32_t
+#define T2 uint32_t
+
+DEF_VEC_SAT_S_SUB_FMT_4_WRAP (T1, T2, INT32_MIN, INT32_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_4_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int64_t
+#define T1 int64_t
+#define T2 uint64_t
+
+DEF_VEC_SAT_S_SUB_FMT_4_WRAP (T1, T2, INT64_MIN, INT64_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_4_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int8_t
+#define T1 int8_t
+#define T2 uint8_t
+
+DEF_VEC_SAT_S_SUB_FMT_4_WRAP (T1, T2, INT8_MIN, INT8_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_4_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
@@ -500,6 +500,23 @@ vec_sat_s_sub_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
#define DEF_VEC_SAT_S_SUB_FMT_3_WRAP(T, UT, MIN, MAX) \
DEF_VEC_SAT_S_SUB_FMT_3(T, UT, MIN, MAX)
+#define DEF_VEC_SAT_S_SUB_FMT_4(T, UT, MIN, MAX) \
+void __attribute__((noinline)) \
+vec_sat_s_sub_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ T minus; \
+ bool overflow = __builtin_sub_overflow (x, y, &minus); \
+ out[i] = !overflow ? minus : x < 0 ? MIN : MAX; \
+ } \
+}
+#define DEF_VEC_SAT_S_SUB_FMT_4_WRAP(T, UT, MIN, MAX) \
+ DEF_VEC_SAT_S_SUB_FMT_4(T, UT, MIN, MAX)
+
#define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N)
@@ -550,6 +567,11 @@ vec_sat_s_sub_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
#define RUN_VEC_SAT_S_SUB_FMT_3_WRAP(T, out, op_1, op_2, N) \
RUN_VEC_SAT_S_SUB_FMT_3(T, out, op_1, op_2, N)
+#define RUN_VEC_SAT_S_SUB_FMT_4(T, out, op_1, op_2, N) \
+ vec_sat_s_sub_##T##_fmt_4(out, op_1, op_2, N)
+#define RUN_VEC_SAT_S_SUB_FMT_4_WRAP(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_4(T, out, op_1, op_2, N)
+
/******************************************************************************/
/* Saturation Sub Truncated (Unsigned and Signed) */
/******************************************************************************/