@@ -598,4 +598,268 @@ int64_t TEST_BINARY_DATA_NAME(int64_t, int64_t, ssadd)[][3][N] =
},
};
+int8_t TEST_BINARY_DATA_NAME(int8_t, int8_t, sssub)[][3][N] =
+{
+ {
+ {
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 126, 126, 126, 126,
+ 127, 127, 127, 127,
+ },
+ {
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ -2, -2, -2, -2,
+ -127, -127, -127, -127,
+ },
+ {
+ 0, 0, 0, 0,
+ -2, -2, -2, -2,
+ 127, 127, 127, 127,
+ 127, 127, 127, 127,
+ },
+ },
+
+ {
+ {
+ -7, -7, -7, -7,
+ -128, -128, -128, -128,
+ -127, -127, -127, -127,
+ -128, -128, -128, -128,
+ },
+ {
+ -4, -4, -4, -4,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 127, 127, 127, 127,
+ },
+ {
+ -3, -3, -3, -3,
+ -128, -128, -128, -128,
+ -128, -128, -128, -128,
+ -128, -128, -128, -128,
+ },
+ },
+
+ {
+ {
+ -128, -128, -128, -128,
+ 127, 127, 127, 127,
+ -125, -125, -125, -125,
+ 126, 126, 126, 126,
+ },
+ {
+ 127, 127, 127, 127,
+ -127, -127, -127, -127,
+ -127, -127, -127, -127,
+ 127, 127, 127, 127,
+ },
+ {
+ -128, -128, -128, -128,
+ 127, 127, 127, 127,
+ 2, 2, 2, 2,
+ -1, -1, -1, -1,
+ },
+ },
+};
+
+int16_t TEST_BINARY_DATA_NAME(int16_t, int16_t, sssub)[][3][N] =
+{
+ {
+ {
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 32766, 32766, 32766, 32766,
+ 32767, 32767, 32767, 32767,
+ },
+ {
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ -2, -2, -2, -2,
+ -32767, -32767, -32767, -32767,
+ },
+ {
+ 0, 0, 0, 0,
+ -2, -2, -2, -2,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ },
+ },
+
+ {
+ {
+ -7, -7, -7, -7,
+ -32768, -32768, -32768, -32768,
+ -32767, -32767, -32767, -32767,
+ -32768, -32768, -32768, -32768,
+ },
+ {
+ -4, -4, -4, -4,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 32767, 32767, 32767, 32767,
+ },
+ {
+ -3, -3, -3, -3,
+ -32768, -32768, -32768, -32768,
+ -32768, -32768, -32768, -32768,
+ -32768, -32768, -32768, -32768,
+ },
+ },
+
+ {
+ {
+ -32768, -32768, -32768, -32768,
+ 32767, 32767, 32767, 32767,
+ -32765, -32765, -32765, -32765,
+ 32766, 32766, 32766, 32766,
+ },
+ {
+ 32767, 32767, 32767, 32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ 32767, 32767, 32767, 32767,
+ },
+ {
+ -32768, -32768, -32768, -32768,
+ 32767, 32767, 32767, 32767,
+ 2, 2, 2, 2,
+ -1, -1, -1, -1,
+ },
+ },
+};
+
+int32_t TEST_BINARY_DATA_NAME(int32_t, int32_t, sssub)[][3][N] =
+{
+ {
+ {
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 2147483646, 2147483646, 2147483646, 2147483646,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ },
+ {
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ -2, -2, -2, -2,
+ -2147483647, -2147483647, -2147483647, -2147483647,
+ },
+ {
+ 0, 0, 0, 0,
+ -2, -2, -2, -2,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ },
+ },
+
+ {
+ {
+ -7, -7, -7, -7,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ -2147483647, -2147483647, -2147483647, -2147483647,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ },
+ {
+ -4, -4, -4, -4,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ },
+ {
+ -3, -3, -3, -3,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ },
+ },
+
+ {
+ {
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ -2147483645, -2147483645, -2147483645, -2147483645,
+ 2147483646, 2147483646, 2147483646, 2147483646,
+ },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ -2147483647, -2147483647, -2147483647, -2147483647,
+ -2147483647, -2147483647, -2147483647, -2147483647,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ },
+ {
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2, 2, 2, 2,
+ -1, -1, -1, -1,
+ },
+ },
+};
+
+int64_t TEST_BINARY_DATA_NAME(int64_t, int64_t, sssub)[][3][N] =
+{
+ {
+ {
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 9223372036854775806ll, 9223372036854775806ll, 9223372036854775806ll, 9223372036854775806ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ },
+ {
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ -2, -2, -2, -2,
+ -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll,
+ },
+ {
+ 0, 0, 0, 0,
+ -2, -2, -2, -2,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ },
+ },
+
+ {
+ {
+ -7, -7, -7, -7,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ },
+ {
+ -4, -4, -4, -4,
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ },
+ {
+ -3, -3, -3, -3,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ },
+ },
+
+ {
+ {
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ -9223372036854775805ll, -9223372036854775805ll, -9223372036854775805ll, -9223372036854775805ll,
+ 9223372036854775806ll, 9223372036854775806ll, 9223372036854775806ll, 9223372036854775806ll,
+ },
+ {
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll,
+ -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll, -9223372036854775807ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ },
+ {
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 2, 2, 2, 2,
+ -1, -1, -1, -1,
+ },
+ },
+};
+
#endif
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_1(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_1(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int16_t
+#define T1 int16_t
+#define T2 uint16_t
+
+DEF_VEC_SAT_S_SUB_FMT_1_WRAP (T1, T2, INT16_MIN, INT16_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_1_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int32_t
+#define T1 int32_t
+#define T2 uint32_t
+
+DEF_VEC_SAT_S_SUB_FMT_1_WRAP (T1, T2, INT32_MIN, INT32_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_1_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int64_t
+#define T1 int64_t
+#define T2 uint64_t
+
+DEF_VEC_SAT_S_SUB_FMT_1_WRAP (T1, T2, INT64_MIN, INT64_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_1_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../vec_sat_arith.h"
+#include "vec_sat_data.h"
+
+#define T int8_t
+#define T1 int8_t
+#define T2 uint8_t
+
+DEF_VEC_SAT_S_SUB_FMT_1_WRAP (T1, T2, INT8_MIN, INT8_MAX)
+
+#define test_data TEST_BINARY_DATA_NAME_WRAP(T, T, sssub)
+#define RUN_VEC_SAT_BINARY(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_1_WRAP(T, out, op_1, op_2, N)
+
+#include "vec_sat_binary_vvv_run.h"
@@ -446,6 +446,26 @@ vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \
}
#define DEF_VEC_SAT_U_SUB_ZIP_WRAP(T1, T2) DEF_VEC_SAT_U_SUB_ZIP(T1, T2)
+#define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
+void __attribute__((noinline)) \
+vec_sat_s_sub_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ T minus = (UT)x - (UT)y; \
+ out[i] = (x ^ y) >= 0 \
+ ? minus \
+ : (minus ^ x) >= 0 \
+ ? minus \
+ : x < 0 ? MIN : MAX; \
+ } \
+}
+#define DEF_VEC_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \
+ DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)
+
#define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N)
@@ -481,6 +501,11 @@ vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \
#define RUN_VEC_SAT_U_SUB_FMT_ZIP_WRAP(T1, T2, x, b, N) \
RUN_VEC_SAT_U_SUB_FMT_ZIP(T1, T2, x, b, N) \
+#define RUN_VEC_SAT_S_SUB_FMT_1(T, out, op_1, op_2, N) \
+ vec_sat_s_sub_##T##_fmt_1(out, op_1, op_2, N)
+#define RUN_VEC_SAT_S_SUB_FMT_1_WRAP(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_S_SUB_FMT_1(T, out, op_1, op_2, N)
+
/******************************************************************************/
/* Saturation Sub Truncated (Unsigned and Signed) */
/******************************************************************************/