@@ -219,6 +219,21 @@ vec_sat_u_sub_##T##_fmt_6 (T *out, T *op_1, T *op_2, unsigned limit) \
} \
}
+#define DEF_VEC_SAT_U_SUB_FMT_7(T) \
+void __attribute__((noinline)) \
+vec_sat_u_sub_##T##_fmt_7 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ T ret; \
+ T overflow = __builtin_sub_overflow (x, y, &ret); \
+ out[i] = ret & (T)(overflow - 1); \
+ } \
+}
+
#define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N)
@@ -237,4 +252,7 @@ vec_sat_u_sub_##T##_fmt_6 (T *out, T *op_1, T *op_2, unsigned limit) \
#define RUN_VEC_SAT_U_SUB_FMT_6(T, out, op_1, op_2, N) \
vec_sat_u_sub_##T##_fmt_6(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_SUB_FMT_7(T, out, op_1, op_2, N) \
+ vec_sat_u_sub_##T##_fmt_7(out, op_1, op_2, N)
+
#endif
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_sub_uint8_t_fmt_7:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_SUB_FMT_7(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_sub_uint16_t_fmt_7:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_SUB_FMT_7(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_sub_uint32_t_fmt_7:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_SUB_FMT_7(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_sub_uint64_t_fmt_7:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_SUB_FMT_7(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint8_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_7
+
+DEF_VEC_SAT_U_SUB_FMT_7(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 0, 255, 255, 255,
+ 0, 255, 255, 255,
+ 0, 255, 255, 255,
+ 0, 255, 255, 255,
+ },
+ {
+ 1, 255, 254, 251,
+ 1, 255, 254, 251,
+ 1, 255, 254, 251,
+ 1, 255, 254, 251,
+ },
+ {
+ 0, 0, 1, 4,
+ 0, 0, 1, 4,
+ 0, 0, 1, 4,
+ 0, 0, 1, 4,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 255,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 0, 254,
+ 254, 254, 254, 255,
+ 255, 255, 0, 252,
+ 255, 255, 255, 1,
+ },
+ {
+ 0, 0, 1, 0,
+ 0, 0, 0, 0,
+ 0, 0, 3, 3,
+ 0, 0, 0, 8,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_7
+
+DEF_VEC_SAT_U_SUB_FMT_7(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 55535, 45535, 35535, 25535,
+ 55535, 45535, 35535, 25535,
+ 55535, 45535, 35535, 25535,
+ 55535, 45535, 35535, 25535,
+ },
+ {
+ 10000, 20000, 30000, 40000,
+ 10000, 20000, 30000, 40000,
+ 10000, 20000, 30000, 40000,
+ 10000, 20000, 30000, 40000,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 65535, 3, 65535,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 1, 65535,
+ 0, 65535, 65535, 0,
+ 65535, 65535, 1, 2,
+ },
+ {
+ 0, 0, 0, 0,
+ 0, 0, 2, 0,
+ 1, 0, 0, 65535,
+ 0, 0, 65534, 7,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_7
+
+DEF_VEC_SAT_U_SUB_FMT_7(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 4, 0,
+ 0, 0, 4, 0,
+ 0, 0, 4, 0,
+ 0, 0, 4, 0,
+ }, /* arg_0 */
+ {
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ }, /* arg_1 */
+ {
+ 0, 0, 2, 0,
+ 0, 0, 2, 0,
+ 0, 0, 2, 0,
+ 0, 0, 2, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 1294967295, 2294967295, 3294967295, 4294967295,
+ 1294967295, 2294967295, 3294967295, 4294967295,
+ 1294967295, 2294967295, 3294967295, 4294967295,
+ 1294967295, 2294967295, 3294967295, 4294967295,
+ },
+ {
+ 3000000000, 2000000000, 1000000000, 0,
+ 3000000000, 2000000000, 1000000000, 0,
+ 3000000000, 2000000000, 1000000000, 0,
+ 3000000000, 2000000000, 1000000000, 0,
+ },
+ },
+ {
+ {
+ 0, 0, 9, 0,
+ 1, 4294967295, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 4294967295,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 1, 2, 4294967294, 4294967295,
+ 1, 4294967295, 4294967295, 1,
+ 1, 4294967295, 4294967290, 9,
+ },
+ {
+ 0, 0, 8, 0,
+ 0, 4294967293, 0, 0,
+ 0, 0, 0, 3,
+ 4, 0, 5, 4294967286,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_7
+
+DEF_VEC_SAT_U_SUB_FMT_7(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 9, 0, 0,
+ 0, 9, 0, 0,
+ 0, 9, 0, 0,
+ 0, 9, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 2, 3, 1,
+ 0, 2, 3, 1,
+ 0, 2, 3, 1,
+ 0, 2, 3, 1,
+ }, /* arg_1 */
+ {
+ 0, 7, 0, 0,
+ 0, 7, 0, 0,
+ 0, 7, 0, 0,
+ 0, 7, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u,
+ 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u,
+ 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u,
+ 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u,
+ 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u,
+ 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u,
+ 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u,
+ },
+ },
+ {
+ {
+ 0, 18446744073709551615u, 1, 0,
+ 1, 18446744073709551615u, 3, 0,
+ 1, 18446744073709551614u, 3, 4,
+ 5, 18446744073709551614u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 1, 18446744073709551614u,
+ 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 1,
+ },
+ {
+ 0, 18446744073709551614u, 0, 0,
+ 0, 1, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 8,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"