@@ -61,6 +61,20 @@ vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
} \
}
+#define DEF_VEC_SAT_U_ADD_FMT_5(T) \
+void __attribute__((noinline)) \
+vec_sat_u_add_##T##_fmt_5 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ T ret; \
+ out[i] = __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
+ } \
+}
+
#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
@@ -73,6 +87,9 @@ vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
#define RUN_VEC_SAT_U_ADD_FMT_4(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_4(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_ADD_FMT_5(T, out, op_1, op_2, N) \
+ vec_sat_u_add_##T##_fmt_5(out, op_1, op_2, N)
+
/******************************************************************************/
/* Saturation Sub (Unsigned and Signed) */
/******************************************************************************/
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_5:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_5(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_uint16_t_fmt_5:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_5(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_uint32_t_fmt_5:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_5(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_uint64_t_fmt_5:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_5(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint8_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_5
+
+DEF_VEC_SAT_U_ADD_FMT_5(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_5
+
+DEF_VEC_SAT_U_ADD_FMT_5(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_5
+
+DEF_VEC_SAT_U_ADD_FMT_5(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 9,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 4294967294, 4294967294, 4294967294, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 9,
+ },
+ {
+ 0, 1, 2, 4294967294,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"
new file mode 100644
@@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_5
+
+DEF_VEC_SAT_U_ADD_FMT_5(T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ {
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 18446744073709551614u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 1, 18446744073709551614u,
+ 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 9,
+ },
+ {
+ 0, 1, 2, 18446744073709551614u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary.h"