Message ID | 20240708044836.1303215-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int | expand |
+ if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) Why we have VOID mode ? I still don't understand the codes. juzhe.zhong@rivai.ai From: pan2.li Date: 2024-07-08 12:48 To: gcc-patches CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li Subject: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int From: Pan Li <pan2.li@intel.com> This patch would like to implement the .SAT_TRUNC for the RISC-V backend. With the help of the RVV Vector Narrowing Fixed-Point Clip Instructions. The below SEW(S) are supported: * e64 => e32 * e64 => e16 * e64 => e8 * e32 => e16 * e32 => e8 * e16 => e8 Take below example to see the changes to asm. Form 1: #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ void __attribute__((noinline)) \ vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ bool overflow = x > (WT)(NT)(-1); \ out[i] = ((NT)x) | (NT)-overflow; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) Before this patch: .L3: vsetvli a5,a2,e64,m1,ta,ma vle64.v v1,0(a1) vmsgtu.vv v0,v1,v2 vsetvli zero,zero,e32,mf2,ta,ma vncvt.x.x.w v1,v1 vmerge.vim v1,v1,-1,v0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 After this patch: .L3: vsetvli a5,a2,e32,mf2,ta,ma vle64.v v1,0(a1) vnclipu.wi v1,v1,0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 Passed the rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/autovec.md (ustrunc<mode><v_double_trunc>2): Add new pattern for double truncation. (ustrunc<mode><v_quad_trunc>2): Ditto but for quad truncation. (ustrunc<mode><v_oct_trunc>2): Ditto but for oct truncation. * config/riscv/riscv-protos.h (expand_vec_ustrunc): Add new decl to expand vec ustrunc. * config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new func impl to expand vector double ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. (expand_vec_ustrunc): Add new func impl to expand vector ustrunc. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New test. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/config/riscv/autovec.md | 34 ++ gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 54 +++ .../riscv/rvv/autovec/binop/vec_sat_arith.h | 22 + .../riscv/rvv/autovec/unop/vec_sat_data.h | 394 ++++++++++++++++++ .../rvv/autovec/unop/vec_sat_u_trunc-1.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-2.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-3.c | 23 + .../rvv/autovec/unop/vec_sat_u_trunc-4.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-5.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-6.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-run-1.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-2.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-3.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-4.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-5.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-6.c | 16 + .../rvv/autovec/unop/vec_sat_unary_vv_run.h | 23 + 18 files changed, 746 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..bd34ad3e862 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2645,6 +2645,7 @@ (define_expand "rawmemchr<ANYI:mode>" ;; Includes: ;; - add ;; - sub +;; - trunc ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2668,6 +2669,39 @@ (define_expand "ussub<mode>3" } ) +(define_expand "ustrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + E_VOIDmode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..3ca26e1107d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,6 +639,7 @@ void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); +void expand_vec_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5306711c1b7..f8da69f3f3c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4901,6 +4901,60 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } +static void +expand_vec_double_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode) +{ + insn_code icode; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + rtx ops[] = {op_0, op_1, zero}; + + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + +static void +expand_vec_quad_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (op_0, double_rtx, double_mode); +} + +static void +expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + rtx quad_rtx = gen_reg_rtx (quad_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (quad_rtx, double_rtx, double_mode); + expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode); +} + +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => SI */ + expand_vec_double_ustrunc (op_0, op_1, vec_mode); + else if (double_mode != E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => HI */ + expand_vec_quad_ustrunc (op_0, op_1, vec_mode, double_mode); + else if (double_mode != E_VOIDmode && quad_mode != E_VOIDmode) + /* Like DI => QI */ + expand_vec_oct_ustrunc (op_0, op_1, vec_mode, double_mode, quad_mode); + else + gcc_unreachable (); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index a3116033fb3..b55a589e019 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -329,4 +329,26 @@ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ #define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) +/******************************************************************************/ +/* Saturation Truncation (Unsigned and Signed) */ +/******************************************************************************/ +#define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ +void __attribute__((noinline)) \ +vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + WT x = in[i]; \ + bool overflow = x > (WT)(NT)(-1); \ + out[i] = ((NT)x) | (NT)-overflow; \ + } \ +} +#define DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) + +#define RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) \ + vec_sat_u_trunc_##NT##_##WT##_fmt_1 (out, in, N) +#define RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, out, in, N) \ + RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h new file mode 100644 index 00000000000..6b23ec809f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -0,0 +1,394 @@ +#ifndef HAVE_DEFINE_VEC_SAT_DATA_H +#define HAVE_DEFINE_VEC_SAT_DATA_H + +#define N 16 + +#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s +#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2) +#define TEST_UNARY_STRUCT(T1, T2) \ + struct TEST_UNARY_STRUCT_NAME(T1, T2) \ + { \ + T2 in[N]; \ + T1 expect[N]; \ + T1 out[N]; \ + }; + +#define TEST_UNARY_DATA(T1, T2) test_##T1##_##T2##_data +#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2) + +TEST_UNARY_STRUCT(uint8_t, uint16_t) +TEST_UNARY_STRUCT(uint8_t, uint32_t) +TEST_UNARY_STRUCT(uint8_t, uint64_t) + +TEST_UNARY_STRUCT(uint16_t, uint32_t) +TEST_UNARY_STRUCT(uint16_t, uint64_t) + +TEST_UNARY_STRUCT(uint32_t, uint64_t) + +TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ + TEST_UNARY_DATA(uint8_t, uint16_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + }, + { + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \ + TEST_UNARY_DATA(uint8_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 65536, 4294967291, + 65534, 65535, 65537, 4294967292, + 65534, 65535, 65538, 4294967293, + 65534, 65535, 65539, 4294967294, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \ + TEST_UNARY_DATA(uint8_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 4294967292, 4294967296, + 65534, 65536, 4294967293, 18446744073709551613u, + 65534, 65537, 4294967294, 18446744073709551614u, + 65534, 65538, 4294967295, 18446744073709551615u, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \ + TEST_UNARY_DATA(uint16_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967295, 4294967291, + 65534, 65535, 4294967295, 4294967292, + 65534, 65535, 4294967295, 4294967293, + 65534, 65535, 4294967295, 4294967294, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \ + TEST_UNARY_DATA(uint16_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ + TEST_UNARY_DATA(uint32_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 4294967294, 4294967295, + 65534, 65536, 4294967295, 4294967295, + 65534, 65537, 4294967295, 4294967295, + 65534, 65538, 4294967295, 4294967295, + }, + }, +}; + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c new file mode 100644 index 00000000000..dc9653947fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c new file mode 100644 index 00000000000..03c1d709194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c new file mode 100644 index 00000000000..291dd0512fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c new file mode 100644 index 00000000000..a3192bf0d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c new file mode 100644 index 00000000000..b85276175bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c new file mode 100644 index 00000000000..65b5ad3c219 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint32_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c new file mode 100644 index 00000000000..cd896e2eb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint16_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c new file mode 100644 index 00000000000..96272ca2943 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c new file mode 100644 index 00000000000..671bbebf363 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c new file mode 100644 index 00000000000..c7df27be15f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c new file mode 100644 index 00000000000..c9067a1bcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c new file mode 100644 index 00000000000..51d76be6743 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint32_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h new file mode 100644 index 00000000000..e731e9f3f71 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H +#define HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H + +int +main () +{ + unsigned i, k; + + for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) + { + T *data = &DATA[i]; + + RUN_UNARY (data->out, data->in, N); + + for (k = 0; k < N; k++) + if (data->out[k] != data->expect[k]) + __builtin_abort (); + } + + return 0; +} + +#endif
> + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) > Why we have VOID mode ? I still don't understand the codes. The vnclipu only has 1/2 truncation, thus we need 2 times truncation for 1/4 and 3 times for 1/8 truncation. Here take voidmode to tell it is 1/2, 1/4 or 1/8 truncation. For 1/2 truncation, we don't need the bridge mode and pass the VOIDmode from define pattern. Or shall we add 2 more truncation expand function for 1/4 and 1/8 truncation? To get ride of the VOID mess here. Pan From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> Sent: Monday, July 8, 2024 1:58 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org> Cc: kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com> Subject: Re: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) Why we have VOID mode ? I still don't understand the codes.
>> Or shall we add 2 more truncation expand function for 1/4 and 1/8 truncation? To get ride of the VOID mess here. I prefer not to have VOID. juzhe.zhong@rivai.ai From: Li, Pan2 Date: 2024-07-08 14:05 To: juzhe.zhong@rivai.ai; gcc-patches CC: kito.cheng; jeffreyalaw; Robin Dapp Subject: RE: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int > + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) > Why we have VOID mode ? I still don't understand the codes. The vnclipu only has 1/2 truncation, thus we need 2 times truncation for 1/4 and 3 times for 1/8 truncation. Here take voidmode to tell it is 1/2, 1/4 or 1/8 truncation. For 1/2 truncation, we don’t need the bridge mode and pass the VOIDmode from define pattern. Or shall we add 2 more truncation expand function for 1/4 and 1/8 truncation? To get ride of the VOID mess here. Pan From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> Sent: Monday, July 8, 2024 1:58 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org> Cc: kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com> Subject: Re: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) Why we have VOID mode ? I still don't understand the codes. juzhe.zhong@rivai.ai From: pan2.li Date: 2024-07-08 12:48 To: gcc-patches CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li Subject: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int From: Pan Li <pan2.li@intel.com> This patch would like to implement the .SAT_TRUNC for the RISC-V backend. With the help of the RVV Vector Narrowing Fixed-Point Clip Instructions. The below SEW(S) are supported: * e64 => e32 * e64 => e16 * e64 => e8 * e32 => e16 * e32 => e8 * e16 => e8 Take below example to see the changes to asm. Form 1: #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ void __attribute__((noinline)) \ vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ bool overflow = x > (WT)(NT)(-1); \ out[i] = ((NT)x) | (NT)-overflow; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) Before this patch: .L3: vsetvli a5,a2,e64,m1,ta,ma vle64.v v1,0(a1) vmsgtu.vv v0,v1,v2 vsetvli zero,zero,e32,mf2,ta,ma vncvt.x.x.w v1,v1 vmerge.vim v1,v1,-1,v0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 After this patch: .L3: vsetvli a5,a2,e32,mf2,ta,ma vle64.v v1,0(a1) vnclipu.wi v1,v1,0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 Passed the rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/autovec.md (ustrunc<mode><v_double_trunc>2): Add new pattern for double truncation. (ustrunc<mode><v_quad_trunc>2): Ditto but for quad truncation. (ustrunc<mode><v_oct_trunc>2): Ditto but for oct truncation. * config/riscv/riscv-protos.h (expand_vec_ustrunc): Add new decl to expand vec ustrunc. * config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new func impl to expand vector double ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. (expand_vec_ustrunc): Add new func impl to expand vector ustrunc. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New test. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/config/riscv/autovec.md | 34 ++ gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 54 +++ .../riscv/rvv/autovec/binop/vec_sat_arith.h | 22 + .../riscv/rvv/autovec/unop/vec_sat_data.h | 394 ++++++++++++++++++ .../rvv/autovec/unop/vec_sat_u_trunc-1.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-2.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-3.c | 23 + .../rvv/autovec/unop/vec_sat_u_trunc-4.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-5.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-6.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-run-1.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-2.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-3.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-4.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-5.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-6.c | 16 + .../rvv/autovec/unop/vec_sat_unary_vv_run.h | 23 + 18 files changed, 746 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..bd34ad3e862 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2645,6 +2645,7 @@ (define_expand "rawmemchr<ANYI:mode>" ;; Includes: ;; - add ;; - sub +;; - trunc ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2668,6 +2669,39 @@ (define_expand "ussub<mode>3" } ) +(define_expand "ustrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + E_VOIDmode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..3ca26e1107d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,6 +639,7 @@ void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); +void expand_vec_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5306711c1b7..f8da69f3f3c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4901,6 +4901,60 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } +static void +expand_vec_double_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode) +{ + insn_code icode; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + rtx ops[] = {op_0, op_1, zero}; + + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + +static void +expand_vec_quad_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (op_0, double_rtx, double_mode); +} + +static void +expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + rtx quad_rtx = gen_reg_rtx (quad_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (quad_rtx, double_rtx, double_mode); + expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode); +} + +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => SI */ + expand_vec_double_ustrunc (op_0, op_1, vec_mode); + else if (double_mode != E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => HI */ + expand_vec_quad_ustrunc (op_0, op_1, vec_mode, double_mode); + else if (double_mode != E_VOIDmode && quad_mode != E_VOIDmode) + /* Like DI => QI */ + expand_vec_oct_ustrunc (op_0, op_1, vec_mode, double_mode, quad_mode); + else + gcc_unreachable (); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index a3116033fb3..b55a589e019 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -329,4 +329,26 @@ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ #define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) +/******************************************************************************/ +/* Saturation Truncation (Unsigned and Signed) */ +/******************************************************************************/ +#define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ +void __attribute__((noinline)) \ +vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + WT x = in[i]; \ + bool overflow = x > (WT)(NT)(-1); \ + out[i] = ((NT)x) | (NT)-overflow; \ + } \ +} +#define DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) + +#define RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) \ + vec_sat_u_trunc_##NT##_##WT##_fmt_1 (out, in, N) +#define RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, out, in, N) \ + RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h new file mode 100644 index 00000000000..6b23ec809f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -0,0 +1,394 @@ +#ifndef HAVE_DEFINE_VEC_SAT_DATA_H +#define HAVE_DEFINE_VEC_SAT_DATA_H + +#define N 16 + +#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s +#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2) +#define TEST_UNARY_STRUCT(T1, T2) \ + struct TEST_UNARY_STRUCT_NAME(T1, T2) \ + { \ + T2 in[N]; \ + T1 expect[N]; \ + T1 out[N]; \ + }; + +#define TEST_UNARY_DATA(T1, T2) test_##T1##_##T2##_data +#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2) + +TEST_UNARY_STRUCT(uint8_t, uint16_t) +TEST_UNARY_STRUCT(uint8_t, uint32_t) +TEST_UNARY_STRUCT(uint8_t, uint64_t) + +TEST_UNARY_STRUCT(uint16_t, uint32_t) +TEST_UNARY_STRUCT(uint16_t, uint64_t) + +TEST_UNARY_STRUCT(uint32_t, uint64_t) + +TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ + TEST_UNARY_DATA(uint8_t, uint16_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + }, + { + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \ + TEST_UNARY_DATA(uint8_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 65536, 4294967291, + 65534, 65535, 65537, 4294967292, + 65534, 65535, 65538, 4294967293, + 65534, 65535, 65539, 4294967294, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \ + TEST_UNARY_DATA(uint8_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 4294967292, 4294967296, + 65534, 65536, 4294967293, 18446744073709551613u, + 65534, 65537, 4294967294, 18446744073709551614u, + 65534, 65538, 4294967295, 18446744073709551615u, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \ + TEST_UNARY_DATA(uint16_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967295, 4294967291, + 65534, 65535, 4294967295, 4294967292, + 65534, 65535, 4294967295, 4294967293, + 65534, 65535, 4294967295, 4294967294, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \ + TEST_UNARY_DATA(uint16_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ + TEST_UNARY_DATA(uint32_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 4294967294, 4294967295, + 65534, 65536, 4294967295, 4294967295, + 65534, 65537, 4294967295, 4294967295, + 65534, 65538, 4294967295, 4294967295, + }, + }, +}; + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c new file mode 100644 index 00000000000..dc9653947fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c new file mode 100644 index 00000000000..03c1d709194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c new file mode 100644 index 00000000000..291dd0512fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c new file mode 100644 index 00000000000..a3192bf0d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c new file mode 100644 index 00000000000..b85276175bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c new file mode 100644 index 00000000000..65b5ad3c219 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint32_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c new file mode 100644 index 00000000000..cd896e2eb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint16_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c new file mode 100644 index 00000000000..96272ca2943 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c new file mode 100644 index 00000000000..671bbebf363 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c new file mode 100644 index 00000000000..c7df27be15f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c new file mode 100644 index 00000000000..c9067a1bcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c new file mode 100644 index 00000000000..51d76be6743 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint32_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h new file mode 100644 index 00000000000..e731e9f3f71 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H +#define HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H + +int +main () +{ + unsigned i, k; + + for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) + { + T *data = &DATA[i]; + + RUN_UNARY (data->out, data->in, N); + + for (k = 0; k < N; k++) + if (data->out[k] != data->expect[k]) + __builtin_abort (); + } + + return 0; +} + +#endif
> I prefer not to have VOID. Sure thing, will update in v3. Pan From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> Sent: Monday, July 8, 2024 2:14 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org> Cc: kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com> Subject: Re: RE: [PATCH v2] RISC-V: Implement .SAT_TRUNC for vector unsigned int >> Or shall we add 2 more truncation expand function for 1/4 and 1/8 truncation? To get ride of the VOID mess here. I prefer not to have VOID.
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..bd34ad3e862 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2645,6 +2645,7 @@ (define_expand "rawmemchr<ANYI:mode>" ;; Includes: ;; - add ;; - sub +;; - trunc ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2668,6 +2669,39 @@ (define_expand "ussub<mode>3" } ) +(define_expand "ustrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + E_VOIDmode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..3ca26e1107d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,6 +639,7 @@ void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); +void expand_vec_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5306711c1b7..f8da69f3f3c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4901,6 +4901,60 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } +static void +expand_vec_double_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode) +{ + insn_code icode; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + rtx ops[] = {op_0, op_1, zero}; + + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + +static void +expand_vec_quad_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (op_0, double_rtx, double_mode); +} + +static void +expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + rtx quad_rtx = gen_reg_rtx (quad_mode); + + expand_vec_double_ustrunc (double_rtx, op_1, vec_mode); + expand_vec_double_ustrunc (quad_rtx, double_rtx, double_mode); + expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode); +} + +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + if (double_mode == E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => SI */ + expand_vec_double_ustrunc (op_0, op_1, vec_mode); + else if (double_mode != E_VOIDmode && quad_mode == E_VOIDmode) + /* Like DI => HI */ + expand_vec_quad_ustrunc (op_0, op_1, vec_mode, double_mode); + else if (double_mode != E_VOIDmode && quad_mode != E_VOIDmode) + /* Like DI => QI */ + expand_vec_oct_ustrunc (op_0, op_1, vec_mode, double_mode, quad_mode); + else + gcc_unreachable (); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index a3116033fb3..b55a589e019 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -329,4 +329,26 @@ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ #define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) +/******************************************************************************/ +/* Saturation Truncation (Unsigned and Signed) */ +/******************************************************************************/ +#define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ +void __attribute__((noinline)) \ +vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + WT x = in[i]; \ + bool overflow = x > (WT)(NT)(-1); \ + out[i] = ((NT)x) | (NT)-overflow; \ + } \ +} +#define DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) + +#define RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) \ + vec_sat_u_trunc_##NT##_##WT##_fmt_1 (out, in, N) +#define RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, out, in, N) \ + RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h new file mode 100644 index 00000000000..6b23ec809f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -0,0 +1,394 @@ +#ifndef HAVE_DEFINE_VEC_SAT_DATA_H +#define HAVE_DEFINE_VEC_SAT_DATA_H + +#define N 16 + +#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s +#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2) +#define TEST_UNARY_STRUCT(T1, T2) \ + struct TEST_UNARY_STRUCT_NAME(T1, T2) \ + { \ + T2 in[N]; \ + T1 expect[N]; \ + T1 out[N]; \ + }; + +#define TEST_UNARY_DATA(T1, T2) test_##T1##_##T2##_data +#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2) + +TEST_UNARY_STRUCT(uint8_t, uint16_t) +TEST_UNARY_STRUCT(uint8_t, uint32_t) +TEST_UNARY_STRUCT(uint8_t, uint64_t) + +TEST_UNARY_STRUCT(uint16_t, uint32_t) +TEST_UNARY_STRUCT(uint16_t, uint64_t) + +TEST_UNARY_STRUCT(uint32_t, uint64_t) + +TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ + TEST_UNARY_DATA(uint8_t, uint16_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + }, + { + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \ + TEST_UNARY_DATA(uint8_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 65536, 4294967291, + 65534, 65535, 65537, 4294967292, + 65534, 65535, 65538, 4294967293, + 65534, 65535, 65539, 4294967294, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \ + TEST_UNARY_DATA(uint8_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 4294967292, 4294967296, + 65534, 65536, 4294967293, 18446744073709551613u, + 65534, 65537, 4294967294, 18446744073709551614u, + 65534, 65538, 4294967295, 18446744073709551615u, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \ + TEST_UNARY_DATA(uint16_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967295, 4294967291, + 65534, 65535, 4294967295, 4294967292, + 65534, 65535, 4294967295, 4294967293, + 65534, 65535, 4294967295, 4294967294, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \ + TEST_UNARY_DATA(uint16_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ + TEST_UNARY_DATA(uint32_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 4294967294, 4294967295, + 65534, 65536, 4294967295, 4294967295, + 65534, 65537, 4294967295, 4294967295, + 65534, 65538, 4294967295, 4294967295, + }, + }, +}; + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c new file mode 100644 index 00000000000..dc9653947fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c new file mode 100644 index 00000000000..03c1d709194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c new file mode 100644 index 00000000000..291dd0512fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c new file mode 100644 index 00000000000..a3192bf0d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c new file mode 100644 index 00000000000..b85276175bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c new file mode 100644 index 00000000000..65b5ad3c219 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint32_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c new file mode 100644 index 00000000000..cd896e2eb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint16_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c new file mode 100644 index 00000000000..96272ca2943 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c new file mode 100644 index 00000000000..671bbebf363 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c new file mode 100644 index 00000000000..c7df27be15f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c new file mode 100644 index 00000000000..c9067a1bcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c new file mode 100644 index 00000000000..51d76be6743 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint32_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h new file mode 100644 index 00000000000..e731e9f3f71 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H +#define HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H + +int +main () +{ + unsigned i, k; + + for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) + { + T *data = &DATA[i]; + + RUN_UNARY (data->out, data->in, N); + + for (k = 0; k < N; k++) + if (data->out[k] != data->expect[k]) + __builtin_abort (); + } + + return 0; +} + +#endif