Message ID | 20240705012308.3367627-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1] RISC-V: Implement .SAT_TRUNC for vector unsigned int | expand |
+/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + insn_code icode; + rtx double_rtx = NULL_RTX; + rtx quad_rtx = NULL_RTX; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + + if (double_mode != E_VOIDmode) + double_rtx = gen_reg_rtx (double_mode); + + if (quad_mode != E_VOIDmode) + quad_rtx = gen_reg_rtx (quad_mode); + + if (double_rtx != NULL_RTX && quad_rtx != NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (quad_rtx, double_rtx, zero, icode, double_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, quad_mode); + emit_vec_fixed_binary_rnu (op_0, quad_rtx, zero, icode, quad_mode); + + return; + } + + if (double_rtx != NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (op_0, double_rtx, zero, icode, double_mode); + + return; + } + + if (double_rtx == NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (op_0, op_1, zero, icode, vec_mode); + + return; + } + + gcc_unreachable (); +} These codes look odd to me. Could you optimize it in a more straightforward way? juzhe.zhong@rivai.ai From: pan2.li Date: 2024-07-05 09:23 To: gcc-patches CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li Subject: [PATCH v1] RISC-V: Implement .SAT_TRUNC for vector unsigned int From: Pan Li <pan2.li@intel.com> This patch would like to implement the .SAT_TRUNC for the RISC-V backend. With the help of the RVV Vector Narrowing Fixed-Point Clip Instructions. The below SEW(S) are supported: * e64 => e32 * e64 => e16 * e64 => e8 * e32 => e16 * e32 => e8 * e16 => e8 Take below example to see the changes to asm. Form 1: #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ void __attribute__((noinline)) \ vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ bool overflow = x > (WT)(NT)(-1); \ out[i] = ((NT)x) | (NT)-overflow; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) Before this patch: .L3: vsetvli a5,a2,e64,m1,ta,ma vle64.v v1,0(a1) vmsgtu.vv v0,v1,v2 vsetvli zero,zero,e32,mf2,ta,ma vncvt.x.x.w v1,v1 vmerge.vim v1,v1,-1,v0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 After this patch: .L3: vsetvli a5,a2,e32,mf2,ta,ma vle64.v v1,0(a1) vnclipu.wi v1,v1,0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 Passed the rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/autovec.md (ustrunc<mode><v_double_trunc>2): Add new pattern for double truncation. (ustrunc<mode><v_quad_trunc>2): Ditto but for quad truncation. (ustrunc<mode><v_oct_trunc>2): Ditto but for oct truncation. * config/riscv/riscv-protos.h (expand_vec_ustrunc): Add new decl to expand vec ustrunc. * config/riscv/riscv-v.cc (emit_vec_fixed_binary_rnu): Add new help func to emit vnclipu. (expand_vec_ustrunc): Add new func impl to expand vector ustrunc. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New test. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/config/riscv/autovec.md | 34 ++ gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 64 +++ .../riscv/rvv/autovec/binop/vec_sat_arith.h | 22 + .../riscv/rvv/autovec/unop/vec_sat_data.h | 394 ++++++++++++++++++ .../rvv/autovec/unop/vec_sat_u_trunc-1.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-2.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-3.c | 23 + .../rvv/autovec/unop/vec_sat_u_trunc-4.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-5.c | 21 + .../rvv/autovec/unop/vec_sat_u_trunc-6.c | 19 + .../rvv/autovec/unop/vec_sat_u_trunc-run-1.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-2.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-3.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-4.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-5.c | 16 + .../rvv/autovec/unop/vec_sat_u_trunc-run-6.c | 16 + .../rvv/autovec/unop/vec_sat_unary_vv_run.h | 23 + 18 files changed, 756 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..bd34ad3e862 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2645,6 +2645,7 @@ (define_expand "rawmemchr<ANYI:mode>" ;; Includes: ;; - add ;; - sub +;; - trunc ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2668,6 +2669,39 @@ (define_expand "ussub<mode>3" } ) +(define_expand "ustrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + E_VOIDmode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..3ca26e1107d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,6 +639,7 @@ void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); +void expand_vec_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5306711c1b7..866fabf18ed 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4656,6 +4656,15 @@ emit_vec_binary_alu (rtx op_dest, rtx op_1, rtx op_2, enum rtx_code rcode, emit_vlmax_insn (icode, BINARY_OP, ops); } +static void +emit_vec_fixed_binary_rnu (rtx op_dest, rtx op_1, rtx op_2, insn_code icode, + machine_mode vec_mode) +{ + rtx ops[] = {op_dest, op_1, op_2}; + + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + void expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode, machine_mode vec_int_mode) @@ -4901,6 +4910,61 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + insn_code icode; + rtx double_rtx = NULL_RTX; + rtx quad_rtx = NULL_RTX; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + + if (double_mode != E_VOIDmode) + double_rtx = gen_reg_rtx (double_mode); + + if (quad_mode != E_VOIDmode) + quad_rtx = gen_reg_rtx (quad_mode); + + if (double_rtx != NULL_RTX && quad_rtx != NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (quad_rtx, double_rtx, zero, icode, double_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, quad_mode); + emit_vec_fixed_binary_rnu (op_0, quad_rtx, zero, icode, quad_mode); + + return; + } + + if (double_rtx != NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (op_0, double_rtx, zero, icode, double_mode); + + return; + } + + if (double_rtx == NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (op_0, op_1, zero, icode, vec_mode); + + return; + } + + gcc_unreachable (); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index a3116033fb3..b55a589e019 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -329,4 +329,26 @@ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ #define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) +/******************************************************************************/ +/* Saturation Truncation (Unsigned and Signed) */ +/******************************************************************************/ +#define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ +void __attribute__((noinline)) \ +vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + WT x = in[i]; \ + bool overflow = x > (WT)(NT)(-1); \ + out[i] = ((NT)x) | (NT)-overflow; \ + } \ +} +#define DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) + +#define RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) \ + vec_sat_u_trunc_##NT##_##WT##_fmt_1 (out, in, N) +#define RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, out, in, N) \ + RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h new file mode 100644 index 00000000000..6b23ec809f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -0,0 +1,394 @@ +#ifndef HAVE_DEFINE_VEC_SAT_DATA_H +#define HAVE_DEFINE_VEC_SAT_DATA_H + +#define N 16 + +#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s +#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2) +#define TEST_UNARY_STRUCT(T1, T2) \ + struct TEST_UNARY_STRUCT_NAME(T1, T2) \ + { \ + T2 in[N]; \ + T1 expect[N]; \ + T1 out[N]; \ + }; + +#define TEST_UNARY_DATA(T1, T2) test_##T1##_##T2##_data +#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2) + +TEST_UNARY_STRUCT(uint8_t, uint16_t) +TEST_UNARY_STRUCT(uint8_t, uint32_t) +TEST_UNARY_STRUCT(uint8_t, uint64_t) + +TEST_UNARY_STRUCT(uint16_t, uint32_t) +TEST_UNARY_STRUCT(uint16_t, uint64_t) + +TEST_UNARY_STRUCT(uint32_t, uint64_t) + +TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ + TEST_UNARY_DATA(uint8_t, uint16_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + }, + { + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \ + TEST_UNARY_DATA(uint8_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 65536, 4294967291, + 65534, 65535, 65537, 4294967292, + 65534, 65535, 65538, 4294967293, + 65534, 65535, 65539, 4294967294, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \ + TEST_UNARY_DATA(uint8_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 4294967292, 4294967296, + 65534, 65536, 4294967293, 18446744073709551613u, + 65534, 65537, 4294967294, 18446744073709551614u, + 65534, 65538, 4294967295, 18446744073709551615u, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \ + TEST_UNARY_DATA(uint16_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967295, 4294967291, + 65534, 65535, 4294967295, 4294967292, + 65534, 65535, 4294967295, 4294967293, + 65534, 65535, 4294967295, 4294967294, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \ + TEST_UNARY_DATA(uint16_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ + TEST_UNARY_DATA(uint32_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 4294967294, 4294967295, + 65534, 65536, 4294967295, 4294967295, + 65534, 65537, 4294967295, 4294967295, + 65534, 65538, 4294967295, 4294967295, + }, + }, +}; + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c new file mode 100644 index 00000000000..dc9653947fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c new file mode 100644 index 00000000000..03c1d709194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c new file mode 100644 index 00000000000..291dd0512fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c new file mode 100644 index 00000000000..a3192bf0d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c new file mode 100644 index 00000000000..b85276175bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c new file mode 100644 index 00000000000..65b5ad3c219 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint32_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c new file mode 100644 index 00000000000..cd896e2eb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint16_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c new file mode 100644 index 00000000000..96272ca2943 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c new file mode 100644 index 00000000000..671bbebf363 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c new file mode 100644 index 00000000000..c7df27be15f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c new file mode 100644 index 00000000000..c9067a1bcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c new file mode 100644 index 00000000000..51d76be6743 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint32_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h new file mode 100644 index 00000000000..e731e9f3f71 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H +#define HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H + +int +main () +{ + unsigned i, k; + + for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) + { + T *data = &DATA[i]; + + RUN_UNARY (data->out, data->in, N); + + for (k = 0; k < N; k++) + if (data->out[k] != data->expect[k]) + __builtin_abort (); + } + + return 0; +} + +#endif
> These codes look odd to me. Could you optimize it in a more straightforward way? Sure, let me separate it into double, quad and oct truncation. Pan From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> Sent: Monday, July 8, 2024 10:24 AM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org> Cc: kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com> Subject: Re: [PATCH v1] RISC-V: Implement .SAT_TRUNC for vector unsigned int +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + insn_code icode; + rtx double_rtx = NULL_RTX; + rtx quad_rtx = NULL_RTX; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + + if (double_mode != E_VOIDmode) + double_rtx = gen_reg_rtx (double_mode); + + if (quad_mode != E_VOIDmode) + quad_rtx = gen_reg_rtx (quad_mode); + + if (double_rtx != NULL_RTX && quad_rtx != NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (quad_rtx, double_rtx, zero, icode, double_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, quad_mode); + emit_vec_fixed_binary_rnu (op_0, quad_rtx, zero, icode, quad_mode); + + return; + } + + if (double_rtx != NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (op_0, double_rtx, zero, icode, double_mode); + + return; + } + + if (double_rtx == NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (op_0, op_1, zero, icode, vec_mode); + + return; + } + + gcc_unreachable (); +} These codes look odd to me. Could you optimize it in a more straightforward way?
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..bd34ad3e862 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2645,6 +2645,7 @@ (define_expand "rawmemchr<ANYI:mode>" ;; Includes: ;; - add ;; - sub +;; - trunc ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2668,6 +2669,39 @@ (define_expand "ussub<mode>3" } ) +(define_expand "ustrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + E_VOIDmode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, E_VOIDmode); + DONE; + } +) + +(define_expand "ustrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ustrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..3ca26e1107d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,6 +639,7 @@ void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); +void expand_vec_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5306711c1b7..866fabf18ed 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4656,6 +4656,15 @@ emit_vec_binary_alu (rtx op_dest, rtx op_1, rtx op_2, enum rtx_code rcode, emit_vlmax_insn (icode, BINARY_OP, ops); } +static void +emit_vec_fixed_binary_rnu (rtx op_dest, rtx op_1, rtx op_2, insn_code icode, + machine_mode vec_mode) +{ + rtx ops[] = {op_dest, op_1, op_2}; + + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + void expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode, machine_mode vec_int_mode) @@ -4901,6 +4910,61 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } +/* Expand the standard name ustrunc<m><n>2 for vector mode, we can leverage + the vector fixed point vector narrowing fixed-point clip directly. */ + +void +expand_vec_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + insn_code icode; + rtx double_rtx = NULL_RTX; + rtx quad_rtx = NULL_RTX; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIPU; + + if (double_mode != E_VOIDmode) + double_rtx = gen_reg_rtx (double_mode); + + if (quad_mode != E_VOIDmode) + quad_rtx = gen_reg_rtx (quad_mode); + + if (double_rtx != NULL_RTX && quad_rtx != NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (quad_rtx, double_rtx, zero, icode, double_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, quad_mode); + emit_vec_fixed_binary_rnu (op_0, quad_rtx, zero, icode, quad_mode); + + return; + } + + if (double_rtx != NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (double_rtx, op_1, zero, icode, vec_mode); + + icode = code_for_pred_narrow_clip_scalar (unspec, double_mode); + emit_vec_fixed_binary_rnu (op_0, double_rtx, zero, icode, double_mode); + + return; + } + + if (double_rtx == NULL_RTX && quad_rtx == NULL_RTX) + { + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vec_fixed_binary_rnu (op_0, op_1, zero, icode, vec_mode); + + return; + } + + gcc_unreachable (); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index a3116033fb3..b55a589e019 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -329,4 +329,26 @@ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ #define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) +/******************************************************************************/ +/* Saturation Truncation (Unsigned and Signed) */ +/******************************************************************************/ +#define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ +void __attribute__((noinline)) \ +vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + WT x = in[i]; \ + bool overflow = x > (WT)(NT)(-1); \ + out[i] = ((NT)x) | (NT)-overflow; \ + } \ +} +#define DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) + +#define RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) \ + vec_sat_u_trunc_##NT##_##WT##_fmt_1 (out, in, N) +#define RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, out, in, N) \ + RUN_VEC_SAT_U_TRUNC_FMT_1(NT, WT, out, in, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h new file mode 100644 index 00000000000..6b23ec809f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -0,0 +1,394 @@ +#ifndef HAVE_DEFINE_VEC_SAT_DATA_H +#define HAVE_DEFINE_VEC_SAT_DATA_H + +#define N 16 + +#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s +#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2) +#define TEST_UNARY_STRUCT(T1, T2) \ + struct TEST_UNARY_STRUCT_NAME(T1, T2) \ + { \ + T2 in[N]; \ + T1 expect[N]; \ + T1 out[N]; \ + }; + +#define TEST_UNARY_DATA(T1, T2) test_##T1##_##T2##_data +#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2) + +TEST_UNARY_STRUCT(uint8_t, uint16_t) +TEST_UNARY_STRUCT(uint8_t, uint32_t) +TEST_UNARY_STRUCT(uint8_t, uint64_t) + +TEST_UNARY_STRUCT(uint16_t, uint32_t) +TEST_UNARY_STRUCT(uint16_t, uint64_t) + +TEST_UNARY_STRUCT(uint32_t, uint64_t) + +TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ + TEST_UNARY_DATA(uint8_t, uint16_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + 65534, 65535, 650, 24, + }, + { + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + 255, 255, 255, 24, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \ + TEST_UNARY_DATA(uint8_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 65536, 4294967291, + 65534, 65535, 65537, 4294967292, + 65534, 65535, 65538, 4294967293, + 65534, 65535, 65539, 4294967294, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \ + TEST_UNARY_DATA(uint8_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + 254, 255, 256, 257, + }, + { + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + 254, 255, 255, 255, + }, + }, + { + { + 65534, 65535, 4294967292, 4294967296, + 65534, 65536, 4294967293, 18446744073709551613u, + 65534, 65537, 4294967294, 18446744073709551614u, + 65534, 65538, 4294967295, 18446744073709551615u, + }, + { + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + 255, 255, 255, 255, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \ + TEST_UNARY_DATA(uint16_t, uint32_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967295, 4294967291, + 65534, 65535, 4294967295, 4294967292, + 65534, 65535, 4294967295, 4294967293, + 65534, 65535, 4294967295, 4294967294, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \ + TEST_UNARY_DATA(uint16_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + 65534, 65535, 65535, 65535, + }, + }, +}; + +TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ + TEST_UNARY_DATA(uint32_t, uint64_t)[] = +{ + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, + }, + { + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + { + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + 1, 2, 3, 4, + }, + }, + { + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + { + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + 65534, 65535, 65536, 65537, + }, + }, + { + { + 65534, 65535, 4294967294, 4294967298, + 65534, 65536, 4294967295, 18446744073709551613u, + 65534, 65537, 4294967296, 18446744073709551614u, + 65534, 65538, 4294967297, 18446744073709551615u, + }, + { + 65534, 65535, 4294967294, 4294967295, + 65534, 65536, 4294967295, 4294967295, + 65534, 65537, 4294967295, 4294967295, + 65534, 65538, 4294967295, 4294967295, + }, + }, +}; + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c new file mode 100644 index 00000000000..dc9653947fc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c new file mode 100644 index 00000000000..03c1d709194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c new file mode 100644 index 00000000000..291dd0512fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c new file mode 100644 index 00000000000..a3192bf0d76 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c new file mode 100644 index 00000000000..b85276175bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c new file mode 100644 index 00000000000..65b5ad3c219 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../binop/vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint32_t_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*mf2,\s*ta,\s*ma +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c new file mode 100644 index 00000000000..cd896e2eb07 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint16_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c new file mode 100644 index 00000000000..96272ca2943 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c new file mode 100644 index 00000000000..671bbebf363 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint8_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c new file mode 100644 index 00000000000..c7df27be15f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint32_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c new file mode 100644 index 00000000000..c9067a1bcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint16_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c new file mode 100644 index 00000000000..51d76be6743 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../binop/vec_sat_arith.h" +#include "vec_sat_data.h" + +#define T1 uint32_t +#define T2 uint64_t + +DEF_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2) + +#define T TEST_UNARY_STRUCT_DECL(T1, T2) +#define DATA TEST_UNARY_DATA_WRAP(T1, T2) +#define RUN_UNARY(out, in, N) RUN_VEC_SAT_U_TRUNC_FMT_1_WRAP(T1, T2, out, in, N) + +#include "vec_sat_unary_vv_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h new file mode 100644 index 00000000000..e731e9f3f71 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h @@ -0,0 +1,23 @@ +#ifndef HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H +#define HAVE_DEFINE_VEC_SAT_UNARY_VV_RUN_H + +int +main () +{ + unsigned i, k; + + for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) + { + T *data = &DATA[i]; + + RUN_UNARY (data->out, data->in, N); + + for (k = 0; k < N; k++) + if (data->out[k] != data->expect[k]) + __builtin_abort (); + } + + return 0; +} + +#endif