Message ID | 20230928055913.1782465-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1] RISC-V: Support {U}INT64 to FP16 auto-vectorization | expand |
Plz add "!flag_trapping_math" juzhe.zhong@rivai.ai From: pan2.li Date: 2023-09-28 13:59 To: gcc-patches CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng Subject: [PATCH v1] RISC-V: Support {U}INT64 to FP16 auto-vectorization From: Pan Li <pan2.li@intel.com> This patch would like to support the auto-vectorization from the INT64 to FP16. We take below steps for the conversion. * INT64 to FP32. * FP32 to FP16. Given sample code as below: void test_func (int64_t * __restrict a, _Float16 *b, unsigned n) { for (unsigned i = 0; i < n; i++) b[i] = (_Float16) (a[i]); } Before this patch: test.c:6:26: missed: couldn't vectorize loop test.c:6:26: missed: not vectorized: unsupported data-type ld a0,0(s0) call __floatdihf fsh fa0,0(s1) addi s0,s0,8 addi s1,s1,2 bne s2,s0,.L3 ld ra,24(sp) ld s0,16(sp) ld s1,8(sp) ld s2,0(sp) addi sp,sp,32 After this patch: vsetvli a5,a2,e8,mf8,ta,ma vle64.v v1,0(a0) vsetvli a4,zero,e32,mf2,ta,ma vfncvt.f.x.w v1,v1 vsetvli zero,zero,e16,mf4,ta,ma vfncvt.f.f.w v1,v1 vsetvli zero,a2,e16,mf4,ta,ma vse16.v v1,0(a1) Please note VLS mode is also involved in this patch and covered by the test cases. PR target/111506 gcc/ChangeLog: * config/riscv/autovec.md (<float_cvt><mode><vnnconvert>2): * config/riscv/vector-iterators.md: gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c: Adjust checker. * gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/cvt-0.c: New test. * gcc.target/riscv/rvv/autovec/unop/cvt-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/cvt-0.c: New test. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/config/riscv/autovec.md | 24 ++++++++++ gcc/config/riscv/vector-iterators.md | 38 +++++++++++++++ .../autovec/conversions/vfncvt-itof-rv32gcv.c | 5 +- .../autovec/conversions/vfncvt-itof-rv64gcv.c | 5 +- .../gcc.target/riscv/rvv/autovec/unop/cvt-0.c | 21 +++++++++ .../gcc.target/riscv/rvv/autovec/unop/cvt-1.c | 22 +++++++++ .../gcc.target/riscv/rvv/autovec/vls/cvt-0.c | 47 +++++++++++++++++++ 7 files changed, 158 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index cd0cbdd2889..6dd3b96a423 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -974,6 +974,30 @@ (define_insn_and_split "<float_cvt><mode><vnconvert>2" } [(set_attr "type" "vfncvtitof")]) +;; This operation can be performed in the loop vectorizer but unfortunately +;; not applicable for now. We can remove this pattern after loop vectorizer +;; is able to take care of INT64 to FP16 conversion. +(define_insn_and_split "<float_cvt><mode><vnnconvert>2" + [(set (match_operand:<VNNCONVERT> 0 "register_operand") + (any_float:<VNNCONVERT> + (match_operand:VWWCONVERTI 1 "register_operand")))] + "TARGET_VECTOR && TARGET_ZVFH && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx single = gen_reg_rtx (<VNCONVERT>mode); /* Get vector SF mode. */ + + /* Step-1, INT64 => FP32. */ + emit_insn (gen_<float_cvt><mode><vnconvert>2 (single, operands[1])); + /* Step-2, FP32 => FP16. */ + emit_insn (gen_trunc<vnconvert><vnnconvert>2 (operands[0], single)); + + DONE; + } + [(set_attr "type" "vfncvtitof")] +) + ;; ========================================================================= ;; == Unary arithmetic ;; ========================================================================= diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index b6cd872eb42..c9a7344b1bc 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -1247,6 +1247,24 @@ (define_mode_iterator VWCONVERTI [ (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096") ]) +(define_mode_iterator VWWCONVERTI [ + (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM2DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + + (V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 64") + (V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 128") + (V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 256") + (V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 512") + (V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024") + (V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048") + (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096") +]) + (define_mode_iterator VQEXTI [ RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") @@ -3243,6 +3261,26 @@ (define_mode_attr vnconvert [ (V512DF "v512si") ]) +;; NN indicates narrow twice +(define_mode_attr VNNCONVERT [ + (RVVM8DI "RVVM2HF") (RVVM4DI "RVVM1HF") (RVVM2DI "RVVMF2HF") + (RVVM1DI "RVVMF4HF") + + (V1DI "V1HF") (V2DI "V2HF") (V4DI "V4HF") (V8DI "V8HF") (V16DI "V16HF") + (V32DI "V32HF") (V64DI "V64HF") (V128DI "V128HF") (V256DI "V256HF") + (V512DI "V512HF") +]) + +;; nn indicates narrow twice +(define_mode_attr vnnconvert [ + (RVVM8DI "rvvm2hf") (RVVM4DI "rvvm1hf") (RVVM2DI "rvvmf2hf") + (RVVM1DI "rvvmf4hf") + + (V1DI "v1hf") (V2DI "v2hf") (V4DI "v4hf") (V8DI "v8hf") (V16DI "v16hf") + (V32DI "v32hf") (V64DI "v64hf") (V128DI "v128hf") (V256DI "v256hf") + (V512DI "v512hf") +]) + (define_mode_attr VDEMOTE [ (RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI") (V1DI "V1SI") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c index 73e4644658b..c0282dec815 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c @@ -4,5 +4,6 @@ #include "vfncvt-itof-template.h" /* { dg-final { scan-assembler-times {\tvfcvt\.f\.x\.v} 2 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 2 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 2 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 1 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 3 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.f\.w} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c index e9d31a70e6a..b3f74708c84 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c @@ -3,5 +3,6 @@ #include "vfncvt-itof-template.h" -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 5 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 1 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 2 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 4 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.f\.w} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c new file mode 100644 index 00000000000..100d6a98729 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_int65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_int65_to_fp16 (int64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c new file mode 100644 index 00000000000..e625014d938 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_uint65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_uint65_to_fp16 (uint64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c new file mode 100644 index 00000000000..6c15495ed8c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */ + +#include "def.h" + +DEF_CONVERT (fp16, int64_t, _Float16, 1) +DEF_CONVERT (fp16, int64_t, _Float16, 2) +DEF_CONVERT (fp16, int64_t, _Float16, 4) +DEF_CONVERT (fp16, int64_t, _Float16, 8) +DEF_CONVERT (fp16, int64_t, _Float16, 16) +DEF_CONVERT (fp16, int64_t, _Float16, 32) +DEF_CONVERT (fp16, int64_t, _Float16, 64) +DEF_CONVERT (fp16, int64_t, _Float16, 128) +DEF_CONVERT (fp16, int64_t, _Float16, 256) +DEF_CONVERT (fp16, int64_t, _Float16, 512) +DEF_CONVERT (fp16, int64_t, _Float16, 1024) +DEF_CONVERT (fp16, int64_t, _Float16, 2048) + +DEF_CONVERT (fp16, uint64_t, _Float16, 1) +DEF_CONVERT (fp16, uint64_t, _Float16, 2) +DEF_CONVERT (fp16, uint64_t, _Float16, 4) +DEF_CONVERT (fp16, uint64_t, _Float16, 8) +DEF_CONVERT (fp16, uint64_t, _Float16, 16) +DEF_CONVERT (fp16, uint64_t, _Float16, 32) +DEF_CONVERT (fp16, uint64_t, _Float16, 64) +DEF_CONVERT (fp16, uint64_t, _Float16, 128) +DEF_CONVERT (fp16, uint64_t, _Float16, 256) +DEF_CONVERT (fp16, uint64_t, _Float16, 512) +DEF_CONVERT (fp16, uint64_t, _Float16, 1024) +DEF_CONVERT (fp16, uint64_t, _Float16, 2048) + +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 30 } } */
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index cd0cbdd2889..6dd3b96a423 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -974,6 +974,30 @@ (define_insn_and_split "<float_cvt><mode><vnconvert>2" } [(set_attr "type" "vfncvtitof")]) +;; This operation can be performed in the loop vectorizer but unfortunately +;; not applicable for now. We can remove this pattern after loop vectorizer +;; is able to take care of INT64 to FP16 conversion. +(define_insn_and_split "<float_cvt><mode><vnnconvert>2" + [(set (match_operand:<VNNCONVERT> 0 "register_operand") + (any_float:<VNNCONVERT> + (match_operand:VWWCONVERTI 1 "register_operand")))] + "TARGET_VECTOR && TARGET_ZVFH && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx single = gen_reg_rtx (<VNCONVERT>mode); /* Get vector SF mode. */ + + /* Step-1, INT64 => FP32. */ + emit_insn (gen_<float_cvt><mode><vnconvert>2 (single, operands[1])); + /* Step-2, FP32 => FP16. */ + emit_insn (gen_trunc<vnconvert><vnnconvert>2 (operands[0], single)); + + DONE; + } + [(set_attr "type" "vfncvtitof")] +) + ;; ========================================================================= ;; == Unary arithmetic ;; ========================================================================= diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index b6cd872eb42..c9a7344b1bc 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -1247,6 +1247,24 @@ (define_mode_iterator VWCONVERTI [ (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096") ]) +(define_mode_iterator VWWCONVERTI [ + (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM2DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + + (V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 64") + (V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 128") + (V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 256") + (V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 512") + (V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024") + (V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048") + (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096") +]) + (define_mode_iterator VQEXTI [ RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") @@ -3243,6 +3261,26 @@ (define_mode_attr vnconvert [ (V512DF "v512si") ]) +;; NN indicates narrow twice +(define_mode_attr VNNCONVERT [ + (RVVM8DI "RVVM2HF") (RVVM4DI "RVVM1HF") (RVVM2DI "RVVMF2HF") + (RVVM1DI "RVVMF4HF") + + (V1DI "V1HF") (V2DI "V2HF") (V4DI "V4HF") (V8DI "V8HF") (V16DI "V16HF") + (V32DI "V32HF") (V64DI "V64HF") (V128DI "V128HF") (V256DI "V256HF") + (V512DI "V512HF") +]) + +;; nn indicates narrow twice +(define_mode_attr vnnconvert [ + (RVVM8DI "rvvm2hf") (RVVM4DI "rvvm1hf") (RVVM2DI "rvvmf2hf") + (RVVM1DI "rvvmf4hf") + + (V1DI "v1hf") (V2DI "v2hf") (V4DI "v4hf") (V8DI "v8hf") (V16DI "v16hf") + (V32DI "v32hf") (V64DI "v64hf") (V128DI "v128hf") (V256DI "v256hf") + (V512DI "v512hf") +]) + (define_mode_attr VDEMOTE [ (RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI") (V1DI "V1SI") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c index 73e4644658b..c0282dec815 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv32gcv.c @@ -4,5 +4,6 @@ #include "vfncvt-itof-template.h" /* { dg-final { scan-assembler-times {\tvfcvt\.f\.x\.v} 2 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 2 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 2 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 1 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 3 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.f\.w} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c index e9d31a70e6a..b3f74708c84 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vfncvt-itof-rv64gcv.c @@ -3,5 +3,6 @@ #include "vfncvt-itof-template.h" -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 5 } } */ -/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 1 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.x\.w} 2 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.xu\.w} 4 } } */ +/* { dg-final { scan-assembler-times {\tvfncvt\.f\.f\.w} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c new file mode 100644 index 00000000000..100d6a98729 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_int65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_int65_to_fp16 (int64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c new file mode 100644 index 00000000000..e625014d938 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_uint65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_uint65_to_fp16 (uint64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c new file mode 100644 index 00000000000..6c15495ed8c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */ + +#include "def.h" + +DEF_CONVERT (fp16, int64_t, _Float16, 1) +DEF_CONVERT (fp16, int64_t, _Float16, 2) +DEF_CONVERT (fp16, int64_t, _Float16, 4) +DEF_CONVERT (fp16, int64_t, _Float16, 8) +DEF_CONVERT (fp16, int64_t, _Float16, 16) +DEF_CONVERT (fp16, int64_t, _Float16, 32) +DEF_CONVERT (fp16, int64_t, _Float16, 64) +DEF_CONVERT (fp16, int64_t, _Float16, 128) +DEF_CONVERT (fp16, int64_t, _Float16, 256) +DEF_CONVERT (fp16, int64_t, _Float16, 512) +DEF_CONVERT (fp16, int64_t, _Float16, 1024) +DEF_CONVERT (fp16, int64_t, _Float16, 2048) + +DEF_CONVERT (fp16, uint64_t, _Float16, 1) +DEF_CONVERT (fp16, uint64_t, _Float16, 2) +DEF_CONVERT (fp16, uint64_t, _Float16, 4) +DEF_CONVERT (fp16, uint64_t, _Float16, 8) +DEF_CONVERT (fp16, uint64_t, _Float16, 16) +DEF_CONVERT (fp16, uint64_t, _Float16, 32) +DEF_CONVERT (fp16, uint64_t, _Float16, 64) +DEF_CONVERT (fp16, uint64_t, _Float16, 128) +DEF_CONVERT (fp16, uint64_t, _Float16, 256) +DEF_CONVERT (fp16, uint64_t, _Float16, 512) +DEF_CONVERT (fp16, uint64_t, _Float16, 1024) +DEF_CONVERT (fp16, uint64_t, _Float16, 2048) + +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 30 } } */