Message ID | 20221024020853.29547-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | RISC-V: Support load/store in mov<mode> pattern for RVV modes. | expand |
Verified and committed to trunk, thanks! And don't forgot changelog next time :P On Mon, Oct 24, 2022 at 10:09 AM <juzhe.zhong@rivai.ai> wrote: > > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> > > --- > gcc/config.gcc | 2 +- > gcc/config/riscv/constraints.md | 22 + > gcc/config/riscv/predicates.md | 23 ++ > gcc/config/riscv/riscv-protos.h | 14 + > gcc/config/riscv/riscv-v.cc | 180 ++++++++ > .../riscv/riscv-vector-builtins-bases.cc | 14 +- > gcc/config/riscv/riscv.cc | 67 ++- > gcc/config/riscv/riscv.h | 2 + > gcc/config/riscv/riscv.md | 9 +- > gcc/config/riscv/t-riscv | 4 + > gcc/config/riscv/vector-iterators.md | 58 +++ > gcc/config/riscv/vector.md | 279 ++++++++++++- > .../gcc.target/riscv/rvv/base/mov-1.c | 179 ++++++++ > .../gcc.target/riscv/rvv/base/mov-10.c | 385 ++++++++++++++++++ > .../gcc.target/riscv/rvv/base/mov-11.c | 385 ++++++++++++++++++ > .../gcc.target/riscv/rvv/base/mov-12.c | 159 ++++++++ > .../gcc.target/riscv/rvv/base/mov-13.c | 14 + > .../gcc.target/riscv/rvv/base/mov-2.c | 153 +++++++ > .../gcc.target/riscv/rvv/base/mov-3.c | 127 ++++++ > .../gcc.target/riscv/rvv/base/mov-4.c | 101 +++++ > .../gcc.target/riscv/rvv/base/mov-5.c | 66 +++ > .../gcc.target/riscv/rvv/base/mov-6.c | 53 +++ > .../gcc.target/riscv/rvv/base/mov-7.c | 13 + > .../gcc.target/riscv/rvv/base/mov-8.c | 96 +++++ > .../gcc.target/riscv/rvv/base/mov-9.c | 44 ++ > 25 files changed, 2421 insertions(+), 28 deletions(-) > create mode 100644 gcc/config/riscv/riscv-v.cc > create mode 100644 gcc/config/riscv/vector-iterators.md > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 3826ae42803..0232e572a99 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -515,7 +515,7 @@ pru-*-*) > ;; > riscv*) > cpu_type=riscv > - extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o" > + extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o" > extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o" > d_target_objs="riscv-d.o" > extra_headers="riscv_vector.h" > diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md > index 8997284f32e..951dcc52d6b 100644 > --- a/gcc/config/riscv/constraints.md > +++ b/gcc/config/riscv/constraints.md > @@ -128,3 +128,25 @@ > "POLY_INT" > (and (match_code "const_poly_int") > (match_test "known_eq (rtx_to_poly_int64 (op), BYTES_PER_RISCV_VECTOR)"))) > + > +(define_constraint "vu" > + "A undefined vector value." > + (and (match_code "unspec") > + (match_test "XINT (op, 1) == UNSPEC_VUNDEF"))) > + > +(define_constraint "vi" > + "A vector 5-bit signed immediate." > + (and (match_code "const_vector") > + (match_test "riscv_vector::const_vec_all_same_in_range_p (op, -16, 15)"))) > + > +(define_constraint "Wc0" > + "@internal > + A constraint that matches a vector of immediate all zeros." > + (and (match_code "const_vector") > + (match_test "op == CONST0_RTX (GET_MODE (op))"))) > + > +(define_constraint "Wc1" > + "@internal > + A constraint that matches a vector of immediate all ones." > + (and (match_code "const_vector") > + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md > index 5e149b3a95f..e2bfafe8150 100644 > --- a/gcc/config/riscv/predicates.md > +++ b/gcc/config/riscv/predicates.md > @@ -262,3 +262,26 @@ > > return true; > }) > + > +;; Predicates for the V extension. > +(define_special_predicate "vector_length_operand" > + (ior (match_operand 0 "pmode_register_operand") > + (match_operand 0 "const_csr_operand"))) > + > +(define_predicate "reg_or_mem_operand" > + (ior (match_operand 0 "register_operand") > + (match_operand 0 "memory_operand"))) > + > +(define_predicate "vector_move_operand" > + (ior (match_operand 0 "nonimmediate_operand") > + (match_code "const_vector"))) > + > +(define_predicate "vector_mask_operand" > + (ior (match_operand 0 "register_operand") > + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) > + > +(define_predicate "vector_merge_operand" > + (ior (match_operand 0 "memory_operand") > + (ior (match_operand 0 "register_operand") > + (match_test "GET_CODE (op) == UNSPEC > + && (XINT (op, 1) == UNSPEC_VUNDEF)")))) > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index f8c9932ff89..386c0027ff4 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -76,6 +76,7 @@ extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); > extern bool riscv_gpr_save_operation_p (rtx); > extern void riscv_reinit (void); > extern bool riscv_v_ext_enabled_vector_mode_p (machine_mode); > +extern poly_uint64 riscv_regmode_natural_size (machine_mode); > > /* Routines implemented in riscv-c.cc. */ > void riscv_cpu_cpp_builtins (cpp_reader *); > @@ -126,6 +127,19 @@ extern bool verify_type_context (location_t, type_context_kind, const_tree, bool > extern void handle_pragma_vector (void); > extern tree builtin_decl (unsigned, bool); > extern rtx expand_builtin (unsigned int, tree, rtx); > +extern bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); > +extern bool legitimize_move (rtx, rtx, machine_mode); > +enum tail_policy > +{ > + TAIL_UNDISTURBED = 0, > + TAIL_AGNOSTIC = 1, > +}; > + > +enum mask_policy > +{ > + MASK_UNDISTURBED = 0, > + MASK_AGNOSTIC = 1, > +}; > } > > /* We classify builtin types into two classes: > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > new file mode 100644 > index 00000000000..6615a5c7ffe > --- /dev/null > +++ b/gcc/config/riscv/riscv-v.cc > @@ -0,0 +1,180 @@ > +/* Subroutines used for code generation for RISC-V 'V' Extension for GNU > + compiler. Copyright (C) 2022-2022 Free Software Foundation, Inc. Contributed > + by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3, or (at your option) > + any later version. > + > + GCC is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with GCC; see the file COPYING3. If not see > + <http://www.gnu.org/licenses/>. */ > + > +#define IN_TARGET_CODE 1 > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tm.h" > +#include "backend.h" > +#include "rtl.h" > +#include "insn-config.h" > +#include "insn-attr.h" > +#include "recog.h" > +#include "alias.h" > +#include "tree.h" > +#include "stringpool.h" > +#include "attribs.h" > +#include "explow.h" > +#include "memmodel.h" > +#include "emit-rtl.h" > +#include "tm_p.h" > +#include "target.h" > +#include "expr.h" > +#include "optabs.h" > + > +using namespace riscv_vector; > + > +namespace riscv_vector { > + > +template <int MAX_OPERANDS> class insn_expander > +{ > +public: > + insn_expander () : m_opno (0) {} > + void add_output_operand (rtx x, machine_mode mode) > + { > + create_output_operand (&m_ops[m_opno++], x, mode); > + gcc_assert (m_opno <= MAX_OPERANDS); > + } > + void add_input_operand (rtx x, machine_mode mode) > + { > + create_input_operand (&m_ops[m_opno++], x, mode); > + gcc_assert (m_opno <= MAX_OPERANDS); > + } > + void add_all_one_mask_operand (machine_mode mode) > + { > + add_input_operand (CONSTM1_RTX (mode), mode); > + } > + void add_vundef_operand (machine_mode mode) > + { > + add_input_operand (gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), > + UNSPEC_VUNDEF), > + mode); > + } > + void add_policy_operand (enum tail_policy vta, enum mask_policy vma) > + { > + rtx tail_policy_rtx = vta == TAIL_UNDISTURBED ? const0_rtx : const1_rtx; > + rtx mask_policy_rtx = vma == MASK_UNDISTURBED ? const0_rtx : const1_rtx; > + add_input_operand (tail_policy_rtx, Pmode); > + add_input_operand (mask_policy_rtx, Pmode); > + } > + > + void expand (enum insn_code icode, bool temporary_volatile_p = false) > + { > + if (temporary_volatile_p) > + { > + temporary_volatile_ok v (true); > + expand_insn (icode, m_opno, m_ops); > + } > + else > + expand_insn (icode, m_opno, m_ops); > + } > + > +private: > + int m_opno; > + expand_operand m_ops[MAX_OPERANDS]; > +}; > + > +/* Return true if X is a const_vector with all duplicate elements, which is in > + the range between MINVAL and MAXVAL. */ > +bool > +const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, > + HOST_WIDE_INT maxval) > +{ > + rtx elt; > + return (const_vec_duplicate_p (x, &elt) && CONST_INT_P (elt) > + && IN_RANGE (INTVAL (elt), minval, maxval)); > +} > + > +/* Emit an RVV unmask && vl mov from SRC to DEST. */ > +static void > +emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode) > +{ > + insn_expander<7> e; > + > + machine_mode mode = GET_MODE (dest); > + if (register_operand (src, mode) && register_operand (dest, mode)) > + { > + emit_move_insn (dest, src); > + return; > + } > + > + e.add_output_operand (dest, mode); > + e.add_all_one_mask_operand (mask_mode); > + /* For load operation, we create undef operand. > + For store operation, we make it depend on the dest memory to > + avoid potential bugs. */ > + if (MEM_P (src)) > + e.add_vundef_operand (mode); > + else > + e.add_input_operand (dest, mode); > + > + e.add_input_operand (src, mode); > + e.add_input_operand (vl, Pmode); > + > + e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC); > + > + enum insn_code icode; > + icode = code_for_pred_mov (mode); > + e.expand (icode, true); > +} > + > +/* Expand a pre-RA RVV data move from SRC to DEST. > + It expands move for RVV fractional vector modes. */ > +bool > +legitimize_move (rtx dest, rtx src, machine_mode mask_mode) > +{ > + machine_mode mode = GET_MODE (dest); > + /* For whole registers load/store or register-register move, > + we don't need to specially handle them, just let them go > + through "*mov<mode>" and then use the codegen directly. */ > + if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) > + && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)) > + || (register_operand (src, mode) && register_operand (dest, mode))) > + { > + /* Need to force register if mem <- !reg. */ > + if (MEM_P (dest) && !REG_P (src)) > + src = force_reg (mode, src); > + return false; > + } > + > + rtx vlmax = gen_reg_rtx (Pmode); > + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL > + ? 8 > + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); > + emit_insn (gen_vsetvl_no_side_effects ( > + Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode), > + gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx)); > + > + if (!register_operand (src, mode) && !register_operand (dest, mode)) > + { > + rtx tmp = gen_reg_rtx (mode); > + if (MEM_P (src)) > + emit_pred_move (tmp, src, vlmax, mask_mode); > + else > + emit_move_insn (tmp, src); > + src = tmp; > + } > + emit_pred_move (dest, src, vlmax, mask_mode); > + return true; > +} > + > +} // namespace riscv_vector > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 8582c0cae4c..231b63a610d 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -72,18 +72,8 @@ public: > e.add_input_operand (Pmode, > gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode)); > > - /* LMUL. Define the bitmap rule as follows: > - | 4 | 3 2 1 0 | > - | fractional_p | factor | > - */ > - bool fractional_p = known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR); > - unsigned int factor > - = fractional_p ? exact_div (BYTES_PER_RISCV_VECTOR, GET_MODE_SIZE (mode)) > - .to_constant () > - : exact_div (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) > - .to_constant (); > - e.add_input_operand (Pmode, > - gen_int_mode ((fractional_p << 4) | factor, Pmode)); > + /* LMUL. */ > + e.add_input_operand (Pmode, gen_int_mode ((unsigned int) mode, Pmode)); > > /* TA. */ > e.add_input_operand (Pmode, gen_int_mode (1, Pmode)); > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index f7694ba043c..98374a922d1 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -4150,14 +4150,42 @@ riscv_print_operand (FILE *file, rtx op, int letter) > switch (letter) > { > case 'm': { > - if (code == CONST_INT) > + if (riscv_v_ext_vector_mode_p (mode)) > { > - /* LMUL. Define the bitmap rule as follows: > - | 4 | 3 2 1 0 | > - | fractional_p | factor | > - */ > - bool fractional_p = (UINTVAL (op) >> 4) & 0x1; > - unsigned int factor = UINTVAL (op) & 0xf; > + /* Calculate lmul according to mode and print the value. */ > + poly_int64 size = GET_MODE_SIZE (mode); > + unsigned int lmul; > + if (known_lt (size, BYTES_PER_RISCV_VECTOR)) > + lmul = 1; > + else > + lmul = exact_div (size, BYTES_PER_RISCV_VECTOR).to_constant (); > + asm_fprintf (file, "%d", lmul); > + } > + else if (code == CONST_INT) > + { > + /* The value in the operand is the unsigned int value > + converted from (enum machine_mode). > + This RTX is generated as follows: > + > + machine_mode mode = XXXmode; > + operand = gen_int_mode ((unsigned int)mode, Pmode); > + > + So we convert it back into machine_mode and then calculate > + the LMUL according to GET_MODE_SIZE. */ > + > + machine_mode rvv_mode = (machine_mode) UINTVAL (op); > + /* For rvv mask modes, we can not calculate LMUL simpily according > + to BYTES_PER_RISCV_VECTOR. When rvv_mode = VNx4BImode. > + Set SEW = 8, LMUL = 1 by default if TARGET_MIN_VLEN == 32. > + Set SEW = 8, LMUL = 1 / 2 by default if TARGET_MIN_VLEN > 32. */ > + bool bool_p = GET_MODE_CLASS (rvv_mode) == MODE_VECTOR_BOOL; > + poly_int64 m1_size = BYTES_PER_RISCV_VECTOR; > + poly_int64 rvv_size > + = bool_p ? GET_MODE_NUNITS (rvv_mode) : GET_MODE_SIZE (rvv_mode); > + bool fractional_p = known_lt (rvv_size, BYTES_PER_RISCV_VECTOR); > + unsigned int factor > + = fractional_p ? exact_div (m1_size, rvv_size).to_constant () > + : exact_div (rvv_size, m1_size).to_constant (); > asm_fprintf (file, "%s%d", fractional_p ? "mf" : "m", factor); > } > else > @@ -4165,7 +4193,15 @@ riscv_print_operand (FILE *file, rtx op, int letter) > break; > } > case 'p': { > - if (code == CONST_INT) > + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) > + { > + /* Print for RVV mask operand. > + If op is reg, print ",v0.t". > + Otherwise, don't print anything. */ > + if (code == REG) > + fprintf (file, ",%s.t", reg_names[REGNO (op)]); > + } > + else if (code == CONST_INT) > { > /* Tail && Mask policy. */ > bool agnostic_p = UINTVAL (op) & 0x1; > @@ -6421,6 +6457,21 @@ riscv_vector_alignment (const_tree type) > return wi::umin (min_size, 128).to_uhwi (); > } > > +/* Implement REGMODE_NATURAL_SIZE. */ > + > +poly_uint64 > +riscv_regmode_natural_size (machine_mode mode) > +{ > + /* The natural size for RVV data modes is one RVV data vector, > + and similarly for predicates. We can't independently modify > + anything smaller than that. */ > + /* ??? For now, only do this for variable-width RVV registers. > + Doing it for constant-sized registers breaks lower-subreg.c. */ > + if (!riscv_vector_chunks.is_constant () && riscv_v_ext_vector_mode_p (mode)) > + return BYTES_PER_RISCV_VECTOR; > + return UNITS_PER_WORD; > +} > + > /* Initialize the GCC target structure. */ > #undef TARGET_ASM_ALIGNED_HI_OP > #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 37363e975e1..9dbc8463591 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -1080,4 +1080,6 @@ extern void riscv_remove_unneeded_save_restore_calls (void); > > #define REGISTER_TARGET_PRAGMAS() riscv_register_pragmas () > > +#define REGMODE_NATURAL_SIZE(MODE) riscv_regmode_natural_size (MODE) > + > #endif /* ! GCC_RISCV_H */ > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index b3654915fde..eb52e0e8b59 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -152,7 +152,14 @@ > (const_string "unknown")) > > ;; Main data type used by the insn > -(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF" > +(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF, > + VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI, > + VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI, > + VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI, > + VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI, > + VNx1DI,VNx2DI,VNx4DI,VNx8DI, > + VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF, > + VNx1DF,VNx2DF,VNx4DF,VNx8DF" > (const_string "unknown")) > > ;; True if the main data type is twice the size of a word. > diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv > index 8f67676cad7..7997db3d898 100644 > --- a/gcc/config/riscv/t-riscv > +++ b/gcc/config/riscv/t-riscv > @@ -63,6 +63,10 @@ riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc > $(COMPILE) $< > $(POSTCOMPILE) > > +riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc > + $(COMPILE) $< > + $(POSTCOMPILE) > + > PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def > > $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \ > diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md > new file mode 100644 > index 00000000000..1255e33a6f8 > --- /dev/null > +++ b/gcc/config/riscv/vector-iterators.md > @@ -0,0 +1,58 @@ > +;; Iterators for RISC-V 'V' Extension for GNU compiler. > +;; Copyright (C) 2022-2022 Free Software Foundation, Inc. > +;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. > + > +;; This file is part of GCC. > + > +;; GCC is free software; you can redistribute it and/or modify > +;; it under the terms of the GNU General Public License as published by > +;; the Free Software Foundation; either version 3, or (at your option) > +;; any later version. > + > +;; GCC is distributed in the hope that it will be useful, > +;; but WITHOUT ANY WARRANTY; without even the implied warranty of > +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +;; GNU General Public License for more details. > + > +;; You should have received a copy of the GNU General Public License > +;; along with GCC; see the file COPYING3. If not see > +;; <http://www.gnu.org/licenses/>. > + > +(define_mode_iterator V [ > + VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") > + VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") > + VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") > + VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32") > + (VNx1SF "TARGET_VECTOR_ELEN_FP_32") > + (VNx2SF "TARGET_VECTOR_ELEN_FP_32") > + (VNx4SF "TARGET_VECTOR_ELEN_FP_32") > + (VNx8SF "TARGET_VECTOR_ELEN_FP_32") > + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") > + (VNx1DF "TARGET_VECTOR_ELEN_FP_64") > + (VNx2DF "TARGET_VECTOR_ELEN_FP_64") > + (VNx4DF "TARGET_VECTOR_ELEN_FP_64") > + (VNx8DF "TARGET_VECTOR_ELEN_FP_64") > +]) > + > +(define_mode_iterator VB [ > + VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI > + (VNx64BI "TARGET_MIN_VLEN > 32") > +]) > + > +(define_mode_attr VM [ > + (VNx1QI "VNx1BI") (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI") (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") > + (VNx1HI "VNx1BI") (VNx2HI "VNx2BI") (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI") > + (VNx1SI "VNx1BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI") (VNx16SI "VNx16BI") > + (VNx1DI "VNx1BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI") (VNx8DI "VNx8BI") > + (VNx1SF "VNx1BI") (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI") > + (VNx1DF "VNx1BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI") > +]) > + > +(define_mode_attr sew [ > + (VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") > + (VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") > + (VNx1SI "32") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") > + (VNx1DI "64") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") > + (VNx1SF "32") (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") > + (VNx1DF "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") > +]) > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 82ce902c0fb..19bb27560f8 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -26,10 +26,71 @@ > ;; - Auto-vectorization (TBD) > ;; - Combine optimization (TBD) > > +(include "vector-iterators.md") > + > (define_c_enum "unspec" [ > UNSPEC_VSETVL > + UNSPEC_VUNDEF > + UNSPEC_VPREDICATE > ]) > > +;; ----------------------------------------------------------------- > +;; ---- Miscellaneous Operations > +;; ----------------------------------------------------------------- > + > +(define_insn "vundefined<mode>" > + [(set (match_operand:V 0 "register_operand" "=vr") > + (unspec:V [(const_int 0)] UNSPEC_VUNDEF))] > + "TARGET_VECTOR" > + "") > + > +;; ----------------------------------------------------------------- > +;; ---- Moves Operations > +;; ----------------------------------------------------------------- > + > +(define_expand "mov<mode>" > + [(set (match_operand:V 0 "reg_or_mem_operand") > + (match_operand:V 1 "vector_move_operand"))] > + "TARGET_VECTOR" > +{ > + if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode)) > + DONE; > +}) > + > +;; This pattern is used for code-gen for whole register load/stores. > +;; Also applicable for all register moves. > +;; Fractional vector modes load/store are not allowed to match this pattern. > +;; Mask modes load/store are not allowed to match this pattern. > +(define_insn "*mov<mode>" > + [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr") > + (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))] > + "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) > + && register_operand (operands[1], <MODE>mode)) > + || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))" > + "@ > + vl%m1re<sew>.v\t%0,%1 > + vs%m1r.v\t%1,%0 > + vmv%m1r.v\t%0,%1" > + [(set_attr "type" "vldr,vstr,vmov") > + (set_attr "mode" "<MODE>")]) > + > +(define_expand "mov<mode>" > + [(set (match_operand:VB 0 "reg_or_mem_operand") > + (match_operand:VB 1 "vector_move_operand"))] > + "TARGET_VECTOR" > +{ > + if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode)) > + DONE; > +}) > + > +(define_insn "*mov<mode>" > + [(set (match_operand:VB 0 "register_operand" "=vr") > + (match_operand:VB 1 "register_operand" "vr"))] > + "TARGET_VECTOR" > + "vmv1r.v\t%0,%1" > + [(set_attr "type" "vmov") > + (set_attr "mode" "<MODE>")]) > + > ;; ----------------------------------------------------------------- > ;; ---- 6. Configuration-Setting Instructions > ;; ----------------------------------------------------------------- > @@ -50,13 +111,98 @@ > ;; operands[3]: LMUL > ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) > ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) > + > +;; We define 2 types of "vsetvl*" instruction patterns: > + > +;; - "@vsetvl<mode>" is a parallel format which has side effects. > + > +;; - "@vsetvl<mode>_no_side_effects" has no side effects. > + > +;; - "@vsetvl<mode>" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS. > + > +;; - "@vsetvl<mode>_no_side_effects" is used by GCC standard patterns. > + > +;; - "@vsetvl<mode>" includes VL/VTYPE global registers status (define set) > +;; and each RVV instruction includes VL/VTYPE global registers status (use) > +;; so that we can guarantee each RVV instruction can execute with correct > +;; VL/VTYPE global registers status after "insert-vsetvl" PASS. > + > +;; - "@vsetvl<mode>_no_side_effects" has no side effects and excludes VL/VTYPE > +;; global registers status (define set). It's only used by GCC standard pattern > +;; expansion. For example: "mov<mode>" pattern for fractional vector modes which > +;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits > +;; from the optimization of each GCC internal PASS. > + > +;; 1. void foo (float *in, float *out) > +;; { > +;; vfloat32mf2_t v = *(vfloat32mf2_t*)in; > +;; *(vfloat32mf2_t*)out = v; > +;; } > +;; We could eliminate the second "vsetvl" by calling "@vsetvl<mode>_no_side_effects". > +;; > +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": > +;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma > +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) > +;; vsetvli a4,zero,e32,mf2,ta,ma ;; -- > +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) > +;; ret ;; ret > + > +;; 2. void foo (int8_t *in, int8_t *out, int M) > +;; { > +;; for (int i = 0; i < M; i++){ > +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); > +;; *(vint8mf2_t*)(out + i) = v; > +;; } > +;; } > +;; > +;; Hoist "vsetvl" instruction in LICM: > +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": > +;; - ;; vsetvli a4,zero,e32,mf2,ta,ma > +;; LOOP: ;; LOOP: > +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - > +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) > +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - > +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) > + > +;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl<mode>". > +;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M) > +;; { > +;; for (int i = 0; i < M; i++){ > +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); > +;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i); > +;; *(vint8mf2_t*)(out + i) = v; > +;; *(vint32mf2_t*)(out + i + i) = v2; > +;; } > +;; } > +;; > +;; vsetvli a6,zero,e8,mf2,ta,ma > +;; vsetvli a2,zero,e32,mf2,ta,ma > +;; LOOP: > +;; vle8.v v25,(a0) > +;; vle32.v v24,(a5) > +;; addi a0,a0,1 > +;; vse8.v v25,(a1) > +;; vse32.v v24,(a3) > +;; > +;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status. > +;; We leave it to "insert-vsetvl" PASS to correct this situation. > + > +;; The "insert-vsetvl" PASS mechanism: > +;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated > +;; by GCC standard pattern expansion has the corresponding "vsetvl". > +;; We exploit each GCC internal optimization pass to optimize the "vsetvl". > +;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions. > +;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary. > +;; For example: RVV intrinsics. > +;; 3. Optimize "vsetvl" instructions. > + > (define_insn "@vsetvl<mode>" > - [(set (match_operand:P 0 "register_operand" "=r,r") > - (unspec:P [(match_operand:P 1 "csr_operand" "r,K") > - (match_operand 2 "const_int_operand" "i,i") > - (match_operand 3 "const_int_operand" "i,i") > - (match_operand 4 "const_int_operand" "i,i") > - (match_operand 5 "const_int_operand" "i,i")] UNSPEC_VSETVL)) > + [(set (match_operand:P 0 "register_operand" "=r") > + (unspec:P [(match_operand:P 1 "csr_operand" "rK") > + (match_operand 2 "const_int_operand" "i") > + (match_operand 3 "const_int_operand" "i") > + (match_operand 4 "const_int_operand" "i") > + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL)) > (set (reg:SI VL_REGNUM) > (unspec:SI [(match_dup 1) > (match_dup 2) > @@ -70,3 +216,124 @@ > "vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5" > [(set_attr "type" "vsetvl") > (set_attr "mode" "<MODE>")]) > + > +;; We keep it as no side effects before reload_completed. > +;; In this case, we can gain benefits from different GCC > +;; internal PASS such as cprop, fwprop, combine,...etc. > + > +;; Then recover it for "insert-vsetvl" and "sched2" PASS > +;; in order to get correct codegen. > +(define_insn_and_split "@vsetvl<mode>_no_side_effects" > + [(set (match_operand:P 0 "register_operand" "=r") > + (unspec:P [(match_operand:P 1 "csr_operand" "rK") > + (match_operand 2 "const_int_operand" "i") > + (match_operand 3 "const_int_operand" "i") > + (match_operand 4 "const_int_operand" "i") > + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))] > + "TARGET_VECTOR" > + "#" > + "&& reload_completed" > + [(parallel > + [(set (match_dup 0) > + (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3) > + (match_dup 4) (match_dup 5)] UNSPEC_VSETVL)) > + (set (reg:SI VL_REGNUM) > + (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) > + (set (reg:SI VTYPE_REGNUM) > + (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) > + (match_dup 5)] UNSPEC_VSETVL))])] > + "" > + [(set_attr "type" "vsetvl") > + (set_attr "mode" "<MODE>")]) > + > +;; RVV machine description matching format > +;; (define_insn "" > +;; [(set (match_operand:MODE 0) > +;; (if_then_else:MODE > +;; (unspec:<MODE:VM> > +;; [(match_operand:<VM> 1 "vector_mask_operand") > +;; (match_operand N + 4 "vector_length_operand") > +;; (match_operand N + 5 "const_int_operand") > +;; (match_operand N + 6 "const_int_operand") > +;; (reg:SI VL_REGNUM) > +;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > +;; (instruction operation:MODE > +;; (match_operand 3 > +;; (match_operand 4 > +;; (match_operand 5 > +;; ................ > +;; (match_operand N + 3) > +;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))] > +;; > +;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper. > +;; Include mask predicate && length predicate && vector policy. > + > +;; ------------------------------------------------------------------------------- > +;; ---- Predicated Mov > +;; ------------------------------------------------------------------------------- > +;; Includes: > +;; - 7.4. Vector Unit-Stride Instructions > +;; - 11.16 Vector Integer Move Instructions > +;; - 13.16 Vector Floating-Point Move Instruction > +;; - 15.1 Vector Mask-Register Logical Instructions > +;; ------------------------------------------------------------------------------- > + > +;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. > +;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. > +;; For vse.v, we don't need merge operand, so it should always match "vu". > +;; constraint alternative 0 ~ 1 match vle.v. > +;; constraint alternative 2 match vse.v. > +;; constraint alternative 3 match vmv.v.v. > +;; constraint alternative 4 match vmv.v.i. > +;; For vmv.v.i, we allow 2 following cases: > +;; 1. (const_vector:VNx1QI repeat [ > +;; (const_int:QI N)]), -15 <= N < 16. > +;; 2. (const_vector:VNx1SF repeat [ > +;; (const_double:SF 0.0 [0x0.0p+0])]). > +(define_insn "@pred_mov<mode>" > + [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") > + (if_then_else:V > + (unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1") > + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") > + (match_operand 5 "const_int_operand" " i, i, i, i, i") > + (match_operand 6 "const_int_operand" " i, i, i, i, i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") > + (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))] > + "TARGET_VECTOR" > + "@ > + vle<sew>.v\t%0,%3%p1 > + vle<sew>.v\t%0,%3%p1 > + vse<sew>.v\t%3,%0%p1 > + vmv.v.v\t%0,%3 > + vmv.v.i\t%0,v%3" > + [(set_attr "type" "vlde,vlde,vste,vimov,vimov") > + (set_attr "mode" "<MODE>")]) > + > +;; vlm.v/vsm.v/vmclr.m/vmset.m. > +;; constraint alternative 0 match vlm.v. > +;; constraint alternative 2 match vsm.v. > +;; constraint alternative 3 match vmclr.m. > +;; constraint alternative 4 match vmset.m. > +(define_insn "@pred_mov<mode>" > + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr") > + (if_then_else:VB > + (unspec:VB > + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1") > + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") > + (match_operand 5 "const_int_operand" " i, i, i, i") > + (match_operand 6 "const_int_operand" " i, i, i, i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1") > + (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))] > + "TARGET_VECTOR" > + "@ > + vlm.v\t%0,%3 > + vsm.v\t%3,%0 > + vmclr.m\t%0 > + vmset.m\t%0" > + [(set_attr "type" "vldm,vstm,vmalu,vmalu") > + (set_attr "mode" "<MODE>")]) > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c > new file mode 100644 > index 00000000000..6a235e308f9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c > @@ -0,0 +1,179 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov1: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov1 (int8_t *in, int8_t *out) > +{ > + vint8mf8_t v = *(vint8mf8_t*)in; > + *(vint8mf8_t*)out = v; > +} > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov2 (int8_t *in, int8_t *out) > +{ > + vint8mf4_t v = *(vint8mf4_t*)in; > + *(vint8mf4_t*)out = v; > +} > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov3 (int8_t *in, int8_t *out) > +{ > + vint8mf2_t v = *(vint8mf2_t*)in; > + *(vint8mf2_t*)out = v; > +} > + > +/* > +** mov4: > +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (int8_t *in, int8_t *out) > +{ > + vint8m1_t v = *(vint8m1_t*)in; > + *(vint8m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (int8_t *in, int8_t *out) > +{ > + vint8m2_t v = *(vint8m2_t*)in; > + *(vint8m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (int8_t *in, int8_t *out) > +{ > + vint8m4_t v = *(vint8m4_t*)in; > + *(vint8m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (int8_t *in, int8_t *out) > +{ > + vint8m8_t v = *(vint8m8_t*)in; > + *(vint8m8_t*)out = v; > +} > + > +/* > +** mov8: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov8 (uint8_t *in, uint8_t *out) > +{ > + vuint8mf8_t v = *(vuint8mf8_t*)in; > + *(vuint8mf8_t*)out = v; > +} > + > +/* > +** mov9: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov9 (uint8_t *in, uint8_t *out) > +{ > + vuint8mf4_t v = *(vuint8mf4_t*)in; > + *(vuint8mf4_t*)out = v; > +} > + > +/* > +** mov10: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov10 (uint8_t *in, uint8_t *out) > +{ > + vuint8mf2_t v = *(vuint8mf2_t*)in; > + *(vuint8mf2_t*)out = v; > +} > + > +/* > +** mov11: > +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov11 (uint8_t *in, uint8_t *out) > +{ > + vuint8m1_t v = *(vuint8m1_t*)in; > + *(vuint8m1_t*)out = v; > +} > + > +/* > +** mov12: > +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov12 (uint8_t *in, uint8_t *out) > +{ > + vuint8m2_t v = *(vuint8m2_t*)in; > + *(vuint8m2_t*)out = v; > +} > + > +/* > +** mov13: > +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov13 (uint8_t *in, uint8_t *out) > +{ > + vuint8m4_t v = *(vuint8m4_t*)in; > + *(vuint8m4_t*)out = v; > +} > + > +/* > +** mov14: > +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov14 (uint8_t *in, uint8_t *out) > +{ > + vuint8m8_t v = *(vuint8m8_t*)in; > + *(vuint8m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c > new file mode 100644 > index 00000000000..10aa8297c30 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c > @@ -0,0 +1,385 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov1: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov1 (int8_t *in, int8_t *out) > +{ > + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint8mf8_t v2 asm("v2") = v1; > + *(vint8mf8_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov2 (int8_t *in, int8_t *out) > +{ > + register vint8mf4_t v1 asm("v1") = *(vint8mf4_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint8mf4_t v2 asm("v2") = v1; > + *(vint8mf4_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov3 (int8_t *in, int8_t *out) > +{ > + register vint8mf2_t v1 asm("v1") = *(vint8mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint8mf2_t v2 asm("v2") = v1; > + *(vint8mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov4: > +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov4 (int8_t *in, int8_t *out) > +{ > + register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint8m1_t v2 asm("v2") = v1; > + *(vint8m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov5: > +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov5 (int8_t *in, int8_t *out) > +{ > + register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vint8m2_t v4 asm("v4") = v2; > + *(vint8m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov6: > +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov6 (int8_t *in, int8_t *out) > +{ > + register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vint8m4_t v8 asm("v8") = v4; > + *(vint8m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov7: > +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov7 (int8_t *in, int8_t *out) > +{ > + register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vint8m8_t v16 asm("v16") = v8; > + *(vint8m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov8: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov8 (int16_t *in, int16_t *out) > +{ > + register vint16mf4_t v1 asm("v1") = *(vint16mf4_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint16mf4_t v2 asm("v2") = v1; > + *(vint16mf4_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov9: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov9 (int16_t *in, int16_t *out) > +{ > + register vint16mf2_t v1 asm("v1") = *(vint16mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint16mf2_t v2 asm("v2") = v1; > + *(vint16mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov10: > +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov10 (int16_t *in, int16_t *out) > +{ > + register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint16m1_t v2 asm("v2") = v1; > + *(vint16m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov11: > +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov11 (int16_t *in, int16_t *out) > +{ > + register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vint16m2_t v4 asm("v4") = v2; > + *(vint16m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov12: > +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov12 (int16_t *in, int16_t *out) > +{ > + register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vint16m4_t v8 asm("v8") = v4; > + *(vint16m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov13: > +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov13 (int32_t *in, int32_t *out) > +{ > + register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vint16m8_t v16 asm("v16") = v8; > + *(vint16m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov14: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov14 (int32_t *in, int32_t *out) > +{ > + register vint32mf2_t v1 asm("v1") = *(vint32mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint32mf2_t v2 asm("v2") = v1; > + *(vint32mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov15: > +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov15 (int32_t *in, int32_t *out) > +{ > + register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint32m1_t v2 asm("v2") = v1; > + *(vint32m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov16: > +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov16 (int32_t *in, int32_t *out) > +{ > + register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vint32m2_t v4 asm("v4") = v2; > + *(vint32m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov17: > +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov17 (int32_t *in, int32_t *out) > +{ > + register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vint32m4_t v8 asm("v8") = v4; > + *(vint32m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov18: > +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov18 (int32_t *in, int32_t *out) > +{ > + register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vint32m8_t v16 asm("v16") = v8; > + *(vint32m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov19: > +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov19 (int64_t *in, int64_t *out) > +{ > + register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint64m1_t v2 asm("v2") = v1; > + *(vint64m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov20: > +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov20 (int64_t *in, int64_t *out) > +{ > + register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vint64m2_t v4 asm("v4") = v2; > + *(vint64m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov21: > +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov21 (int64_t *in, int64_t *out) > +{ > + register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vint64m4_t v8 asm("v8") = v4; > + *(vint64m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov22: > +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov22 (int64_t *in, int64_t *out) > +{ > + register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vint64m8_t v16 asm("v16") = v8; > + *(vint64m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c > new file mode 100644 > index 00000000000..f8da5bb6b93 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c > @@ -0,0 +1,385 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov1: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov1 (uint8_t *in, uint8_t *out) > +{ > + register vuint8mf8_t v1 asm("v1") = *(vuint8mf8_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint8mf8_t v2 asm("v2") = v1; > + *(vuint8mf8_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov2 (uint8_t *in, uint8_t *out) > +{ > + register vuint8mf4_t v1 asm("v1") = *(vuint8mf4_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint8mf4_t v2 asm("v2") = v1; > + *(vuint8mf4_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov3 (uint8_t *in, uint8_t *out) > +{ > + register vuint8mf2_t v1 asm("v1") = *(vuint8mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint8mf2_t v2 asm("v2") = v1; > + *(vuint8mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov4: > +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov4 (uint8_t *in, uint8_t *out) > +{ > + register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint8m1_t v2 asm("v2") = v1; > + *(vuint8m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov5: > +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov5 (uint8_t *in, uint8_t *out) > +{ > + register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vuint8m2_t v4 asm("v4") = v2; > + *(vuint8m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov6: > +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov6 (uint8_t *in, uint8_t *out) > +{ > + register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vuint8m4_t v8 asm("v8") = v4; > + *(vuint8m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov7: > +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov7 (uint8_t *in, uint8_t *out) > +{ > + register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vuint8m8_t v16 asm("v16") = v8; > + *(vuint8m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov8: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov8 (uint16_t *in, uint16_t *out) > +{ > + register vuint16mf4_t v1 asm("v1") = *(vuint16mf4_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint16mf4_t v2 asm("v2") = v1; > + *(vuint16mf4_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov9: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov9 (uint16_t *in, uint16_t *out) > +{ > + register vuint16mf2_t v1 asm("v1") = *(vuint16mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint16mf2_t v2 asm("v2") = v1; > + *(vuint16mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov10: > +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov10 (uint16_t *in, uint16_t *out) > +{ > + register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint16m1_t v2 asm("v2") = v1; > + *(vuint16m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov11: > +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov11 (uint16_t *in, uint16_t *out) > +{ > + register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vuint16m2_t v4 asm("v4") = v2; > + *(vuint16m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov12: > +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov12 (uint16_t *in, uint16_t *out) > +{ > + register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vuint16m4_t v8 asm("v8") = v4; > + *(vuint16m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov13: > +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov13 (uint32_t *in, uint32_t *out) > +{ > + register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vuint16m8_t v16 asm("v16") = v8; > + *(vuint16m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov14: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov14 (uint32_t *in, uint32_t *out) > +{ > + register vuint32mf2_t v1 asm("v1") = *(vuint32mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint32mf2_t v2 asm("v2") = v1; > + *(vuint32mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov15: > +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov15 (uint32_t *in, uint32_t *out) > +{ > + register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint32m1_t v2 asm("v2") = v1; > + *(vuint32m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov16: > +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov16 (uint32_t *in, uint32_t *out) > +{ > + register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vuint32m2_t v4 asm("v4") = v2; > + *(vuint32m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov17: > +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov17 (uint32_t *in, uint32_t *out) > +{ > + register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vuint32m4_t v8 asm("v8") = v4; > + *(vuint32m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov18: > +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov18 (uint32_t *in, uint32_t *out) > +{ > + register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vuint32m8_t v16 asm("v16") = v8; > + *(vuint32m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov19: > +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov19 (uint64_t *in, uint64_t *out) > +{ > + register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vuint64m1_t v2 asm("v2") = v1; > + *(vuint64m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov20: > +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov20 (uint64_t *in, uint64_t *out) > +{ > + register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vuint64m2_t v4 asm("v4") = v2; > + *(vuint64m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov21: > +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov21 (uint64_t *in, uint64_t *out) > +{ > + register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vuint64m4_t v8 asm("v8") = v4; > + *(vuint64m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov22: > +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov22 (uint64_t *in, uint64_t *out) > +{ > + register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vuint64m8_t v16 asm("v16") = v8; > + *(vuint64m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c > new file mode 100644 > index 00000000000..5b8ce40b62d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c > @@ -0,0 +1,159 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov14: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov14 (float *in, float *out) > +{ > + register vfloat32mf2_t v1 asm("v1") = *(vfloat32mf2_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vfloat32mf2_t v2 asm("v2") = v1; > + *(vfloat32mf2_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov15: > +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov15 (float *in, float *out) > +{ > + register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vfloat32m1_t v2 asm("v2") = v1; > + *(vfloat32m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov16: > +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov16 (float *in, float *out) > +{ > + register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vfloat32m2_t v4 asm("v4") = v2; > + *(vfloat32m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov17: > +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov17 (float *in, float *out) > +{ > + register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vfloat32m4_t v8 asm("v8") = v4; > + *(vfloat32m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov18: > +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov18 (float *in, float *out) > +{ > + register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vfloat32m8_t v16 asm("v16") = v8; > + *(vfloat32m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > + > +/* > +** mov19: > +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv1r\.v\tv2,v1 > +** ... > +** ret > +*/ > +void mov19 (uint64_t *in, uint64_t *out) > +{ > + register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vfloat64m1_t v2 asm("v2") = v1; > + *(vfloat64m1_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > + > +/* > +** mov20: > +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv2r\.v\tv4,v2 > +** ... > +** ret > +*/ > +void mov20 (uint64_t *in, uint64_t *out) > +{ > + register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in; > + asm volatile ("# %0"::"vr"(v2)); > + register vfloat64m2_t v4 asm("v4") = v2; > + *(vfloat64m2_t*)out = v4; > + asm volatile ("# %0"::"vr"(v4)); > +} > + > +/* > +** mov21: > +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv4r\.v\tv8,v4 > +** ... > +** ret > +*/ > +void mov21 (uint64_t *in, uint64_t *out) > +{ > + register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in; > + asm volatile ("# %0"::"vr"(v4)); > + register vfloat64m4_t v8 asm("v8") = v4; > + *(vfloat64m4_t*)out = v8; > + asm volatile ("# %0"::"vr"(v8)); > +} > + > +/* > +** mov22: > +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ... > +** vmv8r\.v\tv16,v8 > +** ... > +** ret > +*/ > +void mov22 (uint64_t *in, uint64_t *out) > +{ > + register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in; > + asm volatile ("# %0"::"vr"(v8)); > + register vfloat64m8_t v16 asm("v16") = v8; > + *(vfloat64m8_t*)out = v16; > + asm volatile ("# %0"::"vr"(v16)); > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c > new file mode 100644 > index 00000000000..8c630f3bedb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > + > +#include <riscv_vector.h> > + > +void mov1 (int8_t *in, int8_t *out) > +{ > + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; > + asm volatile ("# %0"::"vr"(v1)); > + register vint8mf8_t v2 asm("v2") = v1; > + asm volatile ("#":::"v2"); > + *(vint8mf8_t*)out = v2; > + asm volatile ("# %0"::"vr"(v2)); > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c > new file mode 100644 > index 00000000000..b9bdd515747 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c > @@ -0,0 +1,153 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov2 (int16_t *in, int16_t *out) > +{ > + vint16mf4_t v = *(vint16mf4_t*)in; > + *(vint16mf4_t*)out = v; > +} > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov3 (int16_t *in, int16_t *out) > +{ > + vint16mf2_t v = *(vint16mf2_t*)in; > + *(vint16mf2_t*)out = v; > +} > + > +/* > +** mov4: > +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (int16_t *in, int16_t *out) > +{ > + vint16m1_t v = *(vint16m1_t*)in; > + *(vint16m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (int16_t *in, int16_t *out) > +{ > + vint16m2_t v = *(vint16m2_t*)in; > + *(vint16m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (int16_t *in, int16_t *out) > +{ > + vint16m4_t v = *(vint16m4_t*)in; > + *(vint16m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (int16_t *in, int16_t *out) > +{ > + vint16m8_t v = *(vint16m8_t*)in; > + *(vint16m8_t*)out = v; > +} > + > +/* > +** mov8: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov8 (uint16_t *in, uint16_t *out) > +{ > + vuint16mf4_t v = *(vuint16mf4_t*)in; > + *(vuint16mf4_t*)out = v; > +} > + > +/* > +** mov9: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov9 (uint16_t *in, uint16_t *out) > +{ > + vuint16mf2_t v = *(vuint16mf2_t*)in; > + *(vuint16mf2_t*)out = v; > +} > + > +/* > +** mov10: > +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov10 (uint16_t *in, uint16_t *out) > +{ > + vuint16m1_t v = *(vuint16m1_t*)in; > + *(vuint16m1_t*)out = v; > +} > + > +/* > +** mov11: > +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov11 (uint16_t *in, uint16_t *out) > +{ > + vuint16m2_t v = *(vuint16m2_t*)in; > + *(vuint16m2_t*)out = v; > +} > + > +/* > +** mov12: > +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov12 (uint16_t *in, uint16_t *out) > +{ > + vuint16m4_t v = *(vuint16m4_t*)in; > + *(vuint16m4_t*)out = v; > +} > + > +/* > +** mov13: > +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov13 (uint16_t *in, uint16_t *out) > +{ > + vuint16m8_t v = *(vuint16m8_t*)in; > + *(vuint16m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c > new file mode 100644 > index 00000000000..a7a89db2735 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c > @@ -0,0 +1,127 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov3 (int32_t *in, int32_t *out) > +{ > + vint32mf2_t v = *(vint32mf2_t*)in; > + *(vint32mf2_t*)out = v; > +} > + > +/* > +** mov4: > +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (int32_t *in, int32_t *out) > +{ > + vint32m1_t v = *(vint32m1_t*)in; > + *(vint32m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (int32_t *in, int32_t *out) > +{ > + vint32m2_t v = *(vint32m2_t*)in; > + *(vint32m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (int32_t *in, int32_t *out) > +{ > + vint32m4_t v = *(vint32m4_t*)in; > + *(vint32m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (int32_t *in, int32_t *out) > +{ > + vint32m8_t v = *(vint32m8_t*)in; > + *(vint32m8_t*)out = v; > +} > + > +/* > +** mov8: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov8 (uint32_t *in, uint32_t *out) > +{ > + vuint32mf2_t v = *(vuint32mf2_t*)in; > + *(vuint32mf2_t*)out = v; > +} > + > +/* > +** mov9: > +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov9 (uint32_t *in, uint32_t *out) > +{ > + vuint32m1_t v = *(vuint32m1_t*)in; > + *(vuint32m1_t*)out = v; > +} > + > +/* > +** mov10: > +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov10 (uint32_t *in, uint32_t *out) > +{ > + vuint32m2_t v = *(vuint32m2_t*)in; > + *(vuint32m2_t*)out = v; > +} > + > +/* > +** mov11: > +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov11 (uint32_t *in, uint32_t *out) > +{ > + vuint32m4_t v = *(vuint32m4_t*)in; > + *(vuint32m4_t*)out = v; > +} > + > +/* > +** mov12: > +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov12 (uint32_t *in, uint32_t *out) > +{ > + vuint32m8_t v = *(vuint32m8_t*)in; > + *(vuint32m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c > new file mode 100644 > index 00000000000..e8cfb4b10b4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov4: > +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (int64_t *in, int64_t *out) > +{ > + vint64m1_t v = *(vint64m1_t*)in; > + *(vint64m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (int64_t *in, int64_t *out) > +{ > + vint64m2_t v = *(vint64m2_t*)in; > + *(vint64m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (int64_t *in, int64_t *out) > +{ > + vint64m4_t v = *(vint64m4_t*)in; > + *(vint64m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (int64_t *in, int64_t *out) > +{ > + vint64m8_t v = *(vint64m8_t*)in; > + *(vint64m8_t*)out = v; > +} > + > +/* > +** mov8: > +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov8 (uint64_t *in, uint64_t *out) > +{ > + vuint64m1_t v = *(vuint64m1_t*)in; > + *(vuint64m1_t*)out = v; > +} > + > +/* > +** mov9: > +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov9 (uint64_t *in, uint64_t *out) > +{ > + vuint64m2_t v = *(vuint64m2_t*)in; > + *(vuint64m2_t*)out = v; > +} > + > +/* > +** mov10: > +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov10 (uint64_t *in, uint64_t *out) > +{ > + vuint64m4_t v = *(vuint64m4_t*)in; > + *(vuint64m4_t*)out = v; > +} > + > +/* > +** mov11: > +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov11 (uint64_t *in, uint64_t *out) > +{ > + vuint64m8_t v = *(vuint64m8_t*)in; > + *(vuint64m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c > new file mode 100644 > index 00000000000..5ca232ba867 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov3 (float *in, float *out) > +{ > + vfloat32mf2_t v = *(vfloat32mf2_t*)in; > + *(vfloat32mf2_t*)out = v; > +} > + > +/* > +** mov4: > +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (float *in, float *out) > +{ > + vfloat32m1_t v = *(vfloat32m1_t*)in; > + *(vfloat32m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (float *in, float *out) > +{ > + vfloat32m2_t v = *(vfloat32m2_t*)in; > + *(vfloat32m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (float *in, float *out) > +{ > + vfloat32m4_t v = *(vfloat32m4_t*)in; > + *(vfloat32m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (float *in, float *out) > +{ > + vfloat32m8_t v = *(vfloat32m8_t*)in; > + *(vfloat32m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c > new file mode 100644 > index 00000000000..41fc73bb099 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c > @@ -0,0 +1,53 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov4: > +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (double *in, double *out) > +{ > + vfloat64m1_t v = *(vfloat64m1_t*)in; > + *(vfloat64m1_t*)out = v; > +} > + > +/* > +** mov5: > +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (double *in, double *out) > +{ > + vfloat64m2_t v = *(vfloat64m2_t*)in; > + *(vfloat64m2_t*)out = v; > +} > + > +/* > +** mov6: > +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (double *in, double *out) > +{ > + vfloat64m4_t v = *(vfloat64m4_t*)in; > + *(vfloat64m4_t*)out = v; > +} > + > +/* > +** mov7: > +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (double *in, double *out) > +{ > + vfloat64m8_t v = *(vfloat64m8_t*)in; > + *(vfloat64m8_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c > new file mode 100644 > index 00000000000..d4636e0adfb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > + > +#include <riscv_vector.h> > + > +/* This testcase is testing whether RISC-V define REGMODE_NATURAL_SIZE. */ > +void foo (int8_t *in, int8_t *out) > +{ > + vint8mf2_t v = *(vint8mf2_t*)in; > + vint32mf2_t v2 = *(vint32mf2_t*)in; > + *(vint8mf2_t*)out = v; > + *(vint32mf2_t*)(out + 16) = v2; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c > new file mode 100644 > index 00000000000..9447b05899d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c > @@ -0,0 +1,96 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* > +** mov1: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov1 (int8_t *in, int8_t *out) > +{ > + vbool64_t v = *(vbool64_t*)in; > + *(vbool64_t*)out = v; > +} > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov2 (int8_t *in, int8_t *out) > +{ > + vbool32_t v = *(vbool32_t*)in; > + *(vbool32_t*)out = v; > +} > + > +/* > +** mov3: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov3 (int8_t *in, int8_t *out) > +{ > + vbool16_t v = *(vbool16_t*)in; > + *(vbool16_t*)out = v; > +} > + > +/* > +** mov4: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov4 (int8_t *in, int8_t *out) > +{ > + vbool8_t v = *(vbool8_t*)in; > + *(vbool8_t*)out = v; > +} > + > +/* > +** mov5: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov5 (int8_t *in, int8_t *out) > +{ > + vbool4_t v = *(vbool4_t*)in; > + *(vbool4_t*)out = v; > +} > + > +/* > +** mov6: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m4,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov6 (int8_t *in, int8_t *out) > +{ > + vbool2_t v = *(vbool2_t*)in; > + *(vbool2_t*)out = v; > +} > + > +/* > +** mov7: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au] > +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov7 (int8_t *in, int8_t *out) > +{ > + vbool1_t v = *(vbool1_t*)in; > + *(vbool1_t*)out = v; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c > new file mode 100644 > index 00000000000..6d39e3c0f4d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c > @@ -0,0 +1,44 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <riscv_vector.h> > + > +/* Test tieable of RVV types with same LMUL. */ > +/* > +** mov1: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2 > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov1 (int8_t *in, int8_t *out, int M) > +{ > + vint8mf2_t v1 = *(vint8mf2_t*)(in); > + vint16mf2_t v2 = *(vint16mf2_t*)(in); > + vint32mf2_t v3 = *(vint32mf2_t*)(in); > + *(vint8mf2_t*)(out) = v1; > + *(vint16mf2_t*)(out + 1) = v2; > + *(vint32mf2_t*)(out + 2) = v3; > +} > + > +/* > +** mov2: > +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) > +** ret > +*/ > +void mov2 (int8_t *in, int8_t *out, int M) > +{ > + vint8mf4_t v1 = *(vint8mf4_t*)(in); > + vint16mf4_t v2 = *(vint16mf4_t*)(in); > + *(vint8mf4_t*)(out) = v1; > + *(vint16mf4_t*)(out + 1) = v2; > +} > -- > 2.36.1 >
On Okt 24 2022, juzhe.zhong@rivai.ai wrote: > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c They all fail if the ilp32d ABI is not available.
Hi, since these RVV testcases doesn't necessary need abi configuration. I fix these testcase in this patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604642.html Plz, verify it and merge it. Thanks. juzhe.zhong@rivai.ai From: Andreas Schwab Date: 2022-10-30 19:00 To: juzhe.zhong CC: gcc-patches; kito.cheng Subject: Re: [PATCH] RISC-V: Support load/store in mov<mode> pattern for RVV modes. On Okt 24 2022, juzhe.zhong@rivai.ai wrote: > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c They all fail if the ilp32d ABI is not available.
diff --git a/gcc/config.gcc b/gcc/config.gcc index 3826ae42803..0232e572a99 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -515,7 +515,7 @@ pru-*-*) ;; riscv*) cpu_type=riscv - extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o" + extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o" extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o" d_target_objs="riscv-d.o" extra_headers="riscv_vector.h" diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 8997284f32e..951dcc52d6b 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -128,3 +128,25 @@ "POLY_INT" (and (match_code "const_poly_int") (match_test "known_eq (rtx_to_poly_int64 (op), BYTES_PER_RISCV_VECTOR)"))) + +(define_constraint "vu" + "A undefined vector value." + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_VUNDEF"))) + +(define_constraint "vi" + "A vector 5-bit signed immediate." + (and (match_code "const_vector") + (match_test "riscv_vector::const_vec_all_same_in_range_p (op, -16, 15)"))) + +(define_constraint "Wc0" + "@internal + A constraint that matches a vector of immediate all zeros." + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + +(define_constraint "Wc1" + "@internal + A constraint that matches a vector of immediate all ones." + (and (match_code "const_vector") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 5e149b3a95f..e2bfafe8150 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -262,3 +262,26 @@ return true; }) + +;; Predicates for the V extension. +(define_special_predicate "vector_length_operand" + (ior (match_operand 0 "pmode_register_operand") + (match_operand 0 "const_csr_operand"))) + +(define_predicate "reg_or_mem_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "memory_operand"))) + +(define_predicate "vector_move_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_code "const_vector"))) + +(define_predicate "vector_mask_operand" + (ior (match_operand 0 "register_operand") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) + +(define_predicate "vector_merge_operand" + (ior (match_operand 0 "memory_operand") + (ior (match_operand 0 "register_operand") + (match_test "GET_CODE (op) == UNSPEC + && (XINT (op, 1) == UNSPEC_VUNDEF)")))) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f8c9932ff89..386c0027ff4 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -76,6 +76,7 @@ extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); extern bool riscv_v_ext_enabled_vector_mode_p (machine_mode); +extern poly_uint64 riscv_regmode_natural_size (machine_mode); /* Routines implemented in riscv-c.cc. */ void riscv_cpu_cpp_builtins (cpp_reader *); @@ -126,6 +127,19 @@ extern bool verify_type_context (location_t, type_context_kind, const_tree, bool extern void handle_pragma_vector (void); extern tree builtin_decl (unsigned, bool); extern rtx expand_builtin (unsigned int, tree, rtx); +extern bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern bool legitimize_move (rtx, rtx, machine_mode); +enum tail_policy +{ + TAIL_UNDISTURBED = 0, + TAIL_AGNOSTIC = 1, +}; + +enum mask_policy +{ + MASK_UNDISTURBED = 0, + MASK_AGNOSTIC = 1, +}; } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc new file mode 100644 index 00000000000..6615a5c7ffe --- /dev/null +++ b/gcc/config/riscv/riscv-v.cc @@ -0,0 +1,180 @@ +/* Subroutines used for code generation for RISC-V 'V' Extension for GNU + compiler. Copyright (C) 2022-2022 Free Software Foundation, Inc. Contributed + by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "backend.h" +#include "rtl.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "recog.h" +#include "alias.h" +#include "tree.h" +#include "stringpool.h" +#include "attribs.h" +#include "explow.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "tm_p.h" +#include "target.h" +#include "expr.h" +#include "optabs.h" + +using namespace riscv_vector; + +namespace riscv_vector { + +template <int MAX_OPERANDS> class insn_expander +{ +public: + insn_expander () : m_opno (0) {} + void add_output_operand (rtx x, machine_mode mode) + { + create_output_operand (&m_ops[m_opno++], x, mode); + gcc_assert (m_opno <= MAX_OPERANDS); + } + void add_input_operand (rtx x, machine_mode mode) + { + create_input_operand (&m_ops[m_opno++], x, mode); + gcc_assert (m_opno <= MAX_OPERANDS); + } + void add_all_one_mask_operand (machine_mode mode) + { + add_input_operand (CONSTM1_RTX (mode), mode); + } + void add_vundef_operand (machine_mode mode) + { + add_input_operand (gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), + UNSPEC_VUNDEF), + mode); + } + void add_policy_operand (enum tail_policy vta, enum mask_policy vma) + { + rtx tail_policy_rtx = vta == TAIL_UNDISTURBED ? const0_rtx : const1_rtx; + rtx mask_policy_rtx = vma == MASK_UNDISTURBED ? const0_rtx : const1_rtx; + add_input_operand (tail_policy_rtx, Pmode); + add_input_operand (mask_policy_rtx, Pmode); + } + + void expand (enum insn_code icode, bool temporary_volatile_p = false) + { + if (temporary_volatile_p) + { + temporary_volatile_ok v (true); + expand_insn (icode, m_opno, m_ops); + } + else + expand_insn (icode, m_opno, m_ops); + } + +private: + int m_opno; + expand_operand m_ops[MAX_OPERANDS]; +}; + +/* Return true if X is a const_vector with all duplicate elements, which is in + the range between MINVAL and MAXVAL. */ +bool +const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) +{ + rtx elt; + return (const_vec_duplicate_p (x, &elt) && CONST_INT_P (elt) + && IN_RANGE (INTVAL (elt), minval, maxval)); +} + +/* Emit an RVV unmask && vl mov from SRC to DEST. */ +static void +emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode) +{ + insn_expander<7> e; + + machine_mode mode = GET_MODE (dest); + if (register_operand (src, mode) && register_operand (dest, mode)) + { + emit_move_insn (dest, src); + return; + } + + e.add_output_operand (dest, mode); + e.add_all_one_mask_operand (mask_mode); + /* For load operation, we create undef operand. + For store operation, we make it depend on the dest memory to + avoid potential bugs. */ + if (MEM_P (src)) + e.add_vundef_operand (mode); + else + e.add_input_operand (dest, mode); + + e.add_input_operand (src, mode); + e.add_input_operand (vl, Pmode); + + e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC); + + enum insn_code icode; + icode = code_for_pred_mov (mode); + e.expand (icode, true); +} + +/* Expand a pre-RA RVV data move from SRC to DEST. + It expands move for RVV fractional vector modes. */ +bool +legitimize_move (rtx dest, rtx src, machine_mode mask_mode) +{ + machine_mode mode = GET_MODE (dest); + /* For whole registers load/store or register-register move, + we don't need to specially handle them, just let them go + through "*mov<mode>" and then use the codegen directly. */ + if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) + && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)) + || (register_operand (src, mode) && register_operand (dest, mode))) + { + /* Need to force register if mem <- !reg. */ + if (MEM_P (dest) && !REG_P (src)) + src = force_reg (mode, src); + return false; + } + + rtx vlmax = gen_reg_rtx (Pmode); + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + ? 8 + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + emit_insn (gen_vsetvl_no_side_effects ( + Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode), + gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx)); + + if (!register_operand (src, mode) && !register_operand (dest, mode)) + { + rtx tmp = gen_reg_rtx (mode); + if (MEM_P (src)) + emit_pred_move (tmp, src, vlmax, mask_mode); + else + emit_move_insn (tmp, src); + src = tmp; + } + emit_pred_move (dest, src, vlmax, mask_mode); + return true; +} + +} // namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 8582c0cae4c..231b63a610d 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -72,18 +72,8 @@ public: e.add_input_operand (Pmode, gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode)); - /* LMUL. Define the bitmap rule as follows: - | 4 | 3 2 1 0 | - | fractional_p | factor | - */ - bool fractional_p = known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR); - unsigned int factor - = fractional_p ? exact_div (BYTES_PER_RISCV_VECTOR, GET_MODE_SIZE (mode)) - .to_constant () - : exact_div (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) - .to_constant (); - e.add_input_operand (Pmode, - gen_int_mode ((fractional_p << 4) | factor, Pmode)); + /* LMUL. */ + e.add_input_operand (Pmode, gen_int_mode ((unsigned int) mode, Pmode)); /* TA. */ e.add_input_operand (Pmode, gen_int_mode (1, Pmode)); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f7694ba043c..98374a922d1 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4150,14 +4150,42 @@ riscv_print_operand (FILE *file, rtx op, int letter) switch (letter) { case 'm': { - if (code == CONST_INT) + if (riscv_v_ext_vector_mode_p (mode)) { - /* LMUL. Define the bitmap rule as follows: - | 4 | 3 2 1 0 | - | fractional_p | factor | - */ - bool fractional_p = (UINTVAL (op) >> 4) & 0x1; - unsigned int factor = UINTVAL (op) & 0xf; + /* Calculate lmul according to mode and print the value. */ + poly_int64 size = GET_MODE_SIZE (mode); + unsigned int lmul; + if (known_lt (size, BYTES_PER_RISCV_VECTOR)) + lmul = 1; + else + lmul = exact_div (size, BYTES_PER_RISCV_VECTOR).to_constant (); + asm_fprintf (file, "%d", lmul); + } + else if (code == CONST_INT) + { + /* The value in the operand is the unsigned int value + converted from (enum machine_mode). + This RTX is generated as follows: + + machine_mode mode = XXXmode; + operand = gen_int_mode ((unsigned int)mode, Pmode); + + So we convert it back into machine_mode and then calculate + the LMUL according to GET_MODE_SIZE. */ + + machine_mode rvv_mode = (machine_mode) UINTVAL (op); + /* For rvv mask modes, we can not calculate LMUL simpily according + to BYTES_PER_RISCV_VECTOR. When rvv_mode = VNx4BImode. + Set SEW = 8, LMUL = 1 by default if TARGET_MIN_VLEN == 32. + Set SEW = 8, LMUL = 1 / 2 by default if TARGET_MIN_VLEN > 32. */ + bool bool_p = GET_MODE_CLASS (rvv_mode) == MODE_VECTOR_BOOL; + poly_int64 m1_size = BYTES_PER_RISCV_VECTOR; + poly_int64 rvv_size + = bool_p ? GET_MODE_NUNITS (rvv_mode) : GET_MODE_SIZE (rvv_mode); + bool fractional_p = known_lt (rvv_size, BYTES_PER_RISCV_VECTOR); + unsigned int factor + = fractional_p ? exact_div (m1_size, rvv_size).to_constant () + : exact_div (rvv_size, m1_size).to_constant (); asm_fprintf (file, "%s%d", fractional_p ? "mf" : "m", factor); } else @@ -4165,7 +4193,15 @@ riscv_print_operand (FILE *file, rtx op, int letter) break; } case 'p': { - if (code == CONST_INT) + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + /* Print for RVV mask operand. + If op is reg, print ",v0.t". + Otherwise, don't print anything. */ + if (code == REG) + fprintf (file, ",%s.t", reg_names[REGNO (op)]); + } + else if (code == CONST_INT) { /* Tail && Mask policy. */ bool agnostic_p = UINTVAL (op) & 0x1; @@ -6421,6 +6457,21 @@ riscv_vector_alignment (const_tree type) return wi::umin (min_size, 128).to_uhwi (); } +/* Implement REGMODE_NATURAL_SIZE. */ + +poly_uint64 +riscv_regmode_natural_size (machine_mode mode) +{ + /* The natural size for RVV data modes is one RVV data vector, + and similarly for predicates. We can't independently modify + anything smaller than that. */ + /* ??? For now, only do this for variable-width RVV registers. + Doing it for constant-sized registers breaks lower-subreg.c. */ + if (!riscv_vector_chunks.is_constant () && riscv_v_ext_vector_mode_p (mode)) + return BYTES_PER_RISCV_VECTOR; + return UNITS_PER_WORD; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 37363e975e1..9dbc8463591 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -1080,4 +1080,6 @@ extern void riscv_remove_unneeded_save_restore_calls (void); #define REGISTER_TARGET_PRAGMAS() riscv_register_pragmas () +#define REGMODE_NATURAL_SIZE(MODE) riscv_regmode_natural_size (MODE) + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index b3654915fde..eb52e0e8b59 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -152,7 +152,14 @@ (const_string "unknown")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF" +(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF, + VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI, + VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI, + VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI, + VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI, + VNx1DI,VNx2DI,VNx4DI,VNx8DI, + VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF, + VNx1DF,VNx2DF,VNx4DF,VNx8DF" (const_string "unknown")) ;; True if the main data type is twice the size of a word. diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 8f67676cad7..7997db3d898 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -63,6 +63,10 @@ riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc $(COMPILE) $< $(POSTCOMPILE) +riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc + $(COMPILE) $< + $(POSTCOMPILE) + PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \ diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md new file mode 100644 index 00000000000..1255e33a6f8 --- /dev/null +++ b/gcc/config/riscv/vector-iterators.md @@ -0,0 +1,58 @@ +;; Iterators for RISC-V 'V' Extension for GNU compiler. +;; Copyright (C) 2022-2022 Free Software Foundation, Inc. +;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_mode_iterator V [ + VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") + VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") + VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") + VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32") + (VNx4SF "TARGET_VECTOR_ELEN_FP_32") + (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx2DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8DF "TARGET_VECTOR_ELEN_FP_64") +]) + +(define_mode_iterator VB [ + VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI + (VNx64BI "TARGET_MIN_VLEN > 32") +]) + +(define_mode_attr VM [ + (VNx1QI "VNx1BI") (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI") (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") + (VNx1HI "VNx1BI") (VNx2HI "VNx2BI") (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI") + (VNx1SI "VNx1BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI") (VNx16SI "VNx16BI") + (VNx1DI "VNx1BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI") (VNx8DI "VNx8BI") + (VNx1SF "VNx1BI") (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI") + (VNx1DF "VNx1BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI") +]) + +(define_mode_attr sew [ + (VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") + (VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") + (VNx1SI "32") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") + (VNx1DI "64") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") + (VNx1SF "32") (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") + (VNx1DF "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") +]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 82ce902c0fb..19bb27560f8 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -26,10 +26,71 @@ ;; - Auto-vectorization (TBD) ;; - Combine optimization (TBD) +(include "vector-iterators.md") + (define_c_enum "unspec" [ UNSPEC_VSETVL + UNSPEC_VUNDEF + UNSPEC_VPREDICATE ]) +;; ----------------------------------------------------------------- +;; ---- Miscellaneous Operations +;; ----------------------------------------------------------------- + +(define_insn "vundefined<mode>" + [(set (match_operand:V 0 "register_operand" "=vr") + (unspec:V [(const_int 0)] UNSPEC_VUNDEF))] + "TARGET_VECTOR" + "") + +;; ----------------------------------------------------------------- +;; ---- Moves Operations +;; ----------------------------------------------------------------- + +(define_expand "mov<mode>" + [(set (match_operand:V 0 "reg_or_mem_operand") + (match_operand:V 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode)) + DONE; +}) + +;; This pattern is used for code-gen for whole register load/stores. +;; Also applicable for all register moves. +;; Fractional vector modes load/store are not allowed to match this pattern. +;; Mask modes load/store are not allowed to match this pattern. +(define_insn "*mov<mode>" + [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr") + (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))] + "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) + && register_operand (operands[1], <MODE>mode)) + || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))" + "@ + vl%m1re<sew>.v\t%0,%1 + vs%m1r.v\t%1,%0 + vmv%m1r.v\t%0,%1" + [(set_attr "type" "vldr,vstr,vmov") + (set_attr "mode" "<MODE>")]) + +(define_expand "mov<mode>" + [(set (match_operand:VB 0 "reg_or_mem_operand") + (match_operand:VB 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode)) + DONE; +}) + +(define_insn "*mov<mode>" + [(set (match_operand:VB 0 "register_operand" "=vr") + (match_operand:VB 1 "register_operand" "vr"))] + "TARGET_VECTOR" + "vmv1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "<MODE>")]) + ;; ----------------------------------------------------------------- ;; ---- 6. Configuration-Setting Instructions ;; ----------------------------------------------------------------- @@ -50,13 +111,98 @@ ;; operands[3]: LMUL ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) + +;; We define 2 types of "vsetvl*" instruction patterns: + +;; - "@vsetvl<mode>" is a parallel format which has side effects. + +;; - "@vsetvl<mode>_no_side_effects" has no side effects. + +;; - "@vsetvl<mode>" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS. + +;; - "@vsetvl<mode>_no_side_effects" is used by GCC standard patterns. + +;; - "@vsetvl<mode>" includes VL/VTYPE global registers status (define set) +;; and each RVV instruction includes VL/VTYPE global registers status (use) +;; so that we can guarantee each RVV instruction can execute with correct +;; VL/VTYPE global registers status after "insert-vsetvl" PASS. + +;; - "@vsetvl<mode>_no_side_effects" has no side effects and excludes VL/VTYPE +;; global registers status (define set). It's only used by GCC standard pattern +;; expansion. For example: "mov<mode>" pattern for fractional vector modes which +;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits +;; from the optimization of each GCC internal PASS. + +;; 1. void foo (float *in, float *out) +;; { +;; vfloat32mf2_t v = *(vfloat32mf2_t*)in; +;; *(vfloat32mf2_t*)out = v; +;; } +;; We could eliminate the second "vsetvl" by calling "@vsetvl<mode>_no_side_effects". +;; +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": +;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; -- +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) +;; ret ;; ret + +;; 2. void foo (int8_t *in, int8_t *out, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; *(vint8mf2_t*)(out + i) = v; +;; } +;; } +;; +;; Hoist "vsetvl" instruction in LICM: +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": +;; - ;; vsetvli a4,zero,e32,mf2,ta,ma +;; LOOP: ;; LOOP: +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) + +;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl<mode>". +;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i); +;; *(vint8mf2_t*)(out + i) = v; +;; *(vint32mf2_t*)(out + i + i) = v2; +;; } +;; } +;; +;; vsetvli a6,zero,e8,mf2,ta,ma +;; vsetvli a2,zero,e32,mf2,ta,ma +;; LOOP: +;; vle8.v v25,(a0) +;; vle32.v v24,(a5) +;; addi a0,a0,1 +;; vse8.v v25,(a1) +;; vse32.v v24,(a3) +;; +;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status. +;; We leave it to "insert-vsetvl" PASS to correct this situation. + +;; The "insert-vsetvl" PASS mechanism: +;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated +;; by GCC standard pattern expansion has the corresponding "vsetvl". +;; We exploit each GCC internal optimization pass to optimize the "vsetvl". +;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions. +;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary. +;; For example: RVV intrinsics. +;; 3. Optimize "vsetvl" instructions. + (define_insn "@vsetvl<mode>" - [(set (match_operand:P 0 "register_operand" "=r,r") - (unspec:P [(match_operand:P 1 "csr_operand" "r,K") - (match_operand 2 "const_int_operand" "i,i") - (match_operand 3 "const_int_operand" "i,i") - (match_operand 4 "const_int_operand" "i,i") - (match_operand 5 "const_int_operand" "i,i")] UNSPEC_VSETVL)) + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL)) (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) @@ -70,3 +216,124 @@ "vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5" [(set_attr "type" "vsetvl") (set_attr "mode" "<MODE>")]) + +;; We keep it as no side effects before reload_completed. +;; In this case, we can gain benefits from different GCC +;; internal PASS such as cprop, fwprop, combine,...etc. + +;; Then recover it for "insert-vsetvl" and "sched2" PASS +;; in order to get correct codegen. +(define_insn_and_split "@vsetvl<mode>_no_side_effects" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))] + "TARGET_VECTOR" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3) + (match_dup 4) (match_dup 5)] UNSPEC_VSETVL)) + (set (reg:SI VL_REGNUM) + (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) + (set (reg:SI VTYPE_REGNUM) + (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) + (match_dup 5)] UNSPEC_VSETVL))])] + "" + [(set_attr "type" "vsetvl") + (set_attr "mode" "<MODE>")]) + +;; RVV machine description matching format +;; (define_insn "" +;; [(set (match_operand:MODE 0) +;; (if_then_else:MODE +;; (unspec:<MODE:VM> +;; [(match_operand:<VM> 1 "vector_mask_operand") +;; (match_operand N + 4 "vector_length_operand") +;; (match_operand N + 5 "const_int_operand") +;; (match_operand N + 6 "const_int_operand") +;; (reg:SI VL_REGNUM) +;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) +;; (instruction operation:MODE +;; (match_operand 3 +;; (match_operand 4 +;; (match_operand 5 +;; ................ +;; (match_operand N + 3) +;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))] +;; +;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper. +;; Include mask predicate && length predicate && vector policy. + +;; ------------------------------------------------------------------------------- +;; ---- Predicated Mov +;; ------------------------------------------------------------------------------- +;; Includes: +;; - 7.4. Vector Unit-Stride Instructions +;; - 11.16 Vector Integer Move Instructions +;; - 13.16 Vector Floating-Point Move Instruction +;; - 15.1 Vector Mask-Register Logical Instructions +;; ------------------------------------------------------------------------------- + +;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. +;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. +;; For vse.v, we don't need merge operand, so it should always match "vu". +;; constraint alternative 0 ~ 1 match vle.v. +;; constraint alternative 2 match vse.v. +;; constraint alternative 3 match vmv.v.v. +;; constraint alternative 4 match vmv.v.i. +;; For vmv.v.i, we allow 2 following cases: +;; 1. (const_vector:VNx1QI repeat [ +;; (const_int:QI N)]), -15 <= N < 16. +;; 2. (const_vector:VNx1SF repeat [ +;; (const_double:SF 0.0 [0x0.0p+0])]). +(define_insn "@pred_mov<mode>" + [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") + (if_then_else:V + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") + (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))] + "TARGET_VECTOR" + "@ + vle<sew>.v\t%0,%3%p1 + vle<sew>.v\t%0,%3%p1 + vse<sew>.v\t%3,%0%p1 + vmv.v.v\t%0,%3 + vmv.v.i\t%0,v%3" + [(set_attr "type" "vlde,vlde,vste,vimov,vimov") + (set_attr "mode" "<MODE>")]) + +;; vlm.v/vsm.v/vmclr.m/vmset.m. +;; constraint alternative 0 match vlm.v. +;; constraint alternative 2 match vsm.v. +;; constraint alternative 3 match vmclr.m. +;; constraint alternative 4 match vmset.m. +(define_insn "@pred_mov<mode>" + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr") + (if_then_else:VB + (unspec:VB + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1") + (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))] + "TARGET_VECTOR" + "@ + vlm.v\t%0,%3 + vsm.v\t%3,%0 + vmclr.m\t%0 + vmset.m\t%0" + [(set_attr "type" "vldm,vstm,vmalu,vmalu") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c new file mode 100644 index 00000000000..6a235e308f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c @@ -0,0 +1,179 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + vint8mf8_t v = *(vint8mf8_t*)in; + *(vint8mf8_t*)out = v; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + vint8mf4_t v = *(vint8mf4_t*)in; + *(vint8mf4_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + vint8mf2_t v = *(vint8mf2_t*)in; + *(vint8mf2_t*)out = v; +} + +/* +** mov4: +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + vint8m1_t v = *(vint8m1_t*)in; + *(vint8m1_t*)out = v; +} + +/* +** mov5: +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + vint8m2_t v = *(vint8m2_t*)in; + *(vint8m2_t*)out = v; +} + +/* +** mov6: +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + vint8m4_t v = *(vint8m4_t*)in; + *(vint8m4_t*)out = v; +} + +/* +** mov7: +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + vint8m8_t v = *(vint8m8_t*)in; + *(vint8m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint8_t *in, uint8_t *out) +{ + vuint8mf8_t v = *(vuint8mf8_t*)in; + *(vuint8mf8_t*)out = v; +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint8_t *in, uint8_t *out) +{ + vuint8mf4_t v = *(vuint8mf4_t*)in; + *(vuint8mf4_t*)out = v; +} + +/* +** mov10: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint8_t *in, uint8_t *out) +{ + vuint8mf2_t v = *(vuint8mf2_t*)in; + *(vuint8mf2_t*)out = v; +} + +/* +** mov11: +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint8_t *in, uint8_t *out) +{ + vuint8m1_t v = *(vuint8m1_t*)in; + *(vuint8m1_t*)out = v; +} + +/* +** mov12: +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint8_t *in, uint8_t *out) +{ + vuint8m2_t v = *(vuint8m2_t*)in; + *(vuint8m2_t*)out = v; +} + +/* +** mov13: +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov13 (uint8_t *in, uint8_t *out) +{ + vuint8m4_t v = *(vuint8m4_t*)in; + *(vuint8m4_t*)out = v; +} + +/* +** mov14: +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov14 (uint8_t *in, uint8_t *out) +{ + vuint8m8_t v = *(vuint8m8_t*)in; + *(vuint8m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c new file mode 100644 index 00000000000..10aa8297c30 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c @@ -0,0 +1,385 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf8_t v2 asm("v2") = v1; + *(vint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + register vint8mf4_t v1 asm("v1") = *(vint8mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf4_t v2 asm("v2") = v1; + *(vint8mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + register vint8mf2_t v1 asm("v1") = *(vint8mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf2_t v2 asm("v2") = v1; + *(vint8mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov4: +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8m1_t v2 asm("v2") = v1; + *(vint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov5: +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint8m2_t v4 asm("v4") = v2; + *(vint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov6: +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint8m4_t v8 asm("v8") = v4; + *(vint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov7: +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint8m8_t v16 asm("v16") = v8; + *(vint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov8 (int16_t *in, int16_t *out) +{ + register vint16mf4_t v1 asm("v1") = *(vint16mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16mf4_t v2 asm("v2") = v1; + *(vint16mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov9 (int16_t *in, int16_t *out) +{ + register vint16mf2_t v1 asm("v1") = *(vint16mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16mf2_t v2 asm("v2") = v1; + *(vint16mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov10: +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov10 (int16_t *in, int16_t *out) +{ + register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16m1_t v2 asm("v2") = v1; + *(vint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov11: +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov11 (int16_t *in, int16_t *out) +{ + register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint16m2_t v4 asm("v4") = v2; + *(vint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov12: +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov12 (int16_t *in, int16_t *out) +{ + register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint16m4_t v8 asm("v8") = v4; + *(vint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov13: +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov13 (int32_t *in, int32_t *out) +{ + register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint16m8_t v16 asm("v16") = v8; + *(vint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (int32_t *in, int32_t *out) +{ + register vint32mf2_t v1 asm("v1") = *(vint32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint32mf2_t v2 asm("v2") = v1; + *(vint32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (int32_t *in, int32_t *out) +{ + register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint32m1_t v2 asm("v2") = v1; + *(vint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (int32_t *in, int32_t *out) +{ + register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint32m2_t v4 asm("v4") = v2; + *(vint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (int32_t *in, int32_t *out) +{ + register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint32m4_t v8 asm("v8") = v4; + *(vint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (int32_t *in, int32_t *out) +{ + register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint32m8_t v16 asm("v16") = v8; + *(vint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (int64_t *in, int64_t *out) +{ + register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint64m1_t v2 asm("v2") = v1; + *(vint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (int64_t *in, int64_t *out) +{ + register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint64m2_t v4 asm("v4") = v2; + *(vint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (int64_t *in, int64_t *out) +{ + register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint64m4_t v8 asm("v8") = v4; + *(vint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (int64_t *in, int64_t *out) +{ + register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint64m8_t v16 asm("v16") = v8; + *(vint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c new file mode 100644 index 00000000000..f8da5bb6b93 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c @@ -0,0 +1,385 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov1 (uint8_t *in, uint8_t *out) +{ + register vuint8mf8_t v1 asm("v1") = *(vuint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf8_t v2 asm("v2") = v1; + *(vuint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov2 (uint8_t *in, uint8_t *out) +{ + register vuint8mf4_t v1 asm("v1") = *(vuint8mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf4_t v2 asm("v2") = v1; + *(vuint8mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov3 (uint8_t *in, uint8_t *out) +{ + register vuint8mf2_t v1 asm("v1") = *(vuint8mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf2_t v2 asm("v2") = v1; + *(vuint8mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov4: +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov4 (uint8_t *in, uint8_t *out) +{ + register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8m1_t v2 asm("v2") = v1; + *(vuint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov5: +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov5 (uint8_t *in, uint8_t *out) +{ + register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint8m2_t v4 asm("v4") = v2; + *(vuint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov6: +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov6 (uint8_t *in, uint8_t *out) +{ + register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint8m4_t v8 asm("v8") = v4; + *(vuint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov7: +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov7 (uint8_t *in, uint8_t *out) +{ + register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint8m8_t v16 asm("v16") = v8; + *(vuint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov8 (uint16_t *in, uint16_t *out) +{ + register vuint16mf4_t v1 asm("v1") = *(vuint16mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16mf4_t v2 asm("v2") = v1; + *(vuint16mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov9 (uint16_t *in, uint16_t *out) +{ + register vuint16mf2_t v1 asm("v1") = *(vuint16mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16mf2_t v2 asm("v2") = v1; + *(vuint16mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov10: +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov10 (uint16_t *in, uint16_t *out) +{ + register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16m1_t v2 asm("v2") = v1; + *(vuint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov11: +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov11 (uint16_t *in, uint16_t *out) +{ + register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint16m2_t v4 asm("v4") = v2; + *(vuint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov12: +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov12 (uint16_t *in, uint16_t *out) +{ + register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint16m4_t v8 asm("v8") = v4; + *(vuint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov13: +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov13 (uint32_t *in, uint32_t *out) +{ + register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint16m8_t v16 asm("v16") = v8; + *(vuint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (uint32_t *in, uint32_t *out) +{ + register vuint32mf2_t v1 asm("v1") = *(vuint32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint32mf2_t v2 asm("v2") = v1; + *(vuint32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (uint32_t *in, uint32_t *out) +{ + register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint32m1_t v2 asm("v2") = v1; + *(vuint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (uint32_t *in, uint32_t *out) +{ + register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint32m2_t v4 asm("v4") = v2; + *(vuint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (uint32_t *in, uint32_t *out) +{ + register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint32m4_t v8 asm("v8") = v4; + *(vuint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (uint32_t *in, uint32_t *out) +{ + register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint32m8_t v16 asm("v16") = v8; + *(vuint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (uint64_t *in, uint64_t *out) +{ + register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint64m1_t v2 asm("v2") = v1; + *(vuint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (uint64_t *in, uint64_t *out) +{ + register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint64m2_t v4 asm("v4") = v2; + *(vuint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (uint64_t *in, uint64_t *out) +{ + register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint64m4_t v8 asm("v8") = v4; + *(vuint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (uint64_t *in, uint64_t *out) +{ + register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint64m8_t v16 asm("v16") = v8; + *(vuint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c new file mode 100644 index 00000000000..5b8ce40b62d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c @@ -0,0 +1,159 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (float *in, float *out) +{ + register vfloat32mf2_t v1 asm("v1") = *(vfloat32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat32mf2_t v2 asm("v2") = v1; + *(vfloat32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (float *in, float *out) +{ + register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat32m1_t v2 asm("v2") = v1; + *(vfloat32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (float *in, float *out) +{ + register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vfloat32m2_t v4 asm("v4") = v2; + *(vfloat32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (float *in, float *out) +{ + register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vfloat32m4_t v8 asm("v8") = v4; + *(vfloat32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (float *in, float *out) +{ + register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vfloat32m8_t v16 asm("v16") = v8; + *(vfloat32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (uint64_t *in, uint64_t *out) +{ + register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat64m1_t v2 asm("v2") = v1; + *(vfloat64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (uint64_t *in, uint64_t *out) +{ + register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vfloat64m2_t v4 asm("v4") = v2; + *(vfloat64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (uint64_t *in, uint64_t *out) +{ + register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vfloat64m4_t v8 asm("v8") = v4; + *(vfloat64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (uint64_t *in, uint64_t *out) +{ + register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vfloat64m8_t v16 asm("v16") = v8; + *(vfloat64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c new file mode 100644 index 00000000000..8c630f3bedb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include <riscv_vector.h> + +void mov1 (int8_t *in, int8_t *out) +{ + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf8_t v2 asm("v2") = v1; + asm volatile ("#":::"v2"); + *(vint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c new file mode 100644 index 00000000000..b9bdd515747 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int16_t *in, int16_t *out) +{ + vint16mf4_t v = *(vint16mf4_t*)in; + *(vint16mf4_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int16_t *in, int16_t *out) +{ + vint16mf2_t v = *(vint16mf2_t*)in; + *(vint16mf2_t*)out = v; +} + +/* +** mov4: +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int16_t *in, int16_t *out) +{ + vint16m1_t v = *(vint16m1_t*)in; + *(vint16m1_t*)out = v; +} + +/* +** mov5: +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int16_t *in, int16_t *out) +{ + vint16m2_t v = *(vint16m2_t*)in; + *(vint16m2_t*)out = v; +} + +/* +** mov6: +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int16_t *in, int16_t *out) +{ + vint16m4_t v = *(vint16m4_t*)in; + *(vint16m4_t*)out = v; +} + +/* +** mov7: +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int16_t *in, int16_t *out) +{ + vint16m8_t v = *(vint16m8_t*)in; + *(vint16m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint16_t *in, uint16_t *out) +{ + vuint16mf4_t v = *(vuint16mf4_t*)in; + *(vuint16mf4_t*)out = v; +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint16_t *in, uint16_t *out) +{ + vuint16mf2_t v = *(vuint16mf2_t*)in; + *(vuint16mf2_t*)out = v; +} + +/* +** mov10: +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint16_t *in, uint16_t *out) +{ + vuint16m1_t v = *(vuint16m1_t*)in; + *(vuint16m1_t*)out = v; +} + +/* +** mov11: +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint16_t *in, uint16_t *out) +{ + vuint16m2_t v = *(vuint16m2_t*)in; + *(vuint16m2_t*)out = v; +} + +/* +** mov12: +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint16_t *in, uint16_t *out) +{ + vuint16m4_t v = *(vuint16m4_t*)in; + *(vuint16m4_t*)out = v; +} + +/* +** mov13: +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov13 (uint16_t *in, uint16_t *out) +{ + vuint16m8_t v = *(vuint16m8_t*)in; + *(vuint16m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c new file mode 100644 index 00000000000..a7a89db2735 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c @@ -0,0 +1,127 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int32_t *in, int32_t *out) +{ + vint32mf2_t v = *(vint32mf2_t*)in; + *(vint32mf2_t*)out = v; +} + +/* +** mov4: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int32_t *in, int32_t *out) +{ + vint32m1_t v = *(vint32m1_t*)in; + *(vint32m1_t*)out = v; +} + +/* +** mov5: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int32_t *in, int32_t *out) +{ + vint32m2_t v = *(vint32m2_t*)in; + *(vint32m2_t*)out = v; +} + +/* +** mov6: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int32_t *in, int32_t *out) +{ + vint32m4_t v = *(vint32m4_t*)in; + *(vint32m4_t*)out = v; +} + +/* +** mov7: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int32_t *in, int32_t *out) +{ + vint32m8_t v = *(vint32m8_t*)in; + *(vint32m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint32_t *in, uint32_t *out) +{ + vuint32mf2_t v = *(vuint32mf2_t*)in; + *(vuint32mf2_t*)out = v; +} + +/* +** mov9: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint32_t *in, uint32_t *out) +{ + vuint32m1_t v = *(vuint32m1_t*)in; + *(vuint32m1_t*)out = v; +} + +/* +** mov10: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint32_t *in, uint32_t *out) +{ + vuint32m2_t v = *(vuint32m2_t*)in; + *(vuint32m2_t*)out = v; +} + +/* +** mov11: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint32_t *in, uint32_t *out) +{ + vuint32m4_t v = *(vuint32m4_t*)in; + *(vuint32m4_t*)out = v; +} + +/* +** mov12: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint32_t *in, uint32_t *out) +{ + vuint32m8_t v = *(vuint32m8_t*)in; + *(vuint32m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c new file mode 100644 index 00000000000..e8cfb4b10b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov4: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int64_t *in, int64_t *out) +{ + vint64m1_t v = *(vint64m1_t*)in; + *(vint64m1_t*)out = v; +} + +/* +** mov5: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int64_t *in, int64_t *out) +{ + vint64m2_t v = *(vint64m2_t*)in; + *(vint64m2_t*)out = v; +} + +/* +** mov6: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int64_t *in, int64_t *out) +{ + vint64m4_t v = *(vint64m4_t*)in; + *(vint64m4_t*)out = v; +} + +/* +** mov7: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int64_t *in, int64_t *out) +{ + vint64m8_t v = *(vint64m8_t*)in; + *(vint64m8_t*)out = v; +} + +/* +** mov8: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint64_t *in, uint64_t *out) +{ + vuint64m1_t v = *(vuint64m1_t*)in; + *(vuint64m1_t*)out = v; +} + +/* +** mov9: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint64_t *in, uint64_t *out) +{ + vuint64m2_t v = *(vuint64m2_t*)in; + *(vuint64m2_t*)out = v; +} + +/* +** mov10: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint64_t *in, uint64_t *out) +{ + vuint64m4_t v = *(vuint64m4_t*)in; + *(vuint64m4_t*)out = v; +} + +/* +** mov11: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint64_t *in, uint64_t *out) +{ + vuint64m8_t v = *(vuint64m8_t*)in; + *(vuint64m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c new file mode 100644 index 00000000000..5ca232ba867 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (float *in, float *out) +{ + vfloat32mf2_t v = *(vfloat32mf2_t*)in; + *(vfloat32mf2_t*)out = v; +} + +/* +** mov4: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (float *in, float *out) +{ + vfloat32m1_t v = *(vfloat32m1_t*)in; + *(vfloat32m1_t*)out = v; +} + +/* +** mov5: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (float *in, float *out) +{ + vfloat32m2_t v = *(vfloat32m2_t*)in; + *(vfloat32m2_t*)out = v; +} + +/* +** mov6: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (float *in, float *out) +{ + vfloat32m4_t v = *(vfloat32m4_t*)in; + *(vfloat32m4_t*)out = v; +} + +/* +** mov7: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (float *in, float *out) +{ + vfloat32m8_t v = *(vfloat32m8_t*)in; + *(vfloat32m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c new file mode 100644 index 00000000000..41fc73bb099 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov4: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (double *in, double *out) +{ + vfloat64m1_t v = *(vfloat64m1_t*)in; + *(vfloat64m1_t*)out = v; +} + +/* +** mov5: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (double *in, double *out) +{ + vfloat64m2_t v = *(vfloat64m2_t*)in; + *(vfloat64m2_t*)out = v; +} + +/* +** mov6: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (double *in, double *out) +{ + vfloat64m4_t v = *(vfloat64m4_t*)in; + *(vfloat64m4_t*)out = v; +} + +/* +** mov7: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (double *in, double *out) +{ + vfloat64m8_t v = *(vfloat64m8_t*)in; + *(vfloat64m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c new file mode 100644 index 00000000000..d4636e0adfb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include <riscv_vector.h> + +/* This testcase is testing whether RISC-V define REGMODE_NATURAL_SIZE. */ +void foo (int8_t *in, int8_t *out) +{ + vint8mf2_t v = *(vint8mf2_t*)in; + vint32mf2_t v2 = *(vint32mf2_t*)in; + *(vint8mf2_t*)out = v; + *(vint32mf2_t*)(out + 16) = v2; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c new file mode 100644 index 00000000000..9447b05899d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + vbool64_t v = *(vbool64_t*)in; + *(vbool64_t*)out = v; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + vbool32_t v = *(vbool32_t*)in; + *(vbool32_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + vbool16_t v = *(vbool16_t*)in; + *(vbool16_t*)out = v; +} + +/* +** mov4: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + vbool8_t v = *(vbool8_t*)in; + *(vbool8_t*)out = v; +} + +/* +** mov5: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + vbool4_t v = *(vbool4_t*)in; + *(vbool4_t*)out = v; +} + +/* +** mov6: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m4,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + vbool2_t v = *(vbool2_t*)in; + *(vbool2_t*)out = v; +} + +/* +** mov7: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + vbool1_t v = *(vbool1_t*)in; + *(vbool1_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c new file mode 100644 index 00000000000..6d39e3c0f4d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* Test tieable of RVV types with same LMUL. */ +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out, int M) +{ + vint8mf2_t v1 = *(vint8mf2_t*)(in); + vint16mf2_t v2 = *(vint16mf2_t*)(in); + vint32mf2_t v3 = *(vint32mf2_t*)(in); + *(vint8mf2_t*)(out) = v1; + *(vint16mf2_t*)(out + 1) = v2; + *(vint32mf2_t*)(out + 2) = v3; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out, int M) +{ + vint8mf4_t v1 = *(vint8mf4_t*)(in); + vint16mf4_t v2 = *(vint16mf4_t*)(in); + *(vint8mf4_t*)(out) = v1; + *(vint16mf4_t*)(out + 1) = v2; +}
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> --- gcc/config.gcc | 2 +- gcc/config/riscv/constraints.md | 22 + gcc/config/riscv/predicates.md | 23 ++ gcc/config/riscv/riscv-protos.h | 14 + gcc/config/riscv/riscv-v.cc | 180 ++++++++ .../riscv/riscv-vector-builtins-bases.cc | 14 +- gcc/config/riscv/riscv.cc | 67 ++- gcc/config/riscv/riscv.h | 2 + gcc/config/riscv/riscv.md | 9 +- gcc/config/riscv/t-riscv | 4 + gcc/config/riscv/vector-iterators.md | 58 +++ gcc/config/riscv/vector.md | 279 ++++++++++++- .../gcc.target/riscv/rvv/base/mov-1.c | 179 ++++++++ .../gcc.target/riscv/rvv/base/mov-10.c | 385 ++++++++++++++++++ .../gcc.target/riscv/rvv/base/mov-11.c | 385 ++++++++++++++++++ .../gcc.target/riscv/rvv/base/mov-12.c | 159 ++++++++ .../gcc.target/riscv/rvv/base/mov-13.c | 14 + .../gcc.target/riscv/rvv/base/mov-2.c | 153 +++++++ .../gcc.target/riscv/rvv/base/mov-3.c | 127 ++++++ .../gcc.target/riscv/rvv/base/mov-4.c | 101 +++++ .../gcc.target/riscv/rvv/base/mov-5.c | 66 +++ .../gcc.target/riscv/rvv/base/mov-6.c | 53 +++ .../gcc.target/riscv/rvv/base/mov-7.c | 13 + .../gcc.target/riscv/rvv/base/mov-8.c | 96 +++++ .../gcc.target/riscv/rvv/base/mov-9.c | 44 ++ 25 files changed, 2421 insertions(+), 28 deletions(-) create mode 100644 gcc/config/riscv/riscv-v.cc create mode 100644 gcc/config/riscv/vector-iterators.md create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c