@@ -518,7 +518,7 @@ pru-*-*)
riscv*)
cpu_type=riscv
extra_headers="riscv_vector.h"
- extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o riscv-vector-builtins.o"
+ extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o riscv-vector-builtins.o riscv-insert-vsetvl.o"
d_target_objs="riscv-d.o"
target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-builtins.cc \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins-functions.cc"
@@ -97,3 +97,8 @@
(and (match_code "const_poly_int")
(match_test "CONST_POLY_INT_COEFFS (op)[0] == UNITS_PER_V_REG.coeffs[0]
&& CONST_POLY_INT_COEFFS (op)[1] == UNITS_PER_V_REG.coeffs[1]")))
+
+(define_constraint "Ws5"
+ "Signed immediate 5-bit value"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -16, 15)")))
@@ -246,3 +246,34 @@
(define_predicate "imm5_operand"
(and (match_code "const_int")
(match_test "INTVAL (op) < 5")))
+
+;; Vector Predicates.
+
+(define_special_predicate "p_reg_or_const_csr_operand"
+ (match_code "reg, subreg, const_int")
+{
+ if (CONST_INT_P (op))
+ return satisfies_constraint_K (op);
+ return GET_MODE (op) == Pmode;
+})
+
+(define_predicate "vector_reg_or_const0_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_test "op == const0_rtx && !VECTOR_MODE_P (GET_MODE (op))")))
+
+(define_predicate "vector_move_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_code "const_vector")))
+
+(define_predicate "reg_or_mem_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "memory_operand")))
+
+(define_predicate "reg_or_simm5_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_operand 0 "const_int_operand")
+ (match_test "!FLOAT_MODE_P (GET_MODE (op)) && IN_RANGE (INTVAL (op), -16, 15)"))))
+
+(define_predicate "reg_or_const_int_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_code "const_wide_int, const_int")))
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,2312 @@
+/* Insert-vsetvli pass for RISC-V 'V' Extension for GNU compiler.
+ Copyright(C) 2022-2022 Free Software Foundation, Inc.
+ Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM 1
+#define INCLUDE_FUNCTIONAL 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "backend.h"
+#include "regs.h"
+#include "target.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "predict.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "cfgrtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "langhooks.h"
+#include "tree-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "cfgcleanup.h"
+
+#include <map>
+#include <vector>
+#include <queue>
+#include <set>
+#include <tuple>
+
+#include "riscv-protos.h"
+#include "riscv-vector-builtins-functions.h"
+#include "riscv-vector-builtins.h"
+
+using namespace riscv_vector;
+using namespace rtl_ssa;
+
+/* This pass is to insert vsetvli instructions for RVV instructions that depend on vtype or vl.
+ Because Clang+LLVM compiler has the mature pass to insert vsetvli instructions and works well,
+ algorithm follows the Clang+LLVM compiler Pass.
+
+ This pass consists of 3 phases:
+
+ Phase 1 collects how each basic block affects VL/VTYPE.
+
+ Phase 2 uses the information from phase 1 to do a data flow analysis to
+ propagate the VL/VTYPE changes through the function. This gives us the
+ VL/VTYPE at the start of each basic block.
+
+ Phase 3 inserts vsetvli instructions in each basic block. Information from
+ phase 2 is used to prevent inserting a vsetvli before the first vector
+ instruction in the block if possible. */
+
+enum state_enum
+{
+ STATE_UNINITIALIZED,
+ STATE_KNOWN,
+ STATE_UNKNOWN
+};
+
+enum replace_enum
+{
+ REPLACE_VL,
+ REPLACE_VTYPE
+};
+
+enum clobber_pat_enum
+{
+ MOV_CLOBBER_MEM_REG,
+ MOV_CLOBBER_REG_MEM,
+ MOV_CLOBBER_REG_REG,
+ MOV_CLOBBER_REG_CONST,
+ OTHERS
+};
+
+/* Helper functions. */
+
+static unsigned int
+get_policy_offset (rtx_insn *insn)
+{
+ unsigned int offset = 1;
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
+ {
+ if (get_attr_type (insn) == TYPE_VCMP)
+ offset = 2;
+ }
+ return offset;
+}
+
+static unsigned int
+get_vl_offset (rtx_insn *insn)
+{
+ unsigned int offset = 2;
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
+ {
+ if (get_attr_type (insn) == TYPE_VCMP)
+ offset = 3;
+ }
+ return offset;
+}
+
+static enum clobber_pat_enum
+recog_clobber_vl_vtype (rtx_insn *insn)
+{
+ /*
+ [(set (match_operand 0 "reg_or_mem_operand" "=vr,m,vr")
+ (match_operand 1 "reg_or_mem_operand" "m,vr,vr"))
+ (clobber (match_scratch:SI 2 "=&r,&r,X"))
+ (clobber (reg:SI VL_REGNUM))
+ (clobber (reg:SI VTYPE_REGNUM))]
+ */
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ return OTHERS;
+
+ unsigned int len = XVECLEN (pat, 0);
+ if (len < 3)
+ return OTHERS;
+
+ if (!rtx_equal_p (
+ XVECEXP (pat, 0, len - 1),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, VTYPE_REGNUM))))
+ return OTHERS;
+
+ if (!rtx_equal_p (XVECEXP (pat, 0, len - 2),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, VL_REGNUM))))
+ return OTHERS;
+
+ extract_insn_cached (insn);
+ rtx mov_pat = gen_rtx_SET (recog_data.operand[0], recog_data.operand[1]);
+ if (!rtx_equal_p (XVECEXP (pat, 0, 0), mov_pat))
+ return OTHERS;
+
+ if (MEM_P (recog_data.operand[0]))
+ return MOV_CLOBBER_MEM_REG;
+
+ if (MEM_P (recog_data.operand[1]))
+ return MOV_CLOBBER_REG_MEM;
+
+ if (REG_P (recog_data.operand[1]))
+ return MOV_CLOBBER_REG_REG;
+
+ if (CONST_VECTOR_P (recog_data.operand[1]))
+ return MOV_CLOBBER_REG_CONST;
+
+ return OTHERS;
+}
+
+static bool
+is_vector_config_instr (rtx_insn *insn)
+{
+ return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+ get_attr_type (insn) == TYPE_VSETVL;
+}
+
+/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
+/// VL and only sets VTYPE.
+static bool
+is_vl_preserving_config (rtx_insn *insn)
+{
+ if (is_vector_config_instr (insn))
+ {
+ extract_insn_cached (insn);
+ return recog_data.n_operands == 1;
+ }
+ return false;
+}
+
+static bool
+rvv_insn_p (rtx_insn *insn, rtx *src)
+{
+ *src = NULL_RTX;
+ if (!insn)
+ return false;
+
+ if (!INSN_P (insn))
+ return false;
+
+ if (recog_memoized (insn) < 0)
+ return false;
+
+ if (!rvv_mode_p (rvv_translate_attr_mode (insn)))
+ return false;
+
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ {
+ if (reload_completed)
+ {
+ *src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return true;
+ }
+ else
+ return false;
+ }
+
+ if (GET_CODE (PATTERN (insn)) == PARALLEL)
+ *src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+
+ if (GET_CODE (PATTERN (insn)) == SET)
+ *src = SET_SRC (PATTERN (insn));
+
+ if (!*src)
+ return false;
+
+ if (GET_CODE (*src) != UNSPEC)
+ return false;
+
+ if (XINT (*src, 1) != UNSPEC_RVV)
+ return false;
+
+ return true;
+}
+
+static bool
+use_vl_p (rtx_insn *insn)
+{
+ rtx src = NULL_RTX;
+ if (!rvv_insn_p (insn, &src))
+ return false;
+
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ return true;
+
+ if (rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 1),
+ gen_rtx_REG (SImode, VL_REGNUM)))
+ return true;
+
+ if (XVECLEN (src, 0) > 1 &&
+ rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 2),
+ gen_rtx_REG (SImode, VL_REGNUM)))
+ return true;
+
+ return false;
+}
+
+static bool
+use_vtype_p (rtx_insn *insn)
+{
+ rtx src = NULL_RTX;
+ if (!rvv_insn_p (insn, &src))
+ return false;
+
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ return true;
+
+ if (rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 1),
+ gen_rtx_REG (SImode, VTYPE_REGNUM)))
+ return true;
+
+ return false;
+}
+
+static bool
+use_vlmax_p (rtx_insn *insn)
+{
+ rtx src = NULL_RTX;
+ unsigned int length = 0;
+
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ return true;
+
+ if (rvv_insn_p (insn, &src))
+ length = XVECLEN (src, 0);
+
+ if (length < 2)
+ return false;
+
+ if (rtx_equal_p (XVECEXP (src, 0, length - 1),
+ gen_rtx_REG (SImode, VL_REGNUM)))
+ return rtx_equal_p (XVECEXP (src, 0, length - 2),
+ gen_rtx_REG (Pmode, X0_REGNUM));
+
+ if (length < 3)
+ return false;
+
+ return rtx_equal_p (XVECEXP (src, 0, length - 3),
+ gen_rtx_REG (Pmode, X0_REGNUM));
+}
+
+static bool
+need_vsetvli_p (rtx_insn *insn)
+{
+ rtx src = NULL_RTX;
+ if (!rvv_insn_p (insn, &src))
+ return false;
+ return true;
+}
+
+static void
+replace_op (rtx_insn *insn, rtx x, unsigned int replace)
+{
+ extract_insn_cached (insn);
+ if (replace == REPLACE_VTYPE)
+ validate_change (insn, recog_data.operand_loc[recog_data.n_operands - 1], x, false);
+
+ if (replace == REPLACE_VL && !use_vlmax_p (insn))
+ {
+ unsigned int offset = get_vl_offset (insn);
+ validate_change (insn,
+ recog_data.operand_loc[recog_data.n_operands - offset],
+ x, false);
+ }
+}
+
+static bool
+update_vl_vtype_p (rtx_insn *insn)
+{
+ if (insn && NONDEBUG_INSN_P (insn))
+ {
+ if (recog_memoized (insn) >= 0 &&
+ (get_attr_type (insn) == TYPE_VLEFF))
+ {
+ extract_insn_cached (insn);
+ if (INTVAL (recog_data.operand[recog_data.n_operands - 1]) ==
+ DO_NOT_UPDATE_VL_VTYPE)
+ return false;
+ return true;
+ }
+ if (CALL_P (insn))
+ return true;
+ if (PATTERN (insn) && (GET_CODE (PATTERN (insn)) == ASM_INPUT ||
+ GET_CODE (PATTERN (insn)) == ASM_OPERANDS ||
+ asm_noperands (PATTERN (insn)) >= 0))
+ return true;
+ }
+ return false;
+}
+
+static rtx
+get_avl_source (rtx avl, rtx_insn *rtl)
+{
+ if (!rtl || !avl)
+ return NULL_RTX;
+
+ if (optimize < 2)
+ return NULL_RTX;
+
+ insn_info *next;
+ rtx avl_source = NULL_RTX;
+
+ if (!REG_P (avl))
+ return NULL_RTX;
+
+ for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+ {
+ next = insn->next_any_insn ();
+ if (insn->rtl () == rtl)
+ {
+ resource_info resource{GET_MODE (avl), REGNO (avl)};
+ def_lookup dl = crtl->ssa->find_def (resource, insn);
+ def_info *def = dl.prev_def (insn);
+
+ if (!def)
+ return NULL_RTX;
+
+ if (!is_a<set_info *> (def))
+ return NULL_RTX;
+
+ insn_info *def_insn = def->insn ();
+
+ if (!def_insn)
+ return NULL_RTX;
+ rtx_insn *def_rtl = def_insn->rtl ();
+
+ if (!def_rtl)
+ return NULL_RTX;
+
+ if (INSN_P (def_rtl) && single_set (def_rtl))
+ {
+ avl_source = SET_SRC (single_set (def_rtl));
+ break;
+ }
+ }
+ }
+
+ return avl_source;
+}
+
+static machine_mode
+vsew_to_int_mode (unsigned vsew)
+{
+ return vsew == 0 ? QImode : vsew == 1 ? HImode : vsew == 2 ? SImode : DImode;
+}
+
+class vinfo
+{
+private:
+ state_enum state;
+ // Fields from VTYPE.
+ uint8_t vma : 1;
+ uint8_t vta : 1;
+ uint8_t vsew : 3;
+ uint8_t vlmul : 3;
+ uint8_t all_maskop_p : 1;
+ uint8_t store_p : 1;
+ uint8_t sew_lmul_ratio_only_p : 1;
+ uint8_t scalar_move_p : 1;
+ rtx avl;
+ rtx avl_source;
+
+public:
+ vinfo ()
+ : state (STATE_UNINITIALIZED), vma (false), vta (false), vsew (0),
+ vlmul (0), all_maskop_p (false), store_p (false), sew_lmul_ratio_only_p (false),
+ scalar_move_p (false), avl (NULL_RTX), avl_source (NULL_RTX)
+ {
+ }
+
+ ~vinfo () {}
+
+ static vinfo
+ get_unknown ()
+ {
+ vinfo info;
+ info.set_unknown ();
+ return info;
+ }
+
+ bool
+ valid_p () const
+ {
+ return state != STATE_UNINITIALIZED;
+ }
+ void
+ set_unknown ()
+ {
+ state = STATE_UNKNOWN;
+ }
+ bool
+ unknown_p () const
+ {
+ return state == STATE_UNKNOWN;
+ }
+
+ bool
+ known_p () const
+ {
+ return state == STATE_KNOWN;
+ }
+
+ void
+ set_avl (rtx op)
+ {
+ avl = op;
+ state = STATE_KNOWN;
+ }
+
+ void
+ set_avl_source (rtx op)
+ {
+ avl_source = op;
+ }
+
+ bool
+ avl_const_p () const
+ {
+ return get_avl () && CONST_SCALAR_INT_P (get_avl ());
+ }
+
+ bool
+ avl_reg_p () const
+ {
+ return get_avl () && REG_P (get_avl ());
+ }
+
+ rtx
+ get_avl () const
+ {
+ gcc_assert (known_p ());
+ return avl;
+ }
+
+ bool
+ has_zero_avl () const
+ {
+ if (!known_p ())
+ return false;
+ if (get_avl () == NULL_RTX)
+ return false;
+ if (avl_const_p ())
+ return INTVAL (get_avl ()) == 0;
+ return false;
+ }
+
+ bool
+ has_nonzero_avl () const
+ {
+ if (!known_p ())
+ return false;
+ if (get_avl () == NULL_RTX)
+ return false;
+ if (avl_const_p ())
+ return INTVAL (get_avl ()) > 0;
+ if (avl_reg_p ())
+ return rtx_equal_p (get_avl (), gen_rtx_REG (Pmode, X0_REGNUM));
+ return false;
+ }
+
+ rtx
+ get_avl_source () const
+ {
+ gcc_assert (known_p ());
+ return avl_source;
+ }
+
+ unsigned int
+ get_vsew () const
+ {
+ return vsew;
+ }
+
+ enum vlmul_field_enum
+ get_vlmul () const
+ {
+ return (enum vlmul_field_enum) vlmul;
+ }
+
+ unsigned int
+ get_vta () const
+ {
+ return vta;
+ }
+
+ unsigned int
+ get_vma () const
+ {
+ return vma;
+ }
+
+ uint8_t
+ get_store_p () const
+ {
+ return store_p;
+ }
+
+ bool
+ compare_vl (const vinfo &info) const
+ {
+ /* Optimize the code as follows:
+ if RVV is a fixed vector-length = 128bit.
+ vsetvli a5, 16, e8, m1......
+ .........
+ vsetvli a5, zero, e8, m1.....(no need)
+ */
+ if (!get_avl () || !info.get_avl ())
+ return false;
+
+ if (REG_P (get_avl ()) && REGNO (get_avl ()) == X0_REGNUM)
+ {
+ unsigned int vsew = info.get_vsew ();
+ machine_mode inner = vsew_to_int_mode (vsew);
+ machine_mode mode = riscv_vector::vector_builtin_mode (
+ as_a<scalar_mode> (inner), info.get_vlmul ());
+ if (CONST_SCALAR_INT_P (info.get_avl ()))
+ {
+ if (GET_MODE_NUNITS (mode).is_constant () &&
+ INTVAL (info.get_avl ()) ==
+ GET_MODE_NUNITS (mode).to_constant ())
+ return true;
+ }
+
+ if (REG_P (info.get_avl ()))
+ {
+ if (info.get_avl_source ())
+ {
+ if (CONST_SCALAR_INT_P (info.get_avl_source ()) &&
+ GET_MODE_NUNITS (mode).is_constant () &&
+ INTVAL (info.get_avl_source ()) ==
+ GET_MODE_NUNITS (mode).to_constant ())
+ return true;
+ if (CONST_POLY_INT_P (info.get_avl_source ()) &&
+ !GET_MODE_NUNITS (mode).is_constant () &&
+ known_eq (rtx_to_poly_int64 (info.get_avl_source ()),
+ GET_MODE_NUNITS (mode)))
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ bool
+ avl_equal_p (const vinfo &other) const
+ {
+ gcc_assert (valid_p () && other.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!unknown_p () && !other.unknown_p () &&
+ "Can't compare AVL in unknown state.");
+
+ if (compare_vl (other))
+ return true;
+
+ if (other.compare_vl (*this))
+ return true;
+
+ if (rtx_equal_p (get_avl (), other.get_avl ()))
+ return true;
+
+ if (!get_avl_source () && !other.get_avl_source ())
+ return false;
+
+ if (get_avl_source () && rtx_equal_p (get_avl_source (), other.get_avl ()))
+ return true;
+
+ if (other.get_avl_source () &&
+ rtx_equal_p (other.get_avl_source (), get_avl ()))
+ return true;
+
+ return rtx_equal_p (get_avl_source (), other.get_avl_source ());
+ }
+
+ void
+ set_vma (unsigned int vma)
+ {
+ gcc_assert (valid_p () && !unknown_p () &&
+ "Can't set VTYPE for uninitialized or unknown.");
+ vma = vma;
+ }
+
+ void
+ set_vta (unsigned int vta)
+ {
+ gcc_assert (valid_p () && !unknown_p () &&
+ "Can't set VTYPE for uninitialized or unknown.");
+ vta = vta;
+ }
+
+ void
+ set_vtype (unsigned int vtype)
+ {
+ gcc_assert (valid_p () && !unknown_p () &&
+ "Can't set VTYPE for uninitialized or unknown.");
+ vma = rvv_parse_vma_field (vtype);
+ vta = rvv_parse_vta_field (vtype);
+ vsew = rvv_parse_vsew_field (vtype);
+ vlmul = rvv_parse_vlmul_field (vtype);
+ }
+
+ void
+ set_vtype (unsigned vl, unsigned vs, bool vt, bool vm, bool m_p,
+ bool st_p, bool is_scalar_move_op)
+ {
+ gcc_assert (valid_p () && !unknown_p () &&
+ "Can't set VTYPE for uninitialized or unknown.");
+ vma = vm;
+ vta = vt;
+ vsew = vs;
+ vlmul = vl;
+ all_maskop_p = m_p;
+ store_p = st_p;
+ scalar_move_p = is_scalar_move_op;
+ }
+
+ // Encode VTYPE into the binary format used by the the VSETVLI instruction
+ // which is used by our MC layer representation.
+ //
+ // Bits | Name | Description
+ // -----+------------+------------------------------------------------
+ // 7 | vma | Vector mask agnostic
+ // 6 | vta | Vector tail agnostic
+ // 5:3 | vsew[2:0] | Standard element width(SEW) setting
+ // 2:0 | vlmul[2:0] | Vector register group multiplier(LMUL) setting
+ unsigned
+ encode_vtype () const
+ {
+ gcc_assert (valid_p () && !unknown_p () && !sew_lmul_ratio_only_p &&
+ "Can't set VTYPE for uninitialized or unknown.");
+ gcc_assert (vsew >= 0 && vsew <= 7 && "Invalid SEW.");
+ unsigned vtype = (vsew << 3) | (vlmul & 0x7);
+ if (vta)
+ vtype |= 0x40;
+ if (vma)
+ vtype |= 0x80;
+
+ return vtype;
+ }
+
+ bool
+ get_sew_lmul_ratio_only_p () const
+ {
+ return sew_lmul_ratio_only_p;
+ }
+
+ bool
+ sew_equal_p (const vinfo &other) const
+ {
+ gcc_assert (valid_p () && other.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!unknown_p () && !other.unknown_p () &&
+ "Can't compare VTYPE in unknown state.");
+ gcc_assert (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p &&
+ "Can't compare when only LMUL/SEW ratio is valid.");
+ return vsew == other.vsew;
+ }
+
+ bool
+ vtype_equal_p (const vinfo &other) const
+ {
+ gcc_assert (valid_p () && other.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!unknown_p () && !other.unknown_p () &&
+ "Can't compare VTYPE in unknown state.");
+ gcc_assert (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p &&
+ "Can't compare when only LMUL/SEW ratio is valid.");
+ return std::tie (vma, vta, vsew, vlmul) ==
+ std::tie (other.vma, other.vta, other.vsew, other.vlmul);
+ }
+
+ bool
+ policy_equal_p (const vinfo &other) const
+ {
+ gcc_assert (valid_p () && other.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!unknown_p () && !other.unknown_p () &&
+ "Can't compare VTYPE in unknown state.");
+
+ return vta == other.vta && vma == other.vma;
+ }
+
+ unsigned
+ calc_sew_lmul_ratio (unsigned int vsew_arg, unsigned int vlmul_arg) const
+ {
+ gcc_assert (valid_p () && !unknown_p () &&
+ "Can't use VTYPE for uninitialized or unknown.");
+
+ unsigned lmul;
+ unsigned sew;
+ bool fractional;
+
+ switch (vsew_arg)
+ {
+ default:
+ gcc_unreachable ();
+ case 0:
+ sew = 8;
+ break;
+ case 1:
+ sew = 16;
+ break;
+ case 2:
+ sew = 32;
+ break;
+ case 3:
+ sew = 64;
+ break;
+ case 4:
+ sew = 128;
+ break;
+ case 5:
+ sew = 256;
+ break;
+ case 6:
+ sew = 512;
+ break;
+ case 7:
+ sew = 1024;
+ break;
+ }
+
+ switch (vlmul_arg)
+ {
+ default:
+ gcc_unreachable ();
+ case 0:
+ lmul = 1;
+ fractional = false;
+ break;
+ case 1:
+ lmul = 2;
+ fractional = false;
+ break;
+ case 2:
+ lmul = 4;
+ fractional = false;
+ break;
+ case 3:
+ lmul = 8;
+ fractional = false;
+ break;
+ case 5:
+ lmul = 8;
+ fractional = true;
+ break;
+ case 6:
+ lmul = 4;
+ fractional = true;
+ break;
+ case 7:
+ lmul = 2;
+ fractional = true;
+ break;
+ }
+
+ gcc_assert (sew >= 8 && "Unexpected SEW value.");
+ unsigned int sew_mul_ratio = fractional ? sew * lmul : sew / lmul;
+
+ return sew_mul_ratio;
+ }
+
+ unsigned
+ calc_sew_lmul_ratio () const
+ {
+ return calc_sew_lmul_ratio (vsew, vlmul);
+ }
+
+ // Check if the VTYPE for these two VSETVLI Infos produce the same VLMAX.
+ bool
+ vlmax_equal_p (const vinfo &other) const
+ {
+ gcc_assert (valid_p () && other.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!unknown_p () && !other.unknown_p () &&
+ "Can't compare AVL in unknown state.");
+ return calc_sew_lmul_ratio () == other.calc_sew_lmul_ratio ();
+ }
+
+ bool
+ compatible_vtype_p (const vinfo &info) const
+ {
+ // Simple case, see if full VTYPE matches.
+ if (vtype_equal_p (info))
+ return true;
+
+ // If this is a mask reg operation, it only cares about VLMAX.
+ // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
+ // than "InstrInfo".
+ // FIXME: The policy bits can probably be ignored for mask reg operations.
+ if (info.all_maskop_p && vlmax_equal_p (info) && vta == info.vta &&
+ vma == info.vma)
+ return true;
+
+ return false;
+ }
+
+ // Determine whether the vector instructions requirements represented by
+ // InstrInfo are compatible with the previous vsetvli instruction represented
+ // by this.
+ bool
+ compatible_p (const vinfo &require) const
+ {
+ gcc_assert (valid_p () && require.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!require.sew_lmul_ratio_only_p &&
+ "Expected a valid VTYPE for instruction.");
+
+ // Nothing is compatible with Unknown.
+ if (unknown_p () || require.unknown_p ())
+ return false;
+
+ // If only our VLMAX ratio is valid, then this isn't compatible.
+ if (sew_lmul_ratio_only_p)
+ return false;
+
+ // If the instruction doesn't need an AVLReg and the SEW matches, consider
+ // it compatible.
+ if (require.known_p () && require.avl == NULL_RTX
+ && vsew == require.vsew)
+ return true;
+
+ // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
+ // So it's compatible when we could make sure that both VL be the same
+ // situation.
+ if (require.scalar_move_p && require.get_avl () &&
+ CONST_SCALAR_INT_P (require.get_avl ()) &&
+ ((has_nonzero_avl () && require.has_nonzero_avl ()) ||
+ (has_zero_avl () && require.has_zero_avl ())) &&
+ sew_equal_p (require) && policy_equal_p (require))
+ return true;
+
+ // The AVL must match.
+ if (!avl_equal_p (require))
+ return false;
+
+ if (compatible_vtype_p (require))
+ return true;
+
+ // Store instructions don't use the policy fields.
+ // TODO: Move into hasCompatibleVTYPE?
+ if (require.store_p && vlmul == require.vlmul && vsew == require.vsew)
+ return true;
+
+ // Anything else is not compatible.
+ return false;
+ }
+
+ bool
+ load_store_compatible_p (unsigned vsew_arg, const vinfo &info) const
+ {
+ gcc_assert (valid_p () && info.valid_p () &&
+ "Can't compare invalid VSETVLI Infos.");
+ gcc_assert (!info.sew_lmul_ratio_only_p &&
+ "Expected a valid VTYPE for instruction.");
+ gcc_assert (vsew_arg == info.vsew && "Mismatched EEW/SEW for store.");
+
+ if (unknown_p () || get_sew_lmul_ratio_only_p ())
+ return false;
+
+ if (!avl_equal_p (info))
+ return false;
+
+ // Stores can ignore the tail and mask policies.
+ if (!info.store_p && (vta != info.vta || vma != info.vma))
+ return false;
+
+ return calc_sew_lmul_ratio () == calc_sew_lmul_ratio (vsew_arg, info.vlmul);
+ }
+
+ bool
+ operator== (const vinfo &other) const
+ {
+ // Uninitialized is only equal to another Uninitialized.
+ if (!valid_p ())
+ return !other.valid_p ();
+
+ if (!other.valid_p ())
+ return !valid_p ();
+
+ // Unknown is only equal to another Unknown.
+ if (unknown_p ())
+ return other.unknown_p ();
+
+ if (other.unknown_p ())
+ return unknown_p ();
+
+ if (!avl_equal_p (other))
+ return false;
+
+ // If only the VLMAX is valid, check that it is the same.
+ if (sew_lmul_ratio_only_p && other.sew_lmul_ratio_only_p)
+ return vlmax_equal_p (other);
+
+ // If the full VTYPE is valid, check that it is the same.
+ if (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p)
+ return vtype_equal_p (other);
+
+ // If the sew_lmul_ratio_only bits are different, then they aren't equal.
+ return false;
+ }
+
+ bool
+ operator!= (const vinfo &Other) const
+ {
+ return !(*this == Other);
+ }
+
+ vinfo &
+ operator= (const vinfo &other)
+ {
+ state = other.state;
+ vma = other.vma;
+ vta = other.vta;
+ vsew = other.vsew;
+ vlmul = other.vlmul;
+ all_maskop_p = other.all_maskop_p;
+ sew_lmul_ratio_only_p = other.sew_lmul_ratio_only_p;
+ avl = other.avl;
+ avl_source = other.avl_source;
+ return *this;
+ }
+
+ // Calculate the vinfo visible to a block assuming this and other are
+ // both predecessors.
+ vinfo
+ intersect (const vinfo &other) const
+ {
+ // If the new value isn't valid, ignore it.
+ if (!other.valid_p ())
+ return *this;
+
+ // If this value isn't valid, this must be the first predecessor, use it.
+ if (!valid_p ())
+ return other;
+
+ // If either is unknown, the result is unknown.
+ if (unknown_p () || other.unknown_p ())
+ return vinfo::get_unknown ();
+
+ // If we have an exact, match return this.
+ if (*this == other)
+ return *this;
+
+ // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
+ // return an SEW/LMUL ratio only value.
+ if (avl_equal_p (other) && vlmax_equal_p (other))
+ {
+ vinfo merge_info = *this;
+ merge_info.sew_lmul_ratio_only_p = true;
+ return merge_info;
+ }
+
+ // otherwise the result is unknown.
+ return vinfo::get_unknown ();
+ }
+
+ // Print debug info into rtl dump file. */
+ void
+ print () const
+ {
+ fprintf (dump_file, "{\n");
+ if (known_p ())
+ fprintf (dump_file, " Known\n");
+ else if (unknown_p ())
+ fprintf (dump_file, " Unknown\n");
+ else
+ fprintf (dump_file, " Uninitialized\n");
+
+ if (known_p () && get_avl ())
+ {
+ fprintf (dump_file, " Avl=");
+ print_rtl_single (dump_file, get_avl ());
+ if (get_avl_source ())
+ {
+ fprintf (dump_file, " Avl Source=");
+ print_rtl_single (dump_file, get_avl_source ());
+ }
+ else
+ fprintf (dump_file, " Avl Source=(nil)\n");
+ }
+ else
+ fprintf (dump_file, " Avl=(nil)\n Avl Source=(nil)\n");
+ fprintf (dump_file, " Vsew=%d\n", (unsigned int)vsew);
+ fprintf (dump_file, " Vlmul=%d\n", (unsigned int)vlmul);
+ fprintf (dump_file, " TailAgnostic=%d\n", (unsigned int)vta);
+ fprintf (dump_file, " MaskAgnostic=%d\n", (unsigned int)vma);
+ fprintf (dump_file, " MaskOp=%d\n", (unsigned int)all_maskop_p);
+ fprintf (dump_file, " Store_p=%d\n", (unsigned int)store_p);
+ fprintf (dump_file, " Scalar_move_p=%d\n", (unsigned int)scalar_move_p);
+ fprintf (dump_file, " Sew_lmul_ratio_only_p=%d\n", (unsigned int)sew_lmul_ratio_only_p);
+ fprintf (dump_file, "}\n");
+ }
+};
+
+struct bb_vinfo
+{
+ // The vinfo that represents the net changes to the VL/VTYPE registers
+ // made by this block. Calculated in Phase 1.
+ vinfo change;
+
+ // The vinfo that represents the VL/VTYPE settings on exit from this
+ // block. Calculated in Phase 2.
+ vinfo exit;
+
+ // The vinfo that represents the VL/VTYPE settings from all predecessor
+ // blocks. Calculated in Phase 2, and used by Phase 3.
+ vinfo pred;
+
+ // Keeps track of whether the block is already in the queue.
+ bool inqueue = false;
+
+ bb_vinfo () {}
+};
+
+static std::map<unsigned int, bb_vinfo> bb_vinfo_map;
+static std::deque<basic_block> bb_queue;
+
+static rtx_insn *
+fetch_def_insn (rtx_insn *rtl, const vinfo info)
+{
+ /* We need use rtl ssa def_info to optimize which needs
+ optimization to large than or equal to 2. */
+ if (optimize < 2)
+ return NULL;
+
+ // We didn't find a compatible value. If our AVL is a virtual register,
+ // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+ // and the last VL/VTYPE we observed is the same, we don't need a
+ // VSETVLI here.
+ if (!info.known_p ())
+ return NULL;
+ if (!info.get_avl ())
+ return NULL;
+
+ rtx avl = info.get_avl ();
+
+ if (!REG_P (avl))
+ return NULL;
+
+ insn_info *next;
+ for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+ {
+ next = insn->next_any_insn ();
+ if (insn->rtl () == rtl)
+ {
+ resource_info resource{GET_MODE (avl), REGNO (avl)};
+ def_lookup dl = crtl->ssa->find_def (resource, insn);
+ def_info *def = dl.prev_def (insn);
+
+ if (!def)
+ return NULL;
+
+ if (!is_a<set_info *> (def))
+ return NULL;
+
+ insn_info *def_insn = def->insn ();
+ rtx_insn *def_rtl = def_insn->rtl ();
+
+ if (!def_rtl)
+ return NULL;
+ if (!INSN_P (def_rtl))
+ return NULL;
+
+ return def_rtl;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+emit_vsetvl_insn (rtx op0, rtx op1, rtx op2, rtx_insn *insn)
+{
+ if (dump_file)
+ {
+ fprintf (dump_file, "insert vsetvli for insn %d\n\n", INSN_UID (insn));
+ print_rtl_single (dump_file, insn);
+ }
+
+ if (rtx_equal_p (op0, gen_rtx_REG (Pmode, X0_REGNUM)) &&
+ rtx_equal_p (op1, gen_rtx_REG (Pmode, X0_REGNUM)))
+ emit_insn_before (gen_vsetvl_zero_zero (op2), insn);
+ else if (rtx_equal_p (op0, gen_rtx_REG (Pmode, X0_REGNUM)))
+ emit_insn_before (gen_vsetvl_zero (Pmode, op1, op2), insn);
+ else
+ emit_insn_before (gen_vsetvl (Pmode, op0, op1, op2), insn);
+}
+
+static vinfo
+compute_info_for_instr (rtx_insn *, vinfo);
+
+// Return a vinfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+static vinfo
+get_info_for_vsetvli (rtx_insn *insn, vinfo curr_info)
+{
+ vinfo new_info;
+ extract_insn_cached (insn);
+
+ if (recog_data.n_operands == 1)
+ {
+ gcc_assert (CONST_INT_P (recog_data.operand[0]) &&
+ "Invalid vtype in vsetvli instruction.");
+ if (curr_info.valid_p () && !curr_info.unknown_p ())
+ {
+ new_info.set_avl (curr_info.get_avl ());
+ new_info.set_avl_source (curr_info.get_avl_source ());
+ new_info.set_vtype (INTVAL (recog_data.operand[0]));
+ /* if this X0, X0 vsetvli is redundant,
+ remove it. */
+ if (curr_info.compatible_vtype_p (new_info))
+ remove_insn (insn);
+ }
+ else
+ {
+ /* vsetvli X0, X0 means that the following instruction
+ use the same vl as before. */
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ rtx_insn *next_insn;
+ bool find_vl_p = false;
+ for (next_insn = NEXT_INSN (insn); insn != NEXT_INSN (BB_END (bb));
+ next_insn = NEXT_INSN (next_insn))
+ {
+ if (use_vtype_p (next_insn))
+ {
+ vinfo next_info = compute_info_for_instr (next_insn, curr_info);
+ new_info.set_avl (next_info.get_avl ());
+ new_info.set_avl_source (next_info.get_avl_source ());
+ extract_insn_cached (insn);
+ new_info.set_vtype (INTVAL (recog_data.operand[0]));
+
+ if (recog_clobber_vl_vtype (next_insn) != MOV_CLOBBER_REG_REG &&
+ recog_clobber_vl_vtype (next_insn) != OTHERS)
+ new_info = vinfo::get_unknown ();
+
+ find_vl_p = true;
+ break;
+ }
+ }
+ gcc_assert (find_vl_p);
+ }
+ return new_info;
+ }
+ if (recog_data.n_operands == 2)
+ {
+ gcc_assert (CONST_INT_P (recog_data.operand[1]) &&
+ "Invalid vtype in vsetvli instruction.");
+ new_info.set_avl (recog_data.operand[0]);
+ new_info.set_avl_source (get_avl_source (recog_data.operand[0], insn));
+ new_info.set_vtype (INTVAL (recog_data.operand[1]));
+ return new_info;
+ }
+
+ gcc_assert (recog_data.n_operands == 3);
+ rtx vl = recog_data.operand[1];
+ rtx vtype = recog_data.operand[2];
+ gcc_assert (CONST_INT_P (vtype) && "Invalid vtype in vsetvli instruction.");
+ new_info.set_avl (vl);
+ new_info.set_avl_source (get_avl_source (vl, insn));
+ new_info.set_vtype (INTVAL (vtype));
+ return new_info;
+}
+
+static unsigned int
+analyze_vma_vta (rtx_insn *insn, vinfo curr_info)
+{
+ if (!use_vl_p (insn))
+ return 1;
+
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ return 1;
+
+ if (use_vlmax_p (insn))
+ return 1;
+ unsigned int offset = get_policy_offset (insn);
+ extract_insn_cached (insn);
+ vector_policy vma =
+ riscv_vector::get_vma (INTVAL (recog_data.operand[recog_data.n_operands - offset]));
+ vector_policy vta =
+ riscv_vector::get_vta (INTVAL (recog_data.operand[recog_data.n_operands - offset]));
+ unsigned int vma_p = 0;
+ unsigned int vta_p = 0;
+ if (vma == vector_policy::agnostic)
+ vma_p = 1;
+ else if (vma == vector_policy::undisturbed)
+ vma_p = 0;
+ else
+ {
+ /* For N/A vma we remain the last vma if it valid. */
+ if (curr_info.valid_p () && !curr_info.unknown_p ())
+ vma_p = curr_info.get_vma ();
+ else
+ vma_p = 0;
+ }
+
+ if (vta == vector_policy::agnostic)
+ vta_p = 1;
+ else if (vta == vector_policy::undisturbed)
+ vta_p = 0;
+ else
+ {
+ /* For N/A vta we remain the last vta if it valid. */
+ if (curr_info.valid_p () && !curr_info.unknown_p ())
+ vta_p = curr_info.get_vta ();
+ else
+ vta_p = 1;
+ }
+ return (vma_p << 1) | vta_p;
+}
+
+static bool
+scalar_move_insn_p (rtx_insn *insn)
+{
+ return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+ (get_attr_type (insn) == TYPE_VMV_S_X ||
+ get_attr_type (insn) == TYPE_VFMV_S_F);
+}
+
+static bool
+store_insn_p (rtx_insn *insn)
+{
+ return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+ (get_attr_type (insn) == TYPE_VSE ||
+ get_attr_type (insn) == TYPE_VSSE);
+}
+
+static bool
+can_skip_load_store_insn_p (rtx_insn *insn)
+{
+ return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+ (get_attr_type (insn) == TYPE_VSE ||
+ get_attr_type (insn) == TYPE_VSSE ||
+ get_attr_type (insn) == TYPE_VLE ||
+ get_attr_type (insn) == TYPE_VLSE);
+}
+
+static vinfo
+compute_info_for_instr (rtx_insn *insn, vinfo curr_info)
+{
+ vinfo info;
+
+ extract_insn_cached (insn);
+
+ if (use_vl_p (insn))
+ {
+ if (recog_clobber_vl_vtype (insn) != OTHERS)
+ info.set_avl (gen_rtx_REG (Pmode, X0_REGNUM));
+ else if (use_vlmax_p (insn))
+ info.set_avl (gen_rtx_REG (Pmode, X0_REGNUM));
+ else
+ {
+ unsigned int offset = get_vl_offset (insn);
+ info.set_avl_source (get_avl_source (
+ recog_data.operand[recog_data.n_operands - offset], insn));
+ info.set_avl (recog_data.operand[recog_data.n_operands - offset]);
+ }
+ }
+ else
+ info.set_avl (NULL_RTX);
+
+ machine_mode mode = rvv_translate_attr_mode (insn);
+ bool st_p = store_insn_p (insn);
+ bool scalar_move_p = scalar_move_insn_p (insn);
+
+ unsigned int vma_vta = analyze_vma_vta (insn, curr_info);
+ unsigned int vta = vma_vta & 0x1;
+ unsigned int vma = (vma_vta >> 1) & 0x1;
+ info.set_vtype (rvv_classify_vlmul_field (mode),
+ rvv_classify_vsew_field (mode),
+ /*TailAgnostic*/ vta, /*MaskAgnostic*/ vma,
+ rvv_mask_mode_p (mode), st_p, scalar_move_p);
+
+ return info;
+}
+
+static bool
+can_skip_vsetvli_for_load_store_p (rtx_insn *insn, const vinfo &new_info, const vinfo &curr_info)
+{
+ gcc_assert (recog_memoized (insn) >= 0);
+ if (!can_skip_load_store_insn_p (insn))
+ return false;
+ machine_mode mode = rvv_translate_attr_mode (insn);
+ unsigned vsew = rvv_classify_vsew_field (mode);
+ gcc_assert (store_insn_p (insn) == new_info.get_store_p ());
+ return curr_info.load_store_compatible_p (vsew, new_info);
+}
+
+static bool
+need_vsetvli (rtx_insn *insn, const vinfo &require, const vinfo &curr_info)
+{
+ if (!need_vsetvli_p (insn))
+ return false;
+
+ if (curr_info.compatible_p (require))
+ return false;
+
+ // We didn't find a compatible value. If our AVL is a virtual register,
+ // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+ // and the last VL/VTYPE we observed is the same, we don't need a
+ // VSETVLI here.
+ if (!curr_info.unknown_p () && require.avl_reg_p () &&
+ REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER &&
+ !curr_info.get_sew_lmul_ratio_only_p () &&
+ curr_info.compatible_vtype_p (require))
+ {
+ rtx_insn *def_rtl = fetch_def_insn (insn, require);
+ if (def_rtl != NULL)
+ {
+ if (is_vector_config_instr (def_rtl))
+ {
+ vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+ if (def_info.avl_equal_p (curr_info) &&
+ def_info.vlmax_equal_p (curr_info))
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool
+need_vsetvli_phi (const vinfo &new_info, rtx_insn *rtl)
+{
+ /* Optimize the case as follows:
+ void foo (int8_t *base, int8_t* out, size_t vl, unsigned int m)
+ {
+ vint8mf8_t v0;
+ size_t avl;
+ if (m > 1000)
+ avl = vsetvl_e8mf8 (vl);
+ else
+ avl = vsetvl_e8mf8 (vl << 2);
+ for (int i = 0; i < m; i++)
+ {
+ v0 = vle8_v_i8mf8 (base + i * 32,avl);
+ v0 = vadd_vv_i8mf8 (v0,v0,avl);
+ }
+ *(vint8mf8_t*)out = v0;
+ } */
+
+ /* We need use rtl ssa phi to optimize which needs
+ optimization to large than or equal to 2. */
+ if (optimize < 2)
+ return true;
+
+ if (!(!new_info.unknown_p () && new_info.get_avl () &&
+ GET_CODE (new_info.get_avl ()) == REG))
+ return true;
+
+ rtx avl = new_info.get_avl ();
+
+ insn_info *next;
+ /* fetch phi_node. */
+ for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+ {
+ next = insn->next_any_insn ();
+ if (insn->rtl () == rtl)
+ {
+ bb_info *bb = insn->bb ();
+ ebb_info *ebb = bb->ebb ();
+ resource_info resource{GET_MODE (avl), REGNO (avl)};
+ insn_info *phi_insn = ebb->phi_insn ();
+ phi_info *phi;
+ def_lookup dl = crtl->ssa->find_def (resource, phi_insn);
+ def_info *set = dl.prev_def (phi_insn);
+
+ if (!set)
+ return true;
+
+ if (!is_a<phi_info *> (set))
+ return true;
+
+ // There is an existing phi.
+ phi = as_a<phi_info *> (set);
+ for (unsigned int i = 0; i < phi->num_inputs (); i++)
+ {
+ def_info *def = phi->input_value (i);
+ if (!def)
+ return true;
+ insn_info *def_insn = def->insn ();
+ rtx_insn *def_rtl = def_insn->rtl ();
+
+ if (!def_rtl)
+ return true;
+ if (!INSN_P (def_rtl))
+ return true;
+ extract_insn_cached (def_rtl);
+ if (recog_data.n_operands > 0 &&
+ rtx_equal_p (recog_data.operand[0], avl))
+ {
+ if (get_attr_type (def_rtl) &&
+ get_attr_type (def_rtl) == TYPE_VSETVL)
+ {
+ basic_block def_bb = BLOCK_FOR_INSN (def_rtl);
+ bb_vinfo info = bb_vinfo_map.at(def_bb->index);
+ // If the exit from the predecessor has the VTYPE
+ // we are looking for we might be able to avoid a
+ // VSETVLI.
+ if (info.exit.unknown_p () ||
+ !info.exit.vtype_equal_p (new_info))
+ return true;
+ // We found a VSET(I)VLI make sure it matches the
+ // output of the predecessor block.
+ vinfo curr_info;
+ vinfo avl_def_info =
+ get_info_for_vsetvli (def_rtl, curr_info);
+ if (!avl_def_info.vtype_equal_p (info.exit) ||
+ !avl_def_info.avl_equal_p (info.exit))
+ return true;
+ }
+ else
+ return true;
+ }
+ }
+ }
+ }
+
+ // If all the incoming values to the PHI checked out, we don't need
+ // to insert a VSETVLI.
+ return false;
+}
+
+static bool
+compute_vl_vtype_changes (basic_block bb)
+{
+ bool vector_p = false;
+
+ bb_vinfo &info = bb_vinfo_map[bb->index];
+ info.change = info.pred;
+ rtx_insn *insn = NULL;
+ vinfo curr_info;
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ // If this is an explicit VSETVLI or VSETIVLI, update our state.
+ if (is_vector_config_instr (insn))
+ {
+ vector_p = true;
+ info.change = get_info_for_vsetvli (insn, curr_info);
+ curr_info = info.change;
+ continue;
+ }
+
+ /* According to vector.md, each instruction pattern parallel.
+ It should have at least 2 side effects.
+ The last 2 side effects are use vl && use vtype */
+ if (use_vtype_p (insn))
+ {
+ vector_p = true;
+
+ vinfo new_info = compute_info_for_instr (insn, curr_info);
+ curr_info = new_info;
+ if (!info.change.valid_p ())
+ info.change = new_info;
+ else
+ {
+ // If this instruction isn't compatible with the previous VL/VTYPE
+ // we need to insert a VSETVLI.
+ // If this is a unit-stride or strided load/store, we may be able
+ // to use the EMUL=(EEW/SEW)*LMUL relationship to avoid changing
+ // vtype. NOTE: We only do this if the vtype we're comparing
+ // against was created in this block. We need the first and third
+ // phase to treat the store the same way.
+ if (!can_skip_vsetvli_for_load_store_p (insn, new_info, info.change) &&
+ need_vsetvli (insn, new_info, info.change))
+ info.change = new_info;
+ }
+ }
+ // If this is something that updates VL/VTYPE that we don't know about, set
+ // the state to unknown.
+ if (update_vl_vtype_p (insn))
+ {
+ curr_info = vinfo::get_unknown ();
+ info.change = vinfo::get_unknown ();
+ }
+ }
+
+ return vector_p;
+}
+
+static void
+compute_incoming_vl_vtype (const basic_block bb)
+{
+ bb_vinfo &info = bb_vinfo_map[bb->index];
+ info.inqueue = false;
+
+ vinfo in_info;
+ if (EDGE_COUNT (bb->preds) == 0)
+ {
+ // There are no predecessors, so use the default starting status.
+ in_info.set_unknown ();
+ }
+ else
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ basic_block ancestor = e->src;
+ in_info = in_info.intersect (bb_vinfo_map.at(ancestor->index).exit);
+ }
+ }
+
+ // If we don't have any valid predecessor value, wait until we do.
+ if (!in_info.valid_p ())
+ return;
+
+ // If no change, no need to rerun block
+ if (in_info == info.pred)
+ return;
+
+ info.pred = in_info;
+ if (dump_file)
+ {
+ fprintf (dump_file, "Entry state of bb %d changed to\n", bb->index);
+ info.pred.print ();
+ }
+
+ // Note: It's tempting to cache the state changes here, but due to the
+ // compatibility checks performed a blocks output state can change based on
+ // the input state. To cache, we'd have to add logic for finding
+ // never-compatible state changes.
+ compute_vl_vtype_changes (bb);
+ vinfo tmpstatus = info.change;
+
+ // If the new exit value matches the old exit value, we don't need to revisit
+ // any blocks.
+ if (info.exit == tmpstatus)
+ return;
+
+ info.exit = tmpstatus;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Exit state of bb %d changed to\n", bb->index);
+ info.exit.print ();
+ }
+ // Add the successors to the work list so we can propagate the changed exit
+ // status.
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ basic_block succ = e->dest;
+ if (!bb_vinfo_map[succ->index].inqueue)
+ bb_queue.push_back (succ);
+ }
+}
+
+static void
+insert_vsetvl (rtx_insn *insn, const vinfo &curr_info, const vinfo &prev_info)
+{
+ extract_insn_cached (insn);
+ rtx avl = curr_info.get_avl ();
+ rtx vtype = GEN_INT (curr_info.encode_vtype ());
+ rtx zero = gen_rtx_REG (Pmode, X0_REGNUM);
+
+ if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_REG_MEM
+ || recog_clobber_vl_vtype (insn) == MOV_CLOBBER_MEM_REG)
+ {
+ gcc_assert (
+ reload_completed &&
+ rtx_equal_p (curr_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)));
+ avl = recog_data.operand[2];
+ PUT_MODE (avl, Pmode);
+ emit_vsetvl_insn (avl, gen_rtx_REG (Pmode, X0_REGNUM), vtype, insn);
+ return;
+ }
+
+ // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
+ // VLMAX
+ if (prev_info.valid_p () && !prev_info.unknown_p () &&
+ curr_info.avl_equal_p (prev_info) && curr_info.vlmax_equal_p (prev_info))
+ {
+ emit_vsetvl_insn (zero, zero, vtype, insn);
+ return;
+ }
+
+ if (curr_info.get_avl () == NULL_RTX)
+ {
+ if (prev_info.valid_p () && !prev_info.unknown_p () &&
+ curr_info.vlmax_equal_p (prev_info))
+ {
+ emit_vsetvl_insn (zero, zero, vtype, insn);
+ return;
+ }
+ // Otherwise use an AVL of 0 to avoid depending on previous vl.
+ emit_vsetvl_insn (zero, GEN_INT (0), vtype, insn);
+ return;
+ }
+
+ if (rtx_equal_p (curr_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)))
+ {
+ if (reload_completed)
+ avl = gen_rtx_REG (Pmode, X0_REGNUM);
+ else
+ avl = gen_reg_rtx (Pmode);
+ emit_vsetvl_insn (avl, gen_rtx_REG (Pmode, X0_REGNUM), vtype, insn);
+ return;
+ }
+
+ emit_vsetvl_insn (zero, avl, vtype, insn);
+}
+
+static void
+cleanup_insn_op (rtx_insn *insn)
+{
+ if (!reload_completed)
+ return;
+
+ /* 1.Remove the vl operand for every rvv instruction.
+ 2.Replace every reload register spilling rvv instruction. */
+ rtx pat;
+ extract_insn_cached (insn);
+ machine_mode mode = rvv_translate_attr_mode (insn);
+ if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_REG_MEM)
+ {
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ pat =
+ gen_vlm (mode, recog_data.operand[0],
+ XEXP (recog_data.operand[1], 0), const0_rtx, const0_rtx);
+ else
+ pat = gen_vle (mode, recog_data.operand[0], const0_rtx, const0_rtx,
+ XEXP (recog_data.operand[1], 0), const0_rtx, const0_rtx);
+
+ validate_change (insn, &PATTERN (insn), pat, false);
+ }
+ else if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_MEM_REG)
+ {
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ pat = gen_vsm (mode, XEXP (recog_data.operand[0], 0),
+ recog_data.operand[1], const0_rtx, const0_rtx);
+ else
+ pat = gen_vse (mode, const0_rtx, XEXP (recog_data.operand[0], 0),
+ recog_data.operand[1], const0_rtx, const0_rtx);
+
+ validate_change (insn, &PATTERN (insn), pat, false);
+ }
+ else
+ replace_op (insn, const0_rtx, REPLACE_VL);
+}
+
+static void
+emit_vsetvlis (const basic_block bb)
+{
+ vinfo curr_info;
+ rtx_insn *insn = NULL;
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ // If this is an explicit VSETVLI or VSETIVLI, update our state.
+ if (is_vector_config_instr (insn))
+ {
+ curr_info = get_info_for_vsetvli (insn, curr_info);
+ continue;
+ }
+
+ if (use_vtype_p (insn))
+ {
+ vinfo new_info = compute_info_for_instr (insn, curr_info);
+
+ if (!curr_info.valid_p ())
+ {
+ // We haven't found any vector instructions or VL/VTYPE changes
+ // yet, use the predecessor information.
+ curr_info = bb_vinfo_map[bb->index].pred;
+ gcc_assert (curr_info.valid_p () &&
+ "Expected a valid predecessor state.");
+ if (need_vsetvli (insn, new_info, curr_info))
+ {
+ // If this is the first implicit state change, and the state change
+ // requested can be proven to produce the same register contents, we
+ // can skip emitting the actual state change and continue as if we
+ // had since we know the GPR result of the implicit state change
+ // wouldn't be used and VL/VTYPE registers are correct. Note that
+ // we *do* need to model the state as if it changed as while the
+ // register contents are unchanged, the abstract model can change.
+ if (need_vsetvli_phi (new_info, insn))
+ insert_vsetvl (insn, new_info, curr_info);
+ curr_info = new_info;
+ }
+ }
+ else
+ {
+ // If this instruction isn't compatible with the previous VL/VTYPE
+ // we need to insert a VSETVLI.
+ // If this is a unit-stride or strided load/store, we may be able
+ // to use the EMUL=(EEW/SEW)*LMUL relationship to avoid changing
+ // vtype. NOTE: We can't use predecessor information for the store.
+ // We must treat it the same as the first phase so that we produce
+ // the correct vl/vtype for succesor blocks.
+ if (!can_skip_vsetvli_for_load_store_p (insn, new_info,
+ curr_info) &&
+ need_vsetvli (insn, new_info, curr_info))
+ {
+ insert_vsetvl (insn, new_info, curr_info);
+ curr_info = new_info;
+ }
+ }
+ cleanup_insn_op (insn);
+ }
+ // If this is something updates VL/VTYPE that we don't know about, set
+ // the state to unknown.
+ if (update_vl_vtype_p (insn))
+ curr_info = vinfo::get_unknown ();
+
+ // If we reach the end of the block and our current info doesn't match the
+ // expected info, insert a vsetvli to correct.
+ if (insn == BB_END (bb))
+ {
+ const vinfo exit_info = bb_vinfo_map.at(bb->index).exit;
+ if (curr_info.valid_p () && exit_info.valid_p () &&
+ !exit_info.unknown_p () && curr_info != exit_info)
+ {
+ insert_vsetvl (insn, exit_info, curr_info);
+ curr_info = exit_info;
+ }
+ }
+ }
+}
+
+static void
+dolocalprepass (const basic_block bb)
+{
+ rtx_insn *insn = NULL;
+ vinfo curr_info = vinfo::get_unknown ();
+ FOR_BB_INSNS (bb, insn)
+ {
+ // If this is an explicit VSETVLI or VSETIVLI, update our state.
+ if (is_vector_config_instr (insn))
+ {
+ curr_info = get_info_for_vsetvli (insn, curr_info);
+ continue;
+ }
+
+ if (scalar_move_insn_p (insn))
+ {
+ gcc_assert (use_vtype_p (insn) && use_vl_p (insn));
+ const vinfo new_info = compute_info_for_instr (insn, curr_info);
+
+ // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
+ // VL > 0. We can discard the user requested AVL and just use the last
+ // one if we can prove it equally zero. This removes a vsetvli entirely
+ // if the types match or allows use of cheaper avl preserving variant
+ // if VLMAX doesn't change. If VLMAX might change, we couldn't use
+ // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
+ // prevent extending live range of an avl register operand.
+ // TODO: We can probably relax this for immediates.
+ if (((curr_info.has_nonzero_avl () && new_info.has_nonzero_avl ()) ||
+ (curr_info.has_zero_avl () && new_info.has_zero_avl ())) &&
+ new_info.vlmax_equal_p (curr_info))
+ {
+ replace_op (insn, curr_info.get_avl (), REPLACE_VL);
+ curr_info = compute_info_for_instr (insn, curr_info);
+ continue;
+ }
+ }
+
+ if (use_vtype_p (insn))
+ {
+ if (use_vl_p (insn))
+ {
+ const auto require = compute_info_for_instr (insn, curr_info);
+ // If the AVL is the result of a previous vsetvli which has the
+ // same AVL and VLMAX as our current state, we can reuse the AVL
+ // from the current state for the new one. This allows us to
+ // generate 'vsetvli x0, x0, vtype" or possible skip the transition
+ // entirely.
+ if (!curr_info.unknown_p () && require.get_avl () &&
+ REG_P (require.get_avl ()) &&
+ REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER)
+ {
+ rtx_insn *def_rtl = fetch_def_insn (insn, require);
+
+ if (def_rtl != NULL)
+ {
+ if (is_vector_config_instr (def_rtl))
+ {
+ vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+ if (def_info.avl_equal_p (curr_info) &&
+ def_info.vlmax_equal_p (curr_info))
+ {
+ replace_op (insn, curr_info.get_avl (), REPLACE_VL);
+ curr_info = compute_info_for_instr (insn, curr_info);
+ continue;
+ }
+ }
+ }
+ }
+
+ // If AVL is defined by a vsetvli with the same vtype, we can
+ // replace the AVL operand with the AVL of the defining vsetvli.
+ // We avoid general register AVLs to avoid extending live ranges
+ // without being sure we can kill the original source reg entirely.
+ // TODO: We can ignore policy bits here, we only need VL to be the
+ // same.
+ if (!curr_info.unknown_p () && require.get_avl () &&
+ REG_P (require.get_avl ()) &&
+ REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER)
+ {
+ rtx_insn *def_rtl = fetch_def_insn (insn, require);
+ if (def_rtl != NULL)
+ {
+ if (is_vector_config_instr (def_rtl))
+ {
+ vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+ if (def_info.vtype_equal_p (require) &&
+ (def_info.avl_const_p () ||
+ (def_info.avl_reg_p () &&
+ rtx_equal_p (def_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)))))
+ {
+ replace_op (insn, def_info.get_avl (), REPLACE_VL);
+ curr_info = compute_info_for_instr (insn, curr_info);
+ continue;
+ }
+ }
+ }
+ }
+ }
+ curr_info = compute_info_for_instr (insn, curr_info);
+ continue;
+ }
+
+ // If this is something that updates VL/VTYPE that we don't know about,
+ // set the state to unknown.
+ if (update_vl_vtype_p (insn))
+ curr_info = vinfo::get_unknown ();
+ }
+}
+
+static void
+dolocalpostpass (const basic_block bb)
+{
+ rtx_insn *prev_insn = nullptr;
+ rtx_insn *insn = nullptr;
+ bool used_vl = false, used_vtype = false;
+ std::vector<rtx_insn *> to_delete;
+ FOR_BB_INSNS (bb, insn)
+ {
+ // Note: Must be *before* vsetvli handling to account for config cases
+ // which only change some subfields.
+ if (update_vl_vtype_p (insn) || use_vl_p (insn))
+ used_vl = true;
+ if (update_vl_vtype_p (insn) || use_vtype_p (insn))
+ used_vtype = true;
+
+ if (!is_vector_config_instr (insn))
+ continue;
+
+ extract_insn_cached (insn);
+ if (prev_insn)
+ {
+ if (!used_vl && !used_vtype)
+ {
+ to_delete.push_back (prev_insn);
+ // fallthrough
+ }
+ else if (!used_vtype && is_vl_preserving_config (insn))
+ {
+ // Note: `vsetvli x0, x0, vtype' is the canonical instruction
+ // for this case. If you find yourself wanting to add other forms
+ // to this "unused VTYPE" case, we're probably missing a
+ // canonicalization earlier.
+ // Note: We don't need to explicitly check vtype compatibility
+ // here because this form is only legal (per ISA) when not
+ // changing VL.
+ rtx new_vtype = recog_data.operand[recog_data.n_operands - 1];
+ replace_op (prev_insn, new_vtype, REPLACE_VTYPE);
+ to_delete.push_back (insn);
+ // Leave prev_insn unchanged
+ continue;
+ }
+ }
+ prev_insn = insn;
+ used_vl = false;
+ used_vtype = false;
+
+ rtx vdef = recog_data.operand[0];
+ if (!rtx_equal_p (vdef, gen_rtx_REG (Pmode, X0_REGNUM)) &&
+ !(REGNO (vdef) >= FIRST_PSEUDO_REGISTER &&
+ (find_reg_note (insn, REG_UNUSED, vdef) ||
+ find_reg_note (insn, REG_DEAD, vdef))))
+ used_vl = true;
+ }
+
+ for (auto *to_remove : to_delete)
+ remove_insn (to_remove);
+}
+
+/// Return true if the VL value configured must be equal to the requested one.
+static bool
+has_fixed_result (const vinfo &info)
+{
+ if (!info.avl_const_p ())
+ // VLMAX is always the same value.
+ // TODO: Could extend to other registers by looking at the associated
+ // vreg def placement.
+ return rtx_equal_p (info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM));
+
+ if (VLMUL_FIELD_000 != info.get_vlmul ())
+ // TODO: Generalize the code below to account for LMUL
+ return false;
+
+ if (!BYTES_PER_RISCV_VECTOR.is_constant ())
+ return false;
+
+ unsigned int avl = INTVAL (info.get_avl ());
+ unsigned int vsew = info.get_vsew ();
+ machine_mode inner = vsew_to_int_mode (vsew);
+ unsigned int sew = GET_MODE_BITSIZE (as_a<scalar_mode> (inner));
+ unsigned avl_in_bits = avl * sew;
+ machine_mode mode = riscv_vector::vector_builtin_mode (
+ as_a<scalar_mode> (inner), info.get_vlmul ());
+ return GET_MODE_BITSIZE (mode).to_constant () >= avl_in_bits;
+}
+
+/// Perform simple partial redundancy elimination of the VSETVLI instructions
+/// we're about to insert by looking for cases where we can PRE from the
+/// beginning of one block to the end of one of its predecessors. Specifically,
+/// this is geared to catch the common case of a fixed length vsetvl in a single
+/// block loop when it could execute once in the preheader instead.
+static void
+dopre (const basic_block bb)
+{
+ if (!bb_vinfo_map[bb->index].pred.unknown_p ())
+ return;
+
+ basic_block unavailable_pred = nullptr;
+ vinfo available_info;
+
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ basic_block predecessor = e->src;
+ const vinfo &pred_info = bb_vinfo_map[predecessor->index].exit;
+ if (pred_info.unknown_p ())
+ {
+ if (unavailable_pred)
+ return;
+ unavailable_pred = predecessor;
+ }
+ else if (!available_info.valid_p ())
+ available_info = pred_info;
+ else if (available_info != pred_info)
+ return;
+ }
+
+ // unreachable, single pred, or full redundancy. Note that FRE
+ // is handled by phase 3.
+ if (!unavailable_pred || !available_info.valid_p ())
+ return;
+
+ // critical edge - TODO: consider splitting?
+ if (EDGE_COUNT (unavailable_pred->succs) != 1)
+ return;
+
+ // If VL can be less than AVL, then we can't reduce the frequency of exec.
+ if (!has_fixed_result (available_info))
+ return;
+
+ // Does it actually let us remove an implicit transition in MBB?
+ bool found = false;
+ rtx_insn *insn;
+ vinfo curr_info;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (is_vector_config_instr (insn))
+ return;
+
+ if (use_vtype_p (insn))
+ {
+ if (available_info != compute_info_for_instr (insn, curr_info))
+ return;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return;
+
+ // Finally, update both data flow state and insert the actual vsetvli.
+ // Doing both keeps the code in sync with the dataflow results, which
+ // is critical for correctness of phase 3.
+ auto old_info = bb_vinfo_map[unavailable_pred->index].exit;
+ if (dump_file)
+ {
+ fprintf (dump_file, "PRE VSETVLI from bb %d changed to bb %d\n", bb->index, unavailable_pred->index);
+ available_info.print ();
+ }
+ bb_vinfo_map[unavailable_pred->index].exit = available_info;
+ bb_vinfo_map[bb->index].pred = available_info;
+
+ // Note there's an implicit assumption here that terminators never use
+ // or modify VL or VTYPE. Also, fallthrough will return end().
+ auto insert_pt = BB_END (unavailable_pred);
+ insert_vsetvl (insert_pt, available_info, old_info);
+}
+
+static unsigned int
+rest_of_handle_insert_vsetvl (function *fn)
+{
+ basic_block bb;
+
+ if (n_basic_blocks_for_fn (fn) <= 0)
+ return 0;
+
+ gcc_assert (bb_vinfo_map.empty () && "Expect empty block infos.");
+
+ if (optimize >= 2)
+ {
+ // Initialization.
+ calculate_dominance_info (CDI_DOMINATORS);
+ df_analyze ();
+ crtl->ssa = new rtl_ssa::function_info (cfun);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\nEntering InsertVSETVLI for %s\n\n",
+ current_function_name ());
+
+ /* Initialize Basic Block Map */
+ FOR_ALL_BB_FN (bb, fn)
+ {
+ bb_vinfo bb_init;
+ bb_vinfo_map.insert (std::pair<uint8_t, bb_vinfo> (bb->index, bb_init));
+ }
+
+ // Scan the block locally for cases where we can mutate the operands
+ // of the instructions to reduce state transitions. Critically, this
+ // must be done before we start propagating data flow states as these
+ // transforms are allowed to change the contents of VTYPE and VL so
+ // long as the semantics of the program stays the same.
+ FOR_ALL_BB_FN (bb, fn)
+ dolocalprepass (bb);
+
+ bool vector_p = false;
+
+ if (dump_file)
+ fprintf (
+ dump_file,
+ "Phase 1 determine how VL/VTYPE are affected by the each block:\n");
+
+ // Phase 1 - determine how VL/VTYPE are affected by the each block.
+ FOR_ALL_BB_FN (bb, fn)
+ {
+ vector_p |= compute_vl_vtype_changes (bb);
+ bb_vinfo &info = bb_vinfo_map[bb->index];
+ info.exit = info.change;
+ if (dump_file)
+ {
+ fprintf (dump_file, "Initial exit state of bb %d\n", bb->index);
+ info.exit.print ();
+ }
+ }
+
+ if (!vector_p)
+ {
+ bb_vinfo_map.clear ();
+ bb_queue.clear ();
+ if (optimize >= 2)
+ {
+ // Finalization.
+ free_dominance_info (CDI_DOMINATORS);
+ if (crtl->ssa->perform_pending_updates ())
+ cleanup_cfg (0);
+
+ delete crtl->ssa;
+ crtl->ssa = nullptr;
+ }
+ return 0;
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "Phase 2 determine the exit VL/VTYPE from each block:\n");
+ // Phase 2 - determine the exit VL/VTYPE from each block. We add all
+ // blocks to the list here, but will also add any that need to be
+ // revisited during Phase 2 processing.
+ FOR_ALL_BB_FN (bb, fn)
+ {
+ bb_queue.push_back (bb);
+ bb_vinfo_map[bb->index].inqueue = true;
+ }
+ while (!bb_queue.empty ())
+ {
+ bb = bb_queue.front ();
+ bb_queue.pop_front ();
+ compute_incoming_vl_vtype (bb);
+ }
+
+ // Perform partial redundancy elimination of vsetvli transitions.
+ FOR_ALL_BB_FN (bb, fn)
+ dopre (bb);
+
+ if (dump_file)
+ fprintf (dump_file,
+ "Phase 3 add any vsetvli instructions needed in the block:\n");
+ // Phase 3 - add any vsetvli instructions needed in the block. Use the
+ // Phase 2 information to avoid adding vsetvlis before the first vector
+ // instruction in the block if the VL/VTYPE is satisfied by its
+ // predecessors.
+ FOR_ALL_BB_FN (bb, fn)
+ emit_vsetvlis (bb);
+
+ // Now that all vsetvlis are explicit, go through and do block local
+ // DSE and peephole based demanded fields based transforms. Note that
+ // this *must* be done outside the main dataflow so long as we allow
+ // any cross block analysis within the dataflow. We can't have both
+ // demanded fields based mutation and non-local analysis in the
+ // dataflow at the same time without introducing inconsistencies.
+ FOR_ALL_BB_FN (bb, fn)
+ dolocalpostpass(bb);
+
+ // Once we're fully done rewriting all the instructions, do a final pass
+ // through to check for VSETVLIs which write to an unused destination.
+ // For the non X0, X0 variant, we can replace the destination register
+ // with X0 to reduce register pressure. This is really a generic
+ // optimization which can be applied to any dead def (TODO: generalize).
+ if (!reload_completed)
+ {
+ FOR_ALL_BB_FN (bb, fn)
+ {
+ rtx_insn *insn = NULL;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (is_vector_config_instr (insn))
+ {
+ extract_insn_cached (insn);
+ if (recog_data.n_operands == 3 &&
+ !rtx_equal_p (recog_data.operand[0],
+ gen_rtx_REG (Pmode, X0_REGNUM)) &&
+ !rtx_equal_p (recog_data.operand[1],
+ gen_rtx_REG (Pmode, X0_REGNUM)) &&
+ (find_reg_note (insn, REG_UNUSED, recog_data.operand[0]) ||
+ find_reg_note (insn, REG_DEAD, recog_data.operand[0])))
+ {
+ rtx pat = gen_vsetvl_zero (Pmode, recog_data.operand[1],
+ recog_data.operand[2]);
+ validate_change (insn, &PATTERN (insn), pat, false);
+ }
+ }
+ }
+ }
+ }
+
+ bb_vinfo_map.clear ();
+ bb_queue.clear ();
+
+ if (optimize >= 2)
+ {
+ // Finalization.
+ free_dominance_info (CDI_DOMINATORS);
+ if (crtl->ssa->perform_pending_updates ())
+ cleanup_cfg (0);
+
+ delete crtl->ssa;
+ crtl->ssa = nullptr;
+ }
+
+ return 0;
+}
+
+const pass_data pass_data_insert_vsetvl = {
+ RTL_PASS, /* type */
+ "insert_vsetvl", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_insert_vsetvl : public rtl_opt_pass
+{
+public:
+ pass_insert_vsetvl (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_insert_vsetvl, ctxt)
+ {
+ }
+
+ /* opt_pass methods: */
+ virtual bool
+ gate (function *)
+ {
+ return TARGET_VECTOR;
+ }
+ virtual unsigned int
+ execute (function *fn)
+ {
+ return rest_of_handle_insert_vsetvl (fn);
+ }
+
+}; // class pass_insert_vsetvl
+
+rtl_opt_pass *
+make_pass_insert_vsetvl (gcc::context *ctxt)
+{
+ return new pass_insert_vsetvl (ctxt);
+}
+
+const pass_data pass_data_insert_vsetvl2 = {
+ RTL_PASS, /* type */
+ "insert_vsetvl2", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_insert_vsetvl2 : public rtl_opt_pass
+{
+public:
+ pass_insert_vsetvl2 (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_insert_vsetvl2, ctxt)
+ {
+ }
+
+ /* opt_pass methods: */
+ virtual bool
+ gate (function *)
+ {
+ return TARGET_VECTOR;
+ }
+ virtual unsigned int
+ execute (function *fn)
+ {
+ return rest_of_handle_insert_vsetvl (fn);
+ }
+
+}; // class pass_insert_vsetvl2
+
+rtl_opt_pass *
+make_pass_insert_vsetvl2 (gcc::context *ctxt)
+{
+ return new pass_insert_vsetvl2 (ctxt);
+}
\ No newline at end of file
@@ -81,6 +81,18 @@ enum riscv_vector_bits_enum
RVV_4096 = 4096
};
+enum vsew_field_enum
+{
+ VSEW_FIELD_000, /* SEW = 8 */
+ VSEW_FIELD_001, /* SEW = 16 */
+ VSEW_FIELD_010, /* SEW = 32 */
+ VSEW_FIELD_011, /* SEW = 64 */
+ VSEW_FIELD_100, /* SEW = 128 */
+ VSEW_FIELD_101, /* SEW = 256 */
+ VSEW_FIELD_110, /* SEW = 512 */
+ VSEW_FIELD_111 /* SEW = 1024 */
+};
+
enum vlmul_field_enum
{
VLMUL_FIELD_000, /* LMUL = 1 */
@@ -18,3 +18,5 @@
<http://www.gnu.org/licenses/>. */
INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
+INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_insert_vsetvl);
+INSERT_PASS_BEFORE (pass_sched2, 1, pass_insert_vsetvl2);
@@ -96,6 +96,8 @@ extern std::string riscv_arch_str (bool version_p = true);
extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
+rtl_opt_pass * make_pass_insert_vsetvl (gcc::context *ctxt);
+rtl_opt_pass * make_pass_insert_vsetvl2 (gcc::context *ctxt);
/* Information about one CPU we know about. */
struct riscv_cpu_info {
@@ -112,15 +114,32 @@ struct riscv_cpu_info {
extern const riscv_cpu_info *riscv_find_cpu (const char *);
/* Routines implemented in riscv-vector.cc. */
+extern bool rvv_mask_mode_p (machine_mode);
extern bool rvv_mode_p (machine_mode);
extern bool rvv_legitimate_poly_int_p (rtx);
extern unsigned int rvv_offset_temporaries (bool, poly_int64);
+extern enum vsew_field_enum rvv_classify_vsew_field (machine_mode);
extern enum vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
extern unsigned int rvv_parse_vsew_field (unsigned int);
extern unsigned int rvv_parse_vlmul_field (unsigned int);
extern bool rvv_parse_vta_field (unsigned int);
extern bool rvv_parse_vma_field (unsigned int);
extern int rvv_regsize (machine_mode);
+extern rtx rvv_gen_policy (unsigned int rvv_policy = 0);
+extern opt_machine_mode rvv_get_mask_mode (machine_mode);
+extern machine_mode rvv_translate_attr_mode (rtx_insn *);
+extern void
+emit_op5 (
+ unsigned int unspec,
+ machine_mode Vmode, machine_mode VSImode, machine_mode VMSImode,
+ machine_mode VSUBmode,
+ rtx *operands,
+ rtx (*gen_vx) (rtx, rtx, rtx, rtx, rtx),
+ rtx (*gen_vx_32bit) (rtx, rtx, rtx, rtx, rtx),
+ rtx (*gen_vv) (rtx, rtx, rtx, rtx, rtx),
+ bool (*imm_p) (rtx),
+ int i, bool reverse
+);
/* We classify builtin types into two classes:
1. General builtin class which is using the
@@ -7,6 +7,38 @@
#define DEF_RISCV_ARG_MODE_ATTR(A, B, C, D, E)
#endif
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(V, 31)
+DEF_RISCV_ARG_MODE_ATTR(V, 0, VNx2QI, VNx2QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 1, VNx4QI, VNx4QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 2, VNx8QI, VNx8QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 3, VNx16QI, VNx16QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 4, VNx32QI, VNx32QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 5, VNx64QI, VNx64QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 6, VNx128QI, VNx128QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 7, VNx2HI, VNx2HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 8, VNx4HI, VNx4HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 9, VNx8HI, VNx8HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 10, VNx16HI, VNx16HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 11, VNx32HI, VNx32HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 12, VNx64HI, VNx64HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 13, VNx2SI, VNx2SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 14, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 15, VNx8SI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 16, VNx16SI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 17, VNx32SI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 18, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 19, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 20, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 21, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 22, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 23, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 24, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 25, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 26, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 27, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 28, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 29, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 30, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VI, 22)
DEF_RISCV_ARG_MODE_ATTR(VI, 0, VNx2QI, VNx2QI, TARGET_ANY)
DEF_RISCV_ARG_MODE_ATTR(VI, 1, VNx4QI, VNx4QI, TARGET_ANY)
@@ -30,6 +62,210 @@ DEF_RISCV_ARG_MODE_ATTR(VI, 18, VNx2DI, VNx2DI, TARGET_ANY)
DEF_RISCV_ARG_MODE_ATTR(VI, 19, VNx4DI, VNx4DI, TARGET_ANY)
DEF_RISCV_ARG_MODE_ATTR(VI, 20, VNx8DI, VNx8DI, TARGET_ANY)
DEF_RISCV_ARG_MODE_ATTR(VI, 21, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VF, 9)
+DEF_RISCV_ARG_MODE_ATTR(VF, 0, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 1, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 2, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 3, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 4, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 5, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 6, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 7, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 8, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VB, 7)
+DEF_RISCV_ARG_MODE_ATTR(VB, 0, VNx2BI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 1, VNx4BI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 2, VNx8BI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 3, VNx16BI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 4, VNx32BI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 5, VNx64BI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 6, VNx128BI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VFULL, 24)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 0, VNx16QI, VNx16QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 1, VNx32QI, VNx32QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 2, VNx64QI, VNx64QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 3, VNx128QI, VNx128QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 4, VNx8HI, VNx8HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 5, VNx16HI, VNx16HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 6, VNx32HI, VNx32HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 7, VNx64HI, VNx64HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 8, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 9, VNx8SI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 10, VNx16SI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 11, VNx32SI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 12, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 13, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 14, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 15, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 16, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 17, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 18, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 19, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 20, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 21, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 22, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 23, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VPARTIAL, 7)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 0, VNx2QI, VNx2QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 1, VNx4QI, VNx4QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 2, VNx8QI, VNx8QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 3, VNx2HI, VNx2HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 4, VNx4HI, VNx4HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 5, VNx2SI, VNx2SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 6, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(V64BITI, 4)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 0, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 1, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 2, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 3, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VM, 69)
+DEF_RISCV_ARG_MODE_ATTR(VM, 0, VNx2BI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 1, VNx4BI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 2, VNx8BI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 3, VNx16BI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 4, VNx32BI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 5, VNx64BI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 6, VNx128BI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 7, VNx2QI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 8, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 9, VNx8QI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 10, VNx16QI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 11, VNx32QI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 12, VNx64QI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 13, VNx128QI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 14, VNx2HI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 15, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 16, VNx8HI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 17, VNx16HI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 18, VNx32HI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 19, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 20, VNx2SI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 21, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 22, VNx8SI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 23, VNx16SI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 24, VNx32SI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 25, VNx2DI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 26, VNx4DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 27, VNx8DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 28, VNx16DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 29, VNx2SF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 30, VNx4SF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 31, VNx8SF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 32, VNx16SF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 33, VNx32SF, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 34, VNx2DF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 35, VNx4DF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 36, VNx8DF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 37, VNx16DF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 38, VNx2QI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 39, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 40, VNx8QI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 41, VNx16QI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 42, VNx32QI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 43, VNx64QI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 44, VNx128QI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 45, VNx2HI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 46, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 47, VNx8HI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 48, VNx16HI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 49, VNx32HI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 50, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 51, VNx2SI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 52, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 53, VNx8SI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 54, VNx16SI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 55, VNx32SI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 56, VNx2DI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 57, VNx4DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 58, VNx8DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 59, VNx16DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 60, VNx2SF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 61, VNx4SF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 62, VNx8SF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 63, VNx16SF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 64, VNx32SF, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 65, VNx2DF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 66, VNx4DF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 67, VNx8DF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 68, VNx16DF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VSUB, 31)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 0, VNx2QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 1, VNx4QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 2, VNx8QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 3, VNx16QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 4, VNx32QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 5, VNx64QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 6, VNx128QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 7, VNx2HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 8, VNx4HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 9, VNx8HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 10, VNx16HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 11, VNx32HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 12, VNx64HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 13, VNx2SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 14, VNx4SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 15, VNx8SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 16, VNx16SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 17, VNx32SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 18, VNx2DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 19, VNx4DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 20, VNx8DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 21, VNx16DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 22, VNx2SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 23, VNx4SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 24, VNx8SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 25, VNx16SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 26, VNx32SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 27, VNx2DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 28, VNx4DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 29, VNx8DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 30, VNx16DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VDI_TO_VSI, 22)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 0, VNx2QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 1, VNx4QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 2, VNx8QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 3, VNx16QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 4, VNx32QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 5, VNx64QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 6, VNx128QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 7, VNx2HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 8, VNx4HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 9, VNx8HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 10, VNx16HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 11, VNx32HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 12, VNx64HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 13, VNx2SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 14, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 15, VNx8SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 16, VNx16SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 17, VNx32SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 18, VNx2DI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 19, VNx4DI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 20, VNx8DI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 21, VNx16DI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VDI_TO_VSI_VM, 22)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 0, VNx2QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 1, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 2, VNx8QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 3, VNx16QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 4, VNx32QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 5, VNx64QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 6, VNx128QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 7, VNx2HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 8, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 9, VNx8HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 10, VNx16HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 11, VNx32HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 12, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 13, VNx2SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 14, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 15, VNx8SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 16, VNx16SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 17, VNx32SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 18, VNx2DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 19, VNx4DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 20, VNx8DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 21, VNx16DI, VNx32BI, TARGET_ANY)
#undef DEF_RISCV_ARG_MODE_ATTR_VARIABLE
#undef DEF_RISCV_ARG_MODE_ATTR
@@ -66,6 +66,7 @@
#include "tree-ssa-loop-niter.h"
#include "rtx-vector-builder.h"
#include "riscv-vector.h"
+#include "riscv-vector-builtins.h"
/* This file should be included last. */
#include "target-def.h"
@@ -158,6 +159,38 @@ rvv_offset_temporaries (bool add_p, poly_int64 offset)
return count + rvv_add_offset_1_temporaries (constant);
}
+/* Return the vsew field for a specific machine mode. */
+
+enum vsew_field_enum
+rvv_classify_vsew_field (machine_mode mode)
+{
+ switch (GET_MODE_INNER (mode))
+ {
+ case E_QImode:
+ return VSEW_FIELD_000;
+
+ case E_HImode:
+ return VSEW_FIELD_001;
+
+ case E_SImode:
+ case E_SFmode:
+ return VSEW_FIELD_010;
+
+ case E_DImode:
+ case E_DFmode:
+ return VSEW_FIELD_011;
+
+ case E_TImode:
+ return VSEW_FIELD_100;
+
+ default:
+ break;
+ }
+
+ /* we don't care about VSEW for Mask */
+ return VSEW_FIELD_000;
+}
+
/* Return the vlmul field for a specific machine mode. */
enum vlmul_field_enum
@@ -271,4 +304,339 @@ rvv_get_mask_mode (machine_mode mode)
&& rvv_mask_mode_p (mask_mode))
return mask_mode;
return default_get_mask_mode (mode);
+}
+
+/* Generate policy bitmap for a specific rvv_policy. */
+rtx
+rvv_gen_policy (unsigned int)
+{
+ return riscv_vector::gen_any_policy ();
+}
+
+/* Return machine mode for an insn type. */
+machine_mode
+rvv_translate_attr_mode (rtx_insn *insn)
+{
+ gcc_assert (recog_memoized (insn) >= 0);
+
+ switch (get_attr_mode (insn))
+ {
+#define TRANSLATE_VECTOR_MODE(MODE) \
+ case MODE_VNX##MODE: \
+ return VNx##MODE##mode;
+ TRANSLATE_VECTOR_MODE (8QI)
+ TRANSLATE_VECTOR_MODE (4HI)
+ TRANSLATE_VECTOR_MODE (2SI)
+ TRANSLATE_VECTOR_MODE (2SF)
+ TRANSLATE_VECTOR_MODE (8BI)
+ TRANSLATE_VECTOR_MODE (4QI)
+ TRANSLATE_VECTOR_MODE (2HI)
+ TRANSLATE_VECTOR_MODE (4BI)
+ TRANSLATE_VECTOR_MODE (2QI)
+ TRANSLATE_VECTOR_MODE (2BI)
+ TRANSLATE_VECTOR_MODE (16QI)
+ TRANSLATE_VECTOR_MODE (8HI)
+ TRANSLATE_VECTOR_MODE (4SI)
+ TRANSLATE_VECTOR_MODE (2DI)
+ TRANSLATE_VECTOR_MODE (4SF)
+ TRANSLATE_VECTOR_MODE (2DF)
+ TRANSLATE_VECTOR_MODE (16BI)
+ TRANSLATE_VECTOR_MODE (32QI)
+ TRANSLATE_VECTOR_MODE (16HI)
+ TRANSLATE_VECTOR_MODE (8SI)
+ TRANSLATE_VECTOR_MODE (4DI)
+ TRANSLATE_VECTOR_MODE (8SF)
+ TRANSLATE_VECTOR_MODE (4DF)
+ TRANSLATE_VECTOR_MODE (32BI)
+ TRANSLATE_VECTOR_MODE (64QI)
+ TRANSLATE_VECTOR_MODE (32HI)
+ TRANSLATE_VECTOR_MODE (16SI)
+ TRANSLATE_VECTOR_MODE (8DI)
+ TRANSLATE_VECTOR_MODE (16SF)
+ TRANSLATE_VECTOR_MODE (8DF)
+ TRANSLATE_VECTOR_MODE (64BI)
+ TRANSLATE_VECTOR_MODE (128QI)
+ TRANSLATE_VECTOR_MODE (64HI)
+ TRANSLATE_VECTOR_MODE (32SI)
+ TRANSLATE_VECTOR_MODE (16DI)
+ TRANSLATE_VECTOR_MODE (32SF)
+ TRANSLATE_VECTOR_MODE (16DF)
+ TRANSLATE_VECTOR_MODE (128BI)
+
+ default:
+ break;
+ }
+
+ return VOIDmode;
+}
+
+/* Return the vtype field for a specific machine mode. */
+static unsigned int
+classify_vtype_field (machine_mode mode)
+{
+ unsigned int vlmul = rvv_classify_vlmul_field (mode);
+ unsigned int vsew = rvv_classify_vsew_field (mode);
+ unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
+ return vtype;
+}
+
+/* lmul = real_lmul * 8
+ guarantee integer
+ e.g.
+ 1 => 1/8
+ 2 => 1/4
+ 4 => 1/2
+ 8 => 1
+ 16 => 2
+ 32 => 4
+ 64 => 8
+ */
+static unsigned int
+get_lmulx8 (machine_mode mode)
+{
+ unsigned int vlmul = rvv_classify_vlmul_field (mode);
+ switch (vlmul)
+ {
+ case VLMUL_FIELD_000:
+ return 8;
+ case VLMUL_FIELD_001:
+ return 16;
+ case VLMUL_FIELD_010:
+ return 32;
+ case VLMUL_FIELD_011:
+ return 64;
+ case VLMUL_FIELD_101:
+ return 1;
+ case VLMUL_FIELD_110:
+ return 2;
+ case VLMUL_FIELD_111:
+ return 4;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+force_reg_for_over_uimm (rtx vl)
+{
+ if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+ {
+ return force_reg (Pmode, vl);
+ }
+
+ return vl;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+{
+ if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+ {
+ return avl;
+ }
+ rtx i32vl = NULL_RTX;
+ if (CONST_SCALAR_INT_P (avl))
+ {
+ unsigned int vlen_max;
+ unsigned int vlen_min;
+ if (riscv_vector_chunks.is_constant ())
+ {
+ vlen_max = riscv_vector_chunks.to_constant () * 64;
+ vlen_min = vlen_max;
+ }
+ else
+ {
+ /* TODO: vlen_max will be supported as 65536 in the future. */
+ vlen_max = RVV_4096;
+ vlen_min = RVV_128;
+ }
+ unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+ unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+
+ unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
+ if (avl_int <= min_vlmax)
+ {
+ i32vl = gen_int_mode (2 * avl_int, SImode);
+ }
+ else if (avl_int >= 2 * max_vlmax)
+ {
+ // Just set i32vl to VLMAX in this situation
+ i32vl = gen_reg_rtx (Pmode);
+ unsigned int vtype = classify_vtype_field (VSImode);
+ emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
+ }
+ else
+ {
+ // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+ // is related to the hardware implementation.
+ // So let the following code handle
+ }
+ }
+ if (!i32vl)
+ {
+ // Using vsetvli instruction to get actually used length which related to
+ // the hardware implementation
+ rtx i64vl = gen_reg_rtx (Pmode);
+ unsigned int vtype = classify_vtype_field (Vmode);
+ emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
+ // scale 2 for 32-bit length
+ i32vl = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
+ }
+
+ return force_reg_for_over_uimm (i32vl);
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static void
+emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
+ machine_mode VMSImode, rtx vd, rtx s, rtx vl,
+ rtx tail)
+{
+ if (CONST_SCALAR_INT_P (s))
+ {
+ s = force_reg (DImode, s);
+ }
+
+ rtx hi = gen_highpart (SImode, s);
+ rtx lo = gen_lowpart (SImode, s);
+
+ rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+
+ /* make a "0101..." mask vector */
+ rtx vm1 = gen_reg_rtx (VNx4SImode);
+ emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
+ force_reg (SImode, GEN_INT (0x55555555)),
+ zero, rvv_gen_policy ()));
+ rtx vm2 = gen_reg_rtx (VMSImode);
+ emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+
+ rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
+ rtx v2 = gen_reg_rtx (VSImode);
+ emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
+ rvv_gen_policy ()));
+
+ rtx vd_si = gen_reg_rtx (VSImode);
+ emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
+ vlx2, tail));
+
+ emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+bool
+imm32_p (rtx a)
+{
+ if (!CONST_SCALAR_INT_P (a))
+ return false;
+ unsigned HOST_WIDE_INT val = UINTVAL (a);
+ return val <= 0x7FFFFFFFULL || val >= 0xFFFFFFFF80000000ULL;
+}
+
+typedef bool imm_p (rtx);
+typedef rtx gen_3 (rtx, rtx, rtx);
+typedef rtx gen_4 (rtx, rtx, rtx, rtx);
+typedef rtx gen_5 (rtx, rtx, rtx, rtx, rtx);
+typedef rtx gen_6 (rtx, rtx, rtx, rtx, rtx, rtx);
+typedef rtx gen_7 (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+enum GEN_CLASS
+{
+ GEN_VX,
+ GEN_VX_32BIT,
+ GEN_VV
+};
+
+/* Helper functions for handling sew=64 on RV32 system. */
+enum GEN_CLASS
+modify_operands (machine_mode Vmode, machine_mode VSImode,
+ machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+ bool (*imm5_p) (rtx), int i, bool reverse, unsigned int unspec)
+{
+ if (!TARGET_64BIT && VSUBmode == DImode)
+ {
+ if (imm32_p (operands[i]))
+ {
+ if (!imm5_p (operands[i]))
+ operands[i] = force_reg (SImode, operands[i]);
+ return GEN_VX_32BIT;
+ }
+ else
+ {
+ rtx result = gen_reg_rtx (Vmode);
+ rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+ rtx tail = rvv_gen_policy ();
+
+ emit_int64_to_vector_32bit (Vmode, VSImode, VMSImode, result,
+ operands[i], zero, tail);
+
+ operands[i] = result;
+
+ if (reverse)
+ {
+ rtx b = operands[i - 1];
+ operands[i - 1] = operands[i];
+ operands[i] = b;
+ }
+ return GEN_VV;
+ }
+ }
+ else
+ {
+ if (!imm5_p (operands[i]))
+ operands[i] = force_reg (VSUBmode, operands[i]);
+ return GEN_VX;
+ }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+bool
+emit_op5_vmv_v_x (machine_mode Vmode, machine_mode VSImode,
+ machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+ int i)
+{
+ if (!TARGET_64BIT && VSUBmode == DImode)
+ {
+ if (!imm32_p (operands[i]))
+ {
+ rtx vd = operands[1];
+ if (rtx_equal_p (vd, const0_rtx))
+ {
+ vd = operands[0];
+ }
+ emit_int64_to_vector_32bit (Vmode, VSImode, VMSImode, vd, operands[i],
+ operands[3], operands[4]);
+
+ emit_insn (gen_rtx_SET (operands[0], vd));
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+void
+emit_op5 (unsigned int unspec, machine_mode Vmode, machine_mode VSImode,
+ machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+ gen_5 *gen_vx, gen_5 *gen_vx_32bit, gen_5 *gen_vv, imm_p *imm5_p,
+ int i, bool reverse)
+{
+ if (unspec == UNSPEC_VMV)
+ {
+ if (emit_op5_vmv_v_x (Vmode, VSImode, VMSImode, VSUBmode, operands, i))
+ {
+ return;
+ }
+ }
+
+ enum GEN_CLASS gen_class = modify_operands (
+ Vmode, VSImode, VMSImode, VSUBmode, operands, imm5_p, i, reverse, unspec);
+
+ gen_5 *gen = gen_class == GEN_VX ? gen_vx
+ : gen_class == GEN_VV ? gen_vv
+ : gen_vx_32bit;
+
+ emit_insn (
+ (*gen) (operands[0], operands[1], operands[2], operands[3], operands[4]));
}
\ No newline at end of file
@@ -20,14 +20,4 @@
#ifndef GCC_RISCV_VECTOR_H
#define GCC_RISCV_VECTOR_H
-bool riscv_vector_mode_p (machine_mode);
-bool rvv_legitimate_poly_int_p (rtx);
-unsigned int rvv_offset_temporaries (bool, poly_int64);
-vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
-extern unsigned int rvv_parse_vsew_field (unsigned int);
-extern unsigned int rvv_parse_vlmul_field (unsigned int);
-extern bool rvv_parse_vta_field (unsigned int);
-extern bool rvv_parse_vma_field (unsigned int);
-int rvv_regsize (machine_mode);
-opt_machine_mode rvv_get_mask_mode (machine_mode);
#endif // GCC_RISCV_VECTOR_H
\ No newline at end of file
@@ -107,6 +107,7 @@
(VL_REGNUM 66)
(VTYPE_REGNUM 67)
(X0_REGNUM 0)
+ (DO_NOT_UPDATE_VL_VTYPE 21)
])
(include "predicates.md")
@@ -138,7 +139,13 @@
(const_string "unknown"))
;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF"
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,
+ VNx8QI,VNx4HI,VNx2SI,VNx4HF,VNx2SF,VNx4QI,VNx2HI,VNx2HF,
+ VNx2QI,VNx16QI,VNx8HI,VNx4SI,VNx2DI,VNx8HF,VNx4SF,VNx2DF,
+ VNx32QI,VNx16HI,VNx8SI,VNx4DI,VNx16HF,VNx8SF,VNx4DF,
+ VNx64QI,VNx32HI,VNx16SI,VNx8DI,VNx32HF,VNx16SF,VNx8DF,
+ VNx128QI,VNx64HI,VNx32SI,VNx16DI,VNx64HF,VNx32SF,VNx16DF,
+ VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI,VNx128BI"
(const_string "unknown"))
;; True if the main data type is twice the size of a word.
@@ -184,11 +191,67 @@
;; ghost an instruction that produces no real code
;; bitmanip bit manipulation instructions
;; vsetvl vector configuration setting
+;; vload vector whole register load
+;; vstore vector whole register store
+;; vcopy vector whole register copy
+;; vle vector unit-stride load
+;; vse vector unit-stride store
+;; vlse vector strided load
+;; vsse vector strided store
+;; vluxei vector unordered indexed load
+;; vloxei vector ordered indexed load
+;; vsuxei vector unordered indexed store
+;; vsoxei vector ordered indexed store
+;; vleff vector unit-stride fault-only-first load
+;; varith vector single-width integer and floating-point arithmetic instructions
+;; vadc vector single-width add-with-carry instructions with non-mask dest
+;; vmadc vector single-width add-with-carry instructions with mask dest
+;; vwarith vector widening integer and floating-point arithmetic instructions
+;; vlogical vector integer logical instructions
+;; vshift vector integer shift instructions
+;; vcmp vector integer and floating-point compare
+;; vmul vector integer and floating-point multiply
+;; vmulh vector integer highpart multiply
+;; vdiv vector integer and floating-point divide
+;; vwmul vector integer and floating-point widening multiply
+;; vmadd vector single-width integer and floating-point multiply-add/sub
+;; vwmadd vector widening integer and floating-point multiply-add/sub
+;; vmerge vector element data selection
+;; vmove vector register move
+;; vsarith vector saturating single-width arithmetic instructions
+;; vsmul vector saturating single-width multiply instructions
+;; vscaleshift vector scaling single-width shift instructions
+;; vclip vector saturating clip
+;; vfsqrt vector floating point square root
+;; vfsgnj vector floating-point sign-injection
+;; vfclass vector floating-point classify instructions
+;; vfcvt vector floating point convert
+;; vfwcvt vector widening floating point convert
+;; vfncvt vector narrowing floating point convert
+;; vwcvt vector widening only integer convert
+;; vncvt vector narrowing only integer convert
+;; vreduc vector single-width reduction operations
+;; vwreduc vector widening reduction operations
+;; vmask vector mask operations
+;; vcpop vector mask population count vpopc
+;; vmsetbit vector mask bit manipulation
+;; vid vector element index instruction
+;; vmv_x_s vmv.x.s instruction
+;; vmv_s_x vmv.s.x instruction
+;; vfmv_f_s vfmv.f.s instruction
+;; vfmv_s_f vfmv.s.f instruction
+;; vslide vector slide instrucions
+;; vgather vector gather instrucions
+;; vcompress vector compress instrucions
(define_attr "type"
"unknown,branch,jump,call,load,fpload,store,fpstore,
mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate,
- vsetvl"
+ vsetvl,vload,vstore,vcopy,vle,vse,vlse,vsse,vluxei,vloxei,vsuxei,vsoxei,vleff,
+ varith,vadc,vmadc,vwarith,vlogical,vshift,vcmp,vmul,vmulh,vdiv,vwmul,vmadd,vwmadd,
+ vmerge,vmove,vsarith,vsmul,vscaleshift,vclip,vfsqrt,vfsgnj,vfclass,vfcvt,vfwcvt,vfncvt,
+ vwcvt,vncvt,vreduc,vwreduc,vmask,vcpop,vmsetbit,viota,vid,vmv_x_s,vmv_s_x,vfmv_f_s,vfmv_s_f,
+ vslide,vgather,vcompress"
(cond [(eq_attr "got" "load") (const_string "load")
;; If a doubleword move uses these expensive instructions,
@@ -63,6 +63,10 @@ riscv-vector-builtins.o: \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vector-builtins.cc
+riscv-insert-vsetvl.o: $(srcdir)/config/riscv/riscv-insert-vsetvl.cc
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
$(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \
@@ -21,11 +21,138 @@
(define_c_enum "unspec" [
;; vsetvli.
UNSPEC_VSETVLI
+ ;; RVV instructions.
+ UNSPEC_RVV
+ ;; vector select
+ UNSPEC_SELECT
+
+ ;; vle/vse
+ UNSPEC_UNIT_STRIDE_LOAD
+ UNSPEC_UNIT_STRIDE_STORE
+
+ ;; unspec merge
+ UNSPEC_MERGE
+
+ UNSPEC_VMV
])
+;; All vector modes supported.
+(define_mode_iterator V [
+ VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI VNx128QI
+ VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI VNx64HI
+ VNx2SI VNx4SI VNx8SI VNx16SI VNx32SI
+ VNx2DI VNx4DI VNx8DI VNx16DI
+ (VNx2SF "TARGET_HARD_FLOAT") (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT")
+ (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+ (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT")
+ (VNx16DF "TARGET_DOUBLE_FLOAT")])
+
;; All integer vector modes supported for RVV.
(define_mode_iterator VI [
VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI VNx128QI
VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI VNx64HI
VNx2SI VNx4SI VNx8SI VNx16SI VNx32SI
- VNx2DI VNx4DI VNx8DI VNx16DI])
\ No newline at end of file
+ VNx2DI VNx4DI VNx8DI VNx16DI])
+
+;; All vector modes supported for float load/store/alu.
+(define_mode_iterator VF [
+ (VNx2SF "TARGET_HARD_FLOAT") (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT")
+ (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+ (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT")
+ (VNx16DF "TARGET_DOUBLE_FLOAT")])
+
+;; All vector masking modes.
+(define_mode_iterator VB [
+ VNx2BI VNx4BI VNx8BI VNx16BI
+ VNx32BI VNx64BI VNx128BI])
+
+;; Full vector modes supported.
+(define_mode_iterator VFULL [
+ VNx16QI VNx32QI VNx64QI VNx128QI
+ VNx8HI VNx16HI VNx32HI VNx64HI
+ VNx4SI VNx8SI VNx16SI VNx32SI
+ VNx2DI VNx4DI VNx8DI VNx16DI
+ (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT") (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+ (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT") (VNx16DF "TARGET_DOUBLE_FLOAT")])
+
+;; Paritial vector modes supported.
+(define_mode_iterator VPARTIAL [
+ VNx2QI VNx4QI VNx8QI
+ VNx2HI VNx4HI
+ VNx2SI
+ (VNx2SF "TARGET_HARD_FLOAT")])
+
+;; All vector modes supported for integer sew = 64.
+(define_mode_iterator V64BITI [VNx2DI VNx4DI VNx8DI VNx16DI])
+
+;; Map a vector int or float mode to a vector compare mode.
+(define_mode_attr VM [
+ (VNx2BI "VNx2BI") (VNx4BI "VNx4BI") (VNx8BI "VNx8BI") (VNx16BI "VNx16BI")
+ (VNx32BI "VNx32BI") (VNx64BI "VNx64BI") (VNx128BI "VNx128BI")
+ (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI")
+ (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") (VNx128QI "VNx128BI") (VNx2HI "VNx2BI")
+ (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI")
+ (VNx64HI "VNx64BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI")
+ (VNx16SI "VNx16BI") (VNx32SI "VNx32BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI")
+ (VNx8DI "VNx8BI") (VNx16DI "VNx16BI")
+ (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI")
+ (VNx32SF "VNx32BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI")
+ (VNx16DF "VNx16BI")
+ (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI")
+ (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") (VNx128QI "VNx128BI") (VNx2HI "VNx2BI")
+ (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI")
+ (VNx64HI "VNx64BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI")
+ (VNx16SI "VNx16BI") (VNx32SI "VNx32BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI")
+ (VNx8DI "VNx8BI") (VNx16DI "VNx16BI")
+ (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI")
+ (VNx32SF "VNx32BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI")
+ (VNx16DF "VNx16BI")])
+
+;; Map a vector mode to its element mode.
+(define_mode_attr VSUB [
+ (VNx2QI "QI") (VNx4QI "QI") (VNx8QI "QI") (VNx16QI "QI")
+ (VNx32QI "QI") (VNx64QI "QI") (VNx128QI "QI") (VNx2HI "HI")
+ (VNx4HI "HI") (VNx8HI "HI") (VNx16HI "HI") (VNx32HI "HI")
+ (VNx64HI "HI") (VNx2SI "SI") (VNx4SI "SI") (VNx8SI "SI")
+ (VNx16SI "SI") (VNx32SI "SI") (VNx2DI "DI") (VNx4DI "DI")
+ (VNx8DI "DI") (VNx16DI "DI")
+ (VNx2SF "SF") (VNx4SF "SF") (VNx8SF "SF") (VNx16SF "SF")
+ (VNx32SF "SF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF")
+ (VNx16DF "DF")])
+
+(define_mode_attr VDI_TO_VSI [
+ (VNx2QI "VNx4SI") (VNx4QI "VNx4SI") (VNx8QI "VNx4SI") (VNx16QI "VNx4SI") (VNx32QI "VNx4SI") (VNx64QI "VNx4SI") (VNx128QI "VNx4SI")
+ (VNx2HI "VNx4SI") (VNx4HI "VNx4SI") (VNx8HI "VNx4SI") (VNx16HI "VNx4SI") (VNx32HI "VNx4SI") (VNx64HI "VNx4SI")
+ (VNx2SI "VNx4SI") (VNx4SI "VNx4SI") (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI")
+ (VNx2DI "VNx4SI") (VNx4DI "VNx8SI") (VNx8DI "VNx16SI") (VNx16DI "VNx32SI")])
+
+(define_mode_attr VDI_TO_VSI_VM [
+ (VNx2QI "VNx4BI") (VNx4QI "VNx4BI") (VNx8QI "VNx4BI") (VNx16QI "VNx4BI")
+ (VNx32QI "VNx4BI") (VNx64QI "VNx4BI") (VNx128QI "VNx4BI")
+ (VNx2HI "VNx4BI") (VNx4HI "VNx4BI") (VNx8HI "VNx4BI") (VNx16HI "VNx4BI") (VNx32HI "VNx4BI")
+ (VNx64HI "VNx64BI")
+ (VNx2SI "VNx4BI") (VNx4SI "VNx4BI") (VNx8SI "VNx4BI") (VNx16SI "VNx4BI") (VNx32SI "VNx4BI")
+ (VNx2DI "VNx4BI") (VNx4DI "VNx8BI") (VNx8DI "VNx16BI") (VNx16DI "VNx32BI")
+])
+
+(define_mode_attr vi_to_v64biti [
+ (VNx2QI "vnx2di") (VNx4QI "vnx2di") (VNx8QI "vnx2di") (VNx16QI "vnx2di") (VNx32QI "vnx2di") (VNx64QI "vnx2di") (VNx128QI "vnx2di")
+ (VNx2HI "vnx2di") (VNx4HI "vnx2di") (VNx8HI "vnx2di") (VNx16HI "vnx2di") (VNx32HI "vnx2di") (VNx64HI "vnx2di")
+ (VNx2SI "vnx2di") (VNx4SI "vnx2di") (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di")
+ (VNx2DI "vnx2di") (VNx4DI "vnx4di") (VNx8DI "vnx8di") (VNx16DI "vnx16di")])
+
+(define_int_iterator VMVOP [
+ UNSPEC_VMV
+])
+
+(define_int_attr vxoptab [
+ (UNSPEC_VMV "mv")
+])
+
+(define_int_attr VXOPTAB [
+ (UNSPEC_VMV "UNSPEC_VMV")
+])
+
+(define_int_attr immptab [
+ (UNSPEC_VMV "Ws5")
+])
@@ -26,6 +26,43 @@
;; - RVV intrinsic implmentation (Document:https://github.com/riscv/rvv-intrinsic-doc)
(include "vector-iterators.md")
+
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Vector Creation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - Duplicate element to a vector
+;; - Initialize from individual elements
+;; -------------------------------------------------------------------------
+
+;; vector integer modes vec_duplicate.
+(define_expand "@vec_duplicate<mode>"
+ [(match_operand:VI 0 "register_operand")
+ (match_operand:<VSUB> 1 "reg_or_simm5_operand")]
+ "TARGET_VECTOR"
+{
+ emit_insn (gen_v_v_x (UNSPEC_VMV, <MODE>mode,
+ operands[0], const0_rtx, operands[1],
+ gen_rtx_REG (Pmode, X0_REGNUM), rvv_gen_policy ()));
+ DONE;
+})
+
+;; vector floating-point modes vec_duplicate.
+(define_expand "@vec_duplicate<mode>"
+ [(match_operand:VF 0 "register_operand")
+ (match_operand:<VSUB> 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ emit_insn (gen_vfmv_v_f (<MODE>mode, operands[0], const0_rtx,
+ operands[1], gen_rtx_REG (Pmode, X0_REGNUM),
+ rvv_gen_policy ()));
+ DONE;
+})
+
;; ===============================================================================
;; == Intrinsics
;; ===============================================================================
@@ -137,4 +174,200 @@
return "";
}
[(set_attr "type" "vsetvl")
- (set_attr "mode" "none")])
\ No newline at end of file
+ (set_attr "mode" "none")])
+
+;; -------------------------------------------------------------------------------
+;; ---- 7. Vector Loads and Stores
+;; -------------------------------------------------------------------------------
+;; Includes:
+;; - 7.4. Vector Unit-Stride Instructions
+;; - 7.5. Vector Strided Instructions
+;; - 7.6. Vector Indexed Instructions
+;; - 7.7. Unit-stride Fault-Only-First Instructions
+;; - 7.8. Vector Load/Store Segment Instructions
+;; - 7.8.1. Vector Unit-Stride Segment Loads and Stores
+;; - 7.8.2. Vector Strided Segment Loads and Stores
+;; - 7.8.3. Vector Indexed Segment Loads and Stores
+;; -------------------------------------------------------------------------------
+
+;; Vector Unit-Stride Loads.
+(define_insn "@vle<mode>"
+ [(set (match_operand:V 0 "register_operand" "=vd,vd, vr,vr")
+ (unspec:V
+ [(unspec:V
+ [(match_operand:<VM> 1 "vector_reg_or_const0_operand" "vm,vm, J,J")
+ (unspec:V
+ [(match_operand 3 "pmode_register_operand" "r,r, r,r")
+ (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_LOAD)
+ (match_operand:V 2 "vector_reg_or_const0_operand" "0,J, 0,J")] UNSPEC_SELECT)
+ (match_operand 4 "p_reg_or_const_csr_operand" "rK,rK, rK,rK")
+ (match_operand 5 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+ vle<sew>.v\t%0,(%3),%1.t
+ vle<sew>.v\t%0,(%3),%1.t
+ vle<sew>.v\t%0,(%3)
+ vle<sew>.v\t%0,(%3)"
+ [(set_attr "type" "vle")
+ (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-Stride Stores.
+(define_insn "@vse<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(unspec:V
+ [(match_operand:<VM> 0 "vector_reg_or_const0_operand" "vm,J")
+ (unspec:BLK
+ [(match_operand 1 "pmode_register_operand" "r,r")
+ (match_operand:V 2 "register_operand" "vr,vr")
+ (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_STORE)
+ (match_dup 1)] UNSPEC_SELECT)
+ (match_operand 3 "p_reg_or_const_csr_operand" "rK,rK")
+ (match_operand 4 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+ vse<sew>.v\t%2,(%1),%0.t
+ vse<sew>.v\t%2,(%1)"
+ [(set_attr "type" "vse")
+ (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-stride mask Loads.
+(define_insn "@vlm<mode>"
+ [(set (match_operand:VB 0 "register_operand" "=vr")
+ (unspec:VB
+ [(unspec:VB
+ [(match_operand 1 "pmode_register_operand" "r")
+ (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_LOAD)
+ (match_operand 2 "p_reg_or_const_csr_operand" "rK")
+ (match_operand 3 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vlm.v\t%0,(%1)"
+ [(set_attr "type" "vle")
+ (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-stride mask Stores.
+(define_insn "@vsm<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(unspec:BLK
+ [(match_operand 0 "pmode_register_operand" "r")
+ (match_operand:VB 1 "register_operand" "vr")
+ (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_STORE)
+ (match_operand 2 "p_reg_or_const_csr_operand" "rK")
+ (match_operand 3 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vsm.v\t%1,(%0)"
+ [(set_attr "type" "vse")
+ (set_attr "mode" "<MODE>")])
+
+;; vmv.v.x
+(define_expand "@v<vxoptab><mode>_v_x"
+ [(unspec [
+ (match_operand:VI 0 "register_operand")
+ (match_operand:VI 1 "vector_reg_or_const0_operand")
+ (match_operand:<VSUB> 2 "reg_or_const_int_operand")
+ (match_operand 3 "p_reg_or_const_csr_operand")
+ (match_operand 4 "const_int_operand")
+ ] VMVOP)]
+ "TARGET_VECTOR"
+ {
+ emit_op5 (
+ <VXOPTAB>,
+ <MODE>mode, <VDI_TO_VSI>mode, <VDI_TO_VSI_VM>mode,
+ <VSUB>mode,
+ operands,
+ gen_v<vxoptab><mode>_v_x_internal,
+ gen_v<vxoptab><vi_to_v64biti>_v_x_32bit,
+ NULL,
+ satisfies_constraint_<immptab>,
+ 2, false
+ );
+ DONE;
+ }
+)
+
+;; Vector-Scalar Integer Move.
+(define_insn "@vmv<mode>_v_x_internal"
+ [(set (match_operand:VI 0 "register_operand" "=vr,vr,vr,vr")
+ (unspec:VI
+ [(match_operand:VI 1 "vector_reg_or_const0_operand" "0,0,J,J")
+ (vec_duplicate:VI
+ (match_operand:<VSUB> 2 "reg_or_simm5_operand" "r,Ws5,r,Ws5"))
+ (match_operand 3 "p_reg_or_const_csr_operand" "rK,rK,rK,rK")
+ (match_operand 4 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+ vmv.v.x\t%0,%2
+ vmv.v.i\t%0,%2
+ vmv.v.x\t%0,%2
+ vmv.v.i\t%0,%2"
+ [(set_attr "type" "vmove")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@vmv<mode>_v_x_32bit"
+ [(set (match_operand:V64BITI 0 "register_operand" "=vr,vr,vr,vr")
+ (unspec:V64BITI
+ [(match_operand:V64BITI 1 "vector_reg_or_const0_operand" "0,0,J,J")
+ (vec_duplicate:V64BITI
+ (sign_extend:<VSUB> (match_operand:SI 2 "reg_or_simm5_operand" "r,Ws5,r,Ws5")))
+ (match_operand:SI 3 "csr_operand" "rK,rK,rK,rK")
+ (match_operand:SI 4 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+ vmv.v.x\t%0,%2
+ vmv.v.i\t%0,%2
+ vmv.v.x\t%0,%2
+ vmv.v.i\t%0,%2"
+ [(set_attr "type" "vmove")
+ (set_attr "mode" "<MODE>")])
+
+;; Vector-Scalar Floating-Point Move.
+(define_insn "@vfmv<mode>_v_f"
+ [(set (match_operand:VF 0 "register_operand" "=vr,vr")
+ (unspec:VF
+ [(match_operand:VF 1 "vector_reg_or_const0_operand" "0,J")
+ (vec_duplicate:VF
+ (match_operand:<VSUB> 2 "register_operand" "f,f"))
+ (match_operand 3 "p_reg_or_const_csr_operand" "rK,rK")
+ (match_operand 4 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vfmv.v.f\t%0,%2"
+ [(set_attr "type" "vmove")
+ (set_attr "mode" "<MODE>")])
+
+;; Vector-Scalar integer merge.
+(define_insn "@vmerge<mode>_vxm_internal"
+ [(set (match_operand:VI 0 "register_operand" "=vd,vd,vd,vd")
+ (unspec:VI
+ [(match_operand:VI 2 "vector_reg_or_const0_operand" "0,0,J,J")
+ (unspec:VI
+ [(match_operand:<VM> 1 "register_operand" "vm,vm,vm,vm")
+ (match_operand:VI 3 "register_operand" "vr,vr,vr,vr")
+ (vec_duplicate:VI
+ (match_operand:<VSUB> 4 "reg_or_simm5_operand" "r,Ws5,r,Ws5"))] UNSPEC_MERGE)
+ (match_operand 5 "p_reg_or_const_csr_operand" "rK,rK,rK,rK")
+ (match_operand 6 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+ vmerge.vxm\t%0,%3,%4,%1
+ vmerge.vim\t%0,%3,%4,%1
+ vmerge.vxm\t%0,%3,%4,%1
+ vmerge.vim\t%0,%3,%4,%1"
+ [(set_attr "type" "vmerge")
+ (set_attr "mode" "<MODE>")])
\ No newline at end of file
From: zhongjuzhe <juzhe.zhong@rivai.ai> gcc/ChangeLog: * config.gcc: Add riscv-insert-vsetvl.o extra_objs for RVV support. * config/riscv/constraints.md (Ws5): New constraint. * config/riscv/predicates.md (p_reg_or_const_csr_operand): New predicate. (vector_reg_or_const0_operand): New predicate. (vector_move_operand): New predicate. (reg_or_mem_operand): New predicate. (reg_or_simm5_operand): New predicate. (reg_or_const_int_operand): New predicate. * config/riscv/riscv-opts.h (enum vsew_field_enum): New enum. * config/riscv/riscv-passes.def (INSERT_PASS_AFTER): Run insert vsetvl pass after pass_split_all_insns. (INSERT_PASS_BEFORE): Run insert vsetvl pass before pass_sched2. * config/riscv/riscv-protos.h (make_pass_insert_vsetvl): New function. (make_pass_insert_vsetvl2): New function. (rvv_mask_mode_p): New function. (rvv_classify_vsew_field): New function. (rvv_gen_policy): New function. (rvv_get_mask_mode): New function. (rvv_translate_attr_mode): New function. * config/riscv/riscv-vector-builtins-iterators.def (V): New iterator. (VF): New iterator. (VB): New iterator. (VFULL): New iterator. (VPARTIAL): New iterator. (V64BITI): New iterator. (VM): New iterator. (VSUB): New iterator. (VDI_TO_VSI): New iterator. (VDI_TO_VSI_VM): New iterator. * config/riscv/riscv-vector.cc (enum vsew_field_enum): New enum. (rvv_classify_vsew_field): New function. (rvv_gen_policy): New function. (rvv_translate_attr_mode): New function. (TRANSLATE_VECTOR_MODE): New macro define. (classify_vtype_field): New function. (get_lmulx8): New function. (force_reg_for_over_uimm): New function. (gen_vlx2): New function. (emit_int64_to_vector_32bit): New function. (imm32_p): New function. (imm_p): New function. (gen_3): New function. (gen_4): New function. (gen_5): New function. (gen_6): New function. (gen_7): New function. (enum GEN_CLASS): New enum. (modify_operands): New function. (emit_op5_vmv_v_x): New function. (emit_op5): New function. * config/riscv/riscv-vector.h (riscv_vector_mode_p): New function. (rvv_legitimate_poly_int_p): New function. (rvv_offset_temporaries): New function. (rvv_classify_vlmul_field): New function. (rvv_parse_vsew_field): New function. (rvv_parse_vlmul_field): New function. (rvv_parse_vta_field): New function. (rvv_parse_vma_field): New function. (rvv_regsize): New function. (rvv_get_mask_mode): New function. * config/riscv/riscv.md: Add RVV modes. * config/riscv/t-riscv: New object. * config/riscv/vector-iterators.md: New iterators and attributes. * config/riscv/vector.md (@vec_duplicate<mode>): New pattern. (@vle<mode>): New pattern. (@vse<mode>): New pattern. (@vlm<mode>): New pattern. (@vsm<mode>): New pattern. (@v<vxoptab><mode>_v_x): New pattern. (@vmv<mode>_v_x_internal): New pattern. (@vmv<mode>_v_x_32bit): New pattern. (@vfmv<mode>_v_f): New pattern. (@vmerge<mode>_vxm_internal): New pattern. * config/riscv/riscv-insert-vsetvl.cc: New file. --- gcc/config.gcc | 2 +- gcc/config/riscv/constraints.md | 5 + gcc/config/riscv/predicates.md | 31 + gcc/config/riscv/riscv-insert-vsetvl.cc | 2312 +++++++++++++++++ gcc/config/riscv/riscv-opts.h | 12 + gcc/config/riscv/riscv-passes.def | 2 + gcc/config/riscv/riscv-protos.h | 19 + .../riscv/riscv-vector-builtins-iterators.def | 236 ++ gcc/config/riscv/riscv-vector.cc | 368 +++ gcc/config/riscv/riscv-vector.h | 10 - gcc/config/riscv/riscv.md | 67 +- gcc/config/riscv/t-riscv | 4 + gcc/config/riscv/vector-iterators.md | 129 +- gcc/config/riscv/vector.md | 235 +- 14 files changed, 3417 insertions(+), 15 deletions(-) create mode 100644 gcc/config/riscv/riscv-insert-vsetvl.cc