Message ID | 7fea7fd8-2869-47cd-69cf-ccc9bfa05733@arm.com |
---|---|
State | New |
Headers | show |
Series | arm: Fix regressions around MVE predicate codegen | expand |
Looks like the first patch was missing a change I had made to prevent mve_bool_vec_to_const ICEing if called with a non-vector immediate. Now included. On 24/01/2023 13:56, Andre Vieira (lists) via Gcc-patches wrote: > Hi, > > This patch fixes the way we synthesize MVE predicate immediates and > fixes some other inconsistencies around predicates. For instance this > patch fixes the modes used in the vctp intrinsics, to couple them with > predicate modes with the appropriate lane numbers. For this V2QI is > added to represent a predicate created by vctp64q. The reason we use > V2QI and not for instance a V2BI with 8-bit boolean modes is because we > are trying to avoid having two 'INT' modes of the same size. We make > sure we use the V2QI mode instead of HI for any instruction working on > two lanes of 64-bits consuming a predicate. > > Bootstrapped on aarch64-none-linux-gnu and regression tested on > arm-none-eabi and armeb-none-eabi for armv8.1-m.main+mve.fp. > > OK for trunk? > > gcc/ChangeLog: > > PR target/108443 > * config/arm/arm.h (VALID_MVE_PRED_MODE): Add V2QI. > * config/arm/arm.cc (thumb2_legitimate_address_p): Use HImode for > addressing MVE predicate modes. > (mve_bool_vec_to_const): Change to represent correct MVE predicate > format. > (arm_hard_regno_mode_ok): Use VALID_MVE_PRED_MODE instead of > checking modes. > (arm_vector_mode_supported_p): Likewise. > (arm_mode_to_pred_mode): Add V2QI. > * config/arm/arm-builtins.cc (UNOP_PRED_UNONE_QUALIFIERS): New > qualifier. > (UNOP_PRED_PRED_QUALIFIERS): New qualifier > (BINOP_PRED_UNONE_PRED_QUALIFIERS): New qualifier. > (v2qi_UP): New macro. > (v4bi_UP): New macro. > (v8bi_UP): New macro. > (v16bi_UP): New macro. > (arm_expand_builtin_args): Make it able to expand the new predicate > modes. > * config/arm/arm-modes.def (V2QI): New mode. > * config/arm/arm-simd-builtin-types.def (Pred1x16_t, Pred2x8_t > Pred4x4_t): Remove unused predicate builtin types. > * config/arm/arm_mve.h (__arm_vctp16q, __arm_vctp32q, __arm_vctp64q, > __arm_vctp8q, __arm_vpnot, __arm_vctp8q_m, __arm_vctp64q_m, > __arm_vctp32q_m, __arm_vctp16q_m): Use predicate modes. > * config/arm/arm_mve_builtins.def (vctp16q, vctp32q, vctp64q, vctp8q, > vpnot, vctp8q_m, vctp16q_m, vctp32q_m, vctp64q_m): Likewise. > * config/arm/constraints.md (DB): Check for VALID_MVE_PRED_MODE > instead > of MODE_VECTOR_BOOL. > * config/arm/iterators.md (MVE_7, MVE_7_HI): Add V2QI > (MVE_VPRED): Likewise. > (MVE_vpred): Add V2QI and map upper case predicate modes to > lower case. > (MVE_vctp): New mode attribute. > (mode1): Remove. > (VCTPQ): Remove. > (VCTPQ_M): Remove. > * config/arm/mve.md (mve_vctp<mode1>qhi): Rename this... > (mve_vctp<MVE_vctp>q<MVE_vpred>): ... to this. And use new mode > attributes. > (mve_vpnothi): Rename this... > (mve_vpnotv16bi): ... to this. > (mve_vctp<mode1>q_mhi): Rename this... > (mve_vctp<MVE_vctp>q_m<MVE_vpred>):... to this. > (mve_vldrdq_gather_base_z_<supf>v2di, > mve_vldrdq_gather_offset_z_<supf>v2di, > mve_vldrdq_gather_shifted_offset_z_<supf>v2di, > mve_vstrdq_scatter_base_p_<supf>v2di, > mve_vstrdq_scatter_offset_p_<supf>v2di, > mve_vstrdq_scatter_offset_p_<supf>v2di_insn, > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di, > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn, > mve_vstrdq_scatter_base_wb_p_<supf>v2di, > mve_vldrdq_gather_base_wb_z_<supf>v2di, > mve_vldrdq_gather_base_nowb_z_<supf>v2di, > mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Use V2QI insead of > HI for predicates. > * config/arm/unspecs.md (VCTP8Q, VCTP16Q, VCTP32Q, VCTP64Q): Replace > these... > (VCTP): ... with this. > (VCTP8Q_M, VCTP16Q_M, VCTP32Q_M, VCTP64Q_M): Replace these... > (VCTP_M): ... with this. > * config/arm/vfp.md (*thumb2_movhi_vfp, *thumb2_movhi_fp16): Use > VALID_MVE_PRED_MODE > instead of checking for MODE_VECTOR_BOOL class. > > > gcc/testsuite/ChangeLog: > > * gcc.dg/rtl/arm/mve-vxbi.c: Use new predicate modes. > * gcc.target/arm/mve/pr108443-run.c: New test. > * gcc.target/arm/mve/pr108443.c: New test. diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 6c67cec93ff76a4b42f3a0b305f697142e88fcd9..22fc7f10e92b33a861a321e43becee4647738b61 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -384,6 +384,19 @@ arm_unop_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define UNOP_UNONE_IMM_QUALIFIERS \ (arm_unop_unone_imm_qualifiers) +static enum arm_type_qualifiers +arm_unop_pred_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned }; +#define UNOP_PRED_UNONE_QUALIFIERS \ + (arm_unop_pred_unone_qualifiers) + +static enum arm_type_qualifiers +arm_unop_pred_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_predicate }; +#define UNOP_PRED_PRED_QUALIFIERS \ + (arm_unop_pred_pred_qualifiers) + + static enum arm_type_qualifiers arm_binop_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none }; @@ -426,6 +439,12 @@ arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define BINOP_PRED_UNONE_UNONE_QUALIFIERS \ (arm_binop_pred_unone_unone_qualifiers) +static enum arm_type_qualifiers +arm_binop_pred_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned, qualifier_predicate }; +#define BINOP_PRED_UNONE_PRED_QUALIFIERS \ + (arm_binop_pred_unone_pred_qualifiers) + static enum arm_type_qualifiers arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_immediate }; @@ -851,6 +870,10 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_none }; #define SET_SAT_QUALIFIERS (arm_set_sat_qualifiers) +#define v2qi_UP E_V2QImode +#define v4bi_UP E_V4BImode +#define v8bi_UP E_V8BImode +#define v16bi_UP E_V16BImode #define v8qi_UP E_V8QImode #define v4hi_UP E_V4HImode #define v4hf_UP E_V4HFmode @@ -2989,11 +3012,14 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode, op[argc] = convert_memory_address (Pmode, op[argc]); /* MVE uses mve_pred16_t (aka HImode) for vectors of - predicates. */ - if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL) + predicates, but internally we use V16BI/V8BI/V4BI/V2QI for + MVE predicate modes. */ + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode[argc])) op[argc] = gen_lowpart (mode[argc], op[argc]); - /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ + gcc_assert (GET_MODE (op[argc]) == mode[argc] + || (GET_MODE(op[argc]) == E_VOIDmode + && CONSTANT_P (op[argc]))); if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) op[argc] = copy_to_mode_reg (mode[argc], op[argc]); @@ -3198,7 +3224,7 @@ constant_arg: else emit_insn (insn); - if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL) + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (tmode)) { rtx HItarget = gen_reg_rtx (HImode); emit_move_insn (HItarget, gen_lowpart (HImode, target)); diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def index 364736d17fa5ae871c49b73bccd4695cba76c71e..c7645c480f63ab4d85cad207099c318f8e69a07d 100644 --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -91,6 +91,7 @@ BOOL_MODE (B4I, 4, 1); VECTOR_BOOL_MODE (V16BI, 16, BI, 2); VECTOR_BOOL_MODE (V8BI, 8, B2I, 2); VECTOR_BOOL_MODE (V4BI, 4, B4I, 2); +VECTOR_MODE (INT, QI, 2); /* Fraction and accumulator vector modes. */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def index 91584c99abb8d0452201f0a32c453a0d4304c9f4..daa4bd95e307973e88a8590e5231fc410c840d4f 100644 --- a/gcc/config/arm/arm-simd-builtin-types.def +++ b/gcc/config/arm/arm-simd-builtin-types.def @@ -51,7 +51,3 @@ ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20) ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20) ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20) - - ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16) - ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15) - ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4352c830cb6d2e632a225edea861b5ceb35dd035..6b6214ebb38f7466234b2b263279d956c7b80ad2 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1093,7 +1093,8 @@ extern const int arm_arch_cde_coproc_bits[]; #define VALID_MVE_PRED_MODE(MODE) \ ((MODE) == HImode \ - || (MODE) == V16BImode || (MODE) == V8BImode || (MODE) == V4BImode) + || (MODE) == V16BImode || (MODE) == V8BImode || (MODE) == V4BImode \ + || (MODE) == V2QImode) #define VALID_MVE_SI_MODE(MODE) \ ((MODE) == V2DImode ||(MODE) == V4SImode || (MODE) == V8HImode \ diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 18460ef5280be8c1df85eff424a1bf66d6019c0a..f060e463956cee86890ef42fce70f23b1dbac8ec 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -8643,6 +8643,11 @@ thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) bool use_ldrd; enum rtx_code code = GET_CODE (x); + /* If we are dealing with a MVE predicate mode, then treat it as a HImode as + can store and load it like any other 16-bit value. */ + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode)) + mode = HImode; + if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode)) return mve_vector_mem_operand (mode, x, strict_p); @@ -12921,7 +12926,7 @@ simd_valid_immediate (rtx op, machine_mode mode, int inverse, /* Only support 128-bit vectors for MVE. */ if (TARGET_HAVE_MVE && (!vector - || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + || VALID_MVE_PRED_MODE (mode) || n_elts * innersize != 16)) return -1; @@ -13294,9 +13299,15 @@ neon_vdup_constant (rtx vals, bool generate) rtx mve_bool_vec_to_const (rtx const_vec) { - int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec)); - int repeat = 16 / n_elts; - int i; + machine_mode mode = GET_MODE (const_vec); + + if (!VECTOR_MODE_P (mode)) + return const_vec; + + unsigned n_elts = GET_MODE_NUNITS (mode); + unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode)); + unsigned shift_c = 16 / n_elts; + unsigned i; int hi_val = 0; for (i = 0; i < n_elts; i++) @@ -13305,12 +13316,16 @@ mve_bool_vec_to_const (rtx const_vec) unsigned HOST_WIDE_INT elpart; gcc_assert (CONST_INT_P (el)); - elpart = INTVAL (el); + elpart = INTVAL (el) & ((1U << el_prec) - 1); + + unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i; - for (int j = 0; j < repeat; j++) - hi_val |= elpart << (i * repeat + j); + hi_val |= elpart << (index * shift_c); } - return gen_int_mode (hi_val, HImode); + /* We are using mov immediate to encode this constant which writes 32-bits + so we need to make sure the top 16-bits are all 0, otherwise we can't + guarantee we can actually write this immediate. */ + return gen_int_mode (hi_val, SImode); } /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only @@ -13353,7 +13368,7 @@ neon_make_constant (rtx vals, bool generate) && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL)) /* Load using VMOV. On Cortex-A8 this takes one cycle. */ return const_vec; - else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)) + else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode)) return mve_bool_vec_to_const (const_vec); else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX) /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON @@ -29458,14 +29473,12 @@ arm_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_HAVE_MVE - && (mode == V2DImode || mode == V4SImode || mode == V8HImode - || mode == V16QImode - || mode == V16BImode || mode == V8BImode || mode == V4BImode)) - return true; + && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode))) + return true; if (TARGET_HAVE_MVE_FLOAT && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode)) - return true; + return true; return false; } @@ -31269,6 +31282,7 @@ arm_mode_to_pred_mode (machine_mode mode) case 16: return V16BImode; case 8: return V8BImode; case 4: return V4BImode; + case 2: return V2QImode; } return opt_machine_mode (); } diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 00fb42afecc74c29dccaa1a601700033598100ce..d28843902285763d0a6416825348ee49a71a1efe 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -3528,35 +3528,35 @@ __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp16q (uint32_t __a) { - return __builtin_mve_vctp16qhi (__a); + return __builtin_mve_vctp16qv8bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp32q (uint32_t __a) { - return __builtin_mve_vctp32qhi (__a); + return __builtin_mve_vctp32qv4bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp64q (uint32_t __a) { - return __builtin_mve_vctp64qhi (__a); + return __builtin_mve_vctp64qv2qi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp8q (uint32_t __a) { - return __builtin_mve_vctp8qhi (__a); + return __builtin_mve_vctp8qv16bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpnot (mve_pred16_t __a) { - return __builtin_mve_vpnothi (__a); + return __builtin_mve_vpnotv16bi (__a); } __extension__ extern __inline uint8x16_t @@ -6696,28 +6696,28 @@ __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp8q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp8q_mhi (__a, __p); + return __builtin_mve_vctp8q_mv16bi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp64q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp64q_mhi (__a, __p); + return __builtin_mve_vctp64q_mv2qi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp32q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp32q_mhi (__a, __p); + return __builtin_mve_vctp32q_mv4bi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp16q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp16q_mhi (__a, __p); + return __builtin_mve_vctp16q_mv8bi (__a, __p); } __extension__ extern __inline uint64_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index e3a61f81be7814806be4f69e9b27b89663554297..5e5510f6e37f197ffc4c08a1166bd0d80262f65a 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -71,11 +71,11 @@ VAR2 (UNOP_UNONE_NONE, vcvtaq_u, v8hi, v4si) VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si) VAR1 (UNOP_UNONE_UNONE, vrev16q_u, v16qi) VAR1 (UNOP_UNONE_UNONE, vaddlvq_u, v4si) -VAR1 (UNOP_UNONE_UNONE, vctp16q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp32q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp64q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp8q, hi) -VAR1 (UNOP_UNONE_UNONE, vpnot, hi) +VAR1 (UNOP_PRED_UNONE, vctp16q, v8bi) +VAR1 (UNOP_PRED_UNONE, vctp32q, v4bi) +VAR1 (UNOP_PRED_UNONE, vctp64q, v2qi) +VAR1 (UNOP_PRED_UNONE, vctp8q, v16bi) +VAR1 (UNOP_PRED_PRED, vpnot, v16bi) VAR2 (BINOP_NONE_NONE_NONE, vsubq_n_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbrsrq_n_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_IMM, vcvtq_n_to_f_s, v8hf, v4sf) @@ -265,10 +265,10 @@ VAR2 (BINOP_NONE_NONE_IMM, vshllbq_n_s, v16qi, v8hi) VAR2 (BINOP_NONE_NONE_IMM, vorrq_n_s, v8hi, v4si) VAR2 (BINOP_NONE_NONE_IMM, vbicq_n_s, v8hi, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vrmlaldavhq_u, v4si) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp8q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp64q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp32q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp16q_m, hi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp8q_m, v16bi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp64q_m, v2qi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp32q_m, v4bi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp16q_m, v8bi) VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvaq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vrmlsldavhxq_s, v4si) VAR1 (BINOP_NONE_NONE_NONE, vrmlsldavhq_s, v4si) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 0155ab9f7e7750f4359b937263e297da9c196fc5..504cd938b26e2629183a883383a81bb4e775bb92 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -316,7 +316,7 @@ (define_constraint "DB" "@internal In ARM/Thumb-2 state with MVE a constant vector of booleans." (and (match_code "const_vector") - (match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL"))) + (match_test "TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode)"))) (define_constraint "Da" "@internal diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 04469064f12ca2900b1657ce6e1ccca8c41e4479..39895ad62aa3afd55d3cbc92c55b45bc56710bcb 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -272,8 +272,8 @@ (define_mode_iterator MVE_3 [V16QI V8HI]) (define_mode_iterator MVE_2 [V16QI V8HI V4SI]) (define_mode_iterator MVE_5 [V8HI V4SI]) (define_mode_iterator MVE_6 [V8HI V4SI]) -(define_mode_iterator MVE_7 [V16BI V8BI V4BI]) -(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI]) +(define_mode_iterator MVE_7 [V16BI V8BI V4BI V2QI]) +(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI]) ;;---------------------------------------------------------------------------- ;; Code iterators @@ -949,9 +949,12 @@ (define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32") (define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") (V8HF "=w") (V4SF "=&w")]) (define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI "V4BI") - (V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")]) + (V8HF "V8BI") (V4SF "V4BI") (V2DI "V2QI")]) (define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi") - (V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")]) + (V8HF "v8bi") (V4SF "v4bi") + (V16BI "v16bi") (V8BI "v8bi") (V4BI "v4bi") + (V2QI "v2qi")]) +(define_mode_attr MVE_vctp [(V16BI "8") (V8BI "16") (V4BI "32") (V2QI "64")]) ;;---------------------------------------------------------------------------- ;; Code attributes @@ -1461,11 +1464,6 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48") (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s") (VSHLCQ_M_U "u")]) - -(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") - (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") - (VCTP32Q_M "32") (VCTP64Q_M "64")]) - ;; Both kinds of return insn. (define_code_iterator RETURNS [return simple_return]) (define_code_attr return_str [(return "") (simple_return "simple_")]) @@ -1557,8 +1555,6 @@ (define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U]) (define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U]) (define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U]) (define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S]) -(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q]) -(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M]) (define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U]) (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) (define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index f123edc449b8b20bfb4a15c1fb0eccdbfff1339c..cd87e2b58d462895b950254d9d6bc2e2ff065d57 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -643,24 +643,24 @@ (define_insn "mve_vaddlvq_<supf>v4si" ;; ;; [vctp8q vctp16q vctp32q vctp64q]) ;; -(define_insn "mve_vctp<mode1>qhi" +(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:SI 1 "s_register_operand" "r")] - VCTPQ)) + (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") + (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")] + VCTP)) ] "TARGET_HAVE_MVE" - "vctp.<mode1> %1" + "vctp.<MVE_vctp> %1" [(set_attr "type" "mve_move") ]) ;; ;; [vpnot]) ;; -(define_insn "mve_vpnothi" +(define_insn "mve_vpnotv16bi" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:HI 1 "vpr_register_operand" "0")] + (set (match_operand:V16BI 0 "vpr_register_operand" "=Up") + (unspec:V16BI [(match_operand:V16BI 1 "vpr_register_operand" "0")] VPNOT)) ] "TARGET_HAVE_MVE" @@ -1959,15 +1959,15 @@ (define_insn "mve_vcmulq<mve_rot><mode>" ;; ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m]) ;; -(define_insn "mve_vctp<mode1>q_mhi" +(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:SI 1 "s_register_operand" "r") - (match_operand:HI 2 "vpr_register_operand" "Up")] - VCTPQ_M)) + (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") + (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:MVE_7 2 "vpr_register_operand" "Up")] + VCTP_M)) ] "TARGET_HAVE_MVE" - "vpst\;vctpt.<mode1> %1" + "vpst\;vctpt.<MVE_vctp> %1" [(set_attr "type" "mve_move") (set_attr "length""8")]) @@ -7666,7 +7666,7 @@ (define_insn "mve_vldrdq_gather_base_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGBQ)) ] "TARGET_HAVE_MVE" @@ -7707,7 +7707,7 @@ (define_insn "mve_vldrdq_gather_offset_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGOQ)) ] "TARGET_HAVE_MVE" @@ -7748,7 +7748,7 @@ (define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGSOQ)) ] "TARGET_HAVE_MVE" @@ -8361,7 +8361,7 @@ (define_insn "mve_vstrdq_scatter_base_p_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand" "w") (match_operand:SI 1 "mve_vldrd_immediate" "Ri") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSBQ)) ] "TARGET_HAVE_MVE" @@ -8404,7 +8404,7 @@ (define_expand "mve_vstrdq_scatter_offset_p_<supf>v2di" [(match_operand:V2DI 0 "mve_scatter_memory") (match_operand:V2DI 1 "s_register_operand") (match_operand:V2DI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { @@ -8422,7 +8422,7 @@ (define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn" [(match_operand:SI 0 "register_operand" "r") (match_operand:V2DI 1 "s_register_operand" "w") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrdt.64\t%q2, [%0, %q1]" @@ -8463,7 +8463,7 @@ (define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" [(match_operand:V2DI 0 "mve_scatter_memory") (match_operand:V2DI 1 "s_register_operand") (match_operand:V2DI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { @@ -8482,7 +8482,7 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn" [(match_operand:SI 0 "register_operand" "r") (match_operand:V2DI 1 "s_register_operand" "w") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrdt.64\t%q2, [%0, %q1, UXTW #3]" @@ -9454,7 +9454,7 @@ (define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di" [(match_operand:V2DI 1 "s_register_operand" "0") (match_operand:SI 2 "mve_vldrd_immediate" "Ri") (match_operand:V2DI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:V2QI 4 "vpr_register_operand")] VSTRDSBWBQ)) (set (match_operand:V2DI 0 "s_register_operand" "=w") (unspec:V2DI [(match_dup 1) (match_dup 2)] @@ -9745,7 +9745,7 @@ (define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand") (match_operand:V2DI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" { @@ -9761,7 +9761,7 @@ (define_expand "mve_vldrdq_gather_base_nowb_z_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand") (match_operand:V2DI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" { @@ -9795,7 +9795,7 @@ (define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:HI 4 "vpr_register_operand" "Up") + (match_operand:V2QI 4 "vpr_register_operand" "Up") (mem:BLK (scratch))] VLDRDGBWBQ)) (set (match_operand:V2DI 1 "s_register_operand" "=&w") diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 2ef6a0f4c335a767bd41da85ad2a7f806dc31fa1..f9ff78fbe01eb05a73621d3701f9dfe0ae1c6854 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -575,10 +575,8 @@ (define_c_enum "unspec" [ VCVTMQ_S VCVTMQ_U VADDLVQ_U - VCTP8Q - VCTP16Q - VCTP32Q - VCTP64Q + VCTP + VCTP_M VPNOT VCREATEQ_F VCVTQ_N_TO_F_S @@ -702,10 +700,6 @@ (define_c_enum "unspec" [ VADDLVAQ_S VBICQ_N_U VBICQ_N_S - VCTP8Q_M - VCTP16Q_M - VCTP32Q_M - VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32 VMLALDAVQ_U diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 8add1e6bbf56b211cef5fc7f18f2426332cc45f5..f34f35e1185e2b0974df965affb113772ea9282d 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -88,7 +88,7 @@ (define_insn "*thumb2_movhi_vfp" case 2: return "mov%?\t%0, %1\t%@ movhi"; case 1: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + if (VALID_MVE_PRED_MODE (<MODE>mode)) operands[1] = mve_bool_vec_to_const (operands[1]); else operands[1] = gen_lowpart (HImode, operands[1]); @@ -192,7 +192,7 @@ (define_insn "*thumb2_movhi_fp16" case 2: return "mov%?\t%0, %1\t%@ movhi"; case 1: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + if (VALID_MVE_PRED_MODE (<MODE>mode)) operands[1] = mve_bool_vec_to_const (operands[1]); else operands[1] = gen_lowpart (HImode, operands[1]); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c new file mode 100644 index 0000000000000000000000000000000000000000..cb4b45bd30563c536a5cdc08147970e077abbf37 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_mve_hw } */ +/* { dg-add-options arm_v8_1m_mve } */ +#include "pr108443.c" + +extern void abort (void); + +void __attribute__ ((noipa)) partial_write_cst (uint32_t *, uint32x4_t); + +void +__attribute__ ((noipa)) partial_write (uint32_t *a, uint32x4_t v, unsigned short p) +{ + vstrwq_p_u32 (a, v, p); +} + +int main (void) +{ + unsigned short p = 0x00CC; + uint32_t a[] = {0, 0, 0, 0}; + uint32_t b[] = {0, 0, 0, 0}; + uint32x4_t v = vdupq_n_u32 (0xFFFFFFFFU); + partial_write_cst (&a[0], v); + partial_write (&b[0], v, p); + if (__builtin_memcmp (&a[0], &b[0], 16) != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443.c b/gcc/testsuite/gcc.target/arm/mve/pr108443.c new file mode 100644 index 0000000000000000000000000000000000000000..227d4b11f477a43b95ee981c190c289b84f1a486 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +#include <arm_mve.h> + +void +__attribute__ ((noipa)) partial_write_cst (uint32_t *a, uint32x4_t v) +{ + vstrwq_p_u32 (a, v, 0x00CC); +} + +/* { dg-final { scan-assembler {mov\tr[0-9][0-9]*, #204} } } */ +
Hi Andre, > -----Original Message----- > From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com> > Sent: Wednesday, January 25, 2023 5:41 PM > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com> > Subject: Re: [PATCH 3/3] arm: Fix MVE predicates synthesis [PR 108443] > > Looks like the first patch was missing a change I had made to prevent > mve_bool_vec_to_const ICEing if called with a non-vector immediate. Now > included. > > On 24/01/2023 13:56, Andre Vieira (lists) via Gcc-patches wrote: > > Hi, > > > > This patch fixes the way we synthesize MVE predicate immediates and > > fixes some other inconsistencies around predicates. For instance this > > patch fixes the modes used in the vctp intrinsics, to couple them with > > predicate modes with the appropriate lane numbers. For this V2QI is > > added to represent a predicate created by vctp64q. The reason we use > > V2QI and not for instance a V2BI with 8-bit boolean modes is because we > > are trying to avoid having two 'INT' modes of the same size. We make > > sure we use the V2QI mode instead of HI for any instruction working on > > two lanes of 64-bits consuming a predicate. > > > > Bootstrapped on aarch64-none-linux-gnu and regression tested on > > arm-none-eabi and armeb-none-eabi for armv8.1-m.main+mve.fp. > > > > OK for trunk? Looks sensible to me, I'm still reviewing it but.... > > > > gcc/ChangeLog: > > > > PR target/108443 > > * config/arm/arm.h (VALID_MVE_PRED_MODE): Add V2QI. > > * config/arm/arm.cc (thumb2_legitimate_address_p): Use HImode for > > addressing MVE predicate modes. > > (mve_bool_vec_to_const): Change to represent correct MVE predicate > > format. > > (arm_hard_regno_mode_ok): Use VALID_MVE_PRED_MODE instead of > > checking modes. > > (arm_vector_mode_supported_p): Likewise. > > (arm_mode_to_pred_mode): Add V2QI. > > * config/arm/arm-builtins.cc (UNOP_PRED_UNONE_QUALIFIERS): New > > qualifier. > > (UNOP_PRED_PRED_QUALIFIERS): New qualifier > > (BINOP_PRED_UNONE_PRED_QUALIFIERS): New qualifier. > > (v2qi_UP): New macro. > > (v4bi_UP): New macro. > > (v8bi_UP): New macro. > > (v16bi_UP): New macro. > > (arm_expand_builtin_args): Make it able to expand the new predicate > > modes. > > * config/arm/arm-modes.def (V2QI): New mode. > > * config/arm/arm-simd-builtin-types.def (Pred1x16_t, Pred2x8_t > > Pred4x4_t): Remove unused predicate builtin types. > > * config/arm/arm_mve.h (__arm_vctp16q, __arm_vctp32q, > __arm_vctp64q, > > __arm_vctp8q, __arm_vpnot, __arm_vctp8q_m, __arm_vctp64q_m, > > __arm_vctp32q_m, __arm_vctp16q_m): Use predicate modes. > > * config/arm/arm_mve_builtins.def (vctp16q, vctp32q, vctp64q, vctp8q, > > vpnot, vctp8q_m, vctp16q_m, vctp32q_m, vctp64q_m): Likewise. > > * config/arm/constraints.md (DB): Check for VALID_MVE_PRED_MODE > > instead > > of MODE_VECTOR_BOOL. > > * config/arm/iterators.md (MVE_7, MVE_7_HI): Add V2QI > > (MVE_VPRED): Likewise. > > (MVE_vpred): Add V2QI and map upper case predicate modes to > > lower case. > > (MVE_vctp): New mode attribute. > > (mode1): Remove. > > (VCTPQ): Remove. > > (VCTPQ_M): Remove. > > * config/arm/mve.md (mve_vctp<mode1>qhi): Rename this... > > (mve_vctp<MVE_vctp>q<MVE_vpred>): ... to this. And use new mode > > attributes. > > (mve_vpnothi): Rename this... > > (mve_vpnotv16bi): ... to this. > > (mve_vctp<mode1>q_mhi): Rename this... > > (mve_vctp<MVE_vctp>q_m<MVE_vpred>):... to this. > > (mve_vldrdq_gather_base_z_<supf>v2di, > > mve_vldrdq_gather_offset_z_<supf>v2di, > > mve_vldrdq_gather_shifted_offset_z_<supf>v2di, > > mve_vstrdq_scatter_base_p_<supf>v2di, > > mve_vstrdq_scatter_offset_p_<supf>v2di, > > mve_vstrdq_scatter_offset_p_<supf>v2di_insn, > > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di, > > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn, > > mve_vstrdq_scatter_base_wb_p_<supf>v2di, > > mve_vldrdq_gather_base_wb_z_<supf>v2di, > > mve_vldrdq_gather_base_nowb_z_<supf>v2di, > > mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Use V2QI insead of > > HI for predicates. > > * config/arm/unspecs.md (VCTP8Q, VCTP16Q, VCTP32Q, VCTP64Q): > Replace > > these... > > (VCTP): ... with this. > > (VCTP8Q_M, VCTP16Q_M, VCTP32Q_M, VCTP64Q_M): Replace these... > > (VCTP_M): ... with this. > > * config/arm/vfp.md (*thumb2_movhi_vfp, *thumb2_movhi_fp16): Use > > VALID_MVE_PRED_MODE > > instead of checking for MODE_VECTOR_BOOL class. > > > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/rtl/arm/mve-vxbi.c: Use new predicate modes. ... I don't see this in the patch. Is this the correct patch? Or does the CHangeLog need updating? > > * gcc.target/arm/mve/pr108443-run.c: New test. > > * gcc.target/arm/mve/pr108443.c: New test.
Yeah that shouldn't be there, it's from an earlier version of the patch I wrote where I was experimenting changing the existing modes, I'll remove it from the ChangeLog. On 31/01/2023 09:53, Kyrylo Tkachov wrote: >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.dg/rtl/arm/mve-vxbi.c: Use new predicate modes. > > ... I don't see this in the patch. Is this the correct patch? Or does the CHangeLog need updating? > >>> * gcc.target/arm/mve/pr108443-run.c: New test. >>> * gcc.target/arm/mve/pr108443.c: New test.
> -----Original Message----- > From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com> > Sent: Wednesday, January 25, 2023 5:41 PM > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com> > Subject: Re: [PATCH 3/3] arm: Fix MVE predicates synthesis [PR 108443] > > Looks like the first patch was missing a change I had made to prevent > mve_bool_vec_to_const ICEing if called with a non-vector immediate. Now > included. > > On 24/01/2023 13:56, Andre Vieira (lists) via Gcc-patches wrote: > > Hi, > > > > This patch fixes the way we synthesize MVE predicate immediates and > > fixes some other inconsistencies around predicates. For instance this > > patch fixes the modes used in the vctp intrinsics, to couple them with > > predicate modes with the appropriate lane numbers. For this V2QI is > > added to represent a predicate created by vctp64q. The reason we use > > V2QI and not for instance a V2BI with 8-bit boolean modes is because we > > are trying to avoid having two 'INT' modes of the same size. We make > > sure we use the V2QI mode instead of HI for any instruction working on > > two lanes of 64-bits consuming a predicate. > > > > Bootstrapped on aarch64-none-linux-gnu and regression tested on > > arm-none-eabi and armeb-none-eabi for armv8.1-m.main+mve.fp. > > > > OK for trunk? > > > > gcc/ChangeLog: > > > > PR target/108443 > > * config/arm/arm.h (VALID_MVE_PRED_MODE): Add V2QI. > > * config/arm/arm.cc (thumb2_legitimate_address_p): Use HImode for > > addressing MVE predicate modes. > > (mve_bool_vec_to_const): Change to represent correct MVE predicate > > format. > > (arm_hard_regno_mode_ok): Use VALID_MVE_PRED_MODE instead of > > checking modes. > > (arm_vector_mode_supported_p): Likewise. > > (arm_mode_to_pred_mode): Add V2QI. > > * config/arm/arm-builtins.cc (UNOP_PRED_UNONE_QUALIFIERS): New > > qualifier. > > (UNOP_PRED_PRED_QUALIFIERS): New qualifier > > (BINOP_PRED_UNONE_PRED_QUALIFIERS): New qualifier. > > (v2qi_UP): New macro. > > (v4bi_UP): New macro. > > (v8bi_UP): New macro. > > (v16bi_UP): New macro. > > (arm_expand_builtin_args): Make it able to expand the new predicate > > modes. > > * config/arm/arm-modes.def (V2QI): New mode. > > * config/arm/arm-simd-builtin-types.def (Pred1x16_t, Pred2x8_t > > Pred4x4_t): Remove unused predicate builtin types. > > * config/arm/arm_mve.h (__arm_vctp16q, __arm_vctp32q, > __arm_vctp64q, > > __arm_vctp8q, __arm_vpnot, __arm_vctp8q_m, __arm_vctp64q_m, > > __arm_vctp32q_m, __arm_vctp16q_m): Use predicate modes. > > * config/arm/arm_mve_builtins.def (vctp16q, vctp32q, vctp64q, vctp8q, > > vpnot, vctp8q_m, vctp16q_m, vctp32q_m, vctp64q_m): Likewise. > > * config/arm/constraints.md (DB): Check for VALID_MVE_PRED_MODE > > instead > > of MODE_VECTOR_BOOL. > > * config/arm/iterators.md (MVE_7, MVE_7_HI): Add V2QI > > (MVE_VPRED): Likewise. > > (MVE_vpred): Add V2QI and map upper case predicate modes to > > lower case. > > (MVE_vctp): New mode attribute. > > (mode1): Remove. > > (VCTPQ): Remove. > > (VCTPQ_M): Remove. > > * config/arm/mve.md (mve_vctp<mode1>qhi): Rename this... > > (mve_vctp<MVE_vctp>q<MVE_vpred>): ... to this. And use new mode > > attributes. > > (mve_vpnothi): Rename this... > > (mve_vpnotv16bi): ... to this. > > (mve_vctp<mode1>q_mhi): Rename this... > > (mve_vctp<MVE_vctp>q_m<MVE_vpred>):... to this. > > (mve_vldrdq_gather_base_z_<supf>v2di, > > mve_vldrdq_gather_offset_z_<supf>v2di, > > mve_vldrdq_gather_shifted_offset_z_<supf>v2di, > > mve_vstrdq_scatter_base_p_<supf>v2di, > > mve_vstrdq_scatter_offset_p_<supf>v2di, > > mve_vstrdq_scatter_offset_p_<supf>v2di_insn, > > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di, > > mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn, > > mve_vstrdq_scatter_base_wb_p_<supf>v2di, > > mve_vldrdq_gather_base_wb_z_<supf>v2di, > > mve_vldrdq_gather_base_nowb_z_<supf>v2di, > > mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Use V2QI insead of > > HI for predicates. > > * config/arm/unspecs.md (VCTP8Q, VCTP16Q, VCTP32Q, VCTP64Q): > Replace > > these... > > (VCTP): ... with this. > > (VCTP8Q_M, VCTP16Q_M, VCTP32Q_M, VCTP64Q_M): Replace these... > > (VCTP_M): ... with this. > > * config/arm/vfp.md (*thumb2_movhi_vfp, *thumb2_movhi_fp16): Use > > VALID_MVE_PRED_MODE > > instead of checking for MODE_VECTOR_BOOL class. > > > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/rtl/arm/mve-vxbi.c: Use new predicate modes. > > * gcc.target/arm/mve/pr108443-run.c: New test. > > * gcc.target/arm/mve/pr108443.c: New test. diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443.c b/gcc/testsuite/gcc.target/arm/mve/pr108443.c new file mode 100644 index 0000000000000000000000000000000000000000..227d4b11f477a43b95ee981c190c289b84f1a486 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +#include <arm_mve.h> + +void +__attribute__ ((noipa)) partial_write_cst (uint32_t *a, uint32x4_t v) +{ + vstrwq_p_u32 (a, v, 0x00CC); +} + +/* { dg-final { scan-assembler {mov\tr[0-9][0-9]*, #204} } } */ + The register scan can be a shorter "r[0-9]+". Ok with that change. Please also double check the changelog entries to ensure that they match what the committed version of the patch does. Thanks for working on this! Kyrill
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 6c67cec93ff76a4b42f3a0b305f697142e88fcd9..22fc7f10e92b33a861a321e43becee4647738b61 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -384,6 +384,19 @@ arm_unop_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define UNOP_UNONE_IMM_QUALIFIERS \ (arm_unop_unone_imm_qualifiers) +static enum arm_type_qualifiers +arm_unop_pred_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned }; +#define UNOP_PRED_UNONE_QUALIFIERS \ + (arm_unop_pred_unone_qualifiers) + +static enum arm_type_qualifiers +arm_unop_pred_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_predicate }; +#define UNOP_PRED_PRED_QUALIFIERS \ + (arm_unop_pred_pred_qualifiers) + + static enum arm_type_qualifiers arm_binop_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none }; @@ -426,6 +439,12 @@ arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define BINOP_PRED_UNONE_UNONE_QUALIFIERS \ (arm_binop_pred_unone_unone_qualifiers) +static enum arm_type_qualifiers +arm_binop_pred_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned, qualifier_predicate }; +#define BINOP_PRED_UNONE_PRED_QUALIFIERS \ + (arm_binop_pred_unone_pred_qualifiers) + static enum arm_type_qualifiers arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_immediate }; @@ -851,6 +870,10 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_none }; #define SET_SAT_QUALIFIERS (arm_set_sat_qualifiers) +#define v2qi_UP E_V2QImode +#define v4bi_UP E_V4BImode +#define v8bi_UP E_V8BImode +#define v16bi_UP E_V16BImode #define v8qi_UP E_V8QImode #define v4hi_UP E_V4HImode #define v4hf_UP E_V4HFmode @@ -2989,11 +3012,14 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode, op[argc] = convert_memory_address (Pmode, op[argc]); /* MVE uses mve_pred16_t (aka HImode) for vectors of - predicates. */ - if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL) + predicates, but internally we use V16BI/V8BI/V4BI/V2QI for + MVE predicate modes. */ + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode[argc])) op[argc] = gen_lowpart (mode[argc], op[argc]); - /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ + gcc_assert (GET_MODE (op[argc]) == mode[argc] + || (GET_MODE(op[argc]) == E_VOIDmode + && CONSTANT_P (op[argc]))); if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) op[argc] = copy_to_mode_reg (mode[argc], op[argc]); @@ -3198,7 +3224,7 @@ constant_arg: else emit_insn (insn); - if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL) + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (tmode)) { rtx HItarget = gen_reg_rtx (HImode); emit_move_insn (HItarget, gen_lowpart (HImode, target)); diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def index 364736d17fa5ae871c49b73bccd4695cba76c71e..c7645c480f63ab4d85cad207099c318f8e69a07d 100644 --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -91,6 +91,7 @@ BOOL_MODE (B4I, 4, 1); VECTOR_BOOL_MODE (V16BI, 16, BI, 2); VECTOR_BOOL_MODE (V8BI, 8, B2I, 2); VECTOR_BOOL_MODE (V4BI, 4, B4I, 2); +VECTOR_MODE (INT, QI, 2); /* Fraction and accumulator vector modes. */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def index 91584c99abb8d0452201f0a32c453a0d4304c9f4..daa4bd95e307973e88a8590e5231fc410c840d4f 100644 --- a/gcc/config/arm/arm-simd-builtin-types.def +++ b/gcc/config/arm/arm-simd-builtin-types.def @@ -51,7 +51,3 @@ ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20) ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20) ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20) - - ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16) - ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15) - ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4352c830cb6d2e632a225edea861b5ceb35dd035..6b6214ebb38f7466234b2b263279d956c7b80ad2 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1093,7 +1093,8 @@ extern const int arm_arch_cde_coproc_bits[]; #define VALID_MVE_PRED_MODE(MODE) \ ((MODE) == HImode \ - || (MODE) == V16BImode || (MODE) == V8BImode || (MODE) == V4BImode) + || (MODE) == V16BImode || (MODE) == V8BImode || (MODE) == V4BImode \ + || (MODE) == V2QImode) #define VALID_MVE_SI_MODE(MODE) \ ((MODE) == V2DImode ||(MODE) == V4SImode || (MODE) == V8HImode \ diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 18460ef5280be8c1df85eff424a1bf66d6019c0a..b9cf1e70d135e75563770c53620badcced4e4f5e 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -8643,6 +8643,11 @@ thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) bool use_ldrd; enum rtx_code code = GET_CODE (x); + /* If we are dealing with a MVE predicate mode, then treat it as a HImode as + can store and load it like any other 16-bit value. */ + if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode)) + mode = HImode; + if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode)) return mve_vector_mem_operand (mode, x, strict_p); @@ -12921,7 +12926,7 @@ simd_valid_immediate (rtx op, machine_mode mode, int inverse, /* Only support 128-bit vectors for MVE. */ if (TARGET_HAVE_MVE && (!vector - || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + || VALID_MVE_PRED_MODE (mode) || n_elts * innersize != 16)) return -1; @@ -13294,9 +13299,11 @@ neon_vdup_constant (rtx vals, bool generate) rtx mve_bool_vec_to_const (rtx const_vec) { - int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec)); - int repeat = 16 / n_elts; - int i; + machine_mode mode = GET_MODE (const_vec); + unsigned n_elts = GET_MODE_NUNITS (mode); + unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode)); + unsigned shift_c = 16 / n_elts; + unsigned i; int hi_val = 0; for (i = 0; i < n_elts; i++) @@ -13305,12 +13312,16 @@ mve_bool_vec_to_const (rtx const_vec) unsigned HOST_WIDE_INT elpart; gcc_assert (CONST_INT_P (el)); - elpart = INTVAL (el); + elpart = INTVAL (el) & ((1U << el_prec) - 1); + + unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i; - for (int j = 0; j < repeat; j++) - hi_val |= elpart << (i * repeat + j); + hi_val |= elpart << (index * shift_c); } - return gen_int_mode (hi_val, HImode); + /* We are using mov immediate to encode this constant which writes 32-bits + so we need to make sure the top 16-bits are all 0, otherwise we can't + guarantee we can actually write this immediate. */ + return gen_int_mode (hi_val, SImode); } /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only @@ -13353,7 +13364,7 @@ neon_make_constant (rtx vals, bool generate) && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL)) /* Load using VMOV. On Cortex-A8 this takes one cycle. */ return const_vec; - else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)) + else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode)) return mve_bool_vec_to_const (const_vec); else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX) /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON @@ -29458,14 +29469,12 @@ arm_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_HAVE_MVE - && (mode == V2DImode || mode == V4SImode || mode == V8HImode - || mode == V16QImode - || mode == V16BImode || mode == V8BImode || mode == V4BImode)) - return true; + && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode))) + return true; if (TARGET_HAVE_MVE_FLOAT && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode)) - return true; + return true; return false; } @@ -31269,6 +31278,7 @@ arm_mode_to_pred_mode (machine_mode mode) case 16: return V16BImode; case 8: return V8BImode; case 4: return V4BImode; + case 2: return V2QImode; } return opt_machine_mode (); } diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 00fb42afecc74c29dccaa1a601700033598100ce..d28843902285763d0a6416825348ee49a71a1efe 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -3528,35 +3528,35 @@ __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp16q (uint32_t __a) { - return __builtin_mve_vctp16qhi (__a); + return __builtin_mve_vctp16qv8bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp32q (uint32_t __a) { - return __builtin_mve_vctp32qhi (__a); + return __builtin_mve_vctp32qv4bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp64q (uint32_t __a) { - return __builtin_mve_vctp64qhi (__a); + return __builtin_mve_vctp64qv2qi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp8q (uint32_t __a) { - return __builtin_mve_vctp8qhi (__a); + return __builtin_mve_vctp8qv16bi (__a); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vpnot (mve_pred16_t __a) { - return __builtin_mve_vpnothi (__a); + return __builtin_mve_vpnotv16bi (__a); } __extension__ extern __inline uint8x16_t @@ -6696,28 +6696,28 @@ __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp8q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp8q_mhi (__a, __p); + return __builtin_mve_vctp8q_mv16bi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp64q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp64q_mhi (__a, __p); + return __builtin_mve_vctp64q_mv2qi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp32q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp32q_mhi (__a, __p); + return __builtin_mve_vctp32q_mv4bi (__a, __p); } __extension__ extern __inline mve_pred16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vctp16q_m (uint32_t __a, mve_pred16_t __p) { - return __builtin_mve_vctp16q_mhi (__a, __p); + return __builtin_mve_vctp16q_mv8bi (__a, __p); } __extension__ extern __inline uint64_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index e3a61f81be7814806be4f69e9b27b89663554297..5e5510f6e37f197ffc4c08a1166bd0d80262f65a 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -71,11 +71,11 @@ VAR2 (UNOP_UNONE_NONE, vcvtaq_u, v8hi, v4si) VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si) VAR1 (UNOP_UNONE_UNONE, vrev16q_u, v16qi) VAR1 (UNOP_UNONE_UNONE, vaddlvq_u, v4si) -VAR1 (UNOP_UNONE_UNONE, vctp16q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp32q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp64q, hi) -VAR1 (UNOP_UNONE_UNONE, vctp8q, hi) -VAR1 (UNOP_UNONE_UNONE, vpnot, hi) +VAR1 (UNOP_PRED_UNONE, vctp16q, v8bi) +VAR1 (UNOP_PRED_UNONE, vctp32q, v4bi) +VAR1 (UNOP_PRED_UNONE, vctp64q, v2qi) +VAR1 (UNOP_PRED_UNONE, vctp8q, v16bi) +VAR1 (UNOP_PRED_PRED, vpnot, v16bi) VAR2 (BINOP_NONE_NONE_NONE, vsubq_n_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbrsrq_n_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_IMM, vcvtq_n_to_f_s, v8hf, v4sf) @@ -265,10 +265,10 @@ VAR2 (BINOP_NONE_NONE_IMM, vshllbq_n_s, v16qi, v8hi) VAR2 (BINOP_NONE_NONE_IMM, vorrq_n_s, v8hi, v4si) VAR2 (BINOP_NONE_NONE_IMM, vbicq_n_s, v8hi, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vrmlaldavhq_u, v4si) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp8q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp64q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp32q_m, hi) -VAR1 (BINOP_UNONE_UNONE_UNONE, vctp16q_m, hi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp8q_m, v16bi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp64q_m, v2qi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp32q_m, v4bi) +VAR1 (BINOP_PRED_UNONE_PRED, vctp16q_m, v8bi) VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvaq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vrmlsldavhxq_s, v4si) VAR1 (BINOP_NONE_NONE_NONE, vrmlsldavhq_s, v4si) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 0155ab9f7e7750f4359b937263e297da9c196fc5..504cd938b26e2629183a883383a81bb4e775bb92 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -316,7 +316,7 @@ (define_constraint "DB" "@internal In ARM/Thumb-2 state with MVE a constant vector of booleans." (and (match_code "const_vector") - (match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL"))) + (match_test "TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode)"))) (define_constraint "Da" "@internal diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 04469064f12ca2900b1657ce6e1ccca8c41e4479..39895ad62aa3afd55d3cbc92c55b45bc56710bcb 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -272,8 +272,8 @@ (define_mode_iterator MVE_3 [V16QI V8HI]) (define_mode_iterator MVE_2 [V16QI V8HI V4SI]) (define_mode_iterator MVE_5 [V8HI V4SI]) (define_mode_iterator MVE_6 [V8HI V4SI]) -(define_mode_iterator MVE_7 [V16BI V8BI V4BI]) -(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI]) +(define_mode_iterator MVE_7 [V16BI V8BI V4BI V2QI]) +(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI]) ;;---------------------------------------------------------------------------- ;; Code iterators @@ -949,9 +949,12 @@ (define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32") (define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") (V8HF "=w") (V4SF "=&w")]) (define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI "V4BI") - (V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")]) + (V8HF "V8BI") (V4SF "V4BI") (V2DI "V2QI")]) (define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi") - (V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")]) + (V8HF "v8bi") (V4SF "v4bi") + (V16BI "v16bi") (V8BI "v8bi") (V4BI "v4bi") + (V2QI "v2qi")]) +(define_mode_attr MVE_vctp [(V16BI "8") (V8BI "16") (V4BI "32") (V2QI "64")]) ;;---------------------------------------------------------------------------- ;; Code attributes @@ -1461,11 +1464,6 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48") (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s") (VSHLCQ_M_U "u")]) - -(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") - (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") - (VCTP32Q_M "32") (VCTP64Q_M "64")]) - ;; Both kinds of return insn. (define_code_iterator RETURNS [return simple_return]) (define_code_attr return_str [(return "") (simple_return "simple_")]) @@ -1557,8 +1555,6 @@ (define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U]) (define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U]) (define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U]) (define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S]) -(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q]) -(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M]) (define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U]) (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) (define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index f123edc449b8b20bfb4a15c1fb0eccdbfff1339c..cd87e2b58d462895b950254d9d6bc2e2ff065d57 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -643,24 +643,24 @@ (define_insn "mve_vaddlvq_<supf>v4si" ;; ;; [vctp8q vctp16q vctp32q vctp64q]) ;; -(define_insn "mve_vctp<mode1>qhi" +(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:SI 1 "s_register_operand" "r")] - VCTPQ)) + (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") + (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")] + VCTP)) ] "TARGET_HAVE_MVE" - "vctp.<mode1> %1" + "vctp.<MVE_vctp> %1" [(set_attr "type" "mve_move") ]) ;; ;; [vpnot]) ;; -(define_insn "mve_vpnothi" +(define_insn "mve_vpnotv16bi" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:HI 1 "vpr_register_operand" "0")] + (set (match_operand:V16BI 0 "vpr_register_operand" "=Up") + (unspec:V16BI [(match_operand:V16BI 1 "vpr_register_operand" "0")] VPNOT)) ] "TARGET_HAVE_MVE" @@ -1959,15 +1959,15 @@ (define_insn "mve_vcmulq<mve_rot><mode>" ;; ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m]) ;; -(define_insn "mve_vctp<mode1>q_mhi" +(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:SI 1 "s_register_operand" "r") - (match_operand:HI 2 "vpr_register_operand" "Up")] - VCTPQ_M)) + (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up") + (unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:MVE_7 2 "vpr_register_operand" "Up")] + VCTP_M)) ] "TARGET_HAVE_MVE" - "vpst\;vctpt.<mode1> %1" + "vpst\;vctpt.<MVE_vctp> %1" [(set_attr "type" "mve_move") (set_attr "length""8")]) @@ -7666,7 +7666,7 @@ (define_insn "mve_vldrdq_gather_base_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGBQ)) ] "TARGET_HAVE_MVE" @@ -7707,7 +7707,7 @@ (define_insn "mve_vldrdq_gather_offset_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGOQ)) ] "TARGET_HAVE_MVE" @@ -7748,7 +7748,7 @@ (define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VLDRDGSOQ)) ] "TARGET_HAVE_MVE" @@ -8361,7 +8361,7 @@ (define_insn "mve_vstrdq_scatter_base_p_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand" "w") (match_operand:SI 1 "mve_vldrd_immediate" "Ri") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSBQ)) ] "TARGET_HAVE_MVE" @@ -8404,7 +8404,7 @@ (define_expand "mve_vstrdq_scatter_offset_p_<supf>v2di" [(match_operand:V2DI 0 "mve_scatter_memory") (match_operand:V2DI 1 "s_register_operand") (match_operand:V2DI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { @@ -8422,7 +8422,7 @@ (define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn" [(match_operand:SI 0 "register_operand" "r") (match_operand:V2DI 1 "s_register_operand" "w") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrdt.64\t%q2, [%0, %q1]" @@ -8463,7 +8463,7 @@ (define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" [(match_operand:V2DI 0 "mve_scatter_memory") (match_operand:V2DI 1 "s_register_operand") (match_operand:V2DI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { @@ -8482,7 +8482,7 @@ (define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn" [(match_operand:SI 0 "register_operand" "r") (match_operand:V2DI 1 "s_register_operand" "w") (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V2QI 3 "vpr_register_operand" "Up")] VSTRDSSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrdt.64\t%q2, [%0, %q1, UXTW #3]" @@ -9454,7 +9454,7 @@ (define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di" [(match_operand:V2DI 1 "s_register_operand" "0") (match_operand:SI 2 "mve_vldrd_immediate" "Ri") (match_operand:V2DI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:V2QI 4 "vpr_register_operand")] VSTRDSBWBQ)) (set (match_operand:V2DI 0 "s_register_operand" "=w") (unspec:V2DI [(match_dup 1) (match_dup 2)] @@ -9745,7 +9745,7 @@ (define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand") (match_operand:V2DI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" { @@ -9761,7 +9761,7 @@ (define_expand "mve_vldrdq_gather_base_nowb_z_<supf>v2di" [(match_operand:V2DI 0 "s_register_operand") (match_operand:V2DI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V2QI 3 "vpr_register_operand") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" { @@ -9795,7 +9795,7 @@ (define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" [(set (match_operand:V2DI 0 "s_register_operand" "=&w") (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:HI 4 "vpr_register_operand" "Up") + (match_operand:V2QI 4 "vpr_register_operand" "Up") (mem:BLK (scratch))] VLDRDGBWBQ)) (set (match_operand:V2DI 1 "s_register_operand" "=&w") diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 2ef6a0f4c335a767bd41da85ad2a7f806dc31fa1..f9ff78fbe01eb05a73621d3701f9dfe0ae1c6854 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -575,10 +575,8 @@ (define_c_enum "unspec" [ VCVTMQ_S VCVTMQ_U VADDLVQ_U - VCTP8Q - VCTP16Q - VCTP32Q - VCTP64Q + VCTP + VCTP_M VPNOT VCREATEQ_F VCVTQ_N_TO_F_S @@ -702,10 +700,6 @@ (define_c_enum "unspec" [ VADDLVAQ_S VBICQ_N_U VBICQ_N_S - VCTP8Q_M - VCTP16Q_M - VCTP32Q_M - VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32 VMLALDAVQ_U diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 8add1e6bbf56b211cef5fc7f18f2426332cc45f5..f34f35e1185e2b0974df965affb113772ea9282d 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -88,7 +88,7 @@ (define_insn "*thumb2_movhi_vfp" case 2: return "mov%?\t%0, %1\t%@ movhi"; case 1: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + if (VALID_MVE_PRED_MODE (<MODE>mode)) operands[1] = mve_bool_vec_to_const (operands[1]); else operands[1] = gen_lowpart (HImode, operands[1]); @@ -192,7 +192,7 @@ (define_insn "*thumb2_movhi_fp16" case 2: return "mov%?\t%0, %1\t%@ movhi"; case 1: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + if (VALID_MVE_PRED_MODE (<MODE>mode)) operands[1] = mve_bool_vec_to_const (operands[1]); else operands[1] = gen_lowpart (HImode, operands[1]); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c new file mode 100644 index 0000000000000000000000000000000000000000..cb4b45bd30563c536a5cdc08147970e077abbf37 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_mve_hw } */ +/* { dg-add-options arm_v8_1m_mve } */ +#include "pr108443.c" + +extern void abort (void); + +void __attribute__ ((noipa)) partial_write_cst (uint32_t *, uint32x4_t); + +void +__attribute__ ((noipa)) partial_write (uint32_t *a, uint32x4_t v, unsigned short p) +{ + vstrwq_p_u32 (a, v, p); +} + +int main (void) +{ + unsigned short p = 0x00CC; + uint32_t a[] = {0, 0, 0, 0}; + uint32_t b[] = {0, 0, 0, 0}; + uint32x4_t v = vdupq_n_u32 (0xFFFFFFFFU); + partial_write_cst (&a[0], v); + partial_write (&b[0], v, p); + if (__builtin_memcmp (&a[0], &b[0], 16) != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443.c b/gcc/testsuite/gcc.target/arm/mve/pr108443.c new file mode 100644 index 0000000000000000000000000000000000000000..227d4b11f477a43b95ee981c190c289b84f1a486 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +#include <arm_mve.h> + +void +__attribute__ ((noipa)) partial_write_cst (uint32_t *a, uint32x4_t v) +{ + vstrwq_p_u32 (a, v, 0x00CC); +} + +/* { dg-final { scan-assembler {mov\tr[0-9][0-9]*, #204} } } */ +