Message ID | 20240710133414.741793-2-vladimir.miloserdov@arm.com |
---|---|
State | New |
Headers | show |
Series | AArch64: LUTI2/LUTI4 ACLE for SVE2 | expand |
Hi Vladimir, > On 10 Jul 2024, at 15:34, vladimir.miloserdov@arm.com wrote: > > External email: Use caution opening links or attachments > > > This patch introduces support for LUTI2/LUTI4 ACLE for SVE2. > > LUTI instructions are used for efficient table lookups with 2-bit > or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from > the low 128 bits of the table vector using packed 2-bit indices, > while LUTI4 can read from the low 128 or 256 bits of the table > vector or from two table vectors using packed 4-bit indices. > These instructions fill the destination vector by copying elements > indexed by segments of the source vector, selected by the vector > segment index. > > The changes include the addition of a new AArch64 option > extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions > for the new LUTI instruction shapes, and implementations of the > svluti2 and svluti4 builtins. > > New tests are added as well > --- > gcc/config/aarch64/aarch64-c.cc | 1 + > .../aarch64/aarch64-option-extensions.def | 2 + > .../aarch64/aarch64-sve-builtins-shapes.cc | 41 +++++++++++++++++ > .../aarch64/aarch64-sve-builtins-shapes.h | 2 + > .../aarch64/aarch64-sve-builtins-sve2.cc | 17 +++++++ > .../aarch64/aarch64-sve-builtins-sve2.def | 4 ++ > .../aarch64/aarch64-sve-builtins-sve2.h | 2 + > gcc/config/aarch64/aarch64-sve2.md | 45 +++++++++++++++++++ > gcc/config/aarch64/aarch64.h | 5 +++ > gcc/config/aarch64/iterators.md | 10 +++++ > .../aarch64/sve/acle/asm/test_sve_acle.h | 16 ++++++- > .../aarch64/sve2/acle/asm/luti2_bf16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti2_f16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti2_s16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti2_s8.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti2_u16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti2_u8.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti4_bf16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti4_bf16_x2.c | 15 +++++++ > .../aarch64/sve2/acle/asm/luti4_f16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti4_f16_x2.c | 15 +++++++ > .../aarch64/sve2/acle/asm/luti4_s16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti4_s16_x2.c | 15 +++++++ > .../aarch64/sve2/acle/asm/luti4_s8.c | 25 +++++++++++ > .../aarch64/sve2/acle/asm/luti4_u16.c | 35 +++++++++++++++ > .../aarch64/sve2/acle/asm/luti4_u16_x2.c | 15 +++++++ > .../aarch64/sve2/acle/asm/luti4_u8.c | 25 +++++++++++ > gcc/testsuite/lib/target-supports.exp | 12 +++++ > 28 files changed, 616 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c > diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 42ec0eec31e..840f52e08ed 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") +AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut") + I think the LUT extension doesn’t require SME2, does it? It doesn’t seem to use any SME state. I don’t think +lut should be enabling +sme2 for the user +;; ------------------------------------------------------------------------- +;; ---- Table lookup +;; ------------------------------------------------------------------------- +;; Includes: +;; - LUTI2 +;; - LUTI4 +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w") + (unspec:SVE_FULL_BS + [(match_operand:SVE_FULL_BS 1 "register_operand" "w") + (match_operand:VNx16QI 2 "register_operand" "w") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" +) + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:<VSINGLE> 0 "register_operand") + (unspec:<VSINGLE> + [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" +) Missing constraints on operands 0 and 3? + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:<VSINGLE> 0 "register_operand") + (unspec:<VSINGLE> + [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]" +) Likewise. Thanks, Kyrill
> On 11 Jul 2024, at 09:18, Kyrylo Tkachov <ktkachov@nvidia.com> wrote: > > External email: Use caution opening links or attachments > > > Hi Vladimir, > >> On 10 Jul 2024, at 15:34, vladimir.miloserdov@arm.com wrote: >> >> External email: Use caution opening links or attachments >> >> >> This patch introduces support for LUTI2/LUTI4 ACLE for SVE2. >> >> LUTI instructions are used for efficient table lookups with 2-bit >> or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from >> the low 128 bits of the table vector using packed 2-bit indices, >> while LUTI4 can read from the low 128 or 256 bits of the table >> vector or from two table vectors using packed 4-bit indices. >> These instructions fill the destination vector by copying elements >> indexed by segments of the source vector, selected by the vector >> segment index. >> >> The changes include the addition of a new AArch64 option >> extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions >> for the new LUTI instruction shapes, and implementations of the >> svluti2 and svluti4 builtins. >> >> New tests are added as well >> --- >> gcc/config/aarch64/aarch64-c.cc | 1 + >> .../aarch64/aarch64-option-extensions.def | 2 + >> .../aarch64/aarch64-sve-builtins-shapes.cc | 41 +++++++++++++++++ >> .../aarch64/aarch64-sve-builtins-shapes.h | 2 + >> .../aarch64/aarch64-sve-builtins-sve2.cc | 17 +++++++ >> .../aarch64/aarch64-sve-builtins-sve2.def | 4 ++ >> .../aarch64/aarch64-sve-builtins-sve2.h | 2 + >> gcc/config/aarch64/aarch64-sve2.md | 45 +++++++++++++++++++ >> gcc/config/aarch64/aarch64.h | 5 +++ >> gcc/config/aarch64/iterators.md | 10 +++++ >> .../aarch64/sve/acle/asm/test_sve_acle.h | 16 ++++++- >> .../aarch64/sve2/acle/asm/luti2_bf16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti2_f16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti2_s16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti2_s8.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti2_u16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti2_u8.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti4_bf16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti4_bf16_x2.c | 15 +++++++ >> .../aarch64/sve2/acle/asm/luti4_f16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti4_f16_x2.c | 15 +++++++ >> .../aarch64/sve2/acle/asm/luti4_s16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti4_s16_x2.c | 15 +++++++ >> .../aarch64/sve2/acle/asm/luti4_s8.c | 25 +++++++++++ >> .../aarch64/sve2/acle/asm/luti4_u16.c | 35 +++++++++++++++ >> .../aarch64/sve2/acle/asm/luti4_u16_x2.c | 15 +++++++ >> .../aarch64/sve2/acle/asm/luti4_u8.c | 25 +++++++++++ >> gcc/testsuite/lib/target-supports.exp | 12 +++++ >> 28 files changed, 616 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c >> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c >> > > diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def > index 42ec0eec31e..840f52e08ed 100644 > --- a/gcc/config/aarch64/aarch64-option-extensions.def > +++ b/gcc/config/aarch64/aarch64-option-extensions.def > @@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") > > AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") > > +AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut") > + > > I think the LUT extension doesn’t require SME2, does it? It doesn’t seem to use any SME state. I don’t think +lut should be enabling +sme2 for the user > > +;; ------------------------------------------------------------------------- > +;; ---- Table lookup > +;; ------------------------------------------------------------------------- > +;; Includes: > +;; - LUTI2 > +;; - LUTI4 > +;; ------------------------------------------------------------------------- > + > +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" > + [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w") > + (unspec:SVE_FULL_BS > + [(match_operand:SVE_FULL_BS 1 "register_operand" "w") > + (match_operand:VNx16QI 2 "register_operand" "w") > + (match_operand:DI 3 "const_int_operand") > + (const_int LUTI_BITS)] > + UNSPEC_SVE_LUTI))] > + "TARGET_SVE2" > + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" > +) > > > + > +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" > + [(set (match_operand:<VSINGLE> 0 "register_operand") > + (unspec:<VSINGLE> > + [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w") > + (match_operand:VNx16QI 2 "register_operand") > + (match_operand:DI 3 "const_int_operand") > + (const_int LUTI_BITS)] > + UNSPEC_SVE_LUTI))] > + "TARGET_SVE2" > + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" > +) > > Missing constraints on operands 0 and 3? I meant operands 0 and 2, of course. > > + > +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" > + [(set (match_operand:<VSINGLE> 0 "register_operand") > + (unspec:<VSINGLE> > + [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2") > + (match_operand:VNx16QI 2 "register_operand") > + (match_operand:DI 3 "const_int_operand") > + (const_int LUTI_BITS)] > + UNSPEC_SVE_LUTI))] > + "TARGET_SVE2" > + "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]" > +) > > Likewise. > > Thanks, > Kyrill
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 6f2111434b3..099d9be8080 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -267,6 +267,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile); + aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile); /* Not for ACLE, but required to keep "float.h" correct if we switch target between implementations that do or do not support ARMv8.2-A diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 42ec0eec31e..840f52e08ed 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") +AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index f190770250f..6e9d65e9173 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -856,6 +856,47 @@ struct load_ext_gather_base : public overloaded_base<1> } }; + +/* sv<v0>_t svlut_<t0>(sv<t0>_t, svuint8_t, uint64_t) + where the final argument is a constant index, the instruction divides + the vector argument in BITS-bit quantities. */ +template<unsigned int BITS> +struct luti_base : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group) const override + { + /* Format: return type, table vector, indices vector, immediate value. */ + build_all (b, "v0,t0,vu8,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + int max_range; + bool byte_mode = c.type_suffix (0).element_bits == 8; + + if (BITS == 2) + max_range = byte_mode ? 3 : 7; + else if (BITS == 4) + max_range = byte_mode ? 1 : 7; + else + /* Unsupported number of indices bits for LUTI. */ + gcc_unreachable (); + + return c.require_immediate_range (2, 0, max_range); + } + +}; + +/* Specializations for 2-bit and 4-bit indices. */ +using luti2_def = luti_base<2>; +SHAPE (luti2) + +using luti4_def = luti_base<4>; +SHAPE (luti4) + + /* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t) where the first argument is the ZT register number (currently always 0) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index ea87240518d..36cfb73e4ab 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -154,6 +154,8 @@ namespace aarch64_sve extern const function_shape *const load_gather_vs; extern const function_shape *const load_replicate; extern const function_shape *const load_za; + extern const function_shape *const luti2; + extern const function_shape *const luti4; extern const function_shape *const luti2_lane_zt; extern const function_shape *const luti4_lane_zt; extern const function_shape *const mmla; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 4f25cc68028..72bb909f259 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -518,6 +518,21 @@ public: int m_unspec; }; + +class svluti_lane_impl : public function_base +{ +public: + CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits) {} + + rtx expand (function_expander &e) const override + { + auto mode = e.tuple_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode)); + } + + unsigned int m_bits; +}; + } /* end anonymous namespace */ namespace aarch64_sve { @@ -746,5 +761,7 @@ FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI)) FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW)) FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR)) FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),) +FUNCTION (svluti2, svluti_lane_impl, (2)) +FUNCTION (svluti4, svluti_lane_impl, (4)) } /* end namespace aarch64_sve */ diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 4366925a971..86aa92dad2e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -164,6 +164,10 @@ DEF_SVE_FUNCTION (svwhilegt, compare_scalar, while, none) DEF_SVE_FUNCTION (svwhilerw, compare_ptr, all_data, none) DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none) DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) +DEF_SVE_FUNCTION (svluti2, luti2, bhs_data, none) +DEF_SVE_FUNCTION (svluti4, luti4, bhs_data, none) +DEF_SVE_FUNCTION_GS (svluti4, luti4, bhs_data, x2, none) + #undef REQUIRED_EXTENSIONS #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index a612ace9415..897f686aca9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -205,6 +205,8 @@ namespace aarch64_sve extern const function_base *const svwhilerw; extern const function_base *const svwhilewr; extern const function_base *const svxar; + extern const function_base *const svluti2; + extern const function_base *const svluti4; } } diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 972b03a4fef..8c54986ca3b 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -114,6 +114,7 @@ ;; ---- Optional AES extensions ;; ---- Optional SHA-3 extensions ;; ---- Optional SM4 extensions +;; ---- Table lookup ;; ========================================================================= ;; == Loads @@ -3543,3 +3544,47 @@ "sm4ekey\t%0.s, %1.s, %2.s" [(set_attr "type" "crypto_sm4")] ) + +;; ------------------------------------------------------------------------- +;; ---- Table lookup +;; ------------------------------------------------------------------------- +;; Includes: +;; - LUTI2 +;; - LUTI4 +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w") + (unspec:SVE_FULL_BS + [(match_operand:SVE_FULL_BS 1 "register_operand" "w") + (match_operand:VNx16QI 2 "register_operand" "w") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" +) + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:<VSINGLE> 0 "register_operand") + (unspec:<VSINGLE> + [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" +) + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:<VSINGLE> 0 "register_operand") + (unspec:<VSINGLE> + [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_SVE2" + "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]" +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index c33f5da02f4..8542f01ec85 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -285,6 +285,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define AARCH64_ISA_D128 (aarch64_isa_flags & AARCH64_FL_D128) #define AARCH64_ISA_THE (aarch64_isa_flags & AARCH64_FL_THE) #define AARCH64_ISA_GCS (aarch64_isa_flags & AARCH64_FL_GCS) +#define AARCH64_ISA_LUT (aarch64_isa_flags & AARCH64_FL_LUT) /* The current function is a normal non-streaming function. */ #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) @@ -515,6 +516,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED && (aarch64_tune_params.extra_tuning_flags \ & AARCH64_EXTRA_TUNE_AVOID_PRED_RMW)) +/* Armv9.2-A/9.5-A Lookup table instructions support with 2-bit and + 4-bit indices: LUTI2 and LUTI4. */ +#define TARGET_LUT (AARCH64_ISA_LUT) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f527b2cfeb8..c55c1837a75 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -508,6 +508,15 @@ (define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI VNx8BF VNx8HF VNx4SF]) +;; Fully-packed SVE vector byte modes that have 32-bit or smaller elements. +(define_mode_iterator SVE_FULL_BS [VNx16QI VNx4SI VNx4SF]) + +;; Fully-packed half word SVE vector modes +(define_mode_iterator SVE_FULL_H [VNx8HI VNx8HF VNx8BF]) + +;; Pairs of fully-packed SVE vector modes (half word only) +(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF]) + ;; Fully-packed SVE vector modes that have 32-bit elements. (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF]) @@ -1063,6 +1072,7 @@ UNSPEC_SQCVTUN UNSPEC_UQCVT UNSPEC_UQCVTN + UNSPEC_SVE_LUTI ;; All used in aarch64-sme.md UNSPEC_SME_ADD diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 367024be863..ea9081420ed 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -755,5 +755,19 @@ __asm volatile ("" :: "w" (z0_res), "w" (z22_res), \ "w" (z25)); \ } - + #define TEST_1X2_NARROW(NAME, RTYPE, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO(NAME, void, ()) \ + { \ + register RTYPE z0 __asm ("z0"); \ + register ZTYPE z5 __asm ("z5"); \ + register TTYPE z6 __asm ("z6"); \ + register RTYPE z16 __asm ("z16"); \ + register ZTYPE z22 __asm ("z22"); \ + register TTYPE z29 __asm ("z29"); \ + register RTYPE z0_res __asm ("z0"); \ + __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \ + "=w" (z16), "=w" (z22), "=w" (z29)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \ + } #endif diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c new file mode 100644 index 00000000000..1764a7abd48 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svbfloat16_t, svuint8_t, z1, + svluti2_bf16 (z28, z0, 0), + svluti2_bf16 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svbfloat16_t, svuint8_t, z1, + svluti2_bf16 (z28, z0, 1), + svluti2_bf16 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svbfloat16_t, svuint8_t, z28, + svluti2_bf16 (z28, z0, 2), + svluti2_bf16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c new file mode 100644 index 00000000000..c1f1d92a469 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svfloat16_t, svuint8_t, z1, + svluti2_f16 (z28, z0, 0), + svluti2_f16 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svfloat16_t, svuint8_t, z1, + svluti2_f16 (z28, z0, 1), + svluti2_f16 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svfloat16_t, svuint8_t, z28, + svluti2_f16 (z28, z0, 2), + svluti2_f16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c new file mode 100644 index 00000000000..ffc92228fe0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svint16_t, svuint8_t, z1, + svluti2_s16 (z28, z0, 0), + svluti2_s16 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svint16_t, svuint8_t, z1, + svluti2_s16 (z28, z0, 1), + svluti2_s16 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svint16_t, svuint8_t, z28, + svluti2_s16 (z28, z0, 2), + svluti2_s16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c new file mode 100644 index 00000000000..189b5335692 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svint8_t, svuint8_t, z1, + svluti2_s8 (z28, z0, 0), + svluti2_s8 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svint8_t, svuint8_t, z1, + svluti2_s8 (z28, z0, 1), + svluti2_s8 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.b, \{ z28\.b \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svint8_t, svuint8_t, z28, + svluti2_s8 (z28, z0, 2), + svluti2_s8 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c new file mode 100644 index 00000000000..682d848e4ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svuint16_t, svuint8_t, z1, + svluti2_u16 (z28, z0, 0), + svluti2_u16 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svuint16_t, svuint8_t, z1, + svluti2_u16 (z28, z0, 1), + svluti2_u16 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svuint16_t, svuint8_t, z28, + svluti2_u16 (z28, z0, 2), + svluti2_u16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c new file mode 100644 index 00000000000..65de112012c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti2_test_imm0: +** luti2 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm0, svuint8_t, svuint8_t, z1, + svluti2_u8 (z28, z0, 0), + svluti2_u8 (z28, z0, 0)) + +/* +** luti2_test_imm1: +** luti2 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_imm1, svuint8_t, svuint8_t, z1, + svluti2_u8 (z28, z0, 1), + svluti2_u8 (z28, z0, 1)) + +/* +** luti2_test_tied: +** luti2 z28\.b, \{ z28\.b \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti2_test_tied, svuint8_t, svuint8_t, z28, + svluti2_u8 (z28, z0, 2), + svluti2_u8 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c new file mode 100644 index 00000000000..108fb3b7667 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svbfloat16_t, svuint8_t, z1, + svluti4_bf16 (z28, z0, 0), + svluti4_bf16 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svbfloat16_t, svuint8_t, z1, + svluti4_bf16 (z28, z0, 1), + svluti4_bf16 (z28, z0, 1)) + +/* +** luti4_test_tied: +** luti4 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_tied, svbfloat16_t, svuint8_t, z28, + svluti4_bf16 (z28, z0, 2), + svluti4_bf16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c new file mode 100644 index 00000000000..4d72e8aa21b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c @@ -0,0 +1,15 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_bf16_x2: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\] +** ret +*/ + +TEST_1X2_NARROW(luti4_test_bf16_x2, svbfloat16_t, svbfloat16x2_t, svuint8_t, + z0_res = svluti4_bf16_x2(z6, z5, 1), + z0_res = svluti4_bf16_x2(z6, z5, 1)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c new file mode 100644 index 00000000000..1af3836b28b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svfloat16_t, svuint8_t, z1, + svluti4_f16 (z28, z0, 0), + svluti4_f16 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svfloat16_t, svuint8_t, z1, + svluti4_f16 (z28, z0, 1), + svluti4_f16 (z28, z0, 1)) + +/* +** luti4_test_tied: +** luti4 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_tied, svfloat16_t, svuint8_t, z28, + svluti4_f16 (z28, z0, 2), + svluti4_f16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c new file mode 100644 index 00000000000..7e322ebaad8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c @@ -0,0 +1,15 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_f16_x2: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\] +** ret +*/ + +TEST_1X2_NARROW(luti4_test_f16_x2, svfloat16_t, svfloat16x2_t, svuint8_t, + z0_res = svluti4_f16_x2(z6, z5, 1), + z0_res = svluti4_f16_x2(z6, z5, 1)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c new file mode 100644 index 00000000000..ef5ab5ce1a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svint16_t, svuint8_t, z1, + svluti4_s16 (z28, z0, 0), + svluti4_s16 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svint16_t, svuint8_t, z1, + svluti4_s16 (z28, z0, 1), + svluti4_s16 (z28, z0, 1)) + +/* +** luti4_test_tied: +** luti4 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_tied, svint16_t, svuint8_t, z28, + svluti4_s16 (z28, z0, 2), + svluti4_s16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c new file mode 100644 index 00000000000..453db37194b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c @@ -0,0 +1,15 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_s16_x2: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\] +** ret +*/ + +TEST_1X2_NARROW(luti4_test_s16_x2, svint16_t, svint16x2_t, svuint8_t, + z0_res = svluti4_s16_x2(z6, z5, 1), + z0_res = svluti4_s16_x2(z6, z5, 1)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c new file mode 100644 index 00000000000..3603c3f0a43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c @@ -0,0 +1,25 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svint8_t, svuint8_t, z1, + svluti4_s8 (z28, z0, 0), + svluti4_s8 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svint8_t, svuint8_t, z1, + svluti4_s8 (z28, z0, 1), + svluti4_s8 (z28, z0, 1)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c new file mode 100644 index 00000000000..2b97290c2b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c @@ -0,0 +1,35 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svuint16_t, svuint8_t, z1, + svluti4_u16 (z28, z0, 0), + svluti4_u16 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.h, \{ z28\.h \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svuint16_t, svuint8_t, z1, + svluti4_u16 (z28, z0, 1), + svluti4_u16 (z28, z0, 1)) + +/* +** luti4_test_tied: +** luti4 z28\.h, \{ z28\.h \}, z0\[2\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_tied, svuint16_t, svuint8_t, z28, + svluti4_u16 (z28, z0, 2), + svluti4_u16 (z28, z0, 2)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c new file mode 100644 index 00000000000..4444aa91856 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c @@ -0,0 +1,15 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_u16_x2: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\] +** ret +*/ + +TEST_1X2_NARROW(luti4_test_u16_x2, svuint16_t, svuint16x2_t, svuint8_t, + z0_res = svluti4_u16_x2(z6, z5, 1), + z0_res = svluti4_u16_x2(z6, z5, 1)) + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c new file mode 100644 index 00000000000..012d0bb84b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c @@ -0,0 +1,25 @@ +/* { dg-options "-march=armv9.4-a+sve2+lut" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** luti4_test_imm0: +** luti4 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm0, svuint8_t, svuint8_t, z1, + svluti4_u8 (z28, z0, 0), + svluti4_u8 (z28, z0, 0)) + +/* +** luti4_test_imm1: +** luti4 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_test_imm1, svuint8_t, svuint8_t, z1, + svluti4_u8 (z28, z0, 1), + svluti4_u8 (z28, z0, 1)) + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b7df6150bcb..bd532d56ff5 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4598,6 +4598,18 @@ proc check_effective_target_aarch64_sve2 { } { }] } +# Return 1 if this is an AArch64 target supporting LUT (Lookup table) +proc check_effective_target_aarch64_lut { } { + if { ![istarget aarch64*-*-*] || ![check_effective_target_aarch64_sve2] } { + return 0 + } + return [check_no_compiler_messages aarch64_lut assembly { + #if !defined (__ARM_FEATURE_LUT) + #error FOO + #endif + }] +} + # Return 1 if this is an AArch64 target only supporting SVE (not SVE2). proc check_effective_target_aarch64_sve1_only { } { return [expr { [check_effective_target_aarch64_sve]