Message ID | c9db158d-b0d5-3f4e-97dd-0f779195ad1d@arm.com |
---|---|
State | New |
Headers | show |
Series | [AArch64] Implement ACLE Data Intrinsics | expand |
"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: > Hi, > > This patch adds support for the ACLE Data Intrinsics to the AArch64 port. > > Bootstrapped and regression tested on aarch64-none-linux. > > OK for trunk? Sorry for the slow review. > > gcc/ChangeLog: > > 2022-06-10 Andre Vieira <andre.simoesdiasvieira@arm.com> > > * config/aarch64/aarch64.md (rbit<mode>2): Rename this ... > (@aarch64_rbit<mode>): ... this and change it in... > (ffs<mode>2,ctz<mode>2): ... here. > (@aarch64_rev16<mode>): New. > * config/aarch64/aarch64-builtins.cc: (aarch64_builtins): > Define the following enum AARCH64_REV16, AARCH64_REV16L, > AARCH64_REV16LL, > AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL. > (aarch64_init_data_intrinsics): New. > (handle_arm_acle_h): Add call to aarch64_init_data_intrinsics. > (aarch64_expand_builtin_data_intrinsic): New. > (aarch64_general_expand_builtin): Add call to > aarch64_expand_builtin_data_intrinsic. > * config/aarch64/arm_acle.h (__clz, __clzl, __clzll, __cls, > __clsl, __clsll, __rbit, > __rbitl, __rbitll, __rev, __revl, __revll, __rev16, __rev16l, > __rev16ll, __ror, __rorl, > __rorll, __revsh): New. > > gcc/testsuite/ChangeLog: > > 2022-06-10 Andre Vieira <andre.simoesdiasvieira@arm.com> > > * gcc.target/aarch64/acle/data-intrinsics.c: New test. > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index e0a741ac663188713e21f457affa57217d074783..91a687dee13a27c21f0c50de9ba777aa900d6096 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -613,6 +613,12 @@ enum aarch64_builtins > AARCH64_LS64_BUILTIN_ST64B, > AARCH64_LS64_BUILTIN_ST64BV, > AARCH64_LS64_BUILTIN_ST64BV0, > + AARCH64_REV16, > + AARCH64_REV16L, > + AARCH64_REV16LL, > + AARCH64_RBIT, > + AARCH64_RBITL, > + AARCH64_RBITLL, > AARCH64_BUILTIN_MAX > }; > > @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) > = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); > } > > +static void > +aarch64_init_data_intrinsics (void) > +{ > + tree uint32_fntype = build_function_type_list (uint32_type_node, > + uint32_type_node, NULL_TREE); > + tree long_fntype = build_function_type_list (long_unsigned_type_node, > + long_unsigned_type_node, > + NULL_TREE); Very minor, but ulong_fntype might be clearer, since the other two variable names are explicitly unsigned. > + tree uint64_fntype = build_function_type_list (uint64_type_node, > + uint64_type_node, NULL_TREE); > + aarch64_builtin_decls[AARCH64_REV16] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, > + AARCH64_REV16); > + aarch64_builtin_decls[AARCH64_REV16L] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", long_fntype, > + AARCH64_REV16L); > + aarch64_builtin_decls[AARCH64_REV16LL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, > + AARCH64_REV16LL); > + aarch64_builtin_decls[AARCH64_RBIT] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, > + AARCH64_RBIT); > + aarch64_builtin_decls[AARCH64_RBITL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", long_fntype, > + AARCH64_RBITL); > + aarch64_builtin_decls[AARCH64_RBITLL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, > + AARCH64_RBITLL); > +} > + > /* Implement #pragma GCC aarch64 "arm_acle.h". */ > void > handle_arm_acle_h (void) > { > + aarch64_init_data_intrinsics (); > if (TARGET_LS64) > aarch64_init_ls64_builtins (); > } > @@ -2393,6 +2430,32 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) > emit_insn (pat); > return target; > } > +/* Function to expand an expression EXP which calls one of the ACLE Data > + Intrinsic builtins FCODE with the result going to TARGET. */ > +static rtx > +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) > +{ > + rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); > + machine_mode mode = GET_MODE (op0); > + rtx pat; > + switch (fcode) > + { > + case AARCH64_REV16: > + case AARCH64_REV16L: > + case AARCH64_REV16LL: > + pat = gen_aarch64_rev16 (mode, target, op0); Does this work when op0 is a constant or comes from memory? Same for when target is a memory. E.g. does: void test_rev16 (uint32_t *ptr) { *ptr = __rev16 (*ptr); } work? It'd be more robust to use the expand_insn interface instead; see aarch64_expand_builtin_ls64 for an example. > + break; > + case AARCH64_RBIT: > + case AARCH64_RBITL: > + case AARCH64_RBITLL: > + pat = gen_aarch64_rbit (mode, target, op0); > + break; > + default: > + gcc_unreachable (); > + } > + emit_insn (pat); > + return target; > +} > > /* Expand an expression EXP as fpsr or fpcr setter (depending on > UNSPEC) using MODE. */ > @@ -2551,6 +2614,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, > if (fcode >= AARCH64_MEMTAG_BUILTIN_START > && fcode <= AARCH64_MEMTAG_BUILTIN_END) > return aarch64_expand_builtin_memtag (fcode, exp, target); > + if (fcode >= AARCH64_REV16 > + && fcode <= AARCH64_RBITLL) > + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > > gcc_unreachable (); > } > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" > rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); > rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); > > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); > DONE; > @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" > [(set_attr "type" "clz")] > ) > > -(define_insn "rbit<mode>2" > +(define_insn "@aarch64_rbit<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r") > (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] > "" > @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" > "reload_completed" > [(const_int 0)] > " > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > DONE; > ") > @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" > [(set_attr "type" "rev")] > ) > > +(define_insn "@aarch64_rev16<mode>" > + [(set (match_operand:GPI 0 "register_operand" "=r") > + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] > + "" > + "rev16\\t%<w>0, %<w>1" > + [(set_attr "type" "rev")]) > + > (define_insn "*aarch64_bfxil<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r,r") > (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h > index 9775a48c65825b424d3eb442384f5ab87b734fd7..faddd5d0a780c5d65ba430bd3174c701e848c794 100644 > --- a/gcc/config/aarch64/arm_acle.h > +++ b/gcc/config/aarch64/arm_acle.h > @@ -28,6 +28,7 @@ > #define _GCC_ARM_ACLE_H > > #include <stdint.h> > +#include <stddef.h> > > #pragma GCC aarch64 "arm_acle.h" > > @@ -35,6 +36,54 @@ > extern "C" { > #endif > > +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +NAME (TYPE value, uint32_t rotate) \ The names of the parameters and local variables need the same __ uglification as __revl. Same for _GCC_ARM_ACLE_DATA_FN. > +{ \ > + size_t size = sizeof (TYPE) * __CHAR_BIT__; \ > + rotate = rotate % size; \ > + return value >> rotate | value << (size - rotate); \ This runs into UB for rotate == 0. > +} > + > +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) Would be good to undef the macro once we're done with it, to reduce noise in things like -dM. > + > +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +__##NAME (TYPE value) \ > +{ \ > + return __builtin_##BUILTIN (value); \ > +} > + > +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t) The ACLE spec says that these should return unsigned int, so I guess either these functions should have their own macro or the macro above should have a separate parameter for the return type. Thanks, Richard > +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t) > +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t) > + > +__extension__ extern __inline unsigned long > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__revl (unsigned long __value) > +{ > + if (sizeof (unsigned long) == 8) > + return __revll (__value); > + else > + return __rev (__value); > +} > + > #pragma GCC push_options > #pragma GCC target ("arch=armv8.3-a") > __extension__ extern __inline int32_t > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > new file mode 100644 > index 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > @@ -0,0 +1,215 @@ > +/* Test the ACLE data intrinsics. */ > +/* { dg-do assemble } */ > +/* { dg-additional-options "--save-temps -O1" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include "arm_acle.h" > + > +/* > +** test_clz: > +** clz w0, w0 > +** ret > +*/ > + > +uint32_t test_clz (uint32_t a) > +{ > + return __clz (a); > +} > + > +/* > +** test_clzl: > +** clz [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clzl (unsigned long a) > +{ > + return __clzl (a); > +} > + > +/* > +** test_clzll: > +** clz x0, x0 > +** ret > +*/ > + > +uint64_t test_clzll (uint64_t a) > +{ > + return __clzll (a); > +} > + > +/* > +** test_cls: > +** cls w0, w0 > +** ret > +*/ > + > +uint32_t test_cls (uint32_t a) > +{ > + return __cls (a); > +} > + > +/* > +** test_clsl: > +** cls [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clsl (unsigned long a) > +{ > + return __clsl (a); > +} > + > +/* > +** test_clsll: > +** cls x0, x0 > +** ret > +*/ > + > +uint64_t test_clsll (uint64_t a) > +{ > + return __clsll (a); > +} > + > +/* > +** test_rbit: > +** rbit w0, w0 > +** ret > +*/ > + > +uint32_t test_rbit (uint32_t a) > +{ > + return __rbit (a); > +} > + > +/* > +** test_rbitl: > +** rbit [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rbitl (unsigned long a) > +{ > + return __rbitl (a); > +} > + > +/* > +** test_rbitll: > +** rbit x0, x0 > +** ret > +*/ > + > +uint64_t test_rbitll (uint64_t a) > +{ > + return __rbitll (a); > +} > + > +/* > +** test_rev: > +** rev w0, w0 > +** ret > +*/ > + > +uint32_t test_rev (uint32_t a) > +{ > + return __builtin_bswap32 (a); > +} > + > +/* > +** test_revl: > +** rev [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_revl (unsigned long a) > +{ > + return __revl (a); > +} > + > +/* > +** test_revll: > +** rev x0, x0 > +** ret > +*/ > + > +uint64_t test_revll (uint64_t a) > +{ > + return __revll (a); > +} > + > +/* > +** test_rev16: > +** rev16 w0, w0 > +** ret > +*/ > + > +uint32_t test_rev16 (uint32_t a) > +{ > + return __rev16 (a); > +} > + > +/* > +** test_rev16l: > +** rev16 [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rev16l (unsigned long a) > +{ > + return __rev16l (a); > +} > + > +/* > +** test_rev16ll: > +** rev16 x0, x0 > +** ret > +*/ > + > +uint64_t test_rev16ll (uint64_t a) > +{ > + return __rev16ll (a); > +} > + > +/* > +** test_ror: > +** ror w0, w0, w1 > +** ret > +*/ > + > +uint32_t test_ror (uint32_t a, uint32_t r) > +{ > + return __ror (a, r); > +} > + > +/* > +** test_rorl: > +** ror [wx]0, [wx]0, [wx]1 > +** ret > +*/ > + > +unsigned long test_rorl (unsigned long a, uint32_t r) > +{ > + return __rorl (a, r); > +} > + > +/* > +** test_rorll: > +** ror x0, x0, x1 > +** ret > +*/ > + > +uint64_t test_rorll (uint64_t a, uint32_t r) > +{ > + return __rorll (a, r); > +} > + > +/* > +** test_revsh: > +** rev16 w0, w0 > +** ret > +*/ > + > +int16_t test_revsh (int16_t a) > +{ > + return __revsh (a); > +}
On 17/06/2022 11:54, Richard Sandiford wrote: > "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: >> Hi, >> >> This patch adds support for the ACLE Data Intrinsics to the AArch64 port. >> >> Bootstrapped and regression tested on aarch64-none-linux. >> >> OK for trunk? > Sorry for the slow review. No worries :) > >> +{ \ >> + size_t size = sizeof (TYPE) * __CHAR_BIT__; \ >> + rotate = rotate % size; \ >> + return value >> rotate | value << (size - rotate); \ > This runs into UB for rotate == 0. I assume it's because of the value << size no? I added a modulo, I assume it's legal to shift by 0? This OK? diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index e0a741ac663188713e21f457affa57217d074783..69f1fb3604a481fa378d105cf3ee98edec1ba619 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -613,6 +613,12 @@ enum aarch64_builtins AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0, + AARCH64_REV16, + AARCH64_REV16L, + AARCH64_REV16LL, + AARCH64_RBIT, + AARCH64_RBITL, + AARCH64_RBITLL, AARCH64_BUILTIN_MAX }; @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); } +static void +aarch64_init_data_intrinsics (void) +{ + tree uint32_fntype = build_function_type_list (uint32_type_node, + uint32_type_node, NULL_TREE); + tree ulong_fntype = build_function_type_list (long_unsigned_type_node, + long_unsigned_type_node, + NULL_TREE); + tree uint64_fntype = build_function_type_list (uint64_type_node, + uint64_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_REV16] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, + AARCH64_REV16); + aarch64_builtin_decls[AARCH64_REV16L] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype, + AARCH64_REV16L); + aarch64_builtin_decls[AARCH64_REV16LL] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, + AARCH64_REV16LL); + aarch64_builtin_decls[AARCH64_RBIT] + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, + AARCH64_RBIT); + aarch64_builtin_decls[AARCH64_RBITL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype, + AARCH64_RBITL); + aarch64_builtin_decls[AARCH64_RBITLL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, + AARCH64_RBITLL); +} + /* Implement #pragma GCC aarch64 "arm_acle.h". */ void handle_arm_acle_h (void) { + aarch64_init_data_intrinsics (); if (TARGET_LS64) aarch64_init_ls64_builtins (); } @@ -2393,6 +2430,40 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) emit_insn (pat); return target; } +/* Function to expand an expression EXP which calls one of the ACLE Data + Intrinsic builtins FCODE with the result going to TARGET. */ +static rtx +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) +{ + expand_operand ops[2]; + machine_mode mode = GET_MODE (target); + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode); + enum insn_code icode; + switch (fcode) + { + case AARCH64_REV16: + case AARCH64_REV16L: + case AARCH64_REV16LL: + if (mode == SImode) + icode = CODE_FOR_aarch64_rev16si; + else + icode = CODE_FOR_aarch64_rev16di; + break; + case AARCH64_RBIT: + case AARCH64_RBITL: + case AARCH64_RBITLL: + if (mode == SImode) + icode = CODE_FOR_aarch64_rbitsi; + else + icode = CODE_FOR_aarch64_rbitdi; + break; + default: + gcc_unreachable (); + } + expand_insn (icode, 2, ops); + return target; +} /* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ @@ -2551,6 +2622,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, if (fcode >= AARCH64_MEMTAG_BUILTIN_START && fcode <= AARCH64_MEMTAG_BUILTIN_END) return aarch64_expand_builtin_memtag (fcode, exp, target); + if (fcode >= AARCH64_REV16 + && fcode <= AARCH64_RBITLL) + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); DONE; @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" [(set_attr "type" "clz")] ) -(define_insn "rbit<mode>2" +(define_insn "@aarch64_rbit<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] "" @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" "reload_completed" [(const_int 0)] " - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); DONE; ") @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" [(set_attr "type" "rev")] ) +(define_insn "@aarch64_rev16<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] + "" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")]) + (define_insn "*aarch64_bfxil<mode>" [(set (match_operand:GPI 0 "register_operand" "=r,r") (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290083f3f072fd5a2ce 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -28,6 +28,7 @@ #define _GCC_ARM_ACLE_H #include <stdint.h> +#include <stddef.h> #pragma GCC aarch64 "arm_acle.h" @@ -35,6 +36,58 @@ extern "C" { #endif +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ +__extension__ extern __inline TYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +NAME (TYPE __value, uint32_t __rotate) \ +{ \ + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ + __rotate = __rotate % __size; \ + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ +} + +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) + +#undef _GCC_ARM_ACLE_ROR_FN + +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ +__extension__ extern __inline RTYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +__##NAME (ITYPE __value) \ +{ \ + return __builtin_##BUILTIN (__value); \ +} + +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) + +#undef _GCC_ARM_ACLE_DATA_FN + +__extension__ extern __inline unsigned long +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__revl (unsigned long __value) +{ + if (sizeof (unsigned long) == 8) + return __revll (__value); + else + return __rev (__value); +} + #pragma GCC push_options #pragma GCC target ("arch=armv8.3-a") __extension__ extern __inline int32_t diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c new file mode 100644 index 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c @@ -0,0 +1,215 @@ +/* Test the ACLE data intrinsics. */ +/* { dg-do assemble } */ +/* { dg-additional-options "--save-temps -O1" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include "arm_acle.h" + +/* +** test_clz: +** clz w0, w0 +** ret +*/ + +uint32_t test_clz (uint32_t a) +{ + return __clz (a); +} + +/* +** test_clzl: +** clz [wx]0, [wx]0 +** ret +*/ + +unsigned long test_clzl (unsigned long a) +{ + return __clzl (a); +} + +/* +** test_clzll: +** clz x0, x0 +** ret +*/ + +uint64_t test_clzll (uint64_t a) +{ + return __clzll (a); +} + +/* +** test_cls: +** cls w0, w0 +** ret +*/ + +uint32_t test_cls (uint32_t a) +{ + return __cls (a); +} + +/* +** test_clsl: +** cls [wx]0, [wx]0 +** ret +*/ + +unsigned long test_clsl (unsigned long a) +{ + return __clsl (a); +} + +/* +** test_clsll: +** cls x0, x0 +** ret +*/ + +uint64_t test_clsll (uint64_t a) +{ + return __clsll (a); +} + +/* +** test_rbit: +** rbit w0, w0 +** ret +*/ + +uint32_t test_rbit (uint32_t a) +{ + return __rbit (a); +} + +/* +** test_rbitl: +** rbit [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rbitl (unsigned long a) +{ + return __rbitl (a); +} + +/* +** test_rbitll: +** rbit x0, x0 +** ret +*/ + +uint64_t test_rbitll (uint64_t a) +{ + return __rbitll (a); +} + +/* +** test_rev: +** rev w0, w0 +** ret +*/ + +uint32_t test_rev (uint32_t a) +{ + return __builtin_bswap32 (a); +} + +/* +** test_revl: +** rev [wx]0, [wx]0 +** ret +*/ + +unsigned long test_revl (unsigned long a) +{ + return __revl (a); +} + +/* +** test_revll: +** rev x0, x0 +** ret +*/ + +uint64_t test_revll (uint64_t a) +{ + return __revll (a); +} + +/* +** test_rev16: +** rev16 w0, w0 +** ret +*/ + +uint32_t test_rev16 (uint32_t a) +{ + return __rev16 (a); +} + +/* +** test_rev16l: +** rev16 [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rev16l (unsigned long a) +{ + return __rev16l (a); +} + +/* +** test_rev16ll: +** rev16 x0, x0 +** ret +*/ + +uint64_t test_rev16ll (uint64_t a) +{ + return __rev16ll (a); +} + +/* +** test_ror: +** ror w0, w0, w1 +** ret +*/ + +uint32_t test_ror (uint32_t a, uint32_t r) +{ + return __ror (a, r); +} + +/* +** test_rorl: +** ror [wx]0, [wx]0, [wx]1 +** ret +*/ + +unsigned long test_rorl (unsigned long a, uint32_t r) +{ + return __rorl (a, r); +} + +/* +** test_rorll: +** ror x0, x0, x1 +** ret +*/ + +uint64_t test_rorll (uint64_t a, uint32_t r) +{ + return __rorll (a, r); +} + +/* +** test_revsh: +** rev16 w0, w0 +** ret +*/ + +int16_t test_revsh (int16_t a) +{ + return __revsh (a); +}
"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: > On 17/06/2022 11:54, Richard Sandiford wrote: >> "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: >>> Hi, >>> >>> This patch adds support for the ACLE Data Intrinsics to the AArch64 port. >>> >>> Bootstrapped and regression tested on aarch64-none-linux. >>> >>> OK for trunk? >> Sorry for the slow review. > No worries :) >> >>> +{ \ >>> + size_t size = sizeof (TYPE) * __CHAR_BIT__; \ >>> + rotate = rotate % size; \ >>> + return value >> rotate | value << (size - rotate); \ >> This runs into UB for rotate == 0. > I assume it's because of the value << size no? Yeah. > I added a modulo, I assume it's legal to shift by 0? Thanks, and yeah, shifting by zero is fine. > This OK? > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index e0a741ac663188713e21f457affa57217d074783..69f1fb3604a481fa378d105cf3ee98edec1ba619 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -613,6 +613,12 @@ enum aarch64_builtins > AARCH64_LS64_BUILTIN_ST64B, > AARCH64_LS64_BUILTIN_ST64BV, > AARCH64_LS64_BUILTIN_ST64BV0, > + AARCH64_REV16, > + AARCH64_REV16L, > + AARCH64_REV16LL, > + AARCH64_RBIT, > + AARCH64_RBITL, > + AARCH64_RBITLL, > AARCH64_BUILTIN_MAX > }; > > @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) > = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); > } > > +static void > +aarch64_init_data_intrinsics (void) > +{ > + tree uint32_fntype = build_function_type_list (uint32_type_node, > + uint32_type_node, NULL_TREE); > + tree ulong_fntype = build_function_type_list (long_unsigned_type_node, > + long_unsigned_type_node, > + NULL_TREE); > + tree uint64_fntype = build_function_type_list (uint64_type_node, > + uint64_type_node, NULL_TREE); > + aarch64_builtin_decls[AARCH64_REV16] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, > + AARCH64_REV16); > + aarch64_builtin_decls[AARCH64_REV16L] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype, > + AARCH64_REV16L); > + aarch64_builtin_decls[AARCH64_REV16LL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, > + AARCH64_REV16LL); > + aarch64_builtin_decls[AARCH64_RBIT] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, > + AARCH64_RBIT); > + aarch64_builtin_decls[AARCH64_RBITL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype, > + AARCH64_RBITL); > + aarch64_builtin_decls[AARCH64_RBITLL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, > + AARCH64_RBITLL); > +} > + > /* Implement #pragma GCC aarch64 "arm_acle.h". */ > void > handle_arm_acle_h (void) > { > + aarch64_init_data_intrinsics (); > if (TARGET_LS64) > aarch64_init_ls64_builtins (); > } > @@ -2393,6 +2430,40 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) > emit_insn (pat); > return target; > } Nit: missing blank line here. > +/* Function to expand an expression EXP which calls one of the ACLE Data > + Intrinsic builtins FCODE with the result going to TARGET. */ > +static rtx > +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) > +{ > + expand_operand ops[2]; > + machine_mode mode = GET_MODE (target); > + create_output_operand (&ops[0], target, mode); > + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode); > + enum insn_code icode; > + switch (fcode) > + { > + case AARCH64_REV16: > + case AARCH64_REV16L: > + case AARCH64_REV16LL: > + if (mode == SImode) > + icode = CODE_FOR_aarch64_rev16si; > + else > + icode = CODE_FOR_aarch64_rev16di; You should be able to do: icode = code_for_aarch64_rev (mode); instead. Same for the next cases. > + break; > + case AARCH64_RBIT: > + case AARCH64_RBITL: > + case AARCH64_RBITLL: > + if (mode == SImode) > + icode = CODE_FOR_aarch64_rbitsi; > + else > + icode = CODE_FOR_aarch64_rbitdi; > + break; > + default: > + gcc_unreachable (); > + } > + expand_insn (icode, 2, ops); > + return target; This needs to return ops[0].value instead, since "target" just suggests a possible location. Could you add tests for a memory source and memory destination, e.g.: void test_clz_mem (uint32_t *a) { *a = __clz (*a); } Without tests like that, these comments probably just sound like a paper exercise, but they should make a difference for memory sources (previous review) and memory destinations (this round). > +} > > /* Expand an expression EXP as fpsr or fpcr setter (depending on > UNSPEC) using MODE. */ > @@ -2551,6 +2622,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, > if (fcode >= AARCH64_MEMTAG_BUILTIN_START > && fcode <= AARCH64_MEMTAG_BUILTIN_END) > return aarch64_expand_builtin_memtag (fcode, exp, target); > + if (fcode >= AARCH64_REV16 > + && fcode <= AARCH64_RBITLL) > + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > > gcc_unreachable (); > } > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" > rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); > rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); > > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); > DONE; > @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" > [(set_attr "type" "clz")] > ) > > -(define_insn "rbit<mode>2" > +(define_insn "@aarch64_rbit<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r") > (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] > "" > @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" > "reload_completed" > [(const_int 0)] > " > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > DONE; > ") > @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" > [(set_attr "type" "rev")] > ) > > +(define_insn "@aarch64_rev16<mode>" > + [(set (match_operand:GPI 0 "register_operand" "=r") > + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] > + "" > + "rev16\\t%<w>0, %<w>1" > + [(set_attr "type" "rev")]) > + > (define_insn "*aarch64_bfxil<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r,r") > (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h > index 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290083f3f072fd5a2ce 100644 > --- a/gcc/config/aarch64/arm_acle.h > +++ b/gcc/config/aarch64/arm_acle.h > @@ -28,6 +28,7 @@ > #define _GCC_ARM_ACLE_H > > #include <stdint.h> > +#include <stddef.h> > > #pragma GCC aarch64 "arm_acle.h" > > @@ -35,6 +36,58 @@ > extern "C" { > #endif > > +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +NAME (TYPE __value, uint32_t __rotate) \ > +{ \ > + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ > + __rotate = __rotate % __size; \ > + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ > +} > + > +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) > + > +#undef _GCC_ARM_ACLE_ROR_FN > + > +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ > +__extension__ extern __inline RTYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +__##NAME (ITYPE __value) \ > +{ \ > + return __builtin_##BUILTIN (__value); \ > +} > + > +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) The return type should be int16_t. The clz and cls tests have the old return types (same as the argument types), but I guess that's a good thing, since it shows that we avoid the redundant zero-extend in clzll and clsll. Thanks, Richard > +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) > + > +#undef _GCC_ARM_ACLE_DATA_FN > + > +__extension__ extern __inline unsigned long > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__revl (unsigned long __value) > +{ > + if (sizeof (unsigned long) == 8) > + return __revll (__value); > + else > + return __rev (__value); > +} > + > #pragma GCC push_options > #pragma GCC target ("arch=armv8.3-a") > __extension__ extern __inline int32_t > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > new file mode 100644 > index 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > @@ -0,0 +1,215 @@ > +/* Test the ACLE data intrinsics. */ > +/* { dg-do assemble } */ > +/* { dg-additional-options "--save-temps -O1" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include "arm_acle.h" > + > +/* > +** test_clz: > +** clz w0, w0 > +** ret > +*/ > + > +uint32_t test_clz (uint32_t a) > +{ > + return __clz (a); > +} > + > +/* > +** test_clzl: > +** clz [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clzl (unsigned long a) > +{ > + return __clzl (a); > +} > + > +/* > +** test_clzll: > +** clz x0, x0 > +** ret > +*/ > + > +uint64_t test_clzll (uint64_t a) > +{ > + return __clzll (a); > +} > + > +/* > +** test_cls: > +** cls w0, w0 > +** ret > +*/ > + > +uint32_t test_cls (uint32_t a) > +{ > + return __cls (a); > +} > + > +/* > +** test_clsl: > +** cls [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_clsl (unsigned long a) > +{ > + return __clsl (a); > +} > + > +/* > +** test_clsll: > +** cls x0, x0 > +** ret > +*/ > + > +uint64_t test_clsll (uint64_t a) > +{ > + return __clsll (a); > +} > + > +/* > +** test_rbit: > +** rbit w0, w0 > +** ret > +*/ > + > +uint32_t test_rbit (uint32_t a) > +{ > + return __rbit (a); > +} > + > +/* > +** test_rbitl: > +** rbit [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rbitl (unsigned long a) > +{ > + return __rbitl (a); > +} > + > +/* > +** test_rbitll: > +** rbit x0, x0 > +** ret > +*/ > + > +uint64_t test_rbitll (uint64_t a) > +{ > + return __rbitll (a); > +} > + > +/* > +** test_rev: > +** rev w0, w0 > +** ret > +*/ > + > +uint32_t test_rev (uint32_t a) > +{ > + return __builtin_bswap32 (a); > +} > + > +/* > +** test_revl: > +** rev [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_revl (unsigned long a) > +{ > + return __revl (a); > +} > + > +/* > +** test_revll: > +** rev x0, x0 > +** ret > +*/ > + > +uint64_t test_revll (uint64_t a) > +{ > + return __revll (a); > +} > + > +/* > +** test_rev16: > +** rev16 w0, w0 > +** ret > +*/ > + > +uint32_t test_rev16 (uint32_t a) > +{ > + return __rev16 (a); > +} > + > +/* > +** test_rev16l: > +** rev16 [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rev16l (unsigned long a) > +{ > + return __rev16l (a); > +} > + > +/* > +** test_rev16ll: > +** rev16 x0, x0 > +** ret > +*/ > + > +uint64_t test_rev16ll (uint64_t a) > +{ > + return __rev16ll (a); > +} > + > +/* > +** test_ror: > +** ror w0, w0, w1 > +** ret > +*/ > + > +uint32_t test_ror (uint32_t a, uint32_t r) > +{ > + return __ror (a, r); > +} > + > +/* > +** test_rorl: > +** ror [wx]0, [wx]0, [wx]1 > +** ret > +*/ > + > +unsigned long test_rorl (unsigned long a, uint32_t r) > +{ > + return __rorl (a, r); > +} > + > +/* > +** test_rorll: > +** ror x0, x0, x1 > +** ret > +*/ > + > +uint64_t test_rorll (uint64_t a, uint32_t r) > +{ > + return __rorll (a, r); > +} > + > +/* > +** test_revsh: > +** rev16 w0, w0 > +** ret > +*/ > + > +int16_t test_revsh (int16_t a) > +{ > + return __revsh (a); > +}
On 29/06/2022 08:18, Richard Sandiford wrote: >> + break; >> + case AARCH64_RBIT: >> + case AARCH64_RBITL: >> + case AARCH64_RBITLL: >> + if (mode == SImode) >> + icode = CODE_FOR_aarch64_rbitsi; >> + else >> + icode = CODE_FOR_aarch64_rbitdi; >> + break; >> + default: >> + gcc_unreachable (); >> + } >> + expand_insn (icode, 2, ops); >> + return target; > This needs to return ops[0].value instead, since "target" just suggests > a possible location. > > Could you add tests for a memory source and memory destination, e.g.: > > void test_clz_mem (uint32_t *a) > { > *a = __clz (*a); > } > > Without tests like that, these comments probably just sound like a paper > exercise, but they should make a difference for memory sources (previous > review) and memory destinations (this round). I had locally tested it (with rev though because clz doesn't use that code) and strangely it does seem to work for the memory destinations, but that's just a simple test. It could very well go wrong with some more complex codegen, so I'll just take your word and use ops[0].value. And yeah I didn't add the tests at the time, don't really know why, I'll chuck it down to laziness :P > >> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h >> index 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290083f3f072fd5a2ce 100644 >> --- a/gcc/config/aarch64/arm_acle.h >> +++ b/gcc/config/aarch64/arm_acle.h >> @@ -28,6 +28,7 @@ >> #define _GCC_ARM_ACLE_H >> >> #include <stdint.h> >> +#include <stddef.h> >> >> #pragma GCC aarch64 "arm_acle.h" >> >> @@ -35,6 +36,58 @@ >> extern "C" { >> #endif >> >> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ >> +__extension__ extern __inline TYPE \ >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >> +NAME (TYPE __value, uint32_t __rotate) \ >> +{ \ >> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ >> + __rotate = __rotate % __size; \ >> + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ >> +} >> + >> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) >> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) >> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) >> + >> +#undef _GCC_ARM_ACLE_ROR_FN >> + >> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ >> +__extension__ extern __inline RTYPE \ >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >> +__##NAME (ITYPE __value) \ >> +{ \ >> + return __builtin_##BUILTIN (__value); \ >> +} >> + >> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) >> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) >> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) >> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) >> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) >> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) >> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) > The return type should be int16_t. Nice catch! > The clz and cls tests have the old return types (same as the argument > types), but I guess that's a good thing, since it shows that we avoid > the redundant zero-extend in clzll and clsll. Yeah I noticed that too when I was adding the mem tests, but I did change them though because at the time it just felt like an oversight, though I too was pleasantly surprised GCC was managing to avoid the zero-extending :) I then saw your comment and made me wonder whether I should keep the wrong return types in... I haven't but happy to change them back if you think it's a nice 'test' to have. Regards, Andre diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index e0a741ac663188713e21f457affa57217d074783..bb5d97c8fc6402635270df851a949cabeecaa5e8 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -613,6 +613,12 @@ enum aarch64_builtins AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0, + AARCH64_REV16, + AARCH64_REV16L, + AARCH64_REV16LL, + AARCH64_RBIT, + AARCH64_RBITL, + AARCH64_RBITLL, AARCH64_BUILTIN_MAX }; @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); } +static void +aarch64_init_data_intrinsics (void) +{ + tree uint32_fntype = build_function_type_list (uint32_type_node, + uint32_type_node, NULL_TREE); + tree ulong_fntype = build_function_type_list (long_unsigned_type_node, + long_unsigned_type_node, + NULL_TREE); + tree uint64_fntype = build_function_type_list (uint64_type_node, + uint64_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_REV16] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, + AARCH64_REV16); + aarch64_builtin_decls[AARCH64_REV16L] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype, + AARCH64_REV16L); + aarch64_builtin_decls[AARCH64_REV16LL] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, + AARCH64_REV16LL); + aarch64_builtin_decls[AARCH64_RBIT] + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, + AARCH64_RBIT); + aarch64_builtin_decls[AARCH64_RBITL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype, + AARCH64_RBITL); + aarch64_builtin_decls[AARCH64_RBITLL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, + AARCH64_RBITLL); +} + /* Implement #pragma GCC aarch64 "arm_acle.h". */ void handle_arm_acle_h (void) { + aarch64_init_data_intrinsics (); if (TARGET_LS64) aarch64_init_ls64_builtins (); } @@ -2394,6 +2431,37 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) return target; } +/* Function to expand an expression EXP which calls one of the ACLE Data + Intrinsic builtins FCODE with the result going to TARGET. */ +static rtx +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) +{ + expand_operand ops[2]; + machine_mode mode = GET_MODE (target); + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode); + enum insn_code icode; + + switch (fcode) + { + case AARCH64_REV16: + case AARCH64_REV16L: + case AARCH64_REV16LL: + icode = code_for_aarch64_rev16 (mode); + break; + case AARCH64_RBIT: + case AARCH64_RBITL: + case AARCH64_RBITLL: + icode = code_for_aarch64_rbit (mode); + break; + default: + gcc_unreachable (); + } + + expand_insn (icode, 2, ops); + return ops[0].value; +} + /* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ static void @@ -2551,6 +2619,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, if (fcode >= AARCH64_MEMTAG_BUILTIN_START && fcode <= AARCH64_MEMTAG_BUILTIN_END) return aarch64_expand_builtin_memtag (fcode, exp, target); + if (fcode >= AARCH64_REV16 + && fcode <= AARCH64_RBITLL) + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); DONE; @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" [(set_attr "type" "clz")] ) -(define_insn "rbit<mode>2" +(define_insn "@aarch64_rbit<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] "" @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" "reload_completed" [(const_int 0)] " - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); DONE; ") @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" [(set_attr "type" "rev")] ) +(define_insn "@aarch64_rev16<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] + "" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")]) + (define_insn "*aarch64_bfxil<mode>" [(set (match_operand:GPI 0 "register_operand" "=r,r") (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 9775a48c65825b424d3eb442384f5ab87b734fd7..d26e269cb843fe37ba789db09c40d06f53438cda 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -28,6 +28,7 @@ #define _GCC_ARM_ACLE_H #include <stdint.h> +#include <stddef.h> #pragma GCC aarch64 "arm_acle.h" @@ -35,6 +36,58 @@ extern "C" { #endif +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ +__extension__ extern __inline TYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +NAME (TYPE __value, uint32_t __rotate) \ +{ \ + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ + __rotate = __rotate % __size; \ + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ +} + +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) + +#undef _GCC_ARM_ACLE_ROR_FN + +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ +__extension__ extern __inline RTYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +__##NAME (ITYPE __value) \ +{ \ + return __builtin_##BUILTIN (__value); \ +} + +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t) +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) + +#undef _GCC_ARM_ACLE_DATA_FN + +__extension__ extern __inline unsigned long +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__revl (unsigned long __value) +{ + if (sizeof (unsigned long) == 8) + return __revll (__value); + else + return __rev (__value); +} + #pragma GCC push_options #pragma GCC target ("arch=armv8.3-a") __extension__ extern __inline int32_t diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c new file mode 100644 index 0000000000000000000000000000000000000000..e067ef20bbdc8993865b541aa99dccac6b03e6a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c @@ -0,0 +1,468 @@ +/* Test the ACLE data intrinsics. */ +/* { dg-do assemble } */ +/* { dg-additional-options "--save-temps -O1" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include "arm_acle.h" + +/* +** test_clz: +** clz w0, w0 +** ret +*/ + +unsigned int test_clz (uint32_t a) +{ + return __clz (a); +} + +/* +** test_clzl: +** clz [wx]0, [wx]0 +** ret +*/ + +unsigned int test_clzl (unsigned long a) +{ + return __clzl (a); +} + +/* +** test_clzll: +** clz x0, x0 +** ret +*/ + +unsigned int test_clzll (uint64_t a) +{ + return __clzll (a); +} + +/* +** test_cls: +** cls w0, w0 +** ret +*/ + +unsigned int test_cls (uint32_t a) +{ + return __cls (a); +} + +/* +** test_clsl: +** cls [wx]0, [wx]0 +** ret +*/ + +unsigned int test_clsl (unsigned long a) +{ + return __clsl (a); +} + +/* +** test_clsll: +** cls x0, x0 +** ret +*/ + +unsigned int test_clsll (uint64_t a) +{ + return __clsll (a); +} + +/* +** test_rbit: +** rbit w0, w0 +** ret +*/ + +uint32_t test_rbit (uint32_t a) +{ + return __rbit (a); +} + +/* +** test_rbitl: +** rbit [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rbitl (unsigned long a) +{ + return __rbitl (a); +} + +/* +** test_rbitll: +** rbit x0, x0 +** ret +*/ + +uint64_t test_rbitll (uint64_t a) +{ + return __rbitll (a); +} + +/* +** test_rev: +** rev w0, w0 +** ret +*/ + +uint32_t test_rev (uint32_t a) +{ + return __rev (a); +} + +/* +** test_revl: +** rev [wx]0, [wx]0 +** ret +*/ + +unsigned long test_revl (unsigned long a) +{ + return __revl (a); +} + +/* +** test_revll: +** rev x0, x0 +** ret +*/ + +uint64_t test_revll (uint64_t a) +{ + return __revll (a); +} + +/* +** test_rev16: +** rev16 w0, w0 +** ret +*/ + +uint32_t test_rev16 (uint32_t a) +{ + return __rev16 (a); +} + +/* +** test_rev16l: +** rev16 [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rev16l (unsigned long a) +{ + return __rev16l (a); +} + +/* +** test_rev16ll: +** rev16 x0, x0 +** ret +*/ + +uint64_t test_rev16ll (uint64_t a) +{ + return __rev16ll (a); +} + +/* +** test_ror: +** ror w0, w0, w1 +** ret +*/ + +uint32_t test_ror (uint32_t a, uint32_t r) +{ + return __ror (a, r); +} + +/* +** test_rorl: +** ror [wx]0, [wx]0, [wx]1 +** ret +*/ + +unsigned long test_rorl (unsigned long a, uint32_t r) +{ + return __rorl (a, r); +} + +/* +** test_rorll: +** ror x0, x0, x1 +** ret +*/ + +uint64_t test_rorll (uint64_t a, uint32_t r) +{ + return __rorll (a, r); +} + +/* +** test_revsh: +** rev16 w0, w0 +** ret +*/ + +int16_t test_revsh (int16_t a) +{ + return __revsh (a); +} + +uint32_t *g32; +unsigned long *gul; +uint64_t *g64; +unsigned int *gui; +int16_t *g16; + +/* +** test_clz_mem: +** ... +** clz w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_clz_mem (uint32_t *a) +{ + *gui = __clz (*a); +} + +/* +** test_clzl_mem: +** ... +** clz [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_clzl_mem (unsigned long *a) +{ + *gui = __clzl (*a); +} + +/* +** test_clzll_mem: +** ... +** clz x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_clzll_mem (uint64_t *a) +{ + *gui = __clzll (*a); +} + +/* +** test_cls_mem: +** ... +** cls w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_cls_mem (uint32_t *a) +{ + *gui = __cls (*a); +} + +/* +** test_clsl_mem: +** ... +** cls [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_clsl_mem (unsigned long *a) +{ + *gui = __clsl (*a); +} + +/* +** test_clsll_mem: +** ... +** cls x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_clsll_mem (uint64_t *a) +{ + *gui = __clsll (*a); +} + +/* +** test_rbit_mem: +** ... +** rbit w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_rbit_mem (uint32_t *a) +{ + *g32 = __rbit (*a); +} + +/* +** test_rbitl_mem: +** ... +** rbit [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_rbitl_mem (unsigned long *a) +{ + *gul = __rbitl (*a); +} + +/* +** test_rbitll_mem: +** ... +** rbit x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_rbitll_mem (uint64_t *a) +{ + *g64 = __rbitll (*a); +} + +/* +** test_rev_mem: +** ... +** rev w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_rev_mem (uint32_t *a) +{ + *g32 = __rev (*a); +} + +/* +** test_revl_mem: +** ... +** rev [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_revl_mem (unsigned long *a) +{ + *gul = __revl (*a); +} + +/* +** test_revll_mem: +** ... +** rev x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_revll_mem (uint64_t *a) +{ + *g64 = __revll (*a); +} + +/* +** test_rev16_mem: +** ... +** rev16 w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_rev16_mem (uint32_t *a) +{ + *g32 = __rev16 (*a); +} + +/* +** test_rev16l_mem: +** ... +** rev16 [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_rev16l_mem (unsigned long *a) +{ + *gul = __rev16l (*a); +} + +/* +** test_rev16ll_mem: +** ... +** rev16 x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_rev16ll_mem (uint64_t *a) +{ + *g64 = __rev16ll (*a); +} + +/* +** test_ror_mem: +** ... +** ror w[0-9]+, w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_ror_mem (uint32_t *a, uint32_t *r) +{ + *g32 = __ror (*a, *r); +} + +/* +** test_rorl_mem: +** ... +** ror [wx][0-9]+, [wx][0-9]+, [wx][0-9]+ +** ... +** ret +*/ + +void test_rorl_mem (unsigned long *a, uint32_t *r) +{ + *gul = __rorl (*a, *r); +} + +/* +** test_rorll_mem: +** ... +** ror x[0-9]+, x[0-9]+, x[0-9]+ +** ... +** ret +*/ + +void test_rorll_mem (uint64_t *a, uint32_t *r) +{ + *g64 = __rorll (*a, *r); +} + +/* +** test_revsh_mem: +** ... +** rev16 w[0-9]+, w[0-9]+ +** ... +** ret +*/ + +void test_revsh_mem (int16_t *a) +{ + *g16 = __revsh (*a); +}
"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: > On 29/06/2022 08:18, Richard Sandiford wrote: >>> + break; >>> + case AARCH64_RBIT: >>> + case AARCH64_RBITL: >>> + case AARCH64_RBITLL: >>> + if (mode == SImode) >>> + icode = CODE_FOR_aarch64_rbitsi; >>> + else >>> + icode = CODE_FOR_aarch64_rbitdi; >>> + break; >>> + default: >>> + gcc_unreachable (); >>> + } >>> + expand_insn (icode, 2, ops); >>> + return target; >> This needs to return ops[0].value instead, since "target" just suggests >> a possible location. >> >> Could you add tests for a memory source and memory destination, e.g.: >> >> void test_clz_mem (uint32_t *a) >> { >> *a = __clz (*a); >> } >> >> Without tests like that, these comments probably just sound like a paper >> exercise, but they should make a difference for memory sources (previous >> review) and memory destinations (this round). > I had locally tested it (with rev though because clz doesn't use that > code) and strangely it does seem to work for the memory destinations, > but that's just a simple test. > It could very well go wrong with some more complex codegen, so I'll just > take your word and use ops[0].value. > > And yeah I didn't add the tests at the time, don't really know why, I'll > chuck it down to laziness :P >> >>> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h >>> index 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290083f3f072fd5a2ce 100644 >>> --- a/gcc/config/aarch64/arm_acle.h >>> +++ b/gcc/config/aarch64/arm_acle.h >>> @@ -28,6 +28,7 @@ >>> #define _GCC_ARM_ACLE_H >>> >>> #include <stdint.h> >>> +#include <stddef.h> >>> >>> #pragma GCC aarch64 "arm_acle.h" >>> >>> @@ -35,6 +36,58 @@ >>> extern "C" { >>> #endif >>> >>> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ >>> +__extension__ extern __inline TYPE \ >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >>> +NAME (TYPE __value, uint32_t __rotate) \ >>> +{ \ >>> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ >>> + __rotate = __rotate % __size; \ >>> + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ >>> +} >>> + >>> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) >>> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) >>> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) >>> + >>> +#undef _GCC_ARM_ACLE_ROR_FN >>> + >>> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ >>> +__extension__ extern __inline RTYPE \ >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >>> +__##NAME (ITYPE __value) \ >>> +{ \ >>> + return __builtin_##BUILTIN (__value); \ >>> +} >>> + >>> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) >>> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) >>> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) >>> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) >>> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) >>> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) >>> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) >>> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) >> The return type should be int16_t. > Nice catch! >> The clz and cls tests have the old return types (same as the argument >> types), but I guess that's a good thing, since it shows that we avoid >> the redundant zero-extend in clzll and clsll. > Yeah I noticed that too when I was adding the mem tests, but I did > change them though because at the time it just felt like an oversight, > though I too was pleasantly surprised GCC was managing to avoid the > zero-extending :) > I then saw your comment and made me wonder whether I should keep the > wrong return types in... I haven't but happy to change them back if you > think it's a nice 'test' to have. I thought it was OK/useful as it was, but I don't mind either way. BTW, while trying it out locally, I noticed: aarch64_init_data_intrinsics was called from the wrong place. Since it's adding normal __builtin functions, it should be called from aarch64_general_init_builtins instead of handle_arm_acle_h. handle_arm_acle_h is instead for cases where we want to simulate C/C++ definitions of the ACLE intrinsics themselves (i.e. so that the intrinsics themselves are built-in functions, rather than inline wrappers around built-in functions). OK with that change, thanks. Thanks, Richard > Regards, > Andre > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index e0a741ac663188713e21f457affa57217d074783..bb5d97c8fc6402635270df851a949cabeecaa5e8 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -613,6 +613,12 @@ enum aarch64_builtins > AARCH64_LS64_BUILTIN_ST64B, > AARCH64_LS64_BUILTIN_ST64BV, > AARCH64_LS64_BUILTIN_ST64BV0, > + AARCH64_REV16, > + AARCH64_REV16L, > + AARCH64_REV16LL, > + AARCH64_RBIT, > + AARCH64_RBITL, > + AARCH64_RBITLL, > AARCH64_BUILTIN_MAX > }; > > @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) > = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); > } > > +static void > +aarch64_init_data_intrinsics (void) > +{ > + tree uint32_fntype = build_function_type_list (uint32_type_node, > + uint32_type_node, NULL_TREE); > + tree ulong_fntype = build_function_type_list (long_unsigned_type_node, > + long_unsigned_type_node, > + NULL_TREE); > + tree uint64_fntype = build_function_type_list (uint64_type_node, > + uint64_type_node, NULL_TREE); > + aarch64_builtin_decls[AARCH64_REV16] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, > + AARCH64_REV16); > + aarch64_builtin_decls[AARCH64_REV16L] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype, > + AARCH64_REV16L); > + aarch64_builtin_decls[AARCH64_REV16LL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, > + AARCH64_REV16LL); > + aarch64_builtin_decls[AARCH64_RBIT] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, > + AARCH64_RBIT); > + aarch64_builtin_decls[AARCH64_RBITL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype, > + AARCH64_RBITL); > + aarch64_builtin_decls[AARCH64_RBITLL] > + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, > + AARCH64_RBITLL); > +} > + > /* Implement #pragma GCC aarch64 "arm_acle.h". */ > void > handle_arm_acle_h (void) > { > + aarch64_init_data_intrinsics (); > if (TARGET_LS64) > aarch64_init_ls64_builtins (); > } > @@ -2394,6 +2431,37 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) > return target; > } > > +/* Function to expand an expression EXP which calls one of the ACLE Data > + Intrinsic builtins FCODE with the result going to TARGET. */ > +static rtx > +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) > +{ > + expand_operand ops[2]; > + machine_mode mode = GET_MODE (target); > + create_output_operand (&ops[0], target, mode); > + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode); > + enum insn_code icode; > + > + switch (fcode) > + { > + case AARCH64_REV16: > + case AARCH64_REV16L: > + case AARCH64_REV16LL: > + icode = code_for_aarch64_rev16 (mode); > + break; > + case AARCH64_RBIT: > + case AARCH64_RBITL: > + case AARCH64_RBITLL: > + icode = code_for_aarch64_rbit (mode); > + break; > + default: > + gcc_unreachable (); > + } > + > + expand_insn (icode, 2, ops); > + return ops[0].value; > +} > + > /* Expand an expression EXP as fpsr or fpcr setter (depending on > UNSPEC) using MODE. */ > static void > @@ -2551,6 +2619,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, > if (fcode >= AARCH64_MEMTAG_BUILTIN_START > && fcode <= AARCH64_MEMTAG_BUILTIN_END) > return aarch64_expand_builtin_memtag (fcode, exp, target); > + if (fcode >= AARCH64_REV16 > + && fcode <= AARCH64_RBITLL) > + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > > gcc_unreachable (); > } > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" > rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); > rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); > > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); > DONE; > @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" > [(set_attr "type" "clz")] > ) > > -(define_insn "rbit<mode>2" > +(define_insn "@aarch64_rbit<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r") > (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] > "" > @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" > "reload_completed" > [(const_int 0)] > " > - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); > emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > DONE; > ") > @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" > [(set_attr "type" "rev")] > ) > > +(define_insn "@aarch64_rev16<mode>" > + [(set (match_operand:GPI 0 "register_operand" "=r") > + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] > + "" > + "rev16\\t%<w>0, %<w>1" > + [(set_attr "type" "rev")]) > + > (define_insn "*aarch64_bfxil<mode>" > [(set (match_operand:GPI 0 "register_operand" "=r,r") > (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h > index 9775a48c65825b424d3eb442384f5ab87b734fd7..d26e269cb843fe37ba789db09c40d06f53438cda 100644 > --- a/gcc/config/aarch64/arm_acle.h > +++ b/gcc/config/aarch64/arm_acle.h > @@ -28,6 +28,7 @@ > #define _GCC_ARM_ACLE_H > > #include <stdint.h> > +#include <stddef.h> > > #pragma GCC aarch64 "arm_acle.h" > > @@ -35,6 +36,58 @@ > extern "C" { > #endif > > +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ > +__extension__ extern __inline TYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +NAME (TYPE __value, uint32_t __rotate) \ > +{ \ > + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ > + __rotate = __rotate % __size; \ > + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ > +} > + > +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) > + > +#undef _GCC_ARM_ACLE_ROR_FN > + > +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ > +__extension__ extern __inline RTYPE \ > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > +__##NAME (ITYPE __value) \ > +{ \ > + return __builtin_##BUILTIN (__value); \ > +} > + > +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) > +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) > +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) > +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t) > +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) > +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) > + > +#undef _GCC_ARM_ACLE_DATA_FN > + > +__extension__ extern __inline unsigned long > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__revl (unsigned long __value) > +{ > + if (sizeof (unsigned long) == 8) > + return __revll (__value); > + else > + return __rev (__value); > +} > + > #pragma GCC push_options > #pragma GCC target ("arch=armv8.3-a") > __extension__ extern __inline int32_t > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > new file mode 100644 > index 0000000000000000000000000000000000000000..e067ef20bbdc8993865b541aa99dccac6b03e6a0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > @@ -0,0 +1,468 @@ > +/* Test the ACLE data intrinsics. */ > +/* { dg-do assemble } */ > +/* { dg-additional-options "--save-temps -O1" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include "arm_acle.h" > + > +/* > +** test_clz: > +** clz w0, w0 > +** ret > +*/ > + > +unsigned int test_clz (uint32_t a) > +{ > + return __clz (a); > +} > + > +/* > +** test_clzl: > +** clz [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned int test_clzl (unsigned long a) > +{ > + return __clzl (a); > +} > + > +/* > +** test_clzll: > +** clz x0, x0 > +** ret > +*/ > + > +unsigned int test_clzll (uint64_t a) > +{ > + return __clzll (a); > +} > + > +/* > +** test_cls: > +** cls w0, w0 > +** ret > +*/ > + > +unsigned int test_cls (uint32_t a) > +{ > + return __cls (a); > +} > + > +/* > +** test_clsl: > +** cls [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned int test_clsl (unsigned long a) > +{ > + return __clsl (a); > +} > + > +/* > +** test_clsll: > +** cls x0, x0 > +** ret > +*/ > + > +unsigned int test_clsll (uint64_t a) > +{ > + return __clsll (a); > +} > + > +/* > +** test_rbit: > +** rbit w0, w0 > +** ret > +*/ > + > +uint32_t test_rbit (uint32_t a) > +{ > + return __rbit (a); > +} > + > +/* > +** test_rbitl: > +** rbit [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rbitl (unsigned long a) > +{ > + return __rbitl (a); > +} > + > +/* > +** test_rbitll: > +** rbit x0, x0 > +** ret > +*/ > + > +uint64_t test_rbitll (uint64_t a) > +{ > + return __rbitll (a); > +} > + > +/* > +** test_rev: > +** rev w0, w0 > +** ret > +*/ > + > +uint32_t test_rev (uint32_t a) > +{ > + return __rev (a); > +} > + > +/* > +** test_revl: > +** rev [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_revl (unsigned long a) > +{ > + return __revl (a); > +} > + > +/* > +** test_revll: > +** rev x0, x0 > +** ret > +*/ > + > +uint64_t test_revll (uint64_t a) > +{ > + return __revll (a); > +} > + > +/* > +** test_rev16: > +** rev16 w0, w0 > +** ret > +*/ > + > +uint32_t test_rev16 (uint32_t a) > +{ > + return __rev16 (a); > +} > + > +/* > +** test_rev16l: > +** rev16 [wx]0, [wx]0 > +** ret > +*/ > + > +unsigned long test_rev16l (unsigned long a) > +{ > + return __rev16l (a); > +} > + > +/* > +** test_rev16ll: > +** rev16 x0, x0 > +** ret > +*/ > + > +uint64_t test_rev16ll (uint64_t a) > +{ > + return __rev16ll (a); > +} > + > +/* > +** test_ror: > +** ror w0, w0, w1 > +** ret > +*/ > + > +uint32_t test_ror (uint32_t a, uint32_t r) > +{ > + return __ror (a, r); > +} > + > +/* > +** test_rorl: > +** ror [wx]0, [wx]0, [wx]1 > +** ret > +*/ > + > +unsigned long test_rorl (unsigned long a, uint32_t r) > +{ > + return __rorl (a, r); > +} > + > +/* > +** test_rorll: > +** ror x0, x0, x1 > +** ret > +*/ > + > +uint64_t test_rorll (uint64_t a, uint32_t r) > +{ > + return __rorll (a, r); > +} > + > +/* > +** test_revsh: > +** rev16 w0, w0 > +** ret > +*/ > + > +int16_t test_revsh (int16_t a) > +{ > + return __revsh (a); > +} > + > +uint32_t *g32; > +unsigned long *gul; > +uint64_t *g64; > +unsigned int *gui; > +int16_t *g16; > + > +/* > +** test_clz_mem: > +** ... > +** clz w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_clz_mem (uint32_t *a) > +{ > + *gui = __clz (*a); > +} > + > +/* > +** test_clzl_mem: > +** ... > +** clz [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_clzl_mem (unsigned long *a) > +{ > + *gui = __clzl (*a); > +} > + > +/* > +** test_clzll_mem: > +** ... > +** clz x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_clzll_mem (uint64_t *a) > +{ > + *gui = __clzll (*a); > +} > + > +/* > +** test_cls_mem: > +** ... > +** cls w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_cls_mem (uint32_t *a) > +{ > + *gui = __cls (*a); > +} > + > +/* > +** test_clsl_mem: > +** ... > +** cls [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_clsl_mem (unsigned long *a) > +{ > + *gui = __clsl (*a); > +} > + > +/* > +** test_clsll_mem: > +** ... > +** cls x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_clsll_mem (uint64_t *a) > +{ > + *gui = __clsll (*a); > +} > + > +/* > +** test_rbit_mem: > +** ... > +** rbit w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rbit_mem (uint32_t *a) > +{ > + *g32 = __rbit (*a); > +} > + > +/* > +** test_rbitl_mem: > +** ... > +** rbit [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_rbitl_mem (unsigned long *a) > +{ > + *gul = __rbitl (*a); > +} > + > +/* > +** test_rbitll_mem: > +** ... > +** rbit x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rbitll_mem (uint64_t *a) > +{ > + *g64 = __rbitll (*a); > +} > + > +/* > +** test_rev_mem: > +** ... > +** rev w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rev_mem (uint32_t *a) > +{ > + *g32 = __rev (*a); > +} > + > +/* > +** test_revl_mem: > +** ... > +** rev [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_revl_mem (unsigned long *a) > +{ > + *gul = __revl (*a); > +} > + > +/* > +** test_revll_mem: > +** ... > +** rev x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_revll_mem (uint64_t *a) > +{ > + *g64 = __revll (*a); > +} > + > +/* > +** test_rev16_mem: > +** ... > +** rev16 w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rev16_mem (uint32_t *a) > +{ > + *g32 = __rev16 (*a); > +} > + > +/* > +** test_rev16l_mem: > +** ... > +** rev16 [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_rev16l_mem (unsigned long *a) > +{ > + *gul = __rev16l (*a); > +} > + > +/* > +** test_rev16ll_mem: > +** ... > +** rev16 x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rev16ll_mem (uint64_t *a) > +{ > + *g64 = __rev16ll (*a); > +} > + > +/* > +** test_ror_mem: > +** ... > +** ror w[0-9]+, w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_ror_mem (uint32_t *a, uint32_t *r) > +{ > + *g32 = __ror (*a, *r); > +} > + > +/* > +** test_rorl_mem: > +** ... > +** ror [wx][0-9]+, [wx][0-9]+, [wx][0-9]+ > +** ... > +** ret > +*/ > + > +void test_rorl_mem (unsigned long *a, uint32_t *r) > +{ > + *gul = __rorl (*a, *r); > +} > + > +/* > +** test_rorll_mem: > +** ... > +** ror x[0-9]+, x[0-9]+, x[0-9]+ > +** ... > +** ret > +*/ > + > +void test_rorll_mem (uint64_t *a, uint32_t *r) > +{ > + *g64 = __rorll (*a, *r); > +} > + > +/* > +** test_revsh_mem: > +** ... > +** rev16 w[0-9]+, w[0-9]+ > +** ... > +** ret > +*/ > + > +void test_revsh_mem (int16_t *a) > +{ > + *g16 = __revsh (*a); > +}
OK to backport this to gcc-12? Applies cleanly and did a bootstrat and regression test on aarch64-linux-gnu Regards, Andre On 01/07/2022 12:26, Richard Sandiford wrote: > "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: >> On 29/06/2022 08:18, Richard Sandiford wrote: >>>> + break; >>>> + case AARCH64_RBIT: >>>> + case AARCH64_RBITL: >>>> + case AARCH64_RBITLL: >>>> + if (mode == SImode) >>>> + icode = CODE_FOR_aarch64_rbitsi; >>>> + else >>>> + icode = CODE_FOR_aarch64_rbitdi; >>>> + break; >>>> + default: >>>> + gcc_unreachable (); >>>> + } >>>> + expand_insn (icode, 2, ops); >>>> + return target; >>> This needs to return ops[0].value instead, since "target" just suggests >>> a possible location. >>> >>> Could you add tests for a memory source and memory destination, e.g.: >>> >>> void test_clz_mem (uint32_t *a) >>> { >>> *a = __clz (*a); >>> } >>> >>> Without tests like that, these comments probably just sound like a paper >>> exercise, but they should make a difference for memory sources (previous >>> review) and memory destinations (this round). >> I had locally tested it (with rev though because clz doesn't use that >> code) and strangely it does seem to work for the memory destinations, >> but that's just a simple test. >> It could very well go wrong with some more complex codegen, so I'll just >> take your word and use ops[0].value. >> >> And yeah I didn't add the tests at the time, don't really know why, I'll >> chuck it down to laziness :P >>>> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h >>>> index 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290083f3f072fd5a2ce 100644 >>>> --- a/gcc/config/aarch64/arm_acle.h >>>> +++ b/gcc/config/aarch64/arm_acle.h >>>> @@ -28,6 +28,7 @@ >>>> #define _GCC_ARM_ACLE_H >>>> >>>> #include <stdint.h> >>>> +#include <stddef.h> >>>> >>>> #pragma GCC aarch64 "arm_acle.h" >>>> >>>> @@ -35,6 +36,58 @@ >>>> extern "C" { >>>> #endif >>>> >>>> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ >>>> +__extension__ extern __inline TYPE \ >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >>>> +NAME (TYPE __value, uint32_t __rotate) \ >>>> +{ \ >>>> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ >>>> + __rotate = __rotate % __size; \ >>>> + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ >>>> +} >>>> + >>>> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) >>>> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) >>>> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) >>>> + >>>> +#undef _GCC_ARM_ACLE_ROR_FN >>>> + >>>> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ >>>> +__extension__ extern __inline RTYPE \ >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >>>> +__##NAME (ITYPE __value) \ >>>> +{ \ >>>> + return __builtin_##BUILTIN (__value); \ >>>> +} >>>> + >>>> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) >>>> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) >>>> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) >>>> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) >>>> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) >>>> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) >>>> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) >>>> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) >>> The return type should be int16_t. >> Nice catch! >>> The clz and cls tests have the old return types (same as the argument >>> types), but I guess that's a good thing, since it shows that we avoid >>> the redundant zero-extend in clzll and clsll. >> Yeah I noticed that too when I was adding the mem tests, but I did >> change them though because at the time it just felt like an oversight, >> though I too was pleasantly surprised GCC was managing to avoid the >> zero-extending :) >> I then saw your comment and made me wonder whether I should keep the >> wrong return types in... I haven't but happy to change them back if you >> think it's a nice 'test' to have. > I thought it was OK/useful as it was, but I don't mind either way. > > BTW, while trying it out locally, I noticed: > > aarch64_init_data_intrinsics > > was called from the wrong place. Since it's adding normal __builtin > functions, it should be called from aarch64_general_init_builtins > instead of handle_arm_acle_h. > > handle_arm_acle_h is instead for cases where we want to simulate > C/C++ definitions of the ACLE intrinsics themselves (i.e. so that > the intrinsics themselves are built-in functions, rather than > inline wrappers around built-in functions). > > OK with that change, thanks. > > Thanks, > Richard > >> Regards, >> Andre >> >> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc >> index e0a741ac663188713e21f457affa57217d074783..bb5d97c8fc6402635270df851a949cabeecaa5e8 100644 >> --- a/gcc/config/aarch64/aarch64-builtins.cc >> +++ b/gcc/config/aarch64/aarch64-builtins.cc >> @@ -613,6 +613,12 @@ enum aarch64_builtins >> AARCH64_LS64_BUILTIN_ST64B, >> AARCH64_LS64_BUILTIN_ST64BV, >> AARCH64_LS64_BUILTIN_ST64BV0, >> + AARCH64_REV16, >> + AARCH64_REV16L, >> + AARCH64_REV16LL, >> + AARCH64_RBIT, >> + AARCH64_RBITL, >> + AARCH64_RBITLL, >> AARCH64_BUILTIN_MAX >> }; >> >> @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) >> = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); >> } >> >> +static void >> +aarch64_init_data_intrinsics (void) >> +{ >> + tree uint32_fntype = build_function_type_list (uint32_type_node, >> + uint32_type_node, NULL_TREE); >> + tree ulong_fntype = build_function_type_list (long_unsigned_type_node, >> + long_unsigned_type_node, >> + NULL_TREE); >> + tree uint64_fntype = build_function_type_list (uint64_type_node, >> + uint64_type_node, NULL_TREE); >> + aarch64_builtin_decls[AARCH64_REV16] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, >> + AARCH64_REV16); >> + aarch64_builtin_decls[AARCH64_REV16L] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype, >> + AARCH64_REV16L); >> + aarch64_builtin_decls[AARCH64_REV16LL] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, >> + AARCH64_REV16LL); >> + aarch64_builtin_decls[AARCH64_RBIT] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, >> + AARCH64_RBIT); >> + aarch64_builtin_decls[AARCH64_RBITL] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype, >> + AARCH64_RBITL); >> + aarch64_builtin_decls[AARCH64_RBITLL] >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, >> + AARCH64_RBITLL); >> +} >> + >> /* Implement #pragma GCC aarch64 "arm_acle.h". */ >> void >> handle_arm_acle_h (void) >> { >> + aarch64_init_data_intrinsics (); >> if (TARGET_LS64) >> aarch64_init_ls64_builtins (); >> } >> @@ -2394,6 +2431,37 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) >> return target; >> } >> >> +/* Function to expand an expression EXP which calls one of the ACLE Data >> + Intrinsic builtins FCODE with the result going to TARGET. */ >> +static rtx >> +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) >> +{ >> + expand_operand ops[2]; >> + machine_mode mode = GET_MODE (target); >> + create_output_operand (&ops[0], target, mode); >> + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode); >> + enum insn_code icode; >> + >> + switch (fcode) >> + { >> + case AARCH64_REV16: >> + case AARCH64_REV16L: >> + case AARCH64_REV16LL: >> + icode = code_for_aarch64_rev16 (mode); >> + break; >> + case AARCH64_RBIT: >> + case AARCH64_RBITL: >> + case AARCH64_RBITLL: >> + icode = code_for_aarch64_rbit (mode); >> + break; >> + default: >> + gcc_unreachable (); >> + } >> + >> + expand_insn (icode, 2, ops); >> + return ops[0].value; >> +} >> + >> /* Expand an expression EXP as fpsr or fpcr setter (depending on >> UNSPEC) using MODE. */ >> static void >> @@ -2551,6 +2619,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, >> if (fcode >= AARCH64_MEMTAG_BUILTIN_START >> && fcode <= AARCH64_MEMTAG_BUILTIN_END) >> return aarch64_expand_builtin_memtag (fcode, exp, target); >> + if (fcode >= AARCH64_REV16 >> + && fcode <= AARCH64_RBITLL) >> + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); >> >> gcc_unreachable (); >> } >> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md >> index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" >> rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); >> rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); >> >> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); >> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); >> emit_insn (gen_clz<mode>2 (operands[0], operands[0])); >> emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); >> DONE; >> @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" >> [(set_attr "type" "clz")] >> ) >> >> -(define_insn "rbit<mode>2" >> +(define_insn "@aarch64_rbit<mode>" >> [(set (match_operand:GPI 0 "register_operand" "=r") >> (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] >> "" >> @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" >> "reload_completed" >> [(const_int 0)] >> " >> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); >> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); >> emit_insn (gen_clz<mode>2 (operands[0], operands[0])); >> DONE; >> ") >> @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" >> [(set_attr "type" "rev")] >> ) >> >> +(define_insn "@aarch64_rev16<mode>" >> + [(set (match_operand:GPI 0 "register_operand" "=r") >> + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] >> + "" >> + "rev16\\t%<w>0, %<w>1" >> + [(set_attr "type" "rev")]) >> + >> (define_insn "*aarch64_bfxil<mode>" >> [(set (match_operand:GPI 0 "register_operand" "=r,r") >> (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") >> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h >> index 9775a48c65825b424d3eb442384f5ab87b734fd7..d26e269cb843fe37ba789db09c40d06f53438cda 100644 >> --- a/gcc/config/aarch64/arm_acle.h >> +++ b/gcc/config/aarch64/arm_acle.h >> @@ -28,6 +28,7 @@ >> #define _GCC_ARM_ACLE_H >> >> #include <stdint.h> >> +#include <stddef.h> >> >> #pragma GCC aarch64 "arm_acle.h" >> >> @@ -35,6 +36,58 @@ >> extern "C" { >> #endif >> >> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ >> +__extension__ extern __inline TYPE \ >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >> +NAME (TYPE __value, uint32_t __rotate) \ >> +{ \ >> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; \ >> + __rotate = __rotate % __size; \ >> + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ >> +} >> + >> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) >> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) >> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) >> + >> +#undef _GCC_ARM_ACLE_ROR_FN >> + >> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) \ >> +__extension__ extern __inline RTYPE \ >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ >> +__##NAME (ITYPE __value) \ >> +{ \ >> + return __builtin_##BUILTIN (__value); \ >> +} >> + >> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) >> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) >> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) >> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) >> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) >> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) >> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) >> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t) >> +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) >> +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) >> + >> +#undef _GCC_ARM_ACLE_DATA_FN >> + >> +__extension__ extern __inline unsigned long >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +__revl (unsigned long __value) >> +{ >> + if (sizeof (unsigned long) == 8) >> + return __revll (__value); >> + else >> + return __rev (__value); >> +} >> + >> #pragma GCC push_options >> #pragma GCC target ("arch=armv8.3-a") >> __extension__ extern __inline int32_t >> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c >> new file mode 100644 >> index 0000000000000000000000000000000000000000..e067ef20bbdc8993865b541aa99dccac6b03e6a0 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c >> @@ -0,0 +1,468 @@ >> +/* Test the ACLE data intrinsics. */ >> +/* { dg-do assemble } */ >> +/* { dg-additional-options "--save-temps -O1" } */ >> +/* { dg-final { check-function-bodies "**" "" "" } } */ >> + >> +#include "arm_acle.h" >> + >> +/* >> +** test_clz: >> +** clz w0, w0 >> +** ret >> +*/ >> + >> +unsigned int test_clz (uint32_t a) >> +{ >> + return __clz (a); >> +} >> + >> +/* >> +** test_clzl: >> +** clz [wx]0, [wx]0 >> +** ret >> +*/ >> + >> +unsigned int test_clzl (unsigned long a) >> +{ >> + return __clzl (a); >> +} >> + >> +/* >> +** test_clzll: >> +** clz x0, x0 >> +** ret >> +*/ >> + >> +unsigned int test_clzll (uint64_t a) >> +{ >> + return __clzll (a); >> +} >> + >> +/* >> +** test_cls: >> +** cls w0, w0 >> +** ret >> +*/ >> + >> +unsigned int test_cls (uint32_t a) >> +{ >> + return __cls (a); >> +} >> + >> +/* >> +** test_clsl: >> +** cls [wx]0, [wx]0 >> +** ret >> +*/ >> + >> +unsigned int test_clsl (unsigned long a) >> +{ >> + return __clsl (a); >> +} >> + >> +/* >> +** test_clsll: >> +** cls x0, x0 >> +** ret >> +*/ >> + >> +unsigned int test_clsll (uint64_t a) >> +{ >> + return __clsll (a); >> +} >> + >> +/* >> +** test_rbit: >> +** rbit w0, w0 >> +** ret >> +*/ >> + >> +uint32_t test_rbit (uint32_t a) >> +{ >> + return __rbit (a); >> +} >> + >> +/* >> +** test_rbitl: >> +** rbit [wx]0, [wx]0 >> +** ret >> +*/ >> + >> +unsigned long test_rbitl (unsigned long a) >> +{ >> + return __rbitl (a); >> +} >> + >> +/* >> +** test_rbitll: >> +** rbit x0, x0 >> +** ret >> +*/ >> + >> +uint64_t test_rbitll (uint64_t a) >> +{ >> + return __rbitll (a); >> +} >> + >> +/* >> +** test_rev: >> +** rev w0, w0 >> +** ret >> +*/ >> + >> +uint32_t test_rev (uint32_t a) >> +{ >> + return __rev (a); >> +} >> + >> +/* >> +** test_revl: >> +** rev [wx]0, [wx]0 >> +** ret >> +*/ >> + >> +unsigned long test_revl (unsigned long a) >> +{ >> + return __revl (a); >> +} >> + >> +/* >> +** test_revll: >> +** rev x0, x0 >> +** ret >> +*/ >> + >> +uint64_t test_revll (uint64_t a) >> +{ >> + return __revll (a); >> +} >> + >> +/* >> +** test_rev16: >> +** rev16 w0, w0 >> +** ret >> +*/ >> + >> +uint32_t test_rev16 (uint32_t a) >> +{ >> + return __rev16 (a); >> +} >> + >> +/* >> +** test_rev16l: >> +** rev16 [wx]0, [wx]0 >> +** ret >> +*/ >> + >> +unsigned long test_rev16l (unsigned long a) >> +{ >> + return __rev16l (a); >> +} >> + >> +/* >> +** test_rev16ll: >> +** rev16 x0, x0 >> +** ret >> +*/ >> + >> +uint64_t test_rev16ll (uint64_t a) >> +{ >> + return __rev16ll (a); >> +} >> + >> +/* >> +** test_ror: >> +** ror w0, w0, w1 >> +** ret >> +*/ >> + >> +uint32_t test_ror (uint32_t a, uint32_t r) >> +{ >> + return __ror (a, r); >> +} >> + >> +/* >> +** test_rorl: >> +** ror [wx]0, [wx]0, [wx]1 >> +** ret >> +*/ >> + >> +unsigned long test_rorl (unsigned long a, uint32_t r) >> +{ >> + return __rorl (a, r); >> +} >> + >> +/* >> +** test_rorll: >> +** ror x0, x0, x1 >> +** ret >> +*/ >> + >> +uint64_t test_rorll (uint64_t a, uint32_t r) >> +{ >> + return __rorll (a, r); >> +} >> + >> +/* >> +** test_revsh: >> +** rev16 w0, w0 >> +** ret >> +*/ >> + >> +int16_t test_revsh (int16_t a) >> +{ >> + return __revsh (a); >> +} >> + >> +uint32_t *g32; >> +unsigned long *gul; >> +uint64_t *g64; >> +unsigned int *gui; >> +int16_t *g16; >> + >> +/* >> +** test_clz_mem: >> +** ... >> +** clz w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_clz_mem (uint32_t *a) >> +{ >> + *gui = __clz (*a); >> +} >> + >> +/* >> +** test_clzl_mem: >> +** ... >> +** clz [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_clzl_mem (unsigned long *a) >> +{ >> + *gui = __clzl (*a); >> +} >> + >> +/* >> +** test_clzll_mem: >> +** ... >> +** clz x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_clzll_mem (uint64_t *a) >> +{ >> + *gui = __clzll (*a); >> +} >> + >> +/* >> +** test_cls_mem: >> +** ... >> +** cls w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_cls_mem (uint32_t *a) >> +{ >> + *gui = __cls (*a); >> +} >> + >> +/* >> +** test_clsl_mem: >> +** ... >> +** cls [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_clsl_mem (unsigned long *a) >> +{ >> + *gui = __clsl (*a); >> +} >> + >> +/* >> +** test_clsll_mem: >> +** ... >> +** cls x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_clsll_mem (uint64_t *a) >> +{ >> + *gui = __clsll (*a); >> +} >> + >> +/* >> +** test_rbit_mem: >> +** ... >> +** rbit w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rbit_mem (uint32_t *a) >> +{ >> + *g32 = __rbit (*a); >> +} >> + >> +/* >> +** test_rbitl_mem: >> +** ... >> +** rbit [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rbitl_mem (unsigned long *a) >> +{ >> + *gul = __rbitl (*a); >> +} >> + >> +/* >> +** test_rbitll_mem: >> +** ... >> +** rbit x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rbitll_mem (uint64_t *a) >> +{ >> + *g64 = __rbitll (*a); >> +} >> + >> +/* >> +** test_rev_mem: >> +** ... >> +** rev w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rev_mem (uint32_t *a) >> +{ >> + *g32 = __rev (*a); >> +} >> + >> +/* >> +** test_revl_mem: >> +** ... >> +** rev [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_revl_mem (unsigned long *a) >> +{ >> + *gul = __revl (*a); >> +} >> + >> +/* >> +** test_revll_mem: >> +** ... >> +** rev x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_revll_mem (uint64_t *a) >> +{ >> + *g64 = __revll (*a); >> +} >> + >> +/* >> +** test_rev16_mem: >> +** ... >> +** rev16 w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rev16_mem (uint32_t *a) >> +{ >> + *g32 = __rev16 (*a); >> +} >> + >> +/* >> +** test_rev16l_mem: >> +** ... >> +** rev16 [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rev16l_mem (unsigned long *a) >> +{ >> + *gul = __rev16l (*a); >> +} >> + >> +/* >> +** test_rev16ll_mem: >> +** ... >> +** rev16 x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rev16ll_mem (uint64_t *a) >> +{ >> + *g64 = __rev16ll (*a); >> +} >> + >> +/* >> +** test_ror_mem: >> +** ... >> +** ror w[0-9]+, w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_ror_mem (uint32_t *a, uint32_t *r) >> +{ >> + *g32 = __ror (*a, *r); >> +} >> + >> +/* >> +** test_rorl_mem: >> +** ... >> +** ror [wx][0-9]+, [wx][0-9]+, [wx][0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rorl_mem (unsigned long *a, uint32_t *r) >> +{ >> + *gul = __rorl (*a, *r); >> +} >> + >> +/* >> +** test_rorll_mem: >> +** ... >> +** ror x[0-9]+, x[0-9]+, x[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_rorll_mem (uint64_t *a, uint32_t *r) >> +{ >> + *g64 = __rorll (*a, *r); >> +} >> + >> +/* >> +** test_revsh_mem: >> +** ... >> +** rev16 w[0-9]+, w[0-9]+ >> +** ... >> +** ret >> +*/ >> + >> +void test_revsh_mem (int16_t *a) >> +{ >> + *g16 = __revsh (*a); >> +}
> -----Original Message----- > From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com> > Sent: Thursday, August 11, 2022 4:11 PM > To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; > Richard Sandiford <Richard.Sandiford@arm.com>; Richard Biener > <rguenther@suse.de> > Subject: Re: [PATCH][AArch64] Implement ACLE Data Intrinsics > > OK to backport this to gcc-12? Applies cleanly and did a bootstrat and > regression test on aarch64-linux-gnu Ok as long as it's before the branch freeze. Thanks, Kyrill > > Regards, > Andre > > On 01/07/2022 12:26, Richard Sandiford wrote: > > "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes: > >> On 29/06/2022 08:18, Richard Sandiford wrote: > >>>> + break; > >>>> + case AARCH64_RBIT: > >>>> + case AARCH64_RBITL: > >>>> + case AARCH64_RBITLL: > >>>> + if (mode == SImode) > >>>> + icode = CODE_FOR_aarch64_rbitsi; > >>>> + else > >>>> + icode = CODE_FOR_aarch64_rbitdi; > >>>> + break; > >>>> + default: > >>>> + gcc_unreachable (); > >>>> + } > >>>> + expand_insn (icode, 2, ops); > >>>> + return target; > >>> This needs to return ops[0].value instead, since "target" just suggests > >>> a possible location. > >>> > >>> Could you add tests for a memory source and memory destination, e.g.: > >>> > >>> void test_clz_mem (uint32_t *a) > >>> { > >>> *a = __clz (*a); > >>> } > >>> > >>> Without tests like that, these comments probably just sound like a paper > >>> exercise, but they should make a difference for memory sources > (previous > >>> review) and memory destinations (this round). > >> I had locally tested it (with rev though because clz doesn't use that > >> code) and strangely it does seem to work for the memory destinations, > >> but that's just a simple test. > >> It could very well go wrong with some more complex codegen, so I'll just > >> take your word and use ops[0].value. > >> > >> And yeah I didn't add the tests at the time, don't really know why, I'll > >> chuck it down to laziness :P > >>>> diff --git a/gcc/config/aarch64/arm_acle.h > b/gcc/config/aarch64/arm_acle.h > >>>> index > 9775a48c65825b424d3eb442384f5ab87b734fd7..a044bc74553fcf2a49b71290 > 083f3f072fd5a2ce 100644 > >>>> --- a/gcc/config/aarch64/arm_acle.h > >>>> +++ b/gcc/config/aarch64/arm_acle.h > >>>> @@ -28,6 +28,7 @@ > >>>> #define _GCC_ARM_ACLE_H > >>>> > >>>> #include <stdint.h> > >>>> +#include <stddef.h> > >>>> > >>>> #pragma GCC aarch64 "arm_acle.h" > >>>> > >>>> @@ -35,6 +36,58 @@ > >>>> extern "C" { > >>>> #endif > >>>> > >>>> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) > \ > >>>> +__extension__ extern __inline TYPE > \ > >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > \ > >>>> +NAME (TYPE __value, uint32_t __rotate) > \ > >>>> +{ \ > >>>> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; > \ > >>>> + __rotate = __rotate % __size; > \ > >>>> + return __value >> __rotate | __value << ((__size - __rotate) % __size); > \ > >>>> +} > >>>> + > >>>> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > >>>> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > >>>> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) > >>>> + > >>>> +#undef _GCC_ARM_ACLE_ROR_FN > >>>> + > >>>> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) > \ > >>>> +__extension__ extern __inline RTYPE \ > >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > >>>> +__##NAME (ITYPE __value) \ > >>>> +{ \ > >>>> + return __builtin_##BUILTIN (__value); \ > >>>> +} > >>>> + > >>>> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) > >>>> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, > uint32_t) > >>>> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, > unsigned long) > >>>> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, > uint64_t) > >>>> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) > >>>> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, > unsigned long) > >>>> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) > >>>> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, uint16_t) > >>> The return type should be int16_t. > >> Nice catch! > >>> The clz and cls tests have the old return types (same as the argument > >>> types), but I guess that's a good thing, since it shows that we avoid > >>> the redundant zero-extend in clzll and clsll. > >> Yeah I noticed that too when I was adding the mem tests, but I did > >> change them though because at the time it just felt like an oversight, > >> though I too was pleasantly surprised GCC was managing to avoid the > >> zero-extending :) > >> I then saw your comment and made me wonder whether I should keep > the > >> wrong return types in... I haven't but happy to change them back if you > >> think it's a nice 'test' to have. > > I thought it was OK/useful as it was, but I don't mind either way. > > > > BTW, while trying it out locally, I noticed: > > > > aarch64_init_data_intrinsics > > > > was called from the wrong place. Since it's adding normal __builtin > > functions, it should be called from aarch64_general_init_builtins > > instead of handle_arm_acle_h. > > > > handle_arm_acle_h is instead for cases where we want to simulate > > C/C++ definitions of the ACLE intrinsics themselves (i.e. so that > > the intrinsics themselves are built-in functions, rather than > > inline wrappers around built-in functions). > > > > OK with that change, thanks. > > > > Thanks, > > Richard > > > >> Regards, > >> Andre > >> > >> diff --git a/gcc/config/aarch64/aarch64-builtins.cc > b/gcc/config/aarch64/aarch64-builtins.cc > >> index > e0a741ac663188713e21f457affa57217d074783..bb5d97c8fc6402635270df85 > 1a949cabeecaa5e8 100644 > >> --- a/gcc/config/aarch64/aarch64-builtins.cc > >> +++ b/gcc/config/aarch64/aarch64-builtins.cc > >> @@ -613,6 +613,12 @@ enum aarch64_builtins > >> AARCH64_LS64_BUILTIN_ST64B, > >> AARCH64_LS64_BUILTIN_ST64BV, > >> AARCH64_LS64_BUILTIN_ST64BV0, > >> + AARCH64_REV16, > >> + AARCH64_REV16L, > >> + AARCH64_REV16LL, > >> + AARCH64_RBIT, > >> + AARCH64_RBITL, > >> + AARCH64_RBITLL, > >> AARCH64_BUILTIN_MAX > >> }; > >> > >> @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) > >> = aarch64_general_add_builtin (data[i].name, data[i].type, > data[i].code); > >> } > >> > >> +static void > >> +aarch64_init_data_intrinsics (void) > >> +{ > >> + tree uint32_fntype = build_function_type_list (uint32_type_node, > >> + uint32_type_node, > NULL_TREE); > >> + tree ulong_fntype = build_function_type_list > (long_unsigned_type_node, > >> + long_unsigned_type_node, > >> + NULL_TREE); > >> + tree uint64_fntype = build_function_type_list (uint64_type_node, > >> + uint64_type_node, > NULL_TREE); > >> + aarch64_builtin_decls[AARCH64_REV16] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", > uint32_fntype, > >> + AARCH64_REV16); > >> + aarch64_builtin_decls[AARCH64_REV16L] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", > ulong_fntype, > >> + AARCH64_REV16L); > >> + aarch64_builtin_decls[AARCH64_REV16LL] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", > uint64_fntype, > >> + AARCH64_REV16LL); > >> + aarch64_builtin_decls[AARCH64_RBIT] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", > uint32_fntype, > >> + AARCH64_RBIT); > >> + aarch64_builtin_decls[AARCH64_RBITL] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", > ulong_fntype, > >> + AARCH64_RBITL); > >> + aarch64_builtin_decls[AARCH64_RBITLL] > >> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", > uint64_fntype, > >> + AARCH64_RBITLL); > >> +} > >> + > >> /* Implement #pragma GCC aarch64 "arm_acle.h". */ > >> void > >> handle_arm_acle_h (void) > >> { > >> + aarch64_init_data_intrinsics (); > >> if (TARGET_LS64) > >> aarch64_init_ls64_builtins (); > >> } > >> @@ -2394,6 +2431,37 @@ aarch64_expand_builtin_memtag (int fcode, > tree exp, rtx target) > >> return target; > >> } > >> > >> +/* Function to expand an expression EXP which calls one of the ACLE > Data > >> + Intrinsic builtins FCODE with the result going to TARGET. */ > >> +static rtx > >> +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx > target) > >> +{ > >> + expand_operand ops[2]; > >> + machine_mode mode = GET_MODE (target); > >> + create_output_operand (&ops[0], target, mode); > >> + create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG > (exp, 0)), mode); > >> + enum insn_code icode; > >> + > >> + switch (fcode) > >> + { > >> + case AARCH64_REV16: > >> + case AARCH64_REV16L: > >> + case AARCH64_REV16LL: > >> + icode = code_for_aarch64_rev16 (mode); > >> + break; > >> + case AARCH64_RBIT: > >> + case AARCH64_RBITL: > >> + case AARCH64_RBITLL: > >> + icode = code_for_aarch64_rbit (mode); > >> + break; > >> + default: > >> + gcc_unreachable (); > >> + } > >> + > >> + expand_insn (icode, 2, ops); > >> + return ops[0].value; > >> +} > >> + > >> /* Expand an expression EXP as fpsr or fpcr setter (depending on > >> UNSPEC) using MODE. */ > >> static void > >> @@ -2551,6 +2619,9 @@ aarch64_general_expand_builtin (unsigned int > fcode, tree exp, rtx target, > >> if (fcode >= AARCH64_MEMTAG_BUILTIN_START > >> && fcode <= AARCH64_MEMTAG_BUILTIN_END) > >> return aarch64_expand_builtin_memtag (fcode, exp, target); > >> + if (fcode >= AARCH64_REV16 > >> + && fcode <= AARCH64_RBITLL) > >> + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > >> > >> gcc_unreachable (); > >> } > >> diff --git a/gcc/config/aarch64/aarch64.md > b/gcc/config/aarch64/aarch64.md > >> index > acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030d > c5921a534e3d19 100644 > >> --- a/gcc/config/aarch64/aarch64.md > >> +++ b/gcc/config/aarch64/aarch64.md > >> @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" > >> rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); > >> rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); > >> > >> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > >> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], > operands[1])); > >> emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > >> emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], > const0_rtx)); > >> DONE; > >> @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" > >> [(set_attr "type" "clz")] > >> ) > >> > >> -(define_insn "rbit<mode>2" > >> +(define_insn "@aarch64_rbit<mode>" > >> [(set (match_operand:GPI 0 "register_operand" "=r") > >> (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] > UNSPEC_RBIT))] > >> "" > >> @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" > >> "reload_completed" > >> [(const_int 0)] > >> " > >> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); > >> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], > operands[1])); > >> emit_insn (gen_clz<mode>2 (operands[0], operands[0])); > >> DONE; > >> ") > >> @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" > >> [(set_attr "type" "rev")] > >> ) > >> > >> +(define_insn "@aarch64_rev16<mode>" > >> + [(set (match_operand:GPI 0 "register_operand" "=r") > >> + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] > UNSPEC_REV))] > >> + "" > >> + "rev16\\t%<w>0, %<w>1" > >> + [(set_attr "type" "rev")]) > >> + > >> (define_insn "*aarch64_bfxil<mode>" > >> [(set (match_operand:GPI 0 "register_operand" "=r,r") > >> (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") > >> diff --git a/gcc/config/aarch64/arm_acle.h > b/gcc/config/aarch64/arm_acle.h > >> index > 9775a48c65825b424d3eb442384f5ab87b734fd7..d26e269cb843fe37ba789db > 09c40d06f53438cda 100644 > >> --- a/gcc/config/aarch64/arm_acle.h > >> +++ b/gcc/config/aarch64/arm_acle.h > >> @@ -28,6 +28,7 @@ > >> #define _GCC_ARM_ACLE_H > >> > >> #include <stdint.h> > >> +#include <stddef.h> > >> > >> #pragma GCC aarch64 "arm_acle.h" > >> > >> @@ -35,6 +36,58 @@ > >> extern "C" { > >> #endif > >> > >> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) > \ > >> +__extension__ extern __inline TYPE > \ > >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > >> +NAME (TYPE __value, uint32_t __rotate) > \ > >> +{ \ > >> + size_t __size = sizeof (TYPE) * __CHAR_BIT__; > \ > >> + __rotate = __rotate % __size; > \ > >> + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ > >> +} > >> + > >> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) > >> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) > >> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) > >> + > >> +#undef _GCC_ARM_ACLE_ROR_FN > >> + > >> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) > \ > >> +__extension__ extern __inline RTYPE \ > >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ > >> +__##NAME (ITYPE __value) \ > >> +{ \ > >> + return __builtin_##BUILTIN (__value); \ > >> +} > >> + > >> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) > >> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) > >> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, > unsigned long) > >> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) > >> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) > >> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, > unsigned long) > >> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) > >> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t) > >> +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) > >> +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) > >> + > >> +#undef _GCC_ARM_ACLE_DATA_FN > >> + > >> +__extension__ extern __inline unsigned long > >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > >> +__revl (unsigned long __value) > >> +{ > >> + if (sizeof (unsigned long) == 8) > >> + return __revll (__value); > >> + else > >> + return __rev (__value); > >> +} > >> + > >> #pragma GCC push_options > >> #pragma GCC target ("arch=armv8.3-a") > >> __extension__ extern __inline int32_t > >> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > >> new file mode 100644 > >> index > 0000000000000000000000000000000000000000..e067ef20bbdc8993865b541 > aa99dccac6b03e6a0 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c > >> @@ -0,0 +1,468 @@ > >> +/* Test the ACLE data intrinsics. */ > >> +/* { dg-do assemble } */ > >> +/* { dg-additional-options "--save-temps -O1" } */ > >> +/* { dg-final { check-function-bodies "**" "" "" } } */ > >> + > >> +#include "arm_acle.h" > >> + > >> +/* > >> +** test_clz: > >> +** clz w0, w0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_clz (uint32_t a) > >> +{ > >> + return __clz (a); > >> +} > >> + > >> +/* > >> +** test_clzl: > >> +** clz [wx]0, [wx]0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_clzl (unsigned long a) > >> +{ > >> + return __clzl (a); > >> +} > >> + > >> +/* > >> +** test_clzll: > >> +** clz x0, x0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_clzll (uint64_t a) > >> +{ > >> + return __clzll (a); > >> +} > >> + > >> +/* > >> +** test_cls: > >> +** cls w0, w0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_cls (uint32_t a) > >> +{ > >> + return __cls (a); > >> +} > >> + > >> +/* > >> +** test_clsl: > >> +** cls [wx]0, [wx]0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_clsl (unsigned long a) > >> +{ > >> + return __clsl (a); > >> +} > >> + > >> +/* > >> +** test_clsll: > >> +** cls x0, x0 > >> +** ret > >> +*/ > >> + > >> +unsigned int test_clsll (uint64_t a) > >> +{ > >> + return __clsll (a); > >> +} > >> + > >> +/* > >> +** test_rbit: > >> +** rbit w0, w0 > >> +** ret > >> +*/ > >> + > >> +uint32_t test_rbit (uint32_t a) > >> +{ > >> + return __rbit (a); > >> +} > >> + > >> +/* > >> +** test_rbitl: > >> +** rbit [wx]0, [wx]0 > >> +** ret > >> +*/ > >> + > >> +unsigned long test_rbitl (unsigned long a) > >> +{ > >> + return __rbitl (a); > >> +} > >> + > >> +/* > >> +** test_rbitll: > >> +** rbit x0, x0 > >> +** ret > >> +*/ > >> + > >> +uint64_t test_rbitll (uint64_t a) > >> +{ > >> + return __rbitll (a); > >> +} > >> + > >> +/* > >> +** test_rev: > >> +** rev w0, w0 > >> +** ret > >> +*/ > >> + > >> +uint32_t test_rev (uint32_t a) > >> +{ > >> + return __rev (a); > >> +} > >> + > >> +/* > >> +** test_revl: > >> +** rev [wx]0, [wx]0 > >> +** ret > >> +*/ > >> + > >> +unsigned long test_revl (unsigned long a) > >> +{ > >> + return __revl (a); > >> +} > >> + > >> +/* > >> +** test_revll: > >> +** rev x0, x0 > >> +** ret > >> +*/ > >> + > >> +uint64_t test_revll (uint64_t a) > >> +{ > >> + return __revll (a); > >> +} > >> + > >> +/* > >> +** test_rev16: > >> +** rev16 w0, w0 > >> +** ret > >> +*/ > >> + > >> +uint32_t test_rev16 (uint32_t a) > >> +{ > >> + return __rev16 (a); > >> +} > >> + > >> +/* > >> +** test_rev16l: > >> +** rev16 [wx]0, [wx]0 > >> +** ret > >> +*/ > >> + > >> +unsigned long test_rev16l (unsigned long a) > >> +{ > >> + return __rev16l (a); > >> +} > >> + > >> +/* > >> +** test_rev16ll: > >> +** rev16 x0, x0 > >> +** ret > >> +*/ > >> + > >> +uint64_t test_rev16ll (uint64_t a) > >> +{ > >> + return __rev16ll (a); > >> +} > >> + > >> +/* > >> +** test_ror: > >> +** ror w0, w0, w1 > >> +** ret > >> +*/ > >> + > >> +uint32_t test_ror (uint32_t a, uint32_t r) > >> +{ > >> + return __ror (a, r); > >> +} > >> + > >> +/* > >> +** test_rorl: > >> +** ror [wx]0, [wx]0, [wx]1 > >> +** ret > >> +*/ > >> + > >> +unsigned long test_rorl (unsigned long a, uint32_t r) > >> +{ > >> + return __rorl (a, r); > >> +} > >> + > >> +/* > >> +** test_rorll: > >> +** ror x0, x0, x1 > >> +** ret > >> +*/ > >> + > >> +uint64_t test_rorll (uint64_t a, uint32_t r) > >> +{ > >> + return __rorll (a, r); > >> +} > >> + > >> +/* > >> +** test_revsh: > >> +** rev16 w0, w0 > >> +** ret > >> +*/ > >> + > >> +int16_t test_revsh (int16_t a) > >> +{ > >> + return __revsh (a); > >> +} > >> + > >> +uint32_t *g32; > >> +unsigned long *gul; > >> +uint64_t *g64; > >> +unsigned int *gui; > >> +int16_t *g16; > >> + > >> +/* > >> +** test_clz_mem: > >> +** ... > >> +** clz w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_clz_mem (uint32_t *a) > >> +{ > >> + *gui = __clz (*a); > >> +} > >> + > >> +/* > >> +** test_clzl_mem: > >> +** ... > >> +** clz [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_clzl_mem (unsigned long *a) > >> +{ > >> + *gui = __clzl (*a); > >> +} > >> + > >> +/* > >> +** test_clzll_mem: > >> +** ... > >> +** clz x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_clzll_mem (uint64_t *a) > >> +{ > >> + *gui = __clzll (*a); > >> +} > >> + > >> +/* > >> +** test_cls_mem: > >> +** ... > >> +** cls w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_cls_mem (uint32_t *a) > >> +{ > >> + *gui = __cls (*a); > >> +} > >> + > >> +/* > >> +** test_clsl_mem: > >> +** ... > >> +** cls [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_clsl_mem (unsigned long *a) > >> +{ > >> + *gui = __clsl (*a); > >> +} > >> + > >> +/* > >> +** test_clsll_mem: > >> +** ... > >> +** cls x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_clsll_mem (uint64_t *a) > >> +{ > >> + *gui = __clsll (*a); > >> +} > >> + > >> +/* > >> +** test_rbit_mem: > >> +** ... > >> +** rbit w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rbit_mem (uint32_t *a) > >> +{ > >> + *g32 = __rbit (*a); > >> +} > >> + > >> +/* > >> +** test_rbitl_mem: > >> +** ... > >> +** rbit [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rbitl_mem (unsigned long *a) > >> +{ > >> + *gul = __rbitl (*a); > >> +} > >> + > >> +/* > >> +** test_rbitll_mem: > >> +** ... > >> +** rbit x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rbitll_mem (uint64_t *a) > >> +{ > >> + *g64 = __rbitll (*a); > >> +} > >> + > >> +/* > >> +** test_rev_mem: > >> +** ... > >> +** rev w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rev_mem (uint32_t *a) > >> +{ > >> + *g32 = __rev (*a); > >> +} > >> + > >> +/* > >> +** test_revl_mem: > >> +** ... > >> +** rev [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_revl_mem (unsigned long *a) > >> +{ > >> + *gul = __revl (*a); > >> +} > >> + > >> +/* > >> +** test_revll_mem: > >> +** ... > >> +** rev x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_revll_mem (uint64_t *a) > >> +{ > >> + *g64 = __revll (*a); > >> +} > >> + > >> +/* > >> +** test_rev16_mem: > >> +** ... > >> +** rev16 w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rev16_mem (uint32_t *a) > >> +{ > >> + *g32 = __rev16 (*a); > >> +} > >> + > >> +/* > >> +** test_rev16l_mem: > >> +** ... > >> +** rev16 [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rev16l_mem (unsigned long *a) > >> +{ > >> + *gul = __rev16l (*a); > >> +} > >> + > >> +/* > >> +** test_rev16ll_mem: > >> +** ... > >> +** rev16 x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rev16ll_mem (uint64_t *a) > >> +{ > >> + *g64 = __rev16ll (*a); > >> +} > >> + > >> +/* > >> +** test_ror_mem: > >> +** ... > >> +** ror w[0-9]+, w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_ror_mem (uint32_t *a, uint32_t *r) > >> +{ > >> + *g32 = __ror (*a, *r); > >> +} > >> + > >> +/* > >> +** test_rorl_mem: > >> +** ... > >> +** ror [wx][0-9]+, [wx][0-9]+, [wx][0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rorl_mem (unsigned long *a, uint32_t *r) > >> +{ > >> + *gul = __rorl (*a, *r); > >> +} > >> + > >> +/* > >> +** test_rorll_mem: > >> +** ... > >> +** ror x[0-9]+, x[0-9]+, x[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_rorll_mem (uint64_t *a, uint32_t *r) > >> +{ > >> + *g64 = __rorll (*a, *r); > >> +} > >> + > >> +/* > >> +** test_revsh_mem: > >> +** ... > >> +** rev16 w[0-9]+, w[0-9]+ > >> +** ... > >> +** ret > >> +*/ > >> + > >> +void test_revsh_mem (int16_t *a) > >> +{ > >> + *g16 = __revsh (*a); > >> +}
Hi all, Can I backport this to gcc-11 branch? Also applies cleanly (with the exception of the file extensions being different: 'aarch64-builtins.cc vs aarch64-builtins.c'). Bootstrapped and regression tested on aarch64-linux-gnu. Kind regards, Andre Vieira
> -----Original Message----- > From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com> > Sent: Tuesday, October 4, 2022 11:34 AM > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; gcc-patches@gcc.gnu.org; > Richard Sandiford <Richard.Sandiford@arm.com>; Richard Biener > <rguenther@suse.de> > Subject: Re: [PATCH][AArch64] Implement ACLE Data Intrinsics > > Hi all, > > Can I backport this to gcc-11 branch? Also applies cleanly (with the > exception of the file extensions being different: 'aarch64-builtins.cc > vs aarch64-builtins.c'). > Ok by me if testing is clean. Thanks, Kyrill > Bootstrapped and regression tested on aarch64-linux-gnu. > > Kind regards, > Andre Vieira
On Wed, 5 Oct 2022, Kyrylo Tkachov wrote: > > > > -----Original Message----- > > From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com> > > Sent: Tuesday, October 4, 2022 11:34 AM > > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; gcc-patches@gcc.gnu.org; > > Richard Sandiford <Richard.Sandiford@arm.com>; Richard Biener > > <rguenther@suse.de> > > Subject: Re: [PATCH][AArch64] Implement ACLE Data Intrinsics > > > > Hi all, > > > > Can I backport this to gcc-11 branch? Also applies cleanly (with the > > exception of the file extensions being different: 'aarch64-builtins.cc > > vs aarch64-builtins.c'). > > > > Ok by me if testing is clean. Target patches like this are really up to the maintainers to decide for backporting. Richard.
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index e0a741ac663188713e21f457affa57217d074783..91a687dee13a27c21f0c50de9ba777aa900d6096 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -613,6 +613,12 @@ enum aarch64_builtins AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0, + AARCH64_REV16, + AARCH64_REV16L, + AARCH64_REV16LL, + AARCH64_RBIT, + AARCH64_RBITL, + AARCH64_RBITLL, AARCH64_BUILTIN_MAX }; @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void) = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code); } +static void +aarch64_init_data_intrinsics (void) +{ + tree uint32_fntype = build_function_type_list (uint32_type_node, + uint32_type_node, NULL_TREE); + tree long_fntype = build_function_type_list (long_unsigned_type_node, + long_unsigned_type_node, + NULL_TREE); + tree uint64_fntype = build_function_type_list (uint64_type_node, + uint64_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_REV16] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype, + AARCH64_REV16); + aarch64_builtin_decls[AARCH64_REV16L] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", long_fntype, + AARCH64_REV16L); + aarch64_builtin_decls[AARCH64_REV16LL] + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype, + AARCH64_REV16LL); + aarch64_builtin_decls[AARCH64_RBIT] + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype, + AARCH64_RBIT); + aarch64_builtin_decls[AARCH64_RBITL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", long_fntype, + AARCH64_RBITL); + aarch64_builtin_decls[AARCH64_RBITLL] + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype, + AARCH64_RBITLL); +} + /* Implement #pragma GCC aarch64 "arm_acle.h". */ void handle_arm_acle_h (void) { + aarch64_init_data_intrinsics (); if (TARGET_LS64) aarch64_init_ls64_builtins (); } @@ -2393,6 +2430,32 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) emit_insn (pat); return target; } +/* Function to expand an expression EXP which calls one of the ACLE Data + Intrinsic builtins FCODE with the result going to TARGET. */ +static rtx +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) +{ + rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); + machine_mode mode = GET_MODE (op0); + rtx pat; + switch (fcode) + { + case AARCH64_REV16: + case AARCH64_REV16L: + case AARCH64_REV16LL: + pat = gen_aarch64_rev16 (mode, target, op0); + break; + case AARCH64_RBIT: + case AARCH64_RBITL: + case AARCH64_RBITLL: + pat = gen_aarch64_rbit (mode, target, op0); + break; + default: + gcc_unreachable (); + } + emit_insn (pat); + return target; +} /* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ @@ -2551,6 +2614,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, if (fcode >= AARCH64_MEMTAG_BUILTIN_START && fcode <= AARCH64_MEMTAG_BUILTIN_END) return aarch64_expand_builtin_memtag (fcode, exp, target); + if (fcode >= AARCH64_REV16 + && fcode <= AARCH64_RBITLL) + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2" rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); DONE; @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2" [(set_attr "type" "clz")] ) -(define_insn "rbit<mode>2" +(define_insn "@aarch64_rbit<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] "" @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2" "reload_completed" [(const_int 0)] " - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); emit_insn (gen_clz<mode>2 (operands[0], operands[0])); DONE; ") @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2" [(set_attr "type" "rev")] ) +(define_insn "@aarch64_rev16<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] + "" + "rev16\\t%<w>0, %<w>1" + [(set_attr "type" "rev")]) + (define_insn "*aarch64_bfxil<mode>" [(set (match_operand:GPI 0 "register_operand" "=r,r") (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 9775a48c65825b424d3eb442384f5ab87b734fd7..faddd5d0a780c5d65ba430bd3174c701e848c794 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -28,6 +28,7 @@ #define _GCC_ARM_ACLE_H #include <stdint.h> +#include <stddef.h> #pragma GCC aarch64 "arm_acle.h" @@ -35,6 +36,54 @@ extern "C" { #endif +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ +__extension__ extern __inline TYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +NAME (TYPE value, uint32_t rotate) \ +{ \ + size_t size = sizeof (TYPE) * __CHAR_BIT__; \ + rotate = rotate % size; \ + return value >> rotate | value << (size - rotate); \ +} + +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) + +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, TYPE) \ +__extension__ extern __inline TYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +__##NAME (TYPE value) \ +{ \ + return __builtin_##BUILTIN (value); \ +} + +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t) +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long) +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t) +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t) +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long) +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t) +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t) +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t) +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t) +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t) +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t) + +__extension__ extern __inline unsigned long +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__revl (unsigned long __value) +{ + if (sizeof (unsigned long) == 8) + return __revll (__value); + else + return __rev (__value); +} + #pragma GCC push_options #pragma GCC target ("arch=armv8.3-a") __extension__ extern __inline int32_t diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c new file mode 100644 index 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c @@ -0,0 +1,215 @@ +/* Test the ACLE data intrinsics. */ +/* { dg-do assemble } */ +/* { dg-additional-options "--save-temps -O1" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include "arm_acle.h" + +/* +** test_clz: +** clz w0, w0 +** ret +*/ + +uint32_t test_clz (uint32_t a) +{ + return __clz (a); +} + +/* +** test_clzl: +** clz [wx]0, [wx]0 +** ret +*/ + +unsigned long test_clzl (unsigned long a) +{ + return __clzl (a); +} + +/* +** test_clzll: +** clz x0, x0 +** ret +*/ + +uint64_t test_clzll (uint64_t a) +{ + return __clzll (a); +} + +/* +** test_cls: +** cls w0, w0 +** ret +*/ + +uint32_t test_cls (uint32_t a) +{ + return __cls (a); +} + +/* +** test_clsl: +** cls [wx]0, [wx]0 +** ret +*/ + +unsigned long test_clsl (unsigned long a) +{ + return __clsl (a); +} + +/* +** test_clsll: +** cls x0, x0 +** ret +*/ + +uint64_t test_clsll (uint64_t a) +{ + return __clsll (a); +} + +/* +** test_rbit: +** rbit w0, w0 +** ret +*/ + +uint32_t test_rbit (uint32_t a) +{ + return __rbit (a); +} + +/* +** test_rbitl: +** rbit [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rbitl (unsigned long a) +{ + return __rbitl (a); +} + +/* +** test_rbitll: +** rbit x0, x0 +** ret +*/ + +uint64_t test_rbitll (uint64_t a) +{ + return __rbitll (a); +} + +/* +** test_rev: +** rev w0, w0 +** ret +*/ + +uint32_t test_rev (uint32_t a) +{ + return __builtin_bswap32 (a); +} + +/* +** test_revl: +** rev [wx]0, [wx]0 +** ret +*/ + +unsigned long test_revl (unsigned long a) +{ + return __revl (a); +} + +/* +** test_revll: +** rev x0, x0 +** ret +*/ + +uint64_t test_revll (uint64_t a) +{ + return __revll (a); +} + +/* +** test_rev16: +** rev16 w0, w0 +** ret +*/ + +uint32_t test_rev16 (uint32_t a) +{ + return __rev16 (a); +} + +/* +** test_rev16l: +** rev16 [wx]0, [wx]0 +** ret +*/ + +unsigned long test_rev16l (unsigned long a) +{ + return __rev16l (a); +} + +/* +** test_rev16ll: +** rev16 x0, x0 +** ret +*/ + +uint64_t test_rev16ll (uint64_t a) +{ + return __rev16ll (a); +} + +/* +** test_ror: +** ror w0, w0, w1 +** ret +*/ + +uint32_t test_ror (uint32_t a, uint32_t r) +{ + return __ror (a, r); +} + +/* +** test_rorl: +** ror [wx]0, [wx]0, [wx]1 +** ret +*/ + +unsigned long test_rorl (unsigned long a, uint32_t r) +{ + return __rorl (a, r); +} + +/* +** test_rorll: +** ror x0, x0, x1 +** ret +*/ + +uint64_t test_rorll (uint64_t a, uint32_t r) +{ + return __rorll (a, r); +} + +/* +** test_revsh: +** rev16 w0, w0 +** ret +*/ + +int16_t test_revsh (int16_t a) +{ + return __revsh (a); +}