Message ID | 509f5ca2-3a32-42e1-b653-7777ccd2d6d3@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000, built-in cleanup patch series | expand |
Hi, on 2024/4/20 05:17, Carl Love wrote: > rs6000, add overloaded vec_sel with int128 arguments > > Extend the vec_sel built-in to take three signed/unsigned int128 arguments > and return a signed/unsigned int128 result. > > Extending the vec_sel built-in makes the existing buit-ins > __builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete. The > patch removes these built-ins. > > The patch adds documentation and test cases for the new overloaded vec_sel > built-ins. > > gcc/ChangeLog: > * config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti, > __builtin_vsx_xxsel_1ti_uns): Remove built-in definitions. > * config/rs6000/rs6000-overload.def (vec_sel): Add new overloaded > definitions. > * doc/extend.texi: Add documentation for new vec_sel arguments. > > gcc/testsuite/ChangeLog: > * gcc.target/powerpc/vec_sel_runnable-int128.c: New test file. > --- > gcc/config/rs6000/rs6000-builtins.def | 6 -- > gcc/config/rs6000/rs6000-overload.def | 4 + > gcc/doc/extend.texi | 14 ++++ > .../powerpc/vec-sel-runnable-i128.c | 84 +++++++++++++++++++ > 4 files changed, 102 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c > > diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def > index d09e21a9151..46d2ae7b7cb 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -1931,12 +1931,6 @@ > const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc); > XXSEL_16QI_UNS vector_select_v16qi_uns {} > > - const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq); > - XXSEL_1TI vector_select_v1ti {} > - > - const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq); > - XXSEL_1TI_UNS vector_select_v1ti_uns {} > - > const vd __builtin_vsx_xxsel_2df (vd, vd, vd); > XXSEL_2DF vector_select_v2df {} > > diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def > index 68501c05289..5912c9452f4 100644 > --- a/gcc/config/rs6000/rs6000-overload.def > +++ b/gcc/config/rs6000/rs6000-overload.def > @@ -3274,6 +3274,10 @@ > VSEL_2DF VSEL_2DF_B > vd __builtin_vec_sel (vd, vd, vull); > VSEL_2DF VSEL_2DF_U > + vsq __builtin_vec_sel (vsq, vsq, vsq); > + VSEL_1TI VSEL_1TI_S > + vuq __builtin_vec_sel (vuq, vuq, vuq); > + VSEL_1TI_UNS VSEL_1TI_U > ; The following variants are deprecated. > vsll __builtin_vec_sel (vsll, vsll, vsll); > VSEL_2DI_B VSEL_2DI_S > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 64a43b55e2d..86b8e536dbe 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -23358,6 +23358,20 @@ The programmer is responsible for understanding the endianness issues involved > with the first argument and the result. > @findex vec_replace_unaligned > > +Vector select > + > +@smallexample > +vector signed __int128 vec_sel (vector signed __int128, > + vector signed __int128, vector signed __int128); > +vector unsigned __int128 vec_sel (vector unsigned __int128, > + vector unsigned __int128, vector unsigned __int128); > +@end smallexample > + > +The overloaded built-in @code{vec_sel} with vector signed/unsigned __int128 > +arguments and returns a vector selecting bits from the two source vectors based > +on the values of the third input vector. This built-in is an extension of the > +@code{vec_sel} built-in documented in the PVIPR. > + Why did you place this in a section for ISA 3.1 (Power10)? It doesn't really require this support. The used instance VSEL_1TI and VSEL_1TI_UNS are placed in altivec stanza, so it looks that we should put it under the section "PowerPC AltiVec Built-in Functions on ISA 2.05". And since it's an extension of @code{vec_sel} documented in the PVIPR, I prefer to just mention it's "an extension of the @code{vec_sel} built-in documented in the PVIPR" and omitting the description to avoid possible slightly different wording. > Vector Shift Left Double Bit Immediate > @smallexample > @exdent vector signed char vec_sldb (vector signed char, vector signed char, > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c > new file mode 100644 > index 00000000000..58eb383e8c3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c > @@ -0,0 +1,84 @@ > +/* { dg-do run { target power10_hw }} */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-require-effective-target power10_hw } */ As mentioned above, this doesn't require power10, you can specify vmx_hw. (btw removing { target power10_hw } on dg-do run line). > +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ s/-mdejagnu-cpu=power10/-maltivec/ s/-save-temps// > + > + > +#include <altivec.h> > + > + > +#define DEBUG 0 > + > +#if DEBUG > +#include <stdio.h> > +void print_i128 (unsigned __int128 val) > +{ > + printf(" 0x%016llx%016llx", > + (unsigned long long)(val >> 64), > + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); > +} > +#endif Did you really test this debugging work as expected? With my experience when making r14-10011-g6e62ede7aaccc6, this debugging doesn't work and the way to initialize a vector int128 variable can easily suffer from endianness issue, so please double check this and test it on BE as well. BR, Kewen > + > +extern void abort (void); > + > +int > +main (int argc, char *argv []) > +{ > + vector signed __int128 src_va_s128; > + vector signed __int128 src_vb_s128; > + vector signed __int128 src_vc_s128; > + vector signed __int128 vresult_s128; > + vector signed __int128 expected_vresult_s128; > + > + vector unsigned __int128 src_va_u128; > + vector unsigned __int128 src_vb_u128; > + vector unsigned __int128 src_vc_u128; > + vector unsigned __int128 vresult_u128; > + vector unsigned __int128 expected_vresult_u128; > + > + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; > + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; > + src_vc_s128 = (vector signed __int128) {0x3333333333333333}; > + expected_vresult_s128 = (vector signed __int128) {0x32147658ba9cfed0}; > + > + /* Signed arguments. */ > + vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_s128); > + > + if (!vec_all_eq (vresult_s128, expected_vresult_s128)) > +#if DEBUG > + { > + printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output.\n"); > + printf (" Result: "); > + print_i128 ((unsigned __int128) vresult_s128); > + printf ("\n Expected result: "); > + print_i128 ((unsigned __int128) expected_vresult_s128); > + printf ("\n"); > + } > +#else > + abort (); > +#endif > + > + src_va_u128 = (vector unsigned __int128) {0x13579ACE02468BDF}; > + src_vb_u128 = (vector unsigned __int128) {0xA987654FEDCB3210}; > + src_vc_u128 = (vector unsigned __int128) {0x5555555555555555}; > + expected_vresult_u128 = (vector unsigned __int128) {0x32147658ba9cfed0}; > + > + /* Unigned arguments. */ > + vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_u128); > + > + if (!vec_all_eq (vresult_u128, expected_vresult_u128)) > +#if DEBUG > + { > + printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_u128) result does not match expected output.\n"); > + printf (" Result: "); > + print_i128 ((unsigned __int128) vresult_u128); > + printf ("\n Expected result: "); > + print_i128 ((unsigned __int128) expected_vresult_u128); > + printf ("\n"); > + } > +#else > + abort (); > +#endif > + > + return 0; > +}
Kewen: On 5/13/24 19:54, Kewen.Lin wrote: > Hi, > > on 2024/4/20 05:17, Carl Love wrote: >> rs6000, add overloaded vec_sel with int128 arguments >> >> Extend the vec_sel built-in to take three signed/unsigned int128 arguments >> and return a signed/unsigned int128 result. >> >> Extending the vec_sel built-in makes the existing buit-ins >> __builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete. The >> patch removes these built-ins. >> >> The patch adds documentation and test cases for the new overloaded vec_sel >> built-ins. >> >> gcc/ChangeLog: >> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti, >> __builtin_vsx_xxsel_1ti_uns): Remove built-in definitions. >> * config/rs6000/rs6000-overload.def (vec_sel): Add new overloaded >> definitions. >> * doc/extend.texi: Add documentation for new vec_sel arguments. >> >> gcc/testsuite/ChangeLog: >> * gcc.target/powerpc/vec_sel_runnable-int128.c: New test file. >> --- >> gcc/config/rs6000/rs6000-builtins.def | 6 -- >> gcc/config/rs6000/rs6000-overload.def | 4 + >> gcc/doc/extend.texi | 14 ++++ >> .../powerpc/vec-sel-runnable-i128.c | 84 +++++++++++++++++++ >> 4 files changed, 102 insertions(+), 6 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >> >> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def >> index d09e21a9151..46d2ae7b7cb 100644 >> --- a/gcc/config/rs6000/rs6000-builtins.def >> +++ b/gcc/config/rs6000/rs6000-builtins.def >> @@ -1931,12 +1931,6 @@ >> const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc); >> XXSEL_16QI_UNS vector_select_v16qi_uns {} >> >> - const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq); >> - XXSEL_1TI vector_select_v1ti {} >> - >> - const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq); >> - XXSEL_1TI_UNS vector_select_v1ti_uns {} >> - >> const vd __builtin_vsx_xxsel_2df (vd, vd, vd); >> XXSEL_2DF vector_select_v2df {} >> >> diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def >> index 68501c05289..5912c9452f4 100644 >> --- a/gcc/config/rs6000/rs6000-overload.def >> +++ b/gcc/config/rs6000/rs6000-overload.def >> @@ -3274,6 +3274,10 @@ >> VSEL_2DF VSEL_2DF_B >> vd __builtin_vec_sel (vd, vd, vull); >> VSEL_2DF VSEL_2DF_U >> + vsq __builtin_vec_sel (vsq, vsq, vsq); >> + VSEL_1TI VSEL_1TI_S >> + vuq __builtin_vec_sel (vuq, vuq, vuq); >> + VSEL_1TI_UNS VSEL_1TI_U >> ; The following variants are deprecated. >> vsll __builtin_vec_sel (vsll, vsll, vsll); >> VSEL_2DI_B VSEL_2DI_S >> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi >> index 64a43b55e2d..86b8e536dbe 100644 >> --- a/gcc/doc/extend.texi >> +++ b/gcc/doc/extend.texi >> @@ -23358,6 +23358,20 @@ The programmer is responsible for understanding the endianness issues involved >> with the first argument and the result. >> @findex vec_replace_unaligned >> >> +Vector select >> + >> +@smallexample >> +vector signed __int128 vec_sel (vector signed __int128, >> + vector signed __int128, vector signed __int128); >> +vector unsigned __int128 vec_sel (vector unsigned __int128, >> + vector unsigned __int128, vector unsigned __int128); >> +@end smallexample >> + >> +The overloaded built-in @code{vec_sel} with vector signed/unsigned __int128 >> +arguments and returns a vector selecting bits from the two source vectors based >> +on the values of the third input vector. This built-in is an extension of the >> +@code{vec_sel} built-in documented in the PVIPR. >> + > > Why did you place this in a section for ISA 3.1 (Power10)? It doesn't really > require this support. The used instance VSEL_1TI and VSEL_1TI_UNS are placed > in altivec stanza, so it looks that we should put it under the section > "PowerPC AltiVec Built-in Functions on ISA 2.05". And since it's an extension > of @code{vec_sel} documented in the PVIPR, I prefer to just mention it's "an > extension of the @code{vec_sel} built-in documented in the PVIPR" and omitting > the description to avoid possible slightly different wording. Honestly, at this point in time I don't remember why I put it there. It has been too long since I created the patch. That said, the test case requires Power 10 do to the comparison check using built-in vec_all_eq but that is another issue. The built-in generates the xxsel instruction that is an ISA 2.06 instruction. So, I would say it should to into the ISA 2.06 section. I moved it to the ISA 2.06 section. For consistency with the previous patches/feedback, the descriptions are being dropped and replaced with the instance being a new extension of the built-in that is documented in the PVIPR. > >> Vector Shift Left Double Bit Immediate >> @smallexample >> @exdent vector signed char vec_sldb (vector signed char, vector signed char, >> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >> new file mode 100644 >> index 00000000000..58eb383e8c3 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >> @@ -0,0 +1,84 @@ >> +/* { dg-do run { target power10_hw }} */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-require-effective-target power10_hw } */ > > As mentioned above, this doesn't require power10, you can specify vmx_hw. > (btw removing { target power10_hw } on dg-do run line). > As mentioned, the testcase uses the vec_all_eq which requires Power 10. So, I rewrote the test case to check the result value and expected result value byte by byte so the test will run on Power 7 (ISA 2.06). The new version of the test case compiles and runs with the command: gcc -g -mcpu=power7 vec-sel-runnable-i128.c -o vec-sel-runnable-i128 >> +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ > > s/-mdejagnu-cpu=power10/-maltivec/ > s/-save-temps// > Removed the Power 10, using vmx_hw instead. >> + >> + >> +#include <altivec.h> >> + >> + >> +#define DEBUG 0 >> + >> +#if DEBUG >> +#include <stdio.h> >> +void print_i128 (unsigned __int128 val) >> +{ >> + printf(" 0x%016llx%016llx", >> + (unsigned long long)(val >> 64), >> + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); >> +} >> +#endif > > Did you really test this debugging work as expected? > With my experience when making r14-10011-g6e62ede7aaccc6, > this debugging doesn't work and the way to initialize > a vector int128 variable can easily suffer from endianness > issue, so please double check this and test it on BE as well. Yes, It seemed to work with this version of gcc. I retested the patch per your request. I set DEBUG to 1, changed the expected result and ran it on LE: ./vec-sel-runnable-i128 ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. Result: 0x000000000000000032147658ba9cfed0 Expected result: 0x000000000000000032147658ba9cfed1 I compiled the patch series on BE and ran the test there: ./vec-sel-runnable-i128 ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. Result: 0x000000000000000032147658ba9cfed0 Expected result: 0x000000000000000032147658ba9cfed1 I am not sure exactly what issue you are concerned about with the print statement. But, we could implement the print statement as follows if you prefer: void print_i128 (unsigned __int128 val) { int i; union convert_u { unsigned __int128 val; char bytes[16]; } convert; convert.val = val; #if __LITTLE_ENDIAN__ for (i = 15; i >= 0; i--) #else for (i = 0; i < 16; i++) #endif printf(" 0x"); printf("%02x", convert.bytes[i]); } which gives the same result (on LE: ./vec-sel-runnable-i128 ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. Result: 0x000000000000000032147658ba9cfed0 Expected result: 0x000000000000000032147658ba9cfed1 and on BE: ./vec-sel-runnable-i128 ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. Result: 0x000000000000000032147658ba9cfed0 Expected result: 0x000000000000000032147658ba9cfed1 Sounds like there was some issue that you noticed on r14-10011-g6e62ede7aaccc6. The new version of print_i128 should be functionally equivalent but perhaps is "safer"? Let me know if you would prefer I use the new version of the print_i128 function or if the orginal is OK? Thanks. Carl > > BR, > Kewen > >> + >> +extern void abort (void); >> + >> +int >> +main (int argc, char *argv []) >> +{ >> + vector signed __int128 src_va_s128; >> + vector signed __int128 src_vb_s128; >> + vector signed __int128 src_vc_s128; >> + vector signed __int128 vresult_s128; >> + vector signed __int128 expected_vresult_s128; >> + >> + vector unsigned __int128 src_va_u128; >> + vector unsigned __int128 src_vb_u128; >> + vector unsigned __int128 src_vc_u128; >> + vector unsigned __int128 vresult_u128; >> + vector unsigned __int128 expected_vresult_u128; >> + >> + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; >> + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; >> + src_vc_s128 = (vector signed __int128) {0x3333333333333333}; >> + expected_vresult_s128 = (vector signed __int128) {0x32147658ba9cfed0}; >> + >> + /* Signed arguments. */ >> + vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_s128); >> + >> + if (!vec_all_eq (vresult_s128, expected_vresult_s128)) >> +#if DEBUG >> + { >> + printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output.\n"); >> + printf (" Result: "); >> + print_i128 ((unsigned __int128) vresult_s128); >> + printf ("\n Expected result: "); >> + print_i128 ((unsigned __int128) expected_vresult_s128); >> + printf ("\n"); >> + } >> +#else >> + abort (); >> +#endif >> + >> + src_va_u128 = (vector unsigned __int128) {0x13579ACE02468BDF}; >> + src_vb_u128 = (vector unsigned __int128) {0xA987654FEDCB3210}; >> + src_vc_u128 = (vector unsigned __int128) {0x5555555555555555}; >> + expected_vresult_u128 = (vector unsigned __int128) {0x32147658ba9cfed0}; >> + >> + /* Unigned arguments. */ >> + vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_u128); >> + >> + if (!vec_all_eq (vresult_u128, expected_vresult_u128)) >> +#if DEBUG >> + { >> + printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_u128) result does not match expected output.\n"); >> + printf (" Result: "); >> + print_i128 ((unsigned __int128) vresult_u128); >> + printf ("\n Expected result: "); >> + print_i128 ((unsigned __int128) expected_vresult_u128); >> + printf ("\n"); >> + } >> +#else >> + abort (); >> +#endif >> + >> + return 0; >> +} >
Hi Carl, on 2024/5/22 08:13, Carl Love wrote: > Kewen: > > On 5/13/24 19:54, Kewen.Lin wrote: >> Hi, >> >> on 2024/4/20 05:17, Carl Love wrote: >>> rs6000, add overloaded vec_sel with int128 arguments >>> >>> Extend the vec_sel built-in to take three signed/unsigned int128 arguments >>> and return a signed/unsigned int128 result. >>> >>> Extending the vec_sel built-in makes the existing buit-ins >>> __builtin_vsx_xxsel_1ti and __builtin_vsx_xxsel_1ti_uns obsolete. The >>> patch removes these built-ins. >>> >>> The patch adds documentation and test cases for the new overloaded vec_sel >>> built-ins. >>> >>> gcc/ChangeLog: >>> * config/rs6000/rs6000-builtins.def (__builtin_vsx_xxsel_1ti, >>> __builtin_vsx_xxsel_1ti_uns): Remove built-in definitions. >>> * config/rs6000/rs6000-overload.def (vec_sel): Add new overloaded >>> definitions. >>> * doc/extend.texi: Add documentation for new vec_sel arguments. >>> >>> gcc/testsuite/ChangeLog: >>> * gcc.target/powerpc/vec_sel_runnable-int128.c: New test file. >>> --- >>> gcc/config/rs6000/rs6000-builtins.def | 6 -- >>> gcc/config/rs6000/rs6000-overload.def | 4 + >>> gcc/doc/extend.texi | 14 ++++ >>> .../powerpc/vec-sel-runnable-i128.c | 84 +++++++++++++++++++ >>> 4 files changed, 102 insertions(+), 6 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >>> >>> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def >>> index d09e21a9151..46d2ae7b7cb 100644 >>> --- a/gcc/config/rs6000/rs6000-builtins.def >>> +++ b/gcc/config/rs6000/rs6000-builtins.def >>> @@ -1931,12 +1931,6 @@ >>> const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc); >>> XXSEL_16QI_UNS vector_select_v16qi_uns {} >>> >>> - const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq); >>> - XXSEL_1TI vector_select_v1ti {} >>> - >>> - const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq); >>> - XXSEL_1TI_UNS vector_select_v1ti_uns {} >>> - >>> const vd __builtin_vsx_xxsel_2df (vd, vd, vd); >>> XXSEL_2DF vector_select_v2df {} >>> >>> diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def >>> index 68501c05289..5912c9452f4 100644 >>> --- a/gcc/config/rs6000/rs6000-overload.def >>> +++ b/gcc/config/rs6000/rs6000-overload.def >>> @@ -3274,6 +3274,10 @@ >>> VSEL_2DF VSEL_2DF_B >>> vd __builtin_vec_sel (vd, vd, vull); >>> VSEL_2DF VSEL_2DF_U >>> + vsq __builtin_vec_sel (vsq, vsq, vsq); >>> + VSEL_1TI VSEL_1TI_S >>> + vuq __builtin_vec_sel (vuq, vuq, vuq); >>> + VSEL_1TI_UNS VSEL_1TI_U >>> ; The following variants are deprecated. >>> vsll __builtin_vec_sel (vsll, vsll, vsll); >>> VSEL_2DI_B VSEL_2DI_S >>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi >>> index 64a43b55e2d..86b8e536dbe 100644 >>> --- a/gcc/doc/extend.texi >>> +++ b/gcc/doc/extend.texi >>> @@ -23358,6 +23358,20 @@ The programmer is responsible for understanding the endianness issues involved >>> with the first argument and the result. >>> @findex vec_replace_unaligned >>> >>> +Vector select >>> + >>> +@smallexample >>> +vector signed __int128 vec_sel (vector signed __int128, >>> + vector signed __int128, vector signed __int128); >>> +vector unsigned __int128 vec_sel (vector unsigned __int128, >>> + vector unsigned __int128, vector unsigned __int128); >>> +@end smallexample >>> + >>> +The overloaded built-in @code{vec_sel} with vector signed/unsigned __int128 >>> +arguments and returns a vector selecting bits from the two source vectors based >>> +on the values of the third input vector. This built-in is an extension of the >>> +@code{vec_sel} built-in documented in the PVIPR. >>> + >> >> Why did you place this in a section for ISA 3.1 (Power10)? It doesn't really >> require this support. The used instance VSEL_1TI and VSEL_1TI_UNS are placed >> in altivec stanza, so it looks that we should put it under the section >> "PowerPC AltiVec Built-in Functions on ISA 2.05". And since it's an extension >> of @code{vec_sel} documented in the PVIPR, I prefer to just mention it's "an >> extension of the @code{vec_sel} built-in documented in the PVIPR" and omitting >> the description to avoid possible slightly different wording. > > Honestly, at this point in time I don't remember why I put it there. It has been too long since I created the patch. That said, the test case requires Power 10 do to the comparison check using built-in vec_all_eq but that is another issue. > The built-in generates the xxsel instruction that is an ISA 2.06 instruction. So, I would say it should to into the ISA 2.06 section. I moved it to the ISA 2.06 section. But the underlying implementation is: const vsq __builtin_altivec_vsel_1ti (vsq, vsq, vuq); VSEL_1TI vector_select_v1ti {} const vuq __builtin_altivec_vsel_1ti_uns (vuq, vuq, vuq); VSEL_1TI_UNS vector_select_v1ti_uns {} , it's under altivec stanza and can result with insn vsel (so not xxsel), vsel is ISA 2.03, so I think ISA 2.05 better matches the implementation. > For consistency with the previous patches/feedback, the descriptions are being dropped and replaced with the instance being a new extension of the built-in that is documented in the PVIPR. >> >>> Vector Shift Left Double Bit Immediate >>> @smallexample >>> @exdent vector signed char vec_sldb (vector signed char, vector signed char, >>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >>> new file mode 100644 >>> index 00000000000..58eb383e8c3 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c >>> @@ -0,0 +1,84 @@ >>> +/* { dg-do run { target power10_hw }} */ >>> +/* { dg-require-effective-target int128 } */ >>> +/* { dg-require-effective-target power10_hw } */ >> >> As mentioned above, this doesn't require power10, you can specify vmx_hw. >> (btw removing { target power10_hw } on dg-do run line). >> > > As mentioned, the testcase uses the vec_all_eq which requires Power 10. So, I rewrote the test case > to check the result value and expected result value byte by byte so the test will run on Power 7 (ISA 2.06). Nice, thanks! > The new version of the test case compiles and runs with the command: > > gcc -g -mcpu=power7 vec-sel-runnable-i128.c -o vec-sel-runnable-i128 > > >>> +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ >> >> s/-mdejagnu-cpu=power10/-maltivec/ >> s/-save-temps// >> > > Removed the Power 10, using vmx_hw instead. > >>> + >>> + >>> +#include <altivec.h> >>> + >>> + >>> +#define DEBUG 0 >>> + >>> +#if DEBUG >>> +#include <stdio.h> >>> +void print_i128 (unsigned __int128 val) >>> +{ >>> + printf(" 0x%016llx%016llx", >>> + (unsigned long long)(val >> 64), >>> + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); >>> +} >>> +#endif >> >> Did you really test this debugging work as expected? >> With my experience when making r14-10011-g6e62ede7aaccc6, >> this debugging doesn't work and the way to initialize >> a vector int128 variable can easily suffer from endianness >> issue, so please double check this and test it on BE as well. > > Yes, It seemed to work with this version of gcc. I retested the patch per your request. I set DEBUG to 1, changed the expected result and ran it on LE: > > ./vec-sel-runnable-i128 > ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. > Result: 0x000000000000000032147658ba9cfed0 > Expected result: 0x000000000000000032147658ba9cfed1 > > I compiled the patch series on BE and ran the test there: > > ./vec-sel-runnable-i128 > ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. > Result: 0x000000000000000032147658ba9cfed0 > Expected result: 0x000000000000000032147658ba9cfed1 > > I am not sure exactly what issue you are concerned about with the print statement. But, we could implement the print statement as follows if you prefer: > > void print_i128 (unsigned __int128 val) > { > int i; > union convert_u { > unsigned __int128 val; > char bytes[16]; > } convert; > > convert.val = val; > #if __LITTLE_ENDIAN__ > for (i = 15; i >= 0; i--) > #else > for (i = 0; i < 16; i++) > #endif > printf(" 0x"); > > printf("%02x", convert.bytes[i]); > } > > which gives the same result (on LE: > > ./vec-sel-runnable-i128 > ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. > Result: 0x000000000000000032147658ba9cfed0 > Expected result: 0x000000000000000032147658ba9cfed1 > > and on BE: > > ./vec-sel-runnable-i128 > ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output. > Result: 0x000000000000000032147658ba9cfed0 > Expected result: 0x000000000000000032147658ba9cfed1 > > Sounds like there was some issue that you noticed on r14-10011-g6e62ede7aaccc6. The new version of > print_i128 should be functionally equivalent but perhaps is "safer"? Thanks for checking! Looking into this more closely, I realized you didn't apply the previously adopted way for printing (the way used in gcc.target/powerpc/builtins-6-p9-runnable.c), sorry for the false alarm! So your supposed print_i128 is fine to me. BR, Kewen
Kewen: On 5/21/24 20:05, Kewen.Lin wrote: > Hi Carl, > > on 2024/5/22 08:13, Carl Love wrote: >> Kewen: <snip> >>> Why did you place this in a section for ISA 3.1 (Power10)? It doesn't really >>> require this support. The used instance VSEL_1TI and VSEL_1TI_UNS are placed >>> in altivec stanza, so it looks that we should put it under the section >>> "PowerPC AltiVec Built-in Functions on ISA 2.05". And since it's an extension >>> of @code{vec_sel} documented in the PVIPR, I prefer to just mention it's "an >>> extension of the @code{vec_sel} built-in documented in the PVIPR" and omitting >>> the description to avoid possible slightly different wording. >> >> Honestly, at this point in time I don't remember why I put it there. It has been too long since I created the patch. That said, the test case requires Power 10 do to the comparison check using built-in vec_all_eq but that is another issue. >> The built-in generates the xxsel instruction that is an ISA 2.06 instruction. So, I would say it should to into the ISA 2.06 section. I moved it to the ISA 2.06 section. > > But the underlying implementation is: > > const vsq __builtin_altivec_vsel_1ti (vsq, vsq, vuq); > VSEL_1TI vector_select_v1ti {} > > const vuq __builtin_altivec_vsel_1ti_uns (vuq, vuq, vuq); > VSEL_1TI_UNS vector_select_v1ti_uns {} > > , it's under altivec stanza and can result with insn vsel (so not xxsel), > vsel is ISA 2.03, so I think ISA 2.05 better matches the implementation. OK, moved to ISA 2.05 > <snip> >> >> Sounds like there was some issue that you noticed on r14-10011-g6e62ede7aaccc6. The new version of >> print_i128 should be functionally equivalent but perhaps is "safer"? > > Thanks for checking! Looking into this more closely, I realized you didn't apply the previously > adopted way for printing (the way used in gcc.target/powerpc/builtins-6-p9-runnable.c), sorry for > the false alarm! So your supposed print_i128 is fine to me. OK, no problem. Will go with the original print_i128 function. Carl
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index d09e21a9151..46d2ae7b7cb 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -1931,12 +1931,6 @@ const vuc __builtin_vsx_xxsel_16qi_uns (vuc, vuc, vuc); XXSEL_16QI_UNS vector_select_v16qi_uns {} - const vsq __builtin_vsx_xxsel_1ti (vsq, vsq, vsq); - XXSEL_1TI vector_select_v1ti {} - - const vsq __builtin_vsx_xxsel_1ti_uns (vsq, vsq, vsq); - XXSEL_1TI_UNS vector_select_v1ti_uns {} - const vd __builtin_vsx_xxsel_2df (vd, vd, vd); XXSEL_2DF vector_select_v2df {} diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index 68501c05289..5912c9452f4 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -3274,6 +3274,10 @@ VSEL_2DF VSEL_2DF_B vd __builtin_vec_sel (vd, vd, vull); VSEL_2DF VSEL_2DF_U + vsq __builtin_vec_sel (vsq, vsq, vsq); + VSEL_1TI VSEL_1TI_S + vuq __builtin_vec_sel (vuq, vuq, vuq); + VSEL_1TI_UNS VSEL_1TI_U ; The following variants are deprecated. vsll __builtin_vec_sel (vsll, vsll, vsll); VSEL_2DI_B VSEL_2DI_S diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 64a43b55e2d..86b8e536dbe 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -23358,6 +23358,20 @@ The programmer is responsible for understanding the endianness issues involved with the first argument and the result. @findex vec_replace_unaligned +Vector select + +@smallexample +vector signed __int128 vec_sel (vector signed __int128, + vector signed __int128, vector signed __int128); +vector unsigned __int128 vec_sel (vector unsigned __int128, + vector unsigned __int128, vector unsigned __int128); +@end smallexample + +The overloaded built-in @code{vec_sel} with vector signed/unsigned __int128 +arguments and returns a vector selecting bits from the two source vectors based +on the values of the third input vector. This built-in is an extension of the +@code{vec_sel} built-in documented in the PVIPR. + Vector Shift Left Double Bit Immediate @smallexample @exdent vector signed char vec_sldb (vector signed char, vector signed char, diff --git a/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c new file mode 100644 index 00000000000..58eb383e8c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-sel-runnable-i128.c @@ -0,0 +1,84 @@ +/* { dg-do run { target power10_hw }} */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ + + +#include <altivec.h> + + +#define DEBUG 0 + +#if DEBUG +#include <stdio.h> +void print_i128 (unsigned __int128 val) +{ + printf(" 0x%016llx%016llx", + (unsigned long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); +} +#endif + +extern void abort (void); + +int +main (int argc, char *argv []) +{ + vector signed __int128 src_va_s128; + vector signed __int128 src_vb_s128; + vector signed __int128 src_vc_s128; + vector signed __int128 vresult_s128; + vector signed __int128 expected_vresult_s128; + + vector unsigned __int128 src_va_u128; + vector unsigned __int128 src_vb_u128; + vector unsigned __int128 src_vc_u128; + vector unsigned __int128 vresult_u128; + vector unsigned __int128 expected_vresult_u128; + + src_va_s128 = (vector signed __int128) {0x123456789ABCDEF0}; + src_vb_s128 = (vector signed __int128) {0xFEDCBA9876543210}; + src_vc_s128 = (vector signed __int128) {0x3333333333333333}; + expected_vresult_s128 = (vector signed __int128) {0x32147658ba9cfed0}; + + /* Signed arguments. */ + vresult_s128 = vec_sel (src_va_s128, src_vb_s128, src_vc_s128); + + if (!vec_all_eq (vresult_s128, expected_vresult_s128)) +#if DEBUG + { + printf ("ERROR, vec_sel (src_va_s128, src_vb_s128, src_vc_s128) result does not match expected output.\n"); + printf (" Result: "); + print_i128 ((unsigned __int128) vresult_s128); + printf ("\n Expected result: "); + print_i128 ((unsigned __int128) expected_vresult_s128); + printf ("\n"); + } +#else + abort (); +#endif + + src_va_u128 = (vector unsigned __int128) {0x13579ACE02468BDF}; + src_vb_u128 = (vector unsigned __int128) {0xA987654FEDCB3210}; + src_vc_u128 = (vector unsigned __int128) {0x5555555555555555}; + expected_vresult_u128 = (vector unsigned __int128) {0x32147658ba9cfed0}; + + /* Unigned arguments. */ + vresult_u128 = vec_sel (src_va_u128, src_vb_u128, src_vc_u128); + + if (!vec_all_eq (vresult_u128, expected_vresult_u128)) +#if DEBUG + { + printf ("ERROR, vec_sel (src_va_u128, src_vb_u128, src_vc_u128) result does not match expected output.\n"); + printf (" Result: "); + print_i128 ((unsigned __int128) vresult_u128); + printf ("\n Expected result: "); + print_i128 ((unsigned __int128) expected_vresult_u128); + printf ("\n"); + } +#else + abort (); +#endif + + return 0; +}