Message ID | 20210629180859.1235662-2-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Add SSE4.1 "test" and "blend" intrinsics | expand |
Hi Paul, On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote: > 2021-06-29 Paul A. Clarke <pc@us.ibm.com> > > gcc/ChangeLog: > * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128, > _mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros, > _mm_test_mix_ones_zeros): New. > --- > gcc/config/rs6000/smmintrin.h | 50 +++++++++++++++++++++++++++++++++++ > 1 file changed, 50 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index bdf6eb365d88..1b8cad135ed0 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) > return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > } > > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) Line too long, please fix here and below. (Existing cases can be left.) > +_mm_testz_si128 (__m128i __A, __m128i __B) > +{ > + /* Note: This implementation does NOT set "zero" or "carry" flags. */ This is reasonable; thanks for documenting. LGTM; I can't approve, but recommend approval with line lengths fixed. Thanks! Bill > + const __v16qu __zero = {0}; > + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero); > +} > + > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_testc_si128 (__m128i __A, __m128i __B) > +{ > + /* Note: This implementation does NOT set "zero" or "carry" flags. */ > + const __v16qu __zero = {0}; > + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); > + return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero); > +} > + > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_testnzc_si128 (__m128i __A, __m128i __B) > +{ > + /* Note: This implementation does NOT set "zero" or "carry" flags. */ > + return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0; > +} > + > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_test_all_zeros (__m128i __A, __m128i __mask) > +{ > + const __v16qu __zero = {0}; > + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero); > +} > + > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_test_all_ones (__m128i __A) > +{ > + const __v16qu __ones = vec_splats ((unsigned char) 0xff); > + return vec_all_eq ((__v16qu) __A, __ones); > +} > + > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) > +{ > + const __v16qu __zero = {0}; > + const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask); > + const int any_ones = vec_any_ne (__Amasked, __zero); > + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); > + const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask); > + const int any_zeros = vec_any_ne (__notAmasked, __zero); > + return any_ones * any_zeros; > +} > + > #endif
Hi! On Sun, Jul 11, 2021 at 10:45:45AM -0500, Bill Schmidt wrote: > On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote: > >--- a/gcc/config/rs6000/smmintrin.h > >+++ b/gcc/config/rs6000/smmintrin.h > >@@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i > >__mask) > > return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > > } > > > >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, > >__artificial__)) > Line too long, please fix here and below. (Existing cases can be left.) I wouldn't bother in this case. There is no way to write these attribute lines in a reasonable way, it doesn't overflow 80 char by that much, and there isn't anything interesting at the end of line. You could put it on a line by itself, which helps for now because it won't get too long until you add another attribute ;-) There should be a space before (( though, and "extern" on definitions is superfluous. But I do not care much about that either -- this isn't a part of the compiler proper anyway :-) > LGTM; I can't approve, but recommend approval with line lengths fixed. It is okay for trunk with whatever changes you want to do. Thanks! Segher
On Mon, Jul 12, 2021 at 05:24:07PM -0500, Segher Boessenkool wrote: > On Sun, Jul 11, 2021 at 10:45:45AM -0500, Bill Schmidt wrote: > > On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote: > > >--- a/gcc/config/rs6000/smmintrin.h > > >+++ b/gcc/config/rs6000/smmintrin.h > > >@@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i > > >__mask) > > > return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > > > } > > > > > >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > >__artificial__)) > > Line too long, please fix here and below. (Existing cases can be left.) > > I wouldn't bother in this case. There is no way to write these > attribute lines in a reasonable way, it doesn't overflow 80 char by that > much, and there isn't anything interesting at the end of line. I bothered. ;-) > You could put it on a line by itself, which helps for now because it > won't get too long until you add another attribute ;-) OK > There should be a space before (( though, and "extern" on definitions is > superfluous. But I do not care much about that either -- this isn't a > part of the compiler proper anyway :-) OK > It is okay for trunk with whatever changes you want to do. Thanks! This is what I committed: 2021-07-13 Paul A. Clarke <pc@us.ibm.com> gcc * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128, _mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros, _mm_test_mix_ones_zeros): New. --- gcc/config/rs6000/smmintrin.h | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb365d88..16fd34d836ff 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,60 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testz_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testnzc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0; +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_ones (__m128i __A) +{ + const __v16qu __ones = vec_splats ((unsigned char) 0xff); + return vec_all_eq ((__v16qu) __A, __ones); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask); + const int any_ones = vec_any_ne (__Amasked, __zero); + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask); + const int any_zeros = vec_any_ne (__notAmasked, __zero); + return any_ones * any_zeros; +} + #endif
On Tue, Jul 13, 2021 at 02:01:18PM -0500, Paul A. Clarke wrote: > > > >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > > >__artificial__)) > > > Line too long, please fix here and below. (Existing cases can be left.) > > > > I wouldn't bother in this case. There is no way to write these > > attribute lines in a reasonable way, it doesn't overflow 80 char by that > > much, and there isn't anything interesting at the end of line. > > I bothered. ;-) Ha :-) Btw, Bill suggested to me offline making a preprocessor macro for this long attribute line. Which is a fine suggestion! Something for the future, maybe? Segher
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb365d88..1b8cad135ed0 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testz_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testnzc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_ones (__m128i __A) +{ + const __v16qu __ones = vec_splats ((unsigned char) 0xff); + return vec_all_eq ((__v16qu) __A, __ones); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask); + const int any_ones = vec_any_ne (__Amasked, __zero); + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask); + const int any_zeros = vec_any_ne (__notAmasked, __zero); + return any_ones * any_zeros; +} + #endif