diff mbox series

[1/4] rs6000: Add support for SSE4.1 "test" intrinsics

Message ID 20210629180859.1235662-2-pc@us.ibm.com
State New
Headers show
Series rs6000: Add SSE4.1 "test" and "blend" intrinsics | expand

Commit Message

Paul A. Clarke June 29, 2021, 6:08 p.m. UTC
2021-06-29  Paul A. Clarke  <pc@us.ibm.com>

gcc/ChangeLog:
        * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128,
	_mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros,
	_mm_test_mix_ones_zeros): New.
---
 gcc/config/rs6000/smmintrin.h | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

Comments

Li, Pan2 via Gcc-patches July 11, 2021, 3:45 p.m. UTC | #1
Hi Paul,

On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote:
> 2021-06-29  Paul A. Clarke  <pc@us.ibm.com>
>
> gcc/ChangeLog:
>          * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128,
> 	_mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros,
> 	_mm_test_mix_ones_zeros): New.
> ---
>   gcc/config/rs6000/smmintrin.h | 50 +++++++++++++++++++++++++++++++++++
>   1 file changed, 50 insertions(+)
>
> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
> index bdf6eb365d88..1b8cad135ed0 100644
> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
>     return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
>   }
>
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
Line too long, please fix here and below.  (Existing cases can be left.)
> +_mm_testz_si128 (__m128i __A, __m128i __B)
> +{
> +  /* Note: This implementation does NOT set "zero" or "carry" flags.  */

This is reasonable; thanks for documenting.

LGTM; I can't approve, but recommend approval with line lengths fixed.  
Thanks!
Bill

> +  const __v16qu __zero = {0};
> +  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
> +}
> +
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_testc_si128 (__m128i __A, __m128i __B)
> +{
> +  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
> +  const __v16qu __zero = {0};
> +  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
> +  return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
> +}
> +
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_testnzc_si128 (__m128i __A, __m128i __B)
> +{
> +  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
> +  return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
> +}
> +
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_test_all_zeros (__m128i __A, __m128i __mask)
> +{
> +  const __v16qu __zero = {0};
> +  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero);
> +}
> +
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_test_all_ones (__m128i __A)
> +{
> +  const __v16qu __ones = vec_splats ((unsigned char) 0xff);
> +  return vec_all_eq ((__v16qu) __A, __ones);
> +}
> +
> +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
> +{
> +  const __v16qu __zero = {0};
> +  const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask);
> +  const int any_ones = vec_any_ne (__Amasked, __zero);
> +  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
> +  const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask);
> +  const int any_zeros = vec_any_ne (__notAmasked, __zero);
> +  return any_ones * any_zeros;
> +}
> +
>   #endif
Segher Boessenkool July 12, 2021, 10:24 p.m. UTC | #2
Hi!

On Sun, Jul 11, 2021 at 10:45:45AM -0500, Bill Schmidt wrote:
> On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote:
> >--- a/gcc/config/rs6000/smmintrin.h
> >+++ b/gcc/config/rs6000/smmintrin.h
> >@@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i 
> >__mask)
> >    return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
> >  }
> >
> >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
> >__artificial__))
> Line too long, please fix here and below.  (Existing cases can be left.)

I wouldn't bother in this case.  There is no way to write these
attribute lines in a reasonable way, it doesn't overflow 80 char by that
much, and there isn't anything interesting at the end of line.

You could put it on a line by itself, which helps for now because it
won't get too long until you add another attribute ;-)

There should be a space before (( though, and "extern" on definitions is
superfluous.  But I do not care much about that either -- this isn't a
part of the compiler proper anyway :-)

> LGTM; I can't approve, but recommend approval with line lengths fixed.  

It is okay for trunk with whatever changes you want to do.  Thanks!


Segher
Paul A. Clarke July 13, 2021, 7:01 p.m. UTC | #3
On Mon, Jul 12, 2021 at 05:24:07PM -0500, Segher Boessenkool wrote:
> On Sun, Jul 11, 2021 at 10:45:45AM -0500, Bill Schmidt wrote:
> > On 6/29/21 1:08 PM, Paul A. Clarke via Gcc-patches wrote:
> > >--- a/gcc/config/rs6000/smmintrin.h
> > >+++ b/gcc/config/rs6000/smmintrin.h
> > >@@ -116,4 +116,54 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i 
> > >__mask)
> > >    return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
> > >  }
> > >
> > >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
> > >__artificial__))
> > Line too long, please fix here and below.  (Existing cases can be left.)
> 
> I wouldn't bother in this case.  There is no way to write these
> attribute lines in a reasonable way, it doesn't overflow 80 char by that
> much, and there isn't anything interesting at the end of line.

I bothered. ;-)

> You could put it on a line by itself, which helps for now because it
> won't get too long until you add another attribute ;-)

OK

> There should be a space before (( though, and "extern" on definitions is
> superfluous.  But I do not care much about that either -- this isn't a
> part of the compiler proper anyway :-)

OK

> It is okay for trunk with whatever changes you want to do.  Thanks!

This is what I committed:

2021-07-13  Paul A. Clarke  <pc@us.ibm.com>

gcc
	* config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128,
	_mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros,
	_mm_test_mix_ones_zeros): New.
---
 gcc/config/rs6000/smmintrin.h | 56 +++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..16fd34d836ff 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,60 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
 }
 
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  const __v16qu __zero = {0};
+  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
+}
+
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  const __v16qu __zero = {0};
+  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
+  return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
+}
+
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
+}
+
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_all_zeros (__m128i __A, __m128i __mask)
+{
+  const __v16qu __zero = {0};
+  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero);
+}
+
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_all_ones (__m128i __A)
+{
+  const __v16qu __ones = vec_splats ((unsigned char) 0xff);
+  return vec_all_eq ((__v16qu) __A, __ones);
+}
+
+__inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
+{
+  const __v16qu __zero = {0};
+  const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask);
+  const int any_ones = vec_any_ne (__Amasked, __zero);
+  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
+  const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask);
+  const int any_zeros = vec_any_ne (__notAmasked, __zero);
+  return any_ones * any_zeros;
+}
+
 #endif
Segher Boessenkool July 13, 2021, 11:12 p.m. UTC | #4
On Tue, Jul 13, 2021 at 02:01:18PM -0500, Paul A. Clarke wrote:
> > > >+extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
> > > >__artificial__))
> > > Line too long, please fix here and below.  (Existing cases can be left.)
> > 
> > I wouldn't bother in this case.  There is no way to write these
> > attribute lines in a reasonable way, it doesn't overflow 80 char by that
> > much, and there isn't anything interesting at the end of line.
> 
> I bothered. ;-)

Ha :-)

Btw, Bill suggested to me offline making a preprocessor macro for this
long attribute line.  Which is a fine suggestion!  Something for the
future, maybe?


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..1b8cad135ed0 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,54 @@  _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
 }
 
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  const __v16qu __zero = {0};
+  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  const __v16qu __zero = {0};
+  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
+  return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __A, __m128i __B)
+{
+  /* Note: This implementation does NOT set "zero" or "carry" flags.  */
+  return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_all_zeros (__m128i __A, __m128i __mask)
+{
+  const __v16qu __zero = {0};
+  return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_all_ones (__m128i __A)
+{
+  const __v16qu __ones = vec_splats ((unsigned char) 0xff);
+  return vec_all_eq ((__v16qu) __A, __ones);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
+{
+  const __v16qu __zero = {0};
+  const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask);
+  const int any_ones = vec_any_ne (__Amasked, __zero);
+  const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
+  const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask);
+  const int any_zeros = vec_any_ne (__notAmasked, __zero);
+  return any_ones * any_zeros;
+}
+
 #endif