Message ID | 20210608191155.796424-2-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Add support for _mm_minpos_epu16 | expand |
Gentle ping. I now realize I forgot to include a blurb about "what changed in v2". v2: - Slight formatting changes based on Segher's review (simplified condition, single line). PC On Tue, Jun 08, 2021 at 02:11:54PM -0500, Paul A. Clarke via Gcc-patches wrote: > Add a naive implementation of the subject x86 intrinsic to > ease porting. > > 2021-06-08 Paul A. Clarke <pc@us.ibm.com> > > gcc/ChangeLog: > * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. > --- > gcc/config/rs6000/smmintrin.h | 25 +++++++++++++++++++++++++ > 1 file changed, 25 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index bdf6eb365d88..b7de38763f2b 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) > return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > } > > +/* Return horizontal packed word minimum and its index in bits [15:0] > + and bits [18:16] respectively. */ > +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_minpos_epu16 (__m128i __A) > +{ > + union __u > + { > + __m128i __m; > + __v8hu __uh; > + }; > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > + unsigned short __ridx = 0; > + unsigned short __rmin = __u.__uh[__ridx]; > + for (unsigned long __i = __ridx + 1; __i < 8; __i++) > + { > + if (__u.__uh[__i] < __rmin) > + { > + __rmin = __u.__uh[__i]; > + __ridx = __i; > + } > + } > + __r.__uh[0] = __rmin; > + __r.__uh[1] = __ridx; > + return __r.__m; > +} > #endif > -- > 2.27.0 >
Hi Paul, On 6/8/21 2:11 PM, Paul A. Clarke via Gcc-patches wrote: > Add a naive implementation of the subject x86 intrinsic to > ease porting. "subject" won't be part of eventual commit, so please specify in commit blurb. > > 2021-06-08 Paul A. Clarke <pc@us.ibm.com> > > gcc/ChangeLog: > * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. > --- > gcc/config/rs6000/smmintrin.h | 25 +++++++++++++++++++++++++ > 1 file changed, 25 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index bdf6eb365d88..b7de38763f2b 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) > return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); > } > > +/* Return horizontal packed word minimum and its index in bits [15:0] > + and bits [18:16] respectively. */ > +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) Line too long, please break up. (I realize this happens throughout this file already, but...) > +_mm_minpos_epu16 (__m128i __A) > +{ > + union __u > + { > + __m128i __m; > + __v8hu __uh; > + }; > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > + unsigned short __ridx = 0; > + unsigned short __rmin = __u.__uh[__ridx]; > + for (unsigned long __i = __ridx + 1; __i < 8; __i++) "__ridx + 1" can just be "1" > + { > + if (__u.__uh[__i] < __rmin) > + { > + __rmin = __u.__uh[__i]; > + __ridx = __i; > + } Preceding four lines need tabs, not spaces. > + } > + __r.__uh[0] = __rmin; > + __r.__uh[1] = __ridx; > + return __r.__m; > +} > #endif Otherwise LGTM. I can't approve, but recommend approval with those things fixed. Thanks, Bill
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb365d88..b7de38763f2b 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,29 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +/* Return horizontal packed word minimum and its index in bits [15:0] + and bits [18:16] respectively. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_minpos_epu16 (__m128i __A) +{ + union __u + { + __m128i __m; + __v8hu __uh; + }; + union __u __u = { .__m = __A }, __r = { .__m = {0} }; + unsigned short __ridx = 0; + unsigned short __rmin = __u.__uh[__ridx]; + for (unsigned long __i = __ridx + 1; __i < 8; __i++) + { + if (__u.__uh[__i] < __rmin) + { + __rmin = __u.__uh[__i]; + __ridx = __i; + } + } + __r.__uh[0] = __rmin; + __r.__uh[1] = __ridx; + return __r.__m; +} #endif