Message ID | 20210706225057.644872-2-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | Add SSE4.1 "floor" intrinsics | expand |
Hi Paul, On 7/6/21 5:50 PM, Paul A. Clarke via Gcc-patches wrote: > 2021-07-06 Paul A. Clarke <pc@us.ibm.com> > > gcc/ChangeLog: > * config/rs6000/smmintrin.h (_mm_floor_pd, _mm_floor_ps, > _mm_floor_sd, _mm_floor_ss): New. > --- > gcc/config/rs6000/smmintrin.h | 28 ++++++++++++++++++++++++++++ > 1 file changed, 28 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index 0c0b0dd7c1e3..f484a7fd029f 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -240,4 +240,32 @@ _mm_ceil_ss (__m128 __A, __m128 __B) > return r; > } > > +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) Usual fuss about line length. LGTM with that fixed here and below. I can't approve, but recommend approval with those changes. Thanks, Bill > +_mm_floor_pd (__m128d __A) > +{ > + return (__m128d) vec_floor ((__v2df) __A); > +} > + > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_floor_ps (__m128 __A) > +{ > + return (__m128) vec_floor ((__v4sf) __A); > +} > + > +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_floor_sd (__m128d __A, __m128d __B) > +{ > + __v2df r = vec_floor ((__v2df) __B); > + r[1] = ((__v2df) __A)[1]; > + return (__m128d) r; > +} > + > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_floor_ss (__m128 __A, __m128 __B) > +{ > + __v4sf r = (__v4sf) __A; > + r[0] = __builtin_floor (((__v4sf) __B)[0]); > + return r; > +} > + > #endif
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 0c0b0dd7c1e3..f484a7fd029f 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -240,4 +240,32 @@ _mm_ceil_ss (__m128 __A, __m128 __B) return r; } +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_floor_pd (__m128d __A) +{ + return (__m128d) vec_floor ((__v2df) __A); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_floor_ps (__m128 __A) +{ + return (__m128) vec_floor ((__v4sf) __A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_floor_sd (__m128d __A, __m128d __B) +{ + __v2df r = vec_floor ((__v2df) __B); + r[1] = ((__v2df) __A)[1]; + return (__m128d) r; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_floor_ss (__m128 __A, __m128 __B) +{ + __v4sf r = (__v4sf) __A; + r[0] = __builtin_floor (((__v4sf) __B)[0]); + return r; +} + #endif