diff mbox series

[v3,4/6] rs6000: Support SSE4.1 "cvt" intrinsics

Message ID 20210823190310.1679905-5-pc@us.ibm.com
State New
Headers show
Series rs6000: Support more SSE4 intrinsics | expand

Commit Message

Paul A. Clarke Aug. 23, 2021, 7:03 p.m. UTC
Function signatures and decorations match gcc/config/i386/smmintrin.h.

Also, copy tests for:
- _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
- _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
- _mm_cvtepi32_epi64,
- _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
- _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
- _mm_cvtepu32_epi64

from gcc/testsuite/gcc.target/i386.

sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
modified from using "char" types to "signed char" types, because
the default is unsigned on powerpc.

2021-08-20  Paul A. Clarke  <pc@us.ibm.com>

gcc
	* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
	_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
	_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
	_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
	_mm_cvtepu32_epi64): New.

gcc/testsuite
	* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
	adjust dg directives to suit.
	* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
	* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
	* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
	* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
	* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
	* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
---
v3: No change.
v2:
- Added "extern" to functions to maintain compatible decorations with
  like implementations in gcc/config/i386.
- Removed "-Wno-psabi" from tests as unnecessary, per v1 review.
- Noted testing in patch series cover letter.

 gcc/config/rs6000/smmintrin.h                 | 138 ++++++++++++++++++
 .../gcc.target/powerpc/sse4_1-pmovsxbd.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovsxbq.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovsxbw.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovsxdq.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovsxwd.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovsxwq.c      |  42 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxbd.c      |  43 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxbq.c      |  43 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxbw.c      |  43 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxdq.c      |  43 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxwd.c      |  43 ++++++
 .../gcc.target/powerpc/sse4_1-pmovzxwq.c      |  43 ++++++
 13 files changed, 648 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c

Comments

Li, Pan2 via Gcc-patches Aug. 27, 2021, 1:49 p.m. UTC | #1
This looks fine, recommend approval.

Thanks!
Bill

On 8/23/21 2:03 PM, Paul A. Clarke wrote:
> Function signatures and decorations match gcc/config/i386/smmintrin.h.
>
> Also, copy tests for:
> - _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
> - _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
> - _mm_cvtepi32_epi64,
> - _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
> - _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
> - _mm_cvtepu32_epi64
>
> from gcc/testsuite/gcc.target/i386.
>
> sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
> modified from using "char" types to "signed char" types, because
> the default is unsigned on powerpc.
>
> 2021-08-20  Paul A. Clarke  <pc@us.ibm.com>
>
> gcc
> 	* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
> 	_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
> 	_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
> 	_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
> 	_mm_cvtepu32_epi64): New.
>
> gcc/testsuite
> 	* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
> 	adjust dg directives to suit.
> 	* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
> ---
> v3: No change.
> v2:
> - Added "extern" to functions to maintain compatible decorations with
>    like implementations in gcc/config/i386.
> - Removed "-Wno-psabi" from tests as unnecessary, per v1 review.
> - Noted testing in patch series cover letter.
>
>   gcc/config/rs6000/smmintrin.h                 | 138 ++++++++++++++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxbd.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxbq.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxbw.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxdq.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxwd.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovsxwq.c      |  42 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxbd.c      |  43 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxbq.c      |  43 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxbw.c      |  43 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxdq.c      |  43 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxwd.c      |  43 ++++++
>   .../gcc.target/powerpc/sse4_1-pmovzxwq.c      |  43 ++++++
>   13 files changed, 648 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
>   create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
>
> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
> index 363534cb06a2..fdef6674d16c 100644
> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -442,6 +442,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
>     return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
>   }
>   
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi8_epi16 (__m128i __A)
> +{
> +  return (__m128i) vec_unpackh ((__v16qi)__A);
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi8_epi32 (__m128i __A)
> +{
> +  __A = (__m128i) vec_unpackh ((__v16qi)__A);
> +  return (__m128i) vec_unpackh ((__v8hi)__A);
> +}
> +
> +#ifdef _ARCH_PWR8
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi8_epi64 (__m128i __A)
> +{
> +  __A = (__m128i) vec_unpackh ((__v16qi)__A);
> +  __A = (__m128i) vec_unpackh ((__v8hi)__A);
> +  return (__m128i) vec_unpackh ((__v4si)__A);
> +}
> +#endif
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi16_epi32 (__m128i __A)
> +{
> +  return (__m128i) vec_unpackh ((__v8hi)__A);
> +}
> +
> +#ifdef _ARCH_PWR8
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi16_epi64 (__m128i __A)
> +{
> +  __A = (__m128i) vec_unpackh ((__v8hi)__A);
> +  return (__m128i) vec_unpackh ((__v4si)__A);
> +}
> +#endif
> +
> +#ifdef _ARCH_PWR8
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi32_epi64 (__m128i __A)
> +{
> +  return (__m128i) vec_unpackh ((__v4si)__A);
> +}
> +#endif
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu8_epi16 (__m128i __A)
> +{
> +  const __v16qu __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu8_epi32 (__m128i __A)
> +{
> +  const __v16qu __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
> +  __A = (__m128i) vec_mergeh ((__v8hu)__A, (__v8hu)__zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
> +  __A = (__m128i) vec_mergeh ((__v8hu)__zero, (__v8hu)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu8_epi64 (__m128i __A)
> +{
> +  const __v16qu __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
> +  __A = (__m128i) vec_mergeh ((__v8hu)__A, (__v8hu)__zero);
> +  __A = (__m128i) vec_mergeh ((__v4su)__A, (__v4su)__zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
> +  __A = (__m128i) vec_mergeh ((__v8hu)__zero, (__v8hu)__A);
> +  __A = (__m128i) vec_mergeh ((__v4su)__zero, (__v4su)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu16_epi32 (__m128i __A)
> +{
> +  const __v8hu __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v8hu)__A, __zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v8hu)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu16_epi64 (__m128i __A)
> +{
> +  const __v8hu __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v8hu)__A, __zero);
> +  __A = (__m128i) vec_mergeh ((__v4su)__A, (__v4su)__zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v8hu)__A);
> +  __A = (__m128i) vec_mergeh ((__v4su)__zero, (__v4su)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepu32_epi64 (__m128i __A)
> +{
> +  const __v4su __zero = {0};
> +#ifdef __LITTLE_ENDIAN__
> +  __A = (__m128i) vec_mergeh ((__v4su)__A, __zero);
> +#else /* __BIG_ENDIAN__.  */
> +  __A = (__m128i) vec_mergeh (__zero, (__v4su)__A);
> +#endif /* __BIG_ENDIAN__.  */
> +  return __A;
> +}
> +
>   /* Return horizontal packed word minimum and its index in bits [15:0]
>      and bits [18:16] respectively.  */
>   __inline __m128i
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
> new file mode 100644
> index 000000000000..553c8dd84505
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 4];
> +      int i[NUM];
> +      signed char c[NUM * 4];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 4) + (i / 4) * 16] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 4)
> +    dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
> new file mode 100644
> index 000000000000..9ec1ab7a4169
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-options "-O2 -mpower8-vector" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      long long ll[NUM];
> +      signed char c[NUM * 8];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 2) + (i / 2) * 16] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
> new file mode 100644
> index 000000000000..be4cf417ca7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 8];
> +      short s[NUM];
> +      signed char c[NUM * 2];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 8) + (i / 8) * 16] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 8)
> +    dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
> new file mode 100644
> index 000000000000..1c263782240a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-options "-O2 -mpower8-vector" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      long long ll[NUM];
> +      int i[NUM * 2];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.i[(i % 2) + (i / 2) * 4] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
> new file mode 100644
> index 000000000000..f0f31aba44ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 4];
> +      int i[NUM];
> +      short s[NUM * 2];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.s[(i % 4) + (i / 4) * 8] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 4)
> +    dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
> new file mode 100644
> index 000000000000..67864695a113
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-options "-O2 -mpower8-vector" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      long long ll[NUM];
> +      short s[NUM * 4];
> +    } dst, src;
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.s[(i % 2) + (i / 2) * 8] = i * i * sign;
> +      sign = -sign;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
> new file mode 100644
> index 000000000000..098ef6a49cb0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 4];
> +      unsigned int i[NUM];
> +      unsigned char c[NUM * 4];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 4) + (i / 4) * 16] = i * i;
> +      if ((i % 4))
> +	src.c[(i % 4) + (i / 4) * 16] |= 0x80;
> +    }
> +
> +  for (i = 0; i < NUM; i += 4)
> +    dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
> new file mode 100644
> index 000000000000..7b862767436e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      unsigned long long ll[NUM];
> +      unsigned char c[NUM * 8];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 2) + (i / 2) * 16] = i * i;
> +      if ((i % 2))
> +	src.c[(i % 2) + (i / 2) * 16] |= 0x80;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
> new file mode 100644
> index 000000000000..9fdbec342d46
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 8];
> +      unsigned short s[NUM];
> +      unsigned char c[NUM * 2];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.c[(i % 8) + (i / 8) * 16] = i * i;
> +      if ((i % 4))
> +	src.c[(i % 8) + (i / 8) * 16] |= 0x80;
> +    }
> +
> +  for (i = 0; i < NUM; i += 8)
> +    dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
> new file mode 100644
> index 000000000000..7a5e7688d9f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      unsigned long long ll[NUM];
> +      unsigned int i[NUM * 2];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.i[(i % 2) + (i / 2) * 4] = i * i;
> +      if ((i % 2))
> +        src.i[(i % 2) + (i / 2) * 4] |= 0x80000000;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
> new file mode 100644
> index 000000000000..078a5a45d909
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 4];
> +      unsigned int i[NUM];
> +      unsigned short s[NUM * 2];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.s[(i % 4) + (i / 4) * 8] = i * i;
> +      if ((i % 4))
> +	src.s[(i % 4) + (i / 4) * 8] |= 0x8000;
> +    }
> +
> +  for (i = 0; i < NUM; i += 4)
> +    dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
> +      abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
> new file mode 100644
> index 000000000000..120d00290faa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
> @@ -0,0 +1,43 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <smmintrin.h>
> +
> +#define NUM 128
> +
> +static void
> +TEST (void)
> +{
> +  union
> +    {
> +      __m128i x[NUM / 2];
> +      unsigned long long ll[NUM];
> +      unsigned short s[NUM * 4];
> +    } dst, src;
> +  int i;
> +
> +  for (i = 0; i < NUM; i++)
> +    {
> +      src.s[(i % 2) + (i / 2) * 8] = i * i;
> +      if ((i % 2))
> +	src.s[(i % 2) + (i / 2) * 8] |= 0x8000;
> +    }
> +
> +  for (i = 0; i < NUM; i += 2)
> +    dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]);
> +
> +  for (i = 0; i < NUM; i++)
> +    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
> +      abort ();
> +}
Segher Boessenkool Oct. 11, 2021, 9:52 p.m. UTC | #2
Hi!

On Mon, Aug 23, 2021 at 02:03:08PM -0500, Paul A. Clarke wrote:
> gcc
> 	* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
> 	_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
> 	_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
> 	_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
> 	_mm_cvtepu32_epi64): New.
> 
> gcc/testsuite
> 	* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
> 	adjust dg directives to suit.
> 	* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
> 	* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.

> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi8_epi16 (__m128i __A)
> +{
> +  return (__m128i) vec_unpackh ((__v16qi)__A);
> +}

This strange mixture of sometimes writing a cast with a space and
sometimes without one is...  strange :-)

Having up to three unpacks in a row seems suboptimal.  But it certainly
is aesthetically pleasing :-)

> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */

Same as before here too (needs vsx_hw).

Okay for trunk with that fixed.  Thanks!


Segher
Paul A. Clarke Oct. 12, 2021, 1:51 a.m. UTC | #3
On Mon, Oct 11, 2021 at 04:52:44PM -0500, Segher Boessenkool wrote:
> On Mon, Aug 23, 2021 at 02:03:08PM -0500, Paul A. Clarke wrote:
[...]
> > +extern __inline __m128i
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_cvtepi8_epi16 (__m128i __A)
> > +{
> > +  return (__m128i) vec_unpackh ((__v16qi)__A);
> > +}
> 
> This strange mixture of sometimes writing a cast with a space and
> sometimes without one is...  strange :-)
> 
> Having up to three unpacks in a row seems suboptimal.  But it certainly
> is aesthetically pleasing :-)
> 
> > +/* { dg-do run } */
> > +/* { dg-require-effective-target powerpc_vsx_ok } */
> > +/* { dg-options "-O2 -mvsx" } */
> 
> Same as before here too (needs vsx_hw).
> 
> Okay for trunk with that fixed.  Thanks!

This was committed:

rs6000: Support SSE4.1 "cvt" intrinsics

Function signatures and decorations match gcc/config/i386/smmintrin.h.

Also, copy tests for:
- _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
- _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
- _mm_cvtepi32_epi64,
- _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
- _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
- _mm_cvtepu32_epi64

from gcc/testsuite/gcc.target/i386.

sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
modified from using "char" types to "signed char" types, because
the default is unsigned on powerpc.

2021-10-11  Paul A. Clarke  <pc@us.ibm.com>

gcc
        * config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
        _mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
        _mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
        _mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
        _mm_cvtepu32_epi64): New.

gcc/testsuite
        * gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
        adjust dg directives to suit.
        * gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
        * gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
        * gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
        * gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
        * gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
        * gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
---
v4: Fix "space after cast" and "vsx_ok" issues, per Segher review.

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index f935ab060abc..ad6b68e13cce 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -330,6 +330,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
   return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
 }
 
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi) __A);
+  return (__m128i) vec_unpackh ((__v8hi) __A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi) __A);
+  __A = (__m128i) vec_unpackh ((__v8hi) __A);
+  return (__m128i) vec_unpackh ((__v4si) __A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v8hi) __A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v8hi) __A);
+  return (__m128i) vec_unpackh ((__v4si) __A);
+}
+#endif
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v4si) __A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
+  __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
+  __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
+  __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
+  __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
+  __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
+  __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __A)
+{
+  const __v8hu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __A)
+{
+  const __v8hu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
+  __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
+  __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __A)
+{
+  const __v4su __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v4su) __A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v4su) __A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
 /* Return horizontal packed word minimum and its index in bits [15:0]
    and bits [18:16] respectively.  */
 __inline __m128i
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
new file mode 100644
index 000000000000..99cca6150ea4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      int i[NUM];
+      signed char c[NUM * 4];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 4) + (i / 4) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
new file mode 100644
index 000000000000..9ec1ab7a4169
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      signed char c[NUM * 8];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 2) + (i / 2) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
new file mode 100644
index 000000000000..805897d929b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 8];
+      short s[NUM];
+      signed char c[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 8) + (i / 8) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 8)
+    dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
new file mode 100644
index 000000000000..1c263782240a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      int i[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.i[(i % 2) + (i / 2) * 4] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
new file mode 100644
index 000000000000..43f30f024390
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      int i[NUM];
+      short s[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 4) + (i / 4) * 8] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
new file mode 100644
index 000000000000..67864695a113
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      short s[NUM * 4];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 2) + (i / 2) * 8] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
new file mode 100644
index 000000000000..643a2a6abf3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      unsigned int i[NUM];
+      unsigned char c[NUM * 4];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 4) + (i / 4) * 16] = i * i;
+      if ((i % 4))
+	src.c[(i % 4) + (i / 4) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
new file mode 100644
index 000000000000..871f425c80eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned char c[NUM * 8];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 2) + (i / 2) * 16] = i * i;
+      if ((i % 2))
+	src.c[(i % 2) + (i / 2) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
new file mode 100644
index 000000000000..ee89ebc805fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 8];
+      unsigned short s[NUM];
+      unsigned char c[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 8) + (i / 8) * 16] = i * i;
+      if ((i % 4))
+	src.c[(i % 8) + (i / 8) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 8)
+    dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
new file mode 100644
index 000000000000..3ec28ab263bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned int i[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.i[(i % 2) + (i / 2) * 4] = i * i;
+      if ((i % 2))
+        src.i[(i % 2) + (i / 2) * 4] |= 0x80000000;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
new file mode 100644
index 000000000000..decd9ff7f9ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      unsigned int i[NUM];
+      unsigned short s[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 4) + (i / 4) * 8] = i * i;
+      if ((i % 4))
+	src.s[(i % 4) + (i / 4) * 8] |= 0x8000;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
new file mode 100644
index 000000000000..03830448d173
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned short s[NUM * 4];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 2) + (i / 2) * 8] = i * i;
+      if ((i % 2))
+	src.s[(i % 2) + (i / 2) * 8] |= 0x8000;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
+      abort ();
+}
diff mbox series

Patch

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 363534cb06a2..fdef6674d16c 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -442,6 +442,144 @@  _mm_max_epu32 (__m128i __X, __m128i __Y)
   return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
 }
 
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v16qi)__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  __A = (__m128i) vec_unpackh ((__v8hi)__A);
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v8hi)__A);
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
+  __A = (__m128i) vec_mergeh ((__v8hu)__A, (__v8hu)__zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
+  __A = (__m128i) vec_mergeh ((__v8hu)__zero, (__v8hu)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __A)
+{
+  const __v16qu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v16qu)__A, __zero);
+  __A = (__m128i) vec_mergeh ((__v8hu)__A, (__v8hu)__zero);
+  __A = (__m128i) vec_mergeh ((__v4su)__A, (__v4su)__zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v16qu)__A);
+  __A = (__m128i) vec_mergeh ((__v8hu)__zero, (__v8hu)__A);
+  __A = (__m128i) vec_mergeh ((__v4su)__zero, (__v4su)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __A)
+{
+  const __v8hu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v8hu)__A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v8hu)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __A)
+{
+  const __v8hu __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v8hu)__A, __zero);
+  __A = (__m128i) vec_mergeh ((__v4su)__A, (__v4su)__zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v8hu)__A);
+  __A = (__m128i) vec_mergeh ((__v4su)__zero, (__v4su)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __A)
+{
+  const __v4su __zero = {0};
+#ifdef __LITTLE_ENDIAN__
+  __A = (__m128i) vec_mergeh ((__v4su)__A, __zero);
+#else /* __BIG_ENDIAN__.  */
+  __A = (__m128i) vec_mergeh (__zero, (__v4su)__A);
+#endif /* __BIG_ENDIAN__.  */
+  return __A;
+}
+
 /* Return horizontal packed word minimum and its index in bits [15:0]
    and bits [18:16] respectively.  */
 __inline __m128i
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
new file mode 100644
index 000000000000..553c8dd84505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      int i[NUM];
+      signed char c[NUM * 4];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 4) + (i / 4) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
new file mode 100644
index 000000000000..9ec1ab7a4169
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      signed char c[NUM * 8];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 2) + (i / 2) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
new file mode 100644
index 000000000000..be4cf417ca7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 8];
+      short s[NUM];
+      signed char c[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 8) + (i / 8) * 16] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 8)
+    dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
new file mode 100644
index 000000000000..1c263782240a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      int i[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.i[(i % 2) + (i / 2) * 4] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
new file mode 100644
index 000000000000..f0f31aba44ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      int i[NUM];
+      short s[NUM * 2];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 4) + (i / 4) * 8] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
new file mode 100644
index 000000000000..67864695a113
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-O2 -mpower8-vector" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      long long ll[NUM];
+      short s[NUM * 4];
+    } dst, src;
+  int i, sign = 1;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 2) + (i / 2) * 8] = i * i * sign;
+      sign = -sign;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
new file mode 100644
index 000000000000..098ef6a49cb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      unsigned int i[NUM];
+      unsigned char c[NUM * 4];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 4) + (i / 4) * 16] = i * i;
+      if ((i % 4))
+	src.c[(i % 4) + (i / 4) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
new file mode 100644
index 000000000000..7b862767436e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned char c[NUM * 8];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 2) + (i / 2) * 16] = i * i;
+      if ((i % 2))
+	src.c[(i % 2) + (i / 2) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
new file mode 100644
index 000000000000..9fdbec342d46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 8];
+      unsigned short s[NUM];
+      unsigned char c[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.c[(i % 8) + (i / 8) * 16] = i * i;
+      if ((i % 4))
+	src.c[(i % 8) + (i / 8) * 16] |= 0x80;
+    }
+
+  for (i = 0; i < NUM; i += 8)
+    dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
new file mode 100644
index 000000000000..7a5e7688d9f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned int i[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.i[(i % 2) + (i / 2) * 4] = i * i;
+      if ((i % 2))
+        src.i[(i % 2) + (i / 2) * 4] |= 0x80000000;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
new file mode 100644
index 000000000000..078a5a45d909
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 4];
+      unsigned int i[NUM];
+      unsigned short s[NUM * 2];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 4) + (i / 4) * 8] = i * i;
+      if ((i % 4))
+	src.s[(i % 4) + (i / 4) * 8] |= 0x8000;
+    }
+
+  for (i = 0; i < NUM; i += 4)
+    dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
new file mode 100644
index 000000000000..120d00290faa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+
+#define NUM 128
+
+static void
+TEST (void)
+{
+  union
+    {
+      __m128i x[NUM / 2];
+      unsigned long long ll[NUM];
+      unsigned short s[NUM * 4];
+    } dst, src;
+  int i;
+
+  for (i = 0; i < NUM; i++)
+    {
+      src.s[(i % 2) + (i / 2) * 8] = i * i;
+      if ((i % 2))
+	src.s[(i % 2) + (i / 2) * 8] |= 0x8000;
+    }
+
+  for (i = 0; i < NUM; i += 2)
+    dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]);
+
+  for (i = 0; i < NUM; i++)
+    if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i])
+      abort ();
+}