From patchwork Sun Jan 16 21:04:38 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 79102 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id F1AF5B7043 for ; Mon, 17 Jan 2011 08:04:54 +1100 (EST) Received: (qmail 22096 invoked by alias); 16 Jan 2011 21:04:52 -0000 Received: (qmail 22085 invoked by uid 22791); 16 Jan 2011 21:04:49 -0000 X-SWARE-Spam-Status: No, hits=-1.2 required=5.0 tests=AWL, BAYES_00, NO_DNS_FOR_FROM, TW_AV, TW_VP, TW_VX, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mga03.intel.com (HELO mga03.intel.com) (143.182.124.21) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sun, 16 Jan 2011 21:04:41 +0000 Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga101.ch.intel.com with ESMTP; 16 Jan 2011 13:04:39 -0800 X-ExtLoop1: 1 Received: from gnu-6.sc.intel.com ([10.3.194.135]) by azsmga001.ch.intel.com with ESMTP; 16 Jan 2011 13:04:38 -0800 Received: by gnu-6.sc.intel.com (Postfix, from userid 500) id 95478180909; Sun, 16 Jan 2011 13:04:38 -0800 (PST) Date: Sun, 16 Jan 2011 13:04:38 -0800 From: "H.J. Lu" To: gcc-patches@gcc.gnu.org Cc: Uros Bizjak Subject: PATCH: PR target/47318: _mm256_maskstore_pd has wrong prototype Message-ID: <20110116210438.GA4527@intel.com> Reply-To: "H.J. Lu" MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, Mask operand of AVX mask load/store insns have the wrong type. This patch fixes it. OK for trunk/4.5/4.4? Thanks. H.J. --- gcc/ 2011-01-16 H.J. Lu PR target/47318 * config/i386/avxintrin.h (_mm_maskload_pd): Change mask to __m128i. (_mm_maskstore_pd): Likewise. (_mm_maskload_ps): Likewise. (_mm_maskstore_ps): Likewise. (_mm256_maskload_pd): Change mask to __m256i. (_mm256_maskstore_pd): Likewise. (_mm256_maskload_ps): Likewise. (_mm256_maskstore_ps): Likewise. * config/i386/i386-builtin-types.def: Updated. (ix86_expand_special_args_builtin): Likewise. * config/i386/i386.c (bdesc_special_args): Update __builtin_ia32_maskloadpd, __builtin_ia32_maskloadps, __builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256, __builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps, __builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256. * config/i386/sse.md (avx_maskload): Use on mask register. (avx_maskstore): Likewise. gcc/testsuite/ 2011-01-16 H.J. Lu PR target/47318 * gcc.target/i386/avx-vmaskmovpd-1.c: New. * gcc.target/i386/avx-vmaskmovpd-2.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask as __m256i. * gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise. diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 26925fd..70bfce1 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -890,55 +890,55 @@ _mm256_storeu_si256 (__m256i *__P, __m256i __A) } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskload_pd (double const *__P, __m128d __M) +_mm_maskload_pd (double const *__P, __m128i __M) { return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P, - (__v2df)__M); + (__v2di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A) +_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A) { - __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A); + __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskload_pd (double const *__P, __m256d __M) +_mm256_maskload_pd (double const *__P, __m256i __M) { return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P, - (__v4df)__M); + (__v4di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A) +_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A) { - __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A); + __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskload_ps (float const *__P, __m128 __M) +_mm_maskload_ps (float const *__P, __m128i __M) { return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P, - (__v4sf)__M); + (__v4si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A) +_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A) { - __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A); + __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskload_ps (float const *__P, __m256 __M) +_mm256_maskload_ps (float const *__P, __m256i __M) { return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P, - (__v8sf)__M); + (__v8si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A) +_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A) { - __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A); + __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 079c8ec..05a7f54 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -236,7 +236,7 @@ DEF_FUNCTION_TYPE (V1DI, V1DI, SI) DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI) DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI) DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI) -DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF) +DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI) DEF_FUNCTION_TYPE (V2DF, V2DF, DI) DEF_FUNCTION_TYPE (V2DF, V2DF, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE) @@ -258,7 +258,7 @@ DEF_FUNCTION_TYPE (V2SI, V2SF, V2SF) DEF_FUNCTION_TYPE (V2SI, V2SI, SI) DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI) DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI) -DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF) +DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI) DEF_FUNCTION_TYPE (V4DF, V4DF, INT) DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF) DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI) @@ -267,7 +267,7 @@ DEF_FUNCTION_TYPE (V4HI, V4HI, INT) DEF_FUNCTION_TYPE (V4HI, V4HI, SI) DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI) DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI) -DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF) +DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI) DEF_FUNCTION_TYPE (V4SF, V4SF, DI) DEF_FUNCTION_TYPE (V4SF, V4SF, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF) @@ -293,7 +293,7 @@ DEF_FUNCTION_TYPE (V8HI, V8SF, INT) DEF_FUNCTION_TYPE (V8HI, V4SF, INT) DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI) DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI) -DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF) +DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI) DEF_FUNCTION_TYPE (V8SF, V8SF, INT) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF) DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI) @@ -352,10 +352,10 @@ DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT) DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI) DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED) -DEF_FUNCTION_TYPE (VOID, PV2DF, V2DF, V2DF) -DEF_FUNCTION_TYPE (VOID, PV4DF, V4DF, V4DF) -DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF, V4SF) -DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF, V8SF) +DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF) +DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF) +DEF_FUNCTION_TYPE (VOID, PV4SF, V4SI, V4SF) +DEF_FUNCTION_TYPE (VOID, PV8SF, V8SI, V8SF) DEF_FUNCTION_TYPE (VOID, UINT, UINT, UINT) DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT) DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a26314b..f5843de 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24483,14 +24483,14 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, @@ -26821,18 +26821,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = load; memory = 1; break; - case V8SF_FTYPE_PCV8SF_V8SF: - case V4DF_FTYPE_PCV4DF_V4DF: - case V4SF_FTYPE_PCV4SF_V4SF: - case V2DF_FTYPE_PCV2DF_V2DF: + case V8SF_FTYPE_PCV8SF_V8SI: + case V4DF_FTYPE_PCV4DF_V4DI: + case V4SF_FTYPE_PCV4SF_V4SI: + case V2DF_FTYPE_PCV2DF_V2DI: nargs = 2; klass = load; memory = 0; break; - case VOID_FTYPE_PV8SF_V8SF_V8SF: - case VOID_FTYPE_PV4DF_V4DF_V4DF: - case VOID_FTYPE_PV4SF_V4SF_V4SF: - case VOID_FTYPE_PV2DF_V2DF_V2DF: + case VOID_FTYPE_PV8SF_V8SI_V8SF: + case VOID_FTYPE_PV4DF_V4DI_V4DF: + case VOID_FTYPE_PV4SF_V4SI_V4SF: + case VOID_FTYPE_PV2DF_V2DI_V2DF: nargs = 2; klass = store; /* Reserve memory operand for target. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a942a2b..c6636a6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11938,7 +11938,7 @@ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") (unspec:AVXMODEF2P [(match_operand:AVXMODEF2P 1 "memory_operand" "m") - (match_operand:AVXMODEF2P 2 "register_operand" "x") + (match_operand: 2 "register_operand" "x") (match_dup 0)] UNSPEC_MASKLOAD))] "TARGET_AVX" @@ -11951,7 +11951,7 @@ (define_insn "avx_maskstore" [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") + [(match_operand: 1 "register_operand" "x") (match_operand:AVXMODEF2P 2 "register_operand" "x") (match_dup 0)] UNSPEC_MASKSTORE))] diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c new file mode 100644 index 0000000..6204ebd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 7 +#endif + +#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63) + +void static +avx_test (void) +{ + int i; + long long m[2] = {mask_v(0), mask_v(1)}; + double s[2] = {1.1, 2.2}; + union128d u; + union128i_q mask; + double e[2] = {0.0}; + + mask.x = _mm_loadu_si128 ((__m128i *)m); + u.x = _mm_maskload_pd (s, mask.x); + + for (i = 0 ; i < 2; i++) + e[i] = m[i] ? s[i] : 0; + + if (check_union128d (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c new file mode 100644 index 0000000..6bc6207 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 6 +#endif + +#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63) + +void static +avx_test (void) +{ + int i; + long long m[2] = {mask_v(0), mask_v(1)}; + double s[2] = {1.1, 2.2}; + double e[2] = {0.0}; + double d[2] = {0.0}; + union128d src; + union128i_q mask; + + src.x = _mm_loadu_pd (s); + mask.x = _mm_loadu_si128 ((__m128i *)m); + _mm_maskstore_pd (d, mask.x, src.x); + + for (i = 0 ; i < 2; i++) + e[i] = m[i] ? s[i] : 0; + + if (checkVd (d, e, 2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c index f29826b..e591c05 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c @@ -14,12 +14,13 @@ void static avx_test (void) { int i; - long long m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + long long m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; double s[4] = {1.1, 2.2, 3.3, 4.4}; - union256d u, mask; + union256d u; + union256i_q mask; double e [4] = {0.0}; - mask.x = _mm256_loadu_pd ((double*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); u.x = _mm256_maskload_pd (s, mask.x); for (i = 0 ; i < 4; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c index 1e574b6..5df2f94 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c @@ -18,10 +18,11 @@ avx_test (void) double s[4] = {1.1, 2.2, 3.3, 4.4}; double e [4] = {0.0}; double d [4] = {0.0}; - union256d src, mask; + union256d src; + union256i_q mask; src.x = _mm256_loadu_pd (s); - mask.x = _mm256_loadu_pd ((double*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); _mm256_maskstore_pd (d, mask.x, src.x); for (i = 0 ; i < 4; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c new file mode 100644 index 0000000..360a04d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 134 +#endif + +#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31) + +void static +avx_test (void) +{ + int i; + int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + float s[4] = {1,2,3,4}; + union128 u; + union128i_d mask; + float e[4] = {0.0}; + + mask.x = _mm_loadu_si128 ((__m128i *)m); + u.x = _mm_maskload_ps (s, mask.x); + + for (i = 0 ; i < 4; i++) + e[i] = m[i] ? s[i] : 0; + + if (check_union128 (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c new file mode 100644 index 0000000..3dde965 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 214 +#endif + +#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31) + +void static +avx_test (void) +{ + int i; + int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + float s[4] = {1,2,3,4}; + union128 src; + union128i_d mask; + float e[4] = {0.0}; + float d[4] = {0.0}; + + src.x = _mm_loadu_ps (s); + mask.x = _mm_loadu_si128 ((__m128i *)m); + _mm_maskstore_ps (d, mask.x, src.x); + + for (i = 0 ; i < 4; i++) + e[i] = m[i] ? s[i] : 0; + + if (checkVf (d, e, 4)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c index 9e6c7f9..647ce3f 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c @@ -16,10 +16,11 @@ avx_test (void) int i; int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)}; float s[8] = {1,2,3,4,5,6,7,8}; - union256 u, mask; + union256 u; + union256i_d mask; float e [8] = {0.0}; - mask.x = _mm256_loadu_ps ((float*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); u.x = _mm256_maskload_ps (s, mask.x); for (i = 0 ; i < 8; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c index 90d91a0..016904d 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c @@ -16,12 +16,13 @@ avx_test (void) int i; int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)}; float s[8] = {1,2,3,4,5,6,7,8}; - union256 src, mask; + union256 src; + union256i_d mask; float e [8] = {0.0}; float d [8] = {0.0}; src.x = _mm256_loadu_ps (s); - mask.x = _mm256_loadu_ps ((float *)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); _mm256_maskstore_ps (d, mask.x, src.x); for (i = 0 ; i < 8; i++)