[2/3,rs6000] Fix x86-compat vector intrinsics testcases for BE, 32bit

Message ID	bb71ef8b-72fe-f7b8-dbb5-07ad97249536@us.ibm.com
State	New
Headers	show Return-Path: <gcc-patches-return-491626-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:date:message-id:content-type :content-transfer-encoding:mime-version; q=dns; s=default; b=Ol1 28niAO/VnUm+IoJWHtzAf4+gFWuReZvc5jyxRrlb1bLcCPMcNVWu6EFNjiCy8Hse +BieKd5Eapx58ZDYjFR7Ivrct+yjMcrquOfM9NdoxTxEBDgobKZhbmqkxx9BBCUR yP3w2mIXvj4DIgG5sXye+WLyxNNqxdtQUMgJDcQc= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Gateway: Authorized Use Only! Violators will be prosecuted for <gcc-patches@gcc.gnu.org> from <pc@us.ibm.com>; Tue, 4 Dec 2018 14:59:45 -0000 Gateway: Authorized Use Only! Violators will be prosecuted; (version=TLSv1/SSLv3 cipher=AES256-GCM-SHA384 bits=256/256) Tue, 4 Dec 2018 14:59:43 -0000 To: gcc-patches@gcc.gnu.org, Segher Boessenkool <segher@kernel.crashing.org> From: Paul Clarke <pc@us.ibm.com> Subject: [PATCH 2/3][rs6000] Fix x86-compat vector intrinsics testcases for BE, 32bit Date: Tue, 4 Dec 2018 08:59:40 -0600 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1 Message-Id: <bb71ef8b-72fe-f7b8-dbb5-07ad97249536@us.ibm.com> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: Quoted-printable MIME-Version: 1.0
Series	x86-compat vector intrinsics fixes for BE, 32bit \| expand [0/3,rs6000] x86-compat vector intrinsics fixes for BE, 32bit [1/3,rs6000] x86-compat vector intrinsics fixes for BE, 32bit [2/3,rs6000] Fix x86-compat vector intrinsics testcases for BE, 32bit [3/3,rs6000] Enable x86-compat vector intrinsics testing

Index: gcc/config/rs6000/smmintrin.h =================================================================== diff --git a/trunk/gcc/config/rs6000/smmintrin.h b/trunk/gcc/config/rs6000/smmintrin.h new file mode 10644 --- /dev/null (revision 0) +++ b/trunk/gcc/config/rs6000/smmintrin.h (working copy) @@ -0,0 +1,67 @@ +/* Copyright (C) 2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#endif + +#ifndef SMMINTRIN_H_ +#define SMMINTRIN_H_ + +#include <altivec.h> +#include <tmmintrin.h> + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_epi8 (__m128i __X, const int __N) +{ + return (unsigned char) ((__v16qi)__X)[__N & 15]; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_epi32 (__m128i __X, const int __N) +{ + return ((__v4si)__X)[__N & 3]; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_epi64 (__m128i __X, const int __N) +{ + return ((__v2di)__X)[__N & 1]; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_ps (__m128 __X, const int __N) +{ + return ((__v4si)__X)[__N & 3]; +} + +#endif Index: gcc/config.gcc =================================================================== diff --git a/trunk/gcc/config.gcc b/trunk/gcc/config.gcc --- a/trunk/gcc/config.gcc (revision 266157) +++ b/trunk/gcc/config.gcc (working copy) @@ -504,7 +504,7 @@ powerpc*-*-*) extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h" extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h" extra_headers="${extra_headers} mmintrin.h x86intrin.h" - extra_headers="${extra_headers} pmmintrin.h tmmintrin.h" + extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h" extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h" extra_headers="${extra_headers} amo.h" case x$with_cpu in Index: gcc/testsuite/gcc.target/powerpc/mmx-packssdw-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packssdw-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packssdw-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packssdw-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packssdw-1.c (working copy) @@ -22,37 +22,50 @@ test (__m64 s1, __m64 s2) return _mm_packs_pi32 (s1, s2); } +static short +saturate (int val) +{ + if (val > 32767) + return 32767; + else if (val < -32768) + return -32768; + else + return val; +} + +static inline int +l_mm_extract_pi32 (__m64 b, int imm8) +{ + unsigned int shift = imm8 & 0x1; +#ifdef __BIG_ENDIAN__ + shift = 1 - shift; +#endif + return ((long long)b >> (shift * 32)) & 0xffffffff; +} + static void TEST (void) { __m64_union s1, s2; __m64_union u; __m64_union e; - int i; + int start, end, inc; s1.as_m64 = _mm_set_pi32 (2134, -128); s2.as_m64 = _mm_set_pi32 (41124, 234); u.as_m64 = test (s1.as_m64, s2.as_m64); - for (i = 0; i < 2; i++) - { - if (s1.as_int[i] > 32767) - e.as_short[i] = 32767; - else if (s1.as_int[i] < -32768) - e.as_short[i] = -32768; - else - e.as_short[i] = s1.as_int[i]; - } - - for (i = 0; i < 2; i++) - { - if (s2.as_int[i] > 32767) - e.as_short[i+2] = 32767; - else if (s2.as_int[i] < -32768) - e.as_short[i+2] = -32768; - else - e.as_short[i+2] = s2.as_int[i]; - } +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + e.as_m64 = _mm_set_pi16 (saturate (l_mm_extract_pi32 (s2.as_m64, 1)), + saturate (l_mm_extract_pi32 (s2.as_m64, 0)), + saturate (l_mm_extract_pi32 (s1.as_m64, 1)), + saturate (l_mm_extract_pi32 (s1.as_m64, 0))); +#else + e.as_m64 = _mm_set_pi16 (saturate (l_mm_extract_pi32 (s1.as_m64, 1)), + saturate (l_mm_extract_pi32 (s1.as_m64, 0)), + saturate (l_mm_extract_pi32 (s2.as_m64, 1)), + saturate (l_mm_extract_pi32 (s2.as_m64, 0))); +#endif if (u.as_m64 != e.as_m64) abort (); Index: gcc/testsuite/gcc.target/powerpc/mmx-packsswb-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packsswb-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packsswb-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packsswb-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packsswb-1.c (working copy) @@ -14,6 +14,7 @@ #include CHECK_H #include <mmintrin.h> +#include <xmmintrin.h> static __m64 __attribute__((noinline, unused)) @@ -22,6 +23,17 @@ test (__m64 s1, __m64 s2) return _mm_packs_pi16 (s1, s2); } +static signed char +saturate (signed short val) +{ + if (val > 127) + return 127; + else if (val < -128) + return -128; + else + return val; +} + static void TEST (void) { @@ -34,25 +46,25 @@ TEST (void) s2.as_m64 = _mm_set_pi16 (41124, 234, 2344, 2354); u.as_m64 = test (s1.as_m64, s2.as_m64); - for (i = 0; i < 4; i++) - { - if (s1.as_short[i] > 127) - e.as_char[i] = 127; - else if (s1.as_short[i] < -128) - e.as_char[i] = -128; - else - e.as_char[i] = s1.as_short[i]; - } - - for (i = 0; i < 4; i++) - { - if (s2.as_short[i] > 127) - e.as_char[i+4] = 127; - else if (s2.as_short[i] < -128) - e.as_char[i+4] = -128; - else - e.as_char[i+4] = s2.as_short[i]; - } +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s2.as_m64, 3)), + saturate (_mm_extract_pi16 (s2.as_m64, 2)), + saturate (_mm_extract_pi16 (s2.as_m64, 1)), + saturate (_mm_extract_pi16 (s2.as_m64, 0)), + saturate (_mm_extract_pi16 (s1.as_m64, 3)), + saturate (_mm_extract_pi16 (s1.as_m64, 2)), + saturate (_mm_extract_pi16 (s1.as_m64, 1)), + saturate (_mm_extract_pi16 (s1.as_m64, 0))); +#else + e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s1.as_m64, 3)), + saturate (_mm_extract_pi16 (s1.as_m64, 2)), + saturate (_mm_extract_pi16 (s1.as_m64, 1)), + saturate (_mm_extract_pi16 (s1.as_m64, 0)), + saturate (_mm_extract_pi16 (s2.as_m64, 3)), + saturate (_mm_extract_pi16 (s2.as_m64, 2)), + saturate (_mm_extract_pi16 (s2.as_m64, 1)), + saturate (_mm_extract_pi16 (s2.as_m64, 0))); +#endif if (u.as_m64 != e.as_m64) abort (); Index: gcc/testsuite/gcc.target/powerpc/mmx-packuswb-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packuswb-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packuswb-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packuswb-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-packuswb-1.c (working copy) @@ -15,6 +15,7 @@ #include CHECK_H #include <mmintrin.h> +#include <xmmintrin.h> static __m64 __attribute__((noinline, unused)) @@ -23,6 +24,17 @@ test (__m64 s1, __m64 s2) return _mm_packs_pu16 (s1, s2); } +static unsigned char +saturate (signed short val) +{ + if (val > 255) + return 255; + else if (val < 0) + return 0; + else + return val; +} + static void TEST (void) { @@ -35,17 +47,26 @@ TEST (void) s2.as_m64 = _mm_set_pi16 (-9, -10, -11, -12); u.as_m64 = test (s1.as_m64, s2.as_m64); - for (i=0; i<4; i++) - { - tmp = s1.as_short[i]<0 ? 0 : s1.as_short[i]; - tmp = tmp>255 ? 255 : tmp; - e.as_char[i] = tmp; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s2.as_m64, 3)), + saturate (_mm_extract_pi16 (s2.as_m64, 2)), + saturate (_mm_extract_pi16 (s2.as_m64, 1)), + saturate (_mm_extract_pi16 (s2.as_m64, 0)), + saturate (_mm_extract_pi16 (s1.as_m64, 3)), + saturate (_mm_extract_pi16 (s1.as_m64, 2)), + saturate (_mm_extract_pi16 (s1.as_m64, 1)), + saturate (_mm_extract_pi16 (s1.as_m64, 0))); +#else + e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s1.as_m64, 3)), + saturate (_mm_extract_pi16 (s1.as_m64, 2)), + saturate (_mm_extract_pi16 (s1.as_m64, 1)), + saturate (_mm_extract_pi16 (s1.as_m64, 0)), + saturate (_mm_extract_pi16 (s2.as_m64, 3)), + saturate (_mm_extract_pi16 (s2.as_m64, 2)), + saturate (_mm_extract_pi16 (s2.as_m64, 1)), + saturate (_mm_extract_pi16 (s2.as_m64, 0))); +#endif - tmp = s2.as_short[i]<0 ? 0 : s2.as_short[i]; - tmp = tmp>255 ? 255 : tmp; - e.as_char[i+4] = tmp; - } - if (u.as_m64 != e.as_m64) abort (); } Index: gcc/testsuite/gcc.target/powerpc/mmx-pmulhw-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-pmulhw-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-pmulhw-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/mmx-pmulhw-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/mmx-pmulhw-1.c (working copy) @@ -33,13 +33,12 @@ TEST (void) s2.as_m64 = _mm_set_pi16 (11, 9834, 7444, -10222); u.as_m64 = test (s1.as_m64, s2.as_m64); - for (i = 0; i < 4; i++) - { - tmp = s1.as_short[i] * s2.as_short[i]; + e.as_m64 = _mm_set_pi16 ( + ((s1.as_short[3] * s2.as_short[3]) & 0xffff0000) >> 16, + ((s1.as_short[2] * s2.as_short[2]) & 0xffff0000) >> 16, + ((s1.as_short[1] * s2.as_short[1]) & 0xffff0000) >> 16, + ((s1.as_short[0] * s2.as_short[0]) & 0xffff0000) >> 16); - e.as_short[i] = (tmp & 0xffff0000)>>16; - } - if (u.as_m64 != e.as_m64) abort (); } Index: gcc/testsuite/gcc.target/powerpc/sse-cvtpi32x2ps-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpi32x2ps-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpi32x2ps-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpi32x2ps-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpi32x2ps-1.c (working copy) @@ -27,8 +27,8 @@ static void TEST (void) { __m64_union s1, s2; - union128 u; - float e[4] = {1000.0, -20000.0, 43.0, 546.0}; + union128 u, e; + e.x = _mm_set_ps (546.0, 43.0, -20000.0, 1000.0); /* input signed in {1000, -20000, 43, 546}. */ s1.as_m64 = _mm_setr_pi32 (1000, -20000); @@ -37,6 +37,6 @@ TEST (void) u.x = test (s1.as_m64, s2.as_m64); - if (check_union128 (u, e)) + if (check_union128 (u, e.a)) abort (); } Index: gcc/testsuite/gcc.target/powerpc/sse-cvtpu16ps-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpu16ps-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpu16ps-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpu16ps-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtpu16ps-1.c (working copy) @@ -27,14 +27,14 @@ static void TEST (void) { __m64_union s1; - union128 u; - float e[4] = {1000.0, 45536.0, 45.0, 65535.0}; + union128 u, e; + e.x = _mm_set_ps (65535.0, 45.0, 45536.0, 1000.0); /* input unsigned short {1000, 45536, 45, 65535}. */ s1.as_m64 = _mm_setr_pi16 (1000, -20000, 45, -1); u.x = test (s1.as_m64); - if (check_union128 (u, e)) + if (check_union128 (u, e.a)) abort (); } Index: gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-1.c (working copy) @@ -15,6 +15,7 @@ #endif #include <xmmintrin.h> +#include <smmintrin.h> static int __attribute__((noinline, unused)) @@ -29,12 +30,17 @@ TEST (void) { union128 s1; int d; - int e; + union { + float f; + int i; + } e; s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46); d = test (s1.x); - e = (int)s1.a[0]; - if (e != d) + e.i = _mm_extract_ps (s1.x, 0); + e.i = e.f; + + if (e.i != d) abort (); } Index: gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-2.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-2.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-2.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-2.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse-cvtss2si-2.c (working copy) @@ -15,6 +15,7 @@ #endif #include <xmmintrin.h> +#include <smmintrin.h> static long long __attribute__((noinline, unused)) @@ -29,11 +30,17 @@ TEST (void) union128 s1; long long d; long long e; + union { + float f; + int i; + } u; s1.x = _mm_set_ps (344.4, 68.346, 43.35, 429496729501.4); d = test (s1.x); - e = (long long)s1.a[0]; + u.i = _mm_extract_ps (s1.x, 0); + e = u.f; + if (e != d) abort (); } Index: gcc/testsuite/gcc.target/powerpc/sse2-pshufhw-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshufhw-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshufhw-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshufhw-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshufhw-1.c (working copy) @@ -26,24 +26,28 @@ test (__m128i s1) static void TEST (void) { - union128i_q s1; - union128i_w u; + union128i_w s1, u; short e[8] = { 0 }; int i; int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 }; int m2[4]; - s1.x = _mm_set_epi64x (0xabcde,0xef58a234); + s1.x = _mm_set_epi16 (0, 0, 0xa, 0xbcde, 0, 0, 0xef58, 0xa234); u.x = test (s1.x); for (i = 0; i < 4; i++) - e[i] = (s1.a[0]>>(16 * i)) & 0xffff; + e[i] = s1.a[i]; - for (i = 0; i < 4; i++) - m2[i] = (N & m1[i])>>(2*i); + for (i = 0; i < 4; i++) { + int i2 = i; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + i2 = 3 - i; +#endif + m2[i2] = (N & m1[i2]) >> (2 * i2); + } for (i = 0; i < 4; i++) - e[i+4] = (s1.a[1] >> (16 * m2[i])) & 0xffff; + e[i + 4] = s1.a[m2[i] + 4]; if (check_union128i_w(u, e)) { Index: gcc/testsuite/gcc.target/powerpc/sse2-pshuflw-1.c =================================================================== diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshuflw-1.c b/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshuflw-1.c --- a/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshuflw-1.c (revision 266157) +++ b/trunk/gcc/testsuite/gcc.target/powerpc/sse2-pshuflw-1.c (working copy) @@ -26,24 +26,28 @@ test (__m128i s1) static void TEST (void) { - union128i_q s1; - union128i_w u; + union128i_w s1, u; short e[8] = { 0 }; int i; int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 }; int m2[4]; - s1.x = _mm_set_epi64x (0xabcde,0xef58a234); + s1.x = _mm_set_epi16 (0, 0, 0xa, 0xbcde, 0, 0, 0xef58, 0xa234); u.x = test (s1.x); for (i = 0; i < 4; i++) - e[i+4] = (s1.a[1]>>(16 * i)) & 0xffff; + e[i + 4] = s1.a[i + 4]; - for (i = 0; i < 4; i++) - m2[i] = (N & m1[i])>>(2*i); + for (i = 0; i < 4; i++) { + int i2 = i; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + i2 = 3 - i; +#endif + m2[i2] = (N & m1[i2]) >> (2 * i2); + } for (i = 0; i < 4; i++) - e[i] = (s1.a[0] >> (16 * m2[i])) & 0xffff; + e[i] = s1.a[m2[i]]; if (check_union128i_w(u, e)) {

[2/3,rs6000] Fix x86-compat vector intrinsics testcases for BE, 32bit

Commit Message

Comments

Patch