From patchwork Wed Nov 10 23:02:19 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 70716 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 90969B7122 for ; Thu, 11 Nov 2010 10:02:32 +1100 (EST) Received: (qmail 31865 invoked by alias); 10 Nov 2010 23:02:28 -0000 Received: (qmail 31850 invoked by uid 22791); 10 Nov 2010 23:02:26 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_ZJ, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-pz0-f47.google.com (HELO mail-pz0-f47.google.com) (209.85.210.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 10 Nov 2010 23:02:21 +0000 Received: by pzk34 with SMTP id 34so323535pzk.20 for ; Wed, 10 Nov 2010 15:02:20 -0800 (PST) MIME-Version: 1.0 Received: by 10.142.48.6 with SMTP id v6mr8106311wfv.48.1289430139946; Wed, 10 Nov 2010 15:02:19 -0800 (PST) Received: by 10.143.161.2 with HTTP; Wed, 10 Nov 2010 15:02:19 -0800 (PST) Date: Thu, 11 Nov 2010 00:02:19 +0100 Message-ID: Subject: [PATCH, i386]: Fix PR target/46419 From: Uros Bizjak To: gcc-patches@gcc.gnu.org Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hello! 2010-11-10 Uros Bizjak PR middle-end/46419 * config/i386/xmmintrin.h (_mm_cvtpi16_ps): Swap __hisi and __losi. (_mm_cvtpu16_ps): Ditto. testsuite/ChangeLog: 2010-11-10 Uros Bizjak PR target/46419 * gcc-target/i386/pr46419.c: New test. Patch was tested on x86_64-pc-linux-gnu {,-m32}. Patch was committed to mainline and will be committed to 4.4 and 4.5 release branches. Uros. Index: config/i386/xmmintrin.h =================================================================== --- config/i386/xmmintrin.h (revision 166563) +++ config/i386/xmmintrin.h (working copy) @@ -626,13 +626,13 @@ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); /* Convert the four words to doublewords. */ + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); - __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); - __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi); - __rb = __builtin_ia32_cvtpi2ps (__ra, __losi); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); } @@ -645,13 +645,13 @@ __v4sf __zero, __ra, __rb; /* Convert the four words to doublewords. */ + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); - __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); - __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi); - __rb = __builtin_ia32_cvtpi2ps (__ra, __losi); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); } Index: testsuite/gcc.target/i386/pr46419.c =================================================================== --- testsuite/gcc.target/i386/pr46419.c (revision 0) +++ testsuite/gcc.target/i386/pr46419.c (revision 0) @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse" } */ +/* { dg-require-effective-target sse } */ + +#include "sse-check.h" + +#include + +void __attribute__((noinline)) +sse_test (void) +{ + char image[4]; + __m128 image4; + float out[4] __attribute__ ((aligned (16))); + int i; + + for (i = 0; i < 4; i++) + image[i] = i + 1; + + image4 = + _mm_cvtpi8_ps (_mm_setr_pi8 + (image[0], image[1], image[2], image[3], 0, 0, 0, 0)); + _mm_store_ps (out, image4); + _mm_empty (); + + for (i = 0; i < 4; i++) + if (out[i] != (float) (i + 1)) + abort (); + + image4 = + _mm_cvtpu8_ps (_mm_setr_pi8 + (image[0], image[1], image[2], image[3], 0, 0, 0, 0)); + _mm_store_ps (out, image4); + _mm_empty (); + + for (i = 0; i < 4; i++) + if (out[i] != (float) (i + 1)) + abort (); +}