diff mbox

[i386] Enable -mprefer-avx128 by default for Bulldozer

Message ID D4C76825A6780047854A11E93CDE84D005980DC70C@SAUSEXMBP01.amd.com
State New
Headers show

Commit Message

Fang, Changpeng June 23, 2011, 8:41 p.m. UTC
Hi,

This patch enables 128-bit avx instruction generation for the auto-vectorizer for AMD bulldozer 
machines. This enablement gives additional ~3% improvement on polyhedron 2005 and cpu2006
floating point programs.

The patch passed bootstrapping on a x86_64-unknown-linux-gnu system with Bulldozer cores.

Is it OK to commit to trunk and backport to 4.6 branch?

Thanks,

Changpeng

Comments

Jakub Jelinek June 23, 2011, 8:47 p.m. UTC | #1
On Thu, Jun 23, 2011 at 03:41:01PM -0500, Fang, Changpeng wrote:
> This patch enables 128-bit avx instruction generation for the auto-vectorizer for AMD bulldozer 
> machines. This enablement gives additional ~3% improvement on polyhedron 2005 and cpu2006
> floating point programs.
> 
> The patch passed bootstrapping on a x86_64-unknown-linux-gnu system with Bulldozer cores.
> 
> Is it OK to commit to trunk and backport to 4.6 branch?

For 4.6 branch, if it is approved for trunk, please wait after 4.6.1 is
released.

	Jakub
Jan Hubicka June 23, 2011, 11:20 p.m. UTC | #2
Hi,
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2128,6 +2128,9 @@ static const unsigned int x86_avx256_split_unaligned_load
>  static const unsigned int x86_avx256_split_unaligned_store
>    = m_COREI7 | m_BDVER1 | m_GENERIC;
>  
> +static const unsigned int x86_prefer_avx128
> +  = m_BDVER1;

What is reason for stuff like this to not go into initial_ix86_tune_features?
I sort of liked them better when they was individual flags, but having the target
tunning flags spread across multiple places seems unnecesary.

Honza
diff mbox

Patch

From b5015593b0b30b14783866ac68c2c5f2e014d206 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <chfang@huainan.(none)>
Date: Wed, 22 Jun 2011 15:03:05 -0700
Subject: [PATCH] Auto-vectorizer generates 128-bit AVX insns by default for bdver1

	* config/i386/i386.opt (mprefer-avx128): Redefine the flag as a Mask option.

	* config/i386/i386.c (x86_prefer_avx128): New tune option definition.
	(ix86_option_override_internal): Enable the generation of the 128-bit
	instructions when x86_prefer_avx128 is set.
	(ix86_preferred_simd_mode): Use TARGET_PREFER_AVX128.
	(ix86_autovectorize_vector_sizes): Use TARGET_PREFER_AVX128.
---
 gcc/config/i386/i386.c   |   13 ++++++++++---
 gcc/config/i386/i386.opt |    2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 014401b..1f5113f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2128,6 +2128,9 @@  static const unsigned int x86_avx256_split_unaligned_load
 static const unsigned int x86_avx256_split_unaligned_store
   = m_COREI7 | m_BDVER1 | m_GENERIC;
 
+static const unsigned int x86_prefer_avx128
+  = m_BDVER1;
+
 /* In case the average insn count for single function invocation is
    lower than this constant, emit fast (but longer) prologue and
    epilogue code.  */
@@ -2623,6 +2626,7 @@  ix86_target_string (int isa, int flags, const char *arch, const char *tune,
     { "-mvzeroupper",			MASK_VZEROUPPER },
     { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD},
     { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE},
+    { "-mprefer-avx128",		MASK_PREFER_AVX128},
   };
 
   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@@ -3672,6 +3676,9 @@  ix86_option_override_internal (bool main_args_p)
 	  if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
 	      && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
 	    target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+	  if ((x86_prefer_avx128 & ix86_tune_mask)
+	      && !(target_flags_explicit & MASK_PREFER_AVX128))
+	    target_flags |= MASK_PREFER_AVX128;
 	}
     }
   else 
@@ -34614,7 +34621,7 @@  ix86_preferred_simd_mode (enum machine_mode mode)
       return V2DImode;
 
     case SFmode:
-      if (TARGET_AVX && !flag_prefer_avx128)
+      if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SFmode;
       else
 	return V4SFmode;
@@ -34622,7 +34629,7 @@  ix86_preferred_simd_mode (enum machine_mode mode)
     case DFmode:
       if (!TARGET_VECTORIZE_DOUBLE)
 	return word_mode;
-      else if (TARGET_AVX && !flag_prefer_avx128)
+      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DFmode;
       else if (TARGET_SSE2)
 	return V2DFmode;
@@ -34639,7 +34646,7 @@  ix86_preferred_simd_mode (enum machine_mode mode)
 static unsigned int
 ix86_autovectorize_vector_sizes (void)
 {
-  return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
+  return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
 }
 
 /* Initialize the GCC target structure.  */
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 21e0def..9886b7b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -388,7 +388,7 @@  Do dispatch scheduling if processor is bdver1 and Haifa scheduling
 is selected.
 
 mprefer-avx128
-Target Report Var(flag_prefer_avx128) Init(0)
+Target Report Mask(PREFER_AVX128) SAVE
 Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer.
 
 ;; ISA support
-- 
1.7.0.4