diff mbox

New parameters to control stringop expansion libcall strategy

Message ID CAAkRFZ+muGUjANkKqbp8r4HvddywmRgz+xPbVAUbuU9rE7pC7Q@mail.gmail.com
State New
Headers show

Commit Message

Xinliang David Li Aug. 3, 2013, 4:21 a.m. UTC
On x86_64, when the expected size of memcpy/memset is known (e.g, with
FDO), libcall strategy is used with the size is > 8192. This value is
hard coded, which makes it hard to do performance tuning. This patch
adds two new parameters to do that. Potential usage includes
per-application libcall strategy min-size tuning based on summary data
with FDO (e.g, instruction workset size).

Bootstrap and tested on x86_64/linux. Ok for trunk?

thanks,

David


2013-08-02  Xinliang David Li  <davidxl@google.com>

        * params.def: New parameters.
        * config/i386/i386.c (ix86_option_override_internal):
        Override default libcall size limit with parameters.

Comments

Jan Hubicka Aug. 3, 2013, 8:06 a.m. UTC | #1
> On x86_64, when the expected size of memcpy/memset is known (e.g, with
> FDO), libcall strategy is used with the size is > 8192. This value is
> hard coded, which makes it hard to do performance tuning. This patch
> adds two new parameters to do that. Potential usage includes
> per-application libcall strategy min-size tuning based on summary data
> with FDO (e.g, instruction workset size).
> 
> Bootstrap and tested on x86_64/linux. Ok for trunk?
> 
> thanks,
> 
> David
> 
> 
> 2013-08-02  Xinliang David Li  <davidxl@google.com>
> 
>         * params.def: New parameters.
>         * config/i386/i386.c (ix86_option_override_internal):
>         Override default libcall size limit with parameters.

Hi,
problem with this is that we introduce generic --param that is used only
by x86 backend.  I am not really guru on the command line options, but I think
this is first time we try to do such thing.  I wonder if
1) We want to introduce target specific params.def
2) We want to use usual -msomething= options
3) We want to go this way?

Honza
Xinliang David Li Aug. 3, 2013, 3:40 p.m. UTC | #2
On Sat, Aug 3, 2013 at 1:06 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> On x86_64, when the expected size of memcpy/memset is known (e.g, with
>> FDO), libcall strategy is used with the size is > 8192. This value is
>> hard coded, which makes it hard to do performance tuning. This patch
>> adds two new parameters to do that. Potential usage includes
>> per-application libcall strategy min-size tuning based on summary data
>> with FDO (e.g, instruction workset size).
>>
>> Bootstrap and tested on x86_64/linux. Ok for trunk?
>>
>> thanks,
>>
>> David
>>
>>
>> 2013-08-02  Xinliang David Li  <davidxl@google.com>
>>
>>         * params.def: New parameters.
>>         * config/i386/i386.c (ix86_option_override_internal):
>>         Override default libcall size limit with parameters.
>
> Hi,
> problem with this is that we introduce generic --param that is used only
> by x86 backend.  I am not really guru on the command line options, but I think
> this is first time we try to do such thing.  I wonder if
> 1) We want to introduce target specific params.def

We do have target specific tuning code for parameters though  --
backend overrides the default value -- I think this is essentially
target specific params.

> 2) We want to use usual -msomething= options
> 3) We want to go this way?

I don't have strong opinion either way. To avoid controversy, let me
work on a -mxxx= version of the patch -- and hopefully it will be more
powerful.

thanks,

David

>
> Honza
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 201458)
+++ config/i386/i386.c	(working copy)
@@ -156,7 +156,7 @@  struct processor_costs ix86_size_cost =
 };
 
 /* Processor costs (relative to an add) */
-static const
+static
 struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -226,7 +226,7 @@  struct processor_costs i386_cost = {	/*
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -298,7 +298,7 @@  struct processor_costs i486_cost = {	/*
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentium_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -368,7 +368,7 @@  struct processor_costs pentium_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -447,7 +447,7 @@  struct processor_costs pentiumpro_cost =
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs geode_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -518,7 +518,7 @@  struct processor_costs geode_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs k6_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
@@ -591,7 +591,7 @@  struct processor_costs k6_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs athlon_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
@@ -664,7 +664,7 @@  struct processor_costs athlon_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs k8_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (2),			/* cost of a lea instruction */
@@ -1265,7 +1265,7 @@  struct processor_costs btver2_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (3),			/* cost of a lea instruction */
@@ -1336,7 +1336,7 @@  struct processor_costs pentium4_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs nocona_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1),			/* cost of a lea instruction */
@@ -1409,7 +1409,7 @@  struct processor_costs nocona_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs atom_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
@@ -1556,7 +1556,7 @@  struct processor_costs slm_cost = {
 };
 
 /* Generic64 should produce code tuned for Nocona and K8.  */
-static const
+static
 struct processor_costs generic64_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   /* On all chips taken into consideration lea is 2 cycles and more.  With
@@ -1635,7 +1635,7 @@  struct processor_costs generic64_cost =
 };
 
 /* core_cost should produce code tuned for Core familly of CPUs.  */
-static const
+static
 struct processor_costs core_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   /* On all chips taken into consideration lea is 2 cycles and more.  With
@@ -1717,7 +1717,7 @@  struct processor_costs core_cost = {
 
 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
    Athlon and K8.  */
-static const
+static
 struct processor_costs generic32_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
@@ -4021,6 +4021,34 @@  ix86_option_override_internal (bool main
   /* Handle stack protector */
   if (!global_options_set.x_ix86_stack_protector_guard)
     ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
+
+  /* Now override the memcpy/memset inline strategy parameters  */
+  if (PARAM_VALUE (PARAM_MEMCPY_LIBCALL_MIN_SIZE) != -1
+      || PARAM_VALUE (PARAM_MEMSET_LIBCALL_MIN_SIZE) != -1)
+    {
+      const struct stringop_algs *algs[2];
+      int k;
+      int min_sizes[2];
+
+      algs[0] = &ix86_cost->memset[TARGET_64BIT != 0];
+      algs[1] = &ix86_cost->memcpy[TARGET_64BIT != 0];
+
+      min_sizes[0] = PARAM_VALUE (PARAM_MEMSET_LIBCALL_MIN_SIZE);
+      min_sizes[1] = PARAM_VALUE (PARAM_MEMCPY_LIBCALL_MIN_SIZE);
+
+      for (k = 0; k < 2; k++)
+        {
+          if (min_sizes[k] == -1)
+            continue;
+
+          for (i = 0; i < MAX_STRINGOP_ALGS - 1; i++)
+            {
+              if (algs[k]->size[i].max >= min_sizes[k]
+                  || algs[k]->size[i + 1].alg == libcall)
+                *const_cast<int *>(&algs[k]->size[i].max) = min_sizes[k] - 1;
+            }
+        }
+    }
 }
 
 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
Index: params.def
===================================================================
--- params.def	(revision 201458)
+++ params.def	(working copy)
@@ -117,6 +117,18 @@  DEFPARAM (PARAM_COMDAT_SHARING_PROBABILI
 	  "Probability that COMDAT function will be shared with different compilation unit",
 	  20, 0, 0)
 
+/* Use libcall strategy when the expected size is no less than this parameter for memcpy.  */
+DEFPARAM (PARAM_MEMCPY_LIBCALL_MIN_SIZE,
+	  "memcpy-libcall-min-size",
+	  "The minimal expected size to force libcall expansion strategy for memcpy",
+	  -1, 1, 0)
+
+/* Use libcall strategy when the expected size is no less than this parameter for memset.  */
+DEFPARAM (PARAM_MEMSET_LIBCALL_MIN_SIZE,
+	  "memset-libcall-min-size",
+	  "The minimal expected size to force libcall expansion strategy for memset",
+	  -1, 1, 0)
+
 /* Limit on probability of entry BB.  */
 DEFPARAM (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY,
 	  "partial-inlining-entry-probability",