diff mbox

[ARM] Remove movdi_vfp_cortexa8

Message ID AM5PR0802MB2610989BCB172720719E3F69839A0@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra Dec. 14, 2016, 4:37 p.m. UTC
ping


From: Wilco Dijkstra
Sent: 29 November 2016 11:05
To: GCC Patches
Cc: nd
Subject: [PATCH][ARM] Remove movdi_vfp_cortexa8
    
Merge the movdi_vfp_cortexa8 pattern into movdi_vfp and remove it to avoid
unnecessary duplication and repeating bugs like PR78439 due to changes being
applied only to one of the duplicates.

Bootstrap OK for ARM and Thumb-2 gnueabihf targets. OK for commit?

ChangeLog:
2016-11-29  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/arm/vfp.md (movdi_vfp): Merge changes from movdi_vfp_cortexa8.
        * (movdi_vfp_cortexa8): Remove pattern.
--

Comments

Kyrill Tkachov Dec. 14, 2016, 4:43 p.m. UTC | #1
Hi Wilco,

On 14/12/16 16:37, Wilco Dijkstra wrote:
>      
>
> ping
>
>
> From: Wilco Dijkstra
> Sent: 29 November 2016 11:05
> To: GCC Patches
> Cc: nd
> Subject: [PATCH][ARM] Remove movdi_vfp_cortexa8
>      
> Merge the movdi_vfp_cortexa8 pattern into movdi_vfp and remove it to avoid
> unnecessary duplication and repeating bugs like PR78439 due to changes being
> applied only to one of the duplicates.

When this was brought up Ramana requested some more investigations on the codegen impact [1].
Have you done the archaeology on why we had two patterns in the first place and what the codegen
effect is of remove the Cortex-A8-specific one?

Thanks,
Kyrill

[1] https://gcc.gnu.org/ml/gcc-patches/2016-11/msg02248.html

> Bootstrap OK for ARM and Thumb-2 gnueabihf targets. OK for commit?
>
> ChangeLog:
> 2016-11-29  Wilco Dijkstra  <wdijkstr@arm.com>
>
>          * config/arm/vfp.md (movdi_vfp): Merge changes from movdi_vfp_cortexa8.
>          * (movdi_vfp_cortexa8): Remove pattern.
> --
>
> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
> index 2051f1018f1cbff9c5bf044e71304d78e615458e..a917aa625a7b15f6c9e2b549ab22e5219bb9b99c 100644
> --- a/gcc/config/arm/vfp.md
> +++ b/gcc/config/arm/vfp.md
> @@ -304,9 +304,9 @@
>   ;; DImode moves
>   
>   (define_insn "*movdi_vfp"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
>          (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != TARGET_CPU_cortexa8
> +  "TARGET_32BIT && TARGET_HARD_FLOAT
>      && (   register_operand (operands[0], DImode)
>          || register_operand (operands[1], DImode))
>      && !(TARGET_NEON && CONST_INT_P (operands[1])
> @@ -339,71 +339,25 @@
>       }
>     "
>     [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
> -   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
> +   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
>                                 (eq_attr "alternative" "2") (const_int 12)
>                                 (eq_attr "alternative" "3") (const_int 16)
> +                             (eq_attr "alternative" "4,5,6")
> +                              (symbol_ref "arm_count_output_move_double_insns (operands) * 4")
>                                 (eq_attr "alternative" "9")
>                                  (if_then_else
>                                    (match_test "TARGET_VFP_SINGLE")
>                                    (const_int 8)
>                                    (const_int 4))]
>                                 (const_int 4)))
> +   (set_attr "predicable"    "yes")
>      (set_attr "arm_pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
>      (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
>      (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
>      (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
>   )
>   
> -(define_insn "*movdi_vfp_cortexa8"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
> -       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == TARGET_CPU_cortexa8
> -    && (   register_operand (operands[0], DImode)
> -        || register_operand (operands[1], DImode))
> -    && !(TARGET_NEON && CONST_INT_P (operands[1])
> -        && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
> -  "*
> -  switch (which_alternative)
> -    {
> -    case 0:
> -    case 1:
> -    case 2:
> -    case 3:
> -      return \"#\";
> -    case 4:
> -    case 5:
> -    case 6:
> -      return output_move_double (operands, true, NULL);
> -    case 7:
> -      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
> -    case 8:
> -      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
> -    case 9:
> -      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
> -    case 10: case 11:
> -      return output_move_vfp (operands);
> -    default:
> -      gcc_unreachable ();
> -    }
> -  "
> -  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
> -   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
> -                               (eq_attr "alternative" "2") (const_int 12)
> -                               (eq_attr "alternative" "3") (const_int 16)
> -                               (eq_attr "alternative" "4,5,6")
> -                              (symbol_ref
> -                               "arm_count_output_move_double_insns (operands) \
> -                                 * 4")]
> -                              (const_int 4)))
> -   (set_attr "predicable"    "yes")
> -   (set_attr "arm_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
> -   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
> -   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
> -   (set (attr "ce_count")
> -       (symbol_ref "get_attr_length (insn) / 4"))
> -   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
> - )
> -
>   ;; HFmode moves
>   
>   (define_insn "*movhf_vfp_fp16"
>
Ramana Radhakrishnan Dec. 19, 2016, 9:43 a.m. UTC | #2
On Wed, Dec 14, 2016 at 5:43 PM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> Kyrill Tkachov wrote:
>> On 14/12/16 16:37, Wilco Dijkstra wrote:
>>
>> > Merge the movdi_vfp_cortexa8 pattern into movdi_vfp and remove it to avoid
>> > unnecessary duplication and repeating bugs like PR78439 due to changes being
>> > applied only to one of the duplicates.
>>
>> When this was brought up Ramana requested some more investigations on the codegen impact [1].
>> Have you done the archaeology on why we had two patterns in the first place and what the codegen
>> effect is of remove the Cortex-A8-specific one?
>
> Yes, the reason to split the pattern was to introduce the '!' to discourage Neon->int moves on Cortex-A8 (https://patches.linaro.org/patch/541/). I am not removing the optimization for Cortex-A8, however I  haven't been able to find an example where it makes a difference, even on high register pressure code.
>

even on crafty  with -mtune=cortex-a8 ?

Ramana
> Wilco
>
>
>> Bootstrap OK for ARM and Thumb-2 gnueabihf targets. OK for commit?
>>
>> ChangeLog:
>> 2016-11-29  Wilco Dijkstra  <wdijkstr@arm.com>
>>
>>          * config/arm/vfp.md (movdi_vfp): Merge changes from movdi_vfp_cortexa8.
>>          * (movdi_vfp_cortexa8): Remove pattern.
>> --
>>
>> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
>> index 2051f1018f1cbff9c5bf044e71304d78e615458e..a917aa625a7b15f6c9e2b549ab22e5219bb9b99c 100644
>> --- a/gcc/config/arm/vfp.md
>> +++ b/gcc/config/arm/vfp.md
>> @@ -304,9 +304,9 @@
>>   ;; DImode moves
>>
>>   (define_insn "*movdi_vfp"
>> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
>> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
>>          (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
>> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != TARGET_CPU_cortexa8
>> +  "TARGET_32BIT && TARGET_HARD_FLOAT
>>      && (   register_operand (operands[0], DImode)
>>          || register_operand (operands[1], DImode))
>>      && !(TARGET_NEON && CONST_INT_P (operands[1])
>> @@ -339,71 +339,25 @@
>>       }
>>     "
>>     [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
>> -   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
>> +   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
>>                                 (eq_attr "alternative" "2") (const_int 12)
>>                                 (eq_attr "alternative" "3") (const_int 16)
>> +                             (eq_attr "alternative" "4,5,6")
>> +                              (symbol_ref "arm_count_output_move_double_insns (operands) * 4")
>>                                 (eq_attr "alternative" "9")
>>                                  (if_then_else
>>                                    (match_test "TARGET_VFP_SINGLE")
>>                                    (const_int 8)
>>                                    (const_int 4))]
>>                                 (const_int 4)))
>> +   (set_attr "predicable"    "yes")
>>      (set_attr "arm_pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
>>      (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
>>      (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
>> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
>>      (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
>>   )
>>
>> -(define_insn "*movdi_vfp_cortexa8"
>> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
>> -       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
>> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == TARGET_CPU_cortexa8
>> -    && (   register_operand (operands[0], DImode)
>> -        || register_operand (operands[1], DImode))
>> -    && !(TARGET_NEON && CONST_INT_P (operands[1])
>> -        && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
>> -  "*
>> -  switch (which_alternative)
>> -    {
>> -    case 0:
>> -    case 1:
>> -    case 2:
>> -    case 3:
>> -      return \"#\";
>> -    case 4:
>> -    case 5:
>> -    case 6:
>> -      return output_move_double (operands, true, NULL);
>> -    case 7:
>> -      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
>> -    case 8:
>> -      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
>> -    case 9:
>> -      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
>> -    case 10: case 11:
>> -      return output_move_vfp (operands);
>> -    default:
>> -      gcc_unreachable ();
>> -    }
>> -  "
>> -  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
>> -   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
>> -                               (eq_attr "alternative" "2") (const_int 12)
>> -                               (eq_attr "alternative" "3") (const_int 16)
>> -                               (eq_attr "alternative" "4,5,6")
>> -                              (symbol_ref
>> -                               "arm_count_output_move_double_insns (operands) \
>> -                                 * 4")]
>> -                              (const_int 4)))
>> -   (set_attr "predicable"    "yes")
>> -   (set_attr "arm_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
>> -   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
>> -   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
>> -   (set (attr "ce_count")
>> -       (symbol_ref "get_attr_length (insn) / 4"))
>> -   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
>> - )
>> -
>>   ;; HFmode moves
>>
>>   (define_insn "*movhf_vfp_fp16"
>>
>
>
Wilco Dijkstra Dec. 19, 2016, 1:01 p.m. UTC | #3
Ramana Radhakrishnan wrote:
> On Wed, Dec 14, 2016 at 5:43 PM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:

> > Yes, the reason to split the pattern was to introduce the '!' to discourage Neon->int moves on Cortex-A8 (https://patches.linaro.org/patch/541/). I am not removing the optimization for Cortex-A8, however  I  haven't been able to find an example where it makes a difference, even on high register pressure code.
>
>
> even on crafty  with -mtune=cortex-a8 ?

Indeed the '!' makes no difference on crafty either with -mcpu=cortex-a8 -mfpu=neon.
Even if it did make a difference in the past, it is either totally ignored by the register
allocator or has no useful effect on deciding whether to use a Neon or integer register.

Wilco
Wilco Dijkstra Jan. 17, 2017, 12:10 p.m. UTC | #4
Wilco Dijkstra wrote:
> Ramana Radhakrishnan wrote:
>> On Wed, Dec 14, 2016 at 5:43 PM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> > > Yes, the reason to split the pattern was to introduce the '!' to discourage Neon->int moves on Cortex-A8
> (https://patches.linaro.org/patch/541/). I am not removing the optimization for Cortex-A8, however 
> I  haven't been able to find an example where it makes a difference, even on high register pressure code.
> >
> > even on crafty  with -mtune=cortex-a8 ?
>
> Indeed the '!' makes no difference on crafty either with -mcpu=cortex-a8 -mfpu=neon.
> Even if it did make a difference in the past, it is either totally ignored by the register
> allocator or has no useful effect on deciding whether to use a Neon or integer register.

Any comments?

Wilco
diff mbox

Patch

diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 2051f1018f1cbff9c5bf044e71304d78e615458e..a917aa625a7b15f6c9e2b549ab22e5219bb9b99c 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -304,9 +304,9 @@ 
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
        (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != TARGET_CPU_cortexa8
+  "TARGET_32BIT && TARGET_HARD_FLOAT
    && (   register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))
    && !(TARGET_NEON && CONST_INT_P (operands[1])
@@ -339,71 +339,25 @@ 
     }
   "
   [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
-   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
+   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
                               (eq_attr "alternative" "2") (const_int 12)
                               (eq_attr "alternative" "3") (const_int 16)
+                             (eq_attr "alternative" "4,5,6")
+                              (symbol_ref "arm_count_output_move_double_insns (operands) * 4")
                               (eq_attr "alternative" "9")
                                (if_then_else
                                  (match_test "TARGET_VFP_SINGLE")
                                  (const_int 8)
                                  (const_int 4))]
                               (const_int 4)))
+   (set_attr "predicable"    "yes")
    (set_attr "arm_pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
    (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
    (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
+   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
 )
 
-(define_insn "*movdi_vfp_cortexa8"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
-       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == TARGET_CPU_cortexa8
-    && (   register_operand (operands[0], DImode)
-        || register_operand (operands[1], DImode))
-    && !(TARGET_NEON && CONST_INT_P (operands[1])
-        && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
-  "*
-  switch (which_alternative)
-    {
-    case 0: 
-    case 1:
-    case 2:
-    case 3:
-      return \"#\";
-    case 4:
-    case 5:
-    case 6:
-      return output_move_double (operands, true, NULL);
-    case 7:
-      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
-    case 8:
-      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
-    case 9:
-      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
-    case 10: case 11:
-      return output_move_vfp (operands);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
-   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
-                               (eq_attr "alternative" "2") (const_int 12)
-                               (eq_attr "alternative" "3") (const_int 16)
-                               (eq_attr "alternative" "4,5,6") 
-                              (symbol_ref 
-                               "arm_count_output_move_double_insns (operands) \
-                                 * 4")]
-                              (const_int 4)))
-   (set_attr "predicable"    "yes")
-   (set_attr "arm_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
-   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
-   (set (attr "ce_count") 
-       (symbol_ref "get_attr_length (insn) / 4"))
-   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
- )
-
 ;; HFmode moves
 
 (define_insn "*movhf_vfp_fp16"