diff mbox

Enable EBX for x86 in 32bits PIC code

Message ID 20140826213712.GA39451@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Aug. 26, 2014, 9:42 p.m. UTC
On 26 Aug 11:25, Vladimir Makarov wrote:
> On 08/26/2014 04:57 AM, Ilya Enkovich wrote:
> > I've looked into one of fails.  There is still a problem with
> > allocation in reload. Here is a piece of code which uses float
> > constant:
> >
> > (insn 1199 1198 1200 96 (set (reg:SI 3 bx)
> >         (reg:SI 1301 [528])) /usr/include/bits/stdlib-float.h:28 90
> > {*movsi_internal}
> >      (nil))
> > (call_insn 1200 1199 1201 96 (set (reg:DF 8 st)
> >         (call (mem:QI (symbol_ref:SI ("strtod") [flags 0x41]
> > <function_decl 0x2b29b8ea8900 strtod>) [0 strtod S1 A8])
> >             (const_int 8 [0x8]))) /usr/include/bits/stdlib-float.h:28
> > 661 {*call_value}
> >      (expr_list:REG_DEAD (reg:SI 3 bx)
> >         (expr_list:REG_CALL_DECL (symbol_ref:SI ("strtod") [flags
> > 0x41]  <function_decl 0x2b29b8ea8900 strtod>)
> >             (expr_list:REG_EH_REGION (const_int 0 [0])
> >                 (nil))))
> >     (expr_list (use (reg:SI 3 bx))
> >         (expr_list:SI (use (reg:SI 3 bx))
> >             (expr_list:SI (use (mem/f:SI (reg/f:SI 7 sp) [0  S4 A32]))
> >                 (expr_list:SI (use (mem/f:SI (plus:SI (reg/f:SI 7 sp)
> >                                 (const_int 4 [0x4])) [0  S4 A32]))
> >                     (nil))))))
> > (insn 1201 1200 1202 96 (set (reg:DF 321 [ D.7817 ])
> >         (reg:DF 8 st)) /usr/include/bits/stdlib-float.h:28 128 {*movdf_internal}
> >      (expr_list:REG_DEAD (reg:DF 8 st)
> >         (nil)))
> > (insn 1202 1201 1203 96 (set (reg:SF 322 [ D.7804 ])
> >         (float_truncate:SF (reg:DF 321 [ D.7817 ]))) read_arch.c:700
> > 157 {*truncdfsf_fast_sse}
> >      (expr_list:REG_DEAD (reg:DF 321 [ D.7817 ])
> >         (nil)))
> > (insn 1203 1202 1204 96 (set (mem:SF (reg/f:SI 198 [ D.7812 ]) [4
> > _130->frequency+0 S4 A32])
> >         (reg:SF 322 [ D.7804 ])) read_arch.c:700 129 {*movsf_internal}
> >      (nil))
> > (insn 1204 1203 1205 96 (set (reg:SF 1209)
> >         (mem/u/c:SF (plus:SI (reg:SI 1301 [528])
> >                 (const:SI (unspec:SI [
> >                             (symbol_ref/u:SI ("*.LC12") [flags 0x2])
> >                         ] UNSPEC_GOTOFF))) [4  S4 A32]))
> > read_arch.c:701 129 {*movsf_internal}
> >      (expr_list:REG_EQUAL (const_double:SF 0.0 [0x0.0p+0])
> >         (nil)))
> > (note 1205 1204 1206 96 NOTE_INSN_DELETED)
> > (note 1206 1205 1207 96 NOTE_INSN_DELETED)
> > (insn 1207 1206 1208 96 (set (reg:CCFP 17 flags)
> >         (compare:CCFP (reg:SF 1209)
> >             (reg:SF 322 [ D.7804 ]))) read_arch.c:701 53 {*cmpisf_sse}
> >      (nil))
> > (jump_insn 1208 1207 3075 96 (set (pc)
> >         (if_then_else (ge (reg:CCFP 17 flags)
> >                 (const_int 0 [0]))
> >             (label_ref:SI 3114)
> >             (pc))) read_arch.c:701 606 {*jcc_1}
> >      (expr_list:REG_DEAD (reg:CCFP 17 flags)
> >         (int_list:REG_BR_PROB 2 (nil)))
> >  -> 3114)
> > (note 3075 1208 1209 97 [bb 97] NOTE_INSN_BASIC_BLOCK)
> > (insn 1209 3075 1210 97 (set (reg:SF 1208)
> >         (mem/u/c:SF (plus:SI (reg:SI 1301 [528])
> >                 (const:SI (unspec:SI [
> >                             (symbol_ref/u:SI ("*.LC11") [flags 0x2])
> >                         ] UNSPEC_GOTOFF))) [4  S4 A32]))
> > read_arch.c:701 129 {*movsf_internal}
> >      (expr_list:REG_EQUIV (const_double:SF 1.0e+0 [0x0.8p+1])
> >         (nil)))
> > (note 1210 1209 1211 97 NOTE_INSN_DELETED)
> > (note 1211 1210 1212 97 NOTE_INSN_DELETED)
> > (insn 1212 1211 1213 97 (set (reg:CCFP 17 flags)
> >         (compare:CCFP (reg:SF 322 [ D.7804 ])
> >             (reg:SF 1208))) read_arch.c:701 53 {*cmpisf_sse}
> >      (nil))
> >
> > We have PIC register r1301 (former r528) used for constant load (insn
> > 1209).  This register was actually loaded to bx (insn 1199) and this
> > hard reg may be used by insn 1209.  During reload we have insn 1209
> > removed and a new one created instead:
> >
> > (insn 3864 1211 1212 104 (set (reg:SI 0 ax [1468])
> >         (plus:SI (reg:SI 6 bp [528])
> >             (const:SI (unspec:SI [
> >                         (symbol_ref/u:SI ("*.LC11") [flags 0x2])
> >                     ] UNSPEC_GOTOFF)))) read_arch.c:701 213 {*leasi}
> >      (expr_list:REG_EQUAL (symbol_ref/u:SI ("*.LC11") [flags 0x2])
> >         (nil)))
> > (insn 1212 3864 1213 104 (set (reg:CCFP 17 flags)
> >         (compare:CCFP (reg:SF 21 xmm0 [orig:322 D.7804 ] [322])
> >             (mem/u/c:SF (reg:SI 0 ax [1468]) [4  S4 A32])))
> > read_arch.c:701 53 {*cmpisf_sse}
> >      (nil))
> >
> > In this new instruction bp is used which is wrong. We actually have
> > required value in bx. In debugger I also checked that bp doesn't have
> > required value.  I suppose I enabled flag correctly because found this
> > in the log: "Spill r1301 after risky transformations".  Is it possible
> > we are still not allowed to use the original PIC register (r528) and
> > should use a reg copy created for particular region (in this case
> > r1301)?
> >
> It is hard for me to say without the full patch and the test.  I can
> only guess that 1301 gets a wrong class and therefore assigned to the
> wrong hard ref.
> 
> Could you send me the patch and the test.  I'll look at this and inform
> you what is going on.
> 
> 
> 

Hi,

Here is a patch I tried.  I apply it over revision 214215.  Unfortunately I do not have a small reproducer but the problem can be easily reproduced on SPEC2000 benchmark 175.vpr.  The problem is in read_arch.c:701 where float value is compared with float constant 1.0.  It is inlined into read_arch function and can be easily found in RTL dump of function read_arch as a float comparison with 1.0 after the first call to strtod function.

Here is a compilation string I use:

gcc -m32 -mno-movbe -g3 -fdump-rtl-all-details -O2 -ffast-math -mfpmath=sse -m32  -march=slm -fPIE -pie -c -o read_arch.o       -DSPEC_CPU2000        read_arch.c

In my final assembler comparison with 1.0 looks like:

comiss  .LC11@GOTOFF(%ebp), %xmm0       # 1101  *cmpisf_sse     [length = 7]

and %ebp here doesn't have a proper value.

I'll try to make a smaller reproducer if these instructions don't help.

Thank you for your help!
Ilya
--

Comments

Jeff Law Aug. 27, 2014, 9:39 p.m. UTC | #1
On 08/26/14 15:42, Ilya Enkovich wrote:
> diff --git a/gcc/calls.c b/gcc/calls.c
> index 4285ec1..85dae6b 100644
> --- a/gcc/calls.c
> +++ b/gcc/calls.c
> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
>       call_expr_arg_iterator iter;
>       tree arg;
>
> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
> +      {
> +	gcc_assert (pic_offset_table_rtx);
> +	args[j].tree_value = make_tree (ptr_type_node,
> +					pic_offset_table_rtx);
> +	j--;
> +      }
> +
>       if (struct_value_addr_value)
>         {
>   	args[j].tree_value = struct_value_addr_value;
So why do you need this?  Can't this be handled in the call/call_value 
expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE 
from inside ix86_expand_call?  Basically I'm not seeing the need for 
another target hook here.  I think that would significantly simply the 
patch as well.


Jeff
Ilya Enkovich Aug. 28, 2014, 8:37 a.m. UTC | #2
2014-08-28 1:39 GMT+04:00 Jeff Law <law@redhat.com>:
> On 08/26/14 15:42, Ilya Enkovich wrote:
>>
>> diff --git a/gcc/calls.c b/gcc/calls.c
>> index 4285ec1..85dae6b 100644
>> --- a/gcc/calls.c
>> +++ b/gcc/calls.c
>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>> ATTRIBUTE_UNUSED,
>>       call_expr_arg_iterator iter;
>>       tree arg;
>>
>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>> +      {
>> +       gcc_assert (pic_offset_table_rtx);
>> +       args[j].tree_value = make_tree (ptr_type_node,
>> +                                       pic_offset_table_rtx);
>> +       j--;
>> +      }
>> +
>>       if (struct_value_addr_value)
>>         {
>>         args[j].tree_value = struct_value_addr_value;
>
> So why do you need this?  Can't this be handled in the call/call_value
> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
> inside ix86_expand_call?  Basically I'm not seeing the need for another
> target hook here.  I think that would significantly simply the patch as
> well.

GOT base address become an additional implicit arg with EBX relaxed
and I handled it as all other args. I can move EBX initialization into
ix86_expand_call. Would still need some hint from target to init
pic_offset_table_rtx with proper value in the beginning of function
expand.

Thanks,
Ilya

>
>
> Jeff
Uros Bizjak Aug. 28, 2014, 12:42 p.m. UTC | #3
On Thu, Aug 28, 2014 at 10:37 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> 2014-08-28 1:39 GMT+04:00 Jeff Law <law@redhat.com>:
>> On 08/26/14 15:42, Ilya Enkovich wrote:
>>>
>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>> index 4285ec1..85dae6b 100644
>>> --- a/gcc/calls.c
>>> +++ b/gcc/calls.c
>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>> ATTRIBUTE_UNUSED,
>>>       call_expr_arg_iterator iter;
>>>       tree arg;
>>>
>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>> +      {
>>> +       gcc_assert (pic_offset_table_rtx);
>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>> +                                       pic_offset_table_rtx);
>>> +       j--;
>>> +      }
>>> +
>>>       if (struct_value_addr_value)
>>>         {
>>>         args[j].tree_value = struct_value_addr_value;
>>
>> So why do you need this?  Can't this be handled in the call/call_value
>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>> target hook here.  I think that would significantly simply the patch as
>> well.
>
> GOT base address become an additional implicit arg with EBX relaxed
> and I handled it as all other args. I can move EBX initialization into
> ix86_expand_call. Would still need some hint from target to init
> pic_offset_table_rtx with proper value in the beginning of function
> expand.

Maybe you can you use get_hard_reg_initial_val for this?

Uros.
Ilya Enkovich Aug. 28, 2014, 12:54 p.m. UTC | #4
2014-08-28 16:42 GMT+04:00 Uros Bizjak <ubizjak@gmail.com>:
> On Thu, Aug 28, 2014 at 10:37 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> 2014-08-28 1:39 GMT+04:00 Jeff Law <law@redhat.com>:
>>> On 08/26/14 15:42, Ilya Enkovich wrote:
>>>>
>>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>>> index 4285ec1..85dae6b 100644
>>>> --- a/gcc/calls.c
>>>> +++ b/gcc/calls.c
>>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>>> ATTRIBUTE_UNUSED,
>>>>       call_expr_arg_iterator iter;
>>>>       tree arg;
>>>>
>>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>>> +      {
>>>> +       gcc_assert (pic_offset_table_rtx);
>>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>>> +                                       pic_offset_table_rtx);
>>>> +       j--;
>>>> +      }
>>>> +
>>>>       if (struct_value_addr_value)
>>>>         {
>>>>         args[j].tree_value = struct_value_addr_value;
>>>
>>> So why do you need this?  Can't this be handled in the call/call_value
>>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>>> target hook here.  I think that would significantly simply the patch as
>>> well.
>>
>> GOT base address become an additional implicit arg with EBX relaxed
>> and I handled it as all other args. I can move EBX initialization into
>> ix86_expand_call. Would still need some hint from target to init
>> pic_offset_table_rtx with proper value in the beginning of function
>> expand.
>
> Maybe you can you use get_hard_reg_initial_val for this?

Actually there is no input hard reg holding GOT address.  Currently I
use initialization with ebx with following ebx initialization in
prolog_epilog pass.  But this is a temporary workaround.  It is
inefficient because always uses callee save reg to get GOT address.  I
suppose we should generate pseudo reg for pic_offset_table_rtx and
also set_got with this register as a destination in expand pass.
After register allocation set_got may be transformed into get_pc_thunk
call with proper hard reg.  But some target hook has to be used for
this.

Ilya

>
> Uros.
Uros Bizjak Aug. 28, 2014, 1:08 p.m. UTC | #5
On Thu, Aug 28, 2014 at 2:54 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:

>>>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>>>> index 4285ec1..85dae6b 100644
>>>>> --- a/gcc/calls.c
>>>>> +++ b/gcc/calls.c
>>>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>>>> ATTRIBUTE_UNUSED,
>>>>>       call_expr_arg_iterator iter;
>>>>>       tree arg;
>>>>>
>>>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>>>> +      {
>>>>> +       gcc_assert (pic_offset_table_rtx);
>>>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>>>> +                                       pic_offset_table_rtx);
>>>>> +       j--;
>>>>> +      }
>>>>> +
>>>>>       if (struct_value_addr_value)
>>>>>         {
>>>>>         args[j].tree_value = struct_value_addr_value;
>>>>
>>>> So why do you need this?  Can't this be handled in the call/call_value
>>>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>>>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>>>> target hook here.  I think that would significantly simply the patch as
>>>> well.
>>>
>>> GOT base address become an additional implicit arg with EBX relaxed
>>> and I handled it as all other args. I can move EBX initialization into
>>> ix86_expand_call. Would still need some hint from target to init
>>> pic_offset_table_rtx with proper value in the beginning of function
>>> expand.
>>
>> Maybe you can you use get_hard_reg_initial_val for this?
>
> Actually there is no input hard reg holding GOT address.  Currently I
> use initialization with ebx with following ebx initialization in
> prolog_epilog pass.  But this is a temporary workaround.  It is
> inefficient because always uses callee save reg to get GOT address.  I
> suppose we should generate pseudo reg for pic_offset_table_rtx and
> also set_got with this register as a destination in expand pass.
> After register allocation set_got may be transformed into get_pc_thunk
> call with proper hard reg.  But some target hook has to be used for
> this.

Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
friends will automatically emit intialization of a pseudo from
pic_offset_table_rtx hard reg. After reload, real initialization of
pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
don't know if this works with current implementation of dynamic
pic_offset_table_rtx selection, though.

Uros.
Ilya Enkovich Aug. 28, 2014, 1:29 p.m. UTC | #6
2014-08-28 17:08 GMT+04:00 Uros Bizjak <ubizjak@gmail.com>:
> On Thu, Aug 28, 2014 at 2:54 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>
>>>>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>>>>> index 4285ec1..85dae6b 100644
>>>>>> --- a/gcc/calls.c
>>>>>> +++ b/gcc/calls.c
>>>>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>>>>> ATTRIBUTE_UNUSED,
>>>>>>       call_expr_arg_iterator iter;
>>>>>>       tree arg;
>>>>>>
>>>>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>>>>> +      {
>>>>>> +       gcc_assert (pic_offset_table_rtx);
>>>>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>>>>> +                                       pic_offset_table_rtx);
>>>>>> +       j--;
>>>>>> +      }
>>>>>> +
>>>>>>       if (struct_value_addr_value)
>>>>>>         {
>>>>>>         args[j].tree_value = struct_value_addr_value;
>>>>>
>>>>> So why do you need this?  Can't this be handled in the call/call_value
>>>>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>>>>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>>>>> target hook here.  I think that would significantly simply the patch as
>>>>> well.
>>>>
>>>> GOT base address become an additional implicit arg with EBX relaxed
>>>> and I handled it as all other args. I can move EBX initialization into
>>>> ix86_expand_call. Would still need some hint from target to init
>>>> pic_offset_table_rtx with proper value in the beginning of function
>>>> expand.
>>>
>>> Maybe you can you use get_hard_reg_initial_val for this?
>>
>> Actually there is no input hard reg holding GOT address.  Currently I
>> use initialization with ebx with following ebx initialization in
>> prolog_epilog pass.  But this is a temporary workaround.  It is
>> inefficient because always uses callee save reg to get GOT address.  I
>> suppose we should generate pseudo reg for pic_offset_table_rtx and
>> also set_got with this register as a destination in expand pass.
>> After register allocation set_got may be transformed into get_pc_thunk
>> call with proper hard reg.  But some target hook has to be used for
>> this.
>
> Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
> friends will automatically emit intialization of a pseudo from
> pic_offset_table_rtx hard reg. After reload, real initialization of
> pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
> don't know if this works with current implementation of dynamic
> pic_offset_table_rtx selection, though.

That means you should choose some hard reg early before register
allocation to be used for PIC reg initialization.  I do not like we
have to do this and want to just generate set_got with pseudo reg and
do not involve any additional hard reg. That would look like

(insn/f 168 167 169 2 (parallel [
            (set (reg:SI 127)
                (unspec:SI [
                        (const_int 0 [0])
                    ] UNSPEC_SET_GOT))
            (clobber (reg:CC 17 flags))
        ]) test.cc:42 -1
     (expr_list:REG_CFA_FLUSH_QUEUE (nil)
        (nil)))

after expand pass.  r127 is pic_offset_table_rtx here. And after
reload it would become:

(insn/f 168 167 169 2 (parallel [
            (set (reg:SI 3 bx)
                (unspec:SI [
                        (const_int 0 [0])
                    ] UNSPEC_SET_GOT))
            (clobber (reg:CC 17 flags))
        ]) test.cc:42 -1
     (expr_list:REG_CFA_FLUSH_QUEUE (nil)
        (nil)))

And no additional actions are required on pro_and_epilogue.  Also it
simplifies analysis whether we should generate set_got at all.
Current we check hard reg is ever live which is wrong with not fixed
ebx because any usage of hard reg used to init GOT doesn't mean GOT
usage.  And with my proposed scheme unused GOT would mean DCE just
removes useless set_got.

Ilya

>
> Uros.
Uros Bizjak Aug. 28, 2014, 4:25 p.m. UTC | #7
On Thu, Aug 28, 2014 at 3:29 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:

>>>>>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>>>>>> index 4285ec1..85dae6b 100644
>>>>>>> --- a/gcc/calls.c
>>>>>>> +++ b/gcc/calls.c
>>>>>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>>>>>> ATTRIBUTE_UNUSED,
>>>>>>>       call_expr_arg_iterator iter;
>>>>>>>       tree arg;
>>>>>>>
>>>>>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>>>>>> +      {
>>>>>>> +       gcc_assert (pic_offset_table_rtx);
>>>>>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>>>>>> +                                       pic_offset_table_rtx);
>>>>>>> +       j--;
>>>>>>> +      }
>>>>>>> +
>>>>>>>       if (struct_value_addr_value)
>>>>>>>         {
>>>>>>>         args[j].tree_value = struct_value_addr_value;
>>>>>>
>>>>>> So why do you need this?  Can't this be handled in the call/call_value
>>>>>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>>>>>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>>>>>> target hook here.  I think that would significantly simply the patch as
>>>>>> well.
>>>>>
>>>>> GOT base address become an additional implicit arg with EBX relaxed
>>>>> and I handled it as all other args. I can move EBX initialization into
>>>>> ix86_expand_call. Would still need some hint from target to init
>>>>> pic_offset_table_rtx with proper value in the beginning of function
>>>>> expand.
>>>>
>>>> Maybe you can you use get_hard_reg_initial_val for this?
>>>
>>> Actually there is no input hard reg holding GOT address.  Currently I
>>> use initialization with ebx with following ebx initialization in
>>> prolog_epilog pass.  But this is a temporary workaround.  It is
>>> inefficient because always uses callee save reg to get GOT address.  I
>>> suppose we should generate pseudo reg for pic_offset_table_rtx and
>>> also set_got with this register as a destination in expand pass.
>>> After register allocation set_got may be transformed into get_pc_thunk
>>> call with proper hard reg.  But some target hook has to be used for
>>> this.
>>
>> Let me expand my idea a bit. IIRC, get_hard_reg_initial_val and
>> friends will automatically emit intialization of a pseudo from
>> pic_offset_table_rtx hard reg. After reload, real initialization of
>> pic_offset_table_rtx hard reg is emitted in pro_and_epilogue pass. I
>> don't know if this works with current implementation of dynamic
>> pic_offset_table_rtx selection, though.
>
> That means you should choose some hard reg early before register
> allocation to be used for PIC reg initialization.  I do not like we
> have to do this and want to just generate set_got with pseudo reg and
> do not involve any additional hard reg. That would look like
>
> (insn/f 168 167 169 2 (parallel [
>             (set (reg:SI 127)
>                 (unspec:SI [
>                         (const_int 0 [0])
>                     ] UNSPEC_SET_GOT))
>             (clobber (reg:CC 17 flags))
>         ]) test.cc:42 -1
>      (expr_list:REG_CFA_FLUSH_QUEUE (nil)
>         (nil)))
>
> after expand pass.  r127 is pic_offset_table_rtx here. And after
> reload it would become:
>
> (insn/f 168 167 169 2 (parallel [
>             (set (reg:SI 3 bx)
>                 (unspec:SI [
>                         (const_int 0 [0])
>                     ] UNSPEC_SET_GOT))
>             (clobber (reg:CC 17 flags))
>         ]) test.cc:42 -1
>      (expr_list:REG_CFA_FLUSH_QUEUE (nil)
>         (nil)))
>
> And no additional actions are required on pro_and_epilogue.  Also it
> simplifies analysis whether we should generate set_got at all.
> Current we check hard reg is ever live which is wrong with not fixed
> ebx because any usage of hard reg used to init GOT doesn't mean GOT
> usage.  And with my proposed scheme unused GOT would mean DCE just
> removes useless set_got.

Yes this is better. I was under impression you want to retain current
initialization insertion in expand_prologue.

Uros.
Jeff Law Aug. 29, 2014, 6:56 p.m. UTC | #8
On 08/28/14 02:37, Ilya Enkovich wrote:
> 2014-08-28 1:39 GMT+04:00 Jeff Law <law@redhat.com>:
>> On 08/26/14 15:42, Ilya Enkovich wrote:
>>>
>>> diff --git a/gcc/calls.c b/gcc/calls.c
>>> index 4285ec1..85dae6b 100644
>>> --- a/gcc/calls.c
>>> +++ b/gcc/calls.c
>>> @@ -1122,6 +1122,14 @@ initialize_argument_information (int num_actuals
>>> ATTRIBUTE_UNUSED,
>>>        call_expr_arg_iterator iter;
>>>        tree arg;
>>>
>>> +    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
>>> +      {
>>> +       gcc_assert (pic_offset_table_rtx);
>>> +       args[j].tree_value = make_tree (ptr_type_node,
>>> +                                       pic_offset_table_rtx);
>>> +       j--;
>>> +      }
>>> +
>>>        if (struct_value_addr_value)
>>>          {
>>>          args[j].tree_value = struct_value_addr_value;
>>
>> So why do you need this?  Can't this be handled in the call/call_value
>> expanders or what about attaching the use to CALL_INSN_FUNCTION_USAGE from
>> inside ix86_expand_call?  Basically I'm not seeing the need for another
>> target hook here.  I think that would significantly simply the patch as
>> well.
>
> GOT base address become an additional implicit arg with EBX relaxed
> and I handled it as all other args. I can move EBX initialization into
> ix86_expand_call. Would still need some hint from target to init
> pic_offset_table_rtx with proper value in the beginning of function
> expand.
It doesn't really need to be an argument in the traditional sense and 
adding it just complicates things with a target implementation detail as 
far as I can see.

I think you'll find that if you have the call pattern emit a copy from 
pic_offset_table_rtx into EBX and attach of use of EBX to the call then 
most of the code you've written to add the implicit argument just 
disappears.

jeff
diff mbox

Patch

diff --git a/gcc/calls.c b/gcc/calls.c
index 4285ec1..85dae6b 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1122,6 +1122,14 @@  initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
     call_expr_arg_iterator iter;
     tree arg;
 
+    if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
+      {
+	gcc_assert (pic_offset_table_rtx);
+	args[j].tree_value = make_tree (ptr_type_node,
+					pic_offset_table_rtx);
+	j--;
+      }
+
     if (struct_value_addr_value)
       {
 	args[j].tree_value = struct_value_addr_value;
@@ -2520,6 +2528,10 @@  expand_call (tree exp, rtx target, int ignore)
     /* Treat all args as named.  */
     n_named_args = num_actuals;
 
+  /* Add implicit PIC arg.  */
+  if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
+    num_actuals++;
+
   /* Make a vector to hold all the information about each arg.  */
   args = XALLOCAVEC (struct arg_data, num_actuals);
   memset (args, 0, num_actuals * sizeof (struct arg_data));
@@ -3133,6 +3145,8 @@  expand_call (tree exp, rtx target, int ignore)
 	{
 	  int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
 	  arg_nr = num_actuals - arg_nr - 1;
+	  if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
+	    arg_nr--;
 	  if (arg_nr >= 0
 	      && arg_nr < num_actuals
 	      && args[arg_nr].reg
@@ -3700,8 +3714,8 @@  emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
      of the full argument passing conventions to limit complexity here since
      library functions shouldn't have many args.  */
 
-  argvec = XALLOCAVEC (struct arg, nargs + 1);
-  memset (argvec, 0, (nargs + 1) * sizeof (struct arg));
+  argvec = XALLOCAVEC (struct arg, nargs + 2);
+  memset (argvec, 0, (nargs + 2) * sizeof (struct arg));
 
 #ifdef INIT_CUMULATIVE_LIBCALL_ARGS
   INIT_CUMULATIVE_LIBCALL_ARGS (args_so_far_v, outmode, fun);
@@ -3717,6 +3731,23 @@  emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
 
   push_temp_slots ();
 
+  if (targetm.calls.implicit_pic_arg (fndecl ? fndecl : fntype))
+    {
+      gcc_assert (pic_offset_table_rtx);
+
+      argvec[count].value = pic_offset_table_rtx;
+      argvec[count].mode = Pmode;
+      argvec[count].partial = 0;
+
+      argvec[count].reg = targetm.calls.function_arg (args_so_far,
+						      Pmode, NULL_TREE, true);
+
+      targetm.calls.function_arg_advance (args_so_far, Pmode, NULL_TREE, true);
+
+      count++;
+      nargs++;
+    }
+
   /* If there's a structure value address to be passed,
      either pass it in the special place, or pass it as an extra argument.  */
   if (mem_value && struct_value == 0 && ! pcc_struct_value)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cc4b0c7..cfafcdd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6133,6 +6133,21 @@  ix86_maybe_switch_abi (void)
     reinit_regs ();
 }
 
+/* Return reg in which implicit PIC base address
+   arg is passed.  */
+static rtx
+ix86_implicit_pic_arg (const_tree fntype_or_decl ATTRIBUTE_UNUSED)
+{
+  if ((TARGET_64BIT
+       && (ix86_cmodel == CM_SMALL_PIC
+	   || TARGET_PECOFF))
+      || !flag_pic
+      || !X86_TUNE_RELAX_PIC_REG)
+    return NULL_RTX;
+
+  return gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
+}
+
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
    For a library call, FNTYPE is 0.  */
@@ -6198,6 +6213,11 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
 		      : !libname);
 
+  if (caller)
+    cum->implicit_pic_arg = ix86_implicit_pic_arg (fndecl ? fndecl : fntype);
+  else
+    cum->implicit_pic_arg = NULL_RTX;
+
   if (!TARGET_64BIT)
     {
       /* If there are variable arguments, then we won't pass anything
@@ -7291,7 +7311,9 @@  ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
   if (type)
     mode = type_natural_mode (type, NULL, false);
 
-  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+  if (cum->implicit_pic_arg)
+    cum->implicit_pic_arg = NULL_RTX;
+  else if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     function_arg_advance_ms_64 (cum, bytes, words);
   else if (TARGET_64BIT)
     function_arg_advance_64 (cum, mode, type, words, named);
@@ -7542,7 +7564,9 @@  ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
   if (type && TREE_CODE (type) == VECTOR_TYPE)
     mode = type_natural_mode (type, cum, false);
 
-  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+  if (cum->implicit_pic_arg)
+    arg = cum->implicit_pic_arg;
+  else if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
   else if (TARGET_64BIT)
     arg = function_arg_64 (cum, mode, omode, type, named);
@@ -9373,6 +9397,9 @@  gen_pop (rtx arg)
 static unsigned int
 ix86_select_alt_pic_regnum (void)
 {
+  if (ix86_implicit_pic_arg (NULL))
+    return INVALID_REGNUM;
+
   if (crtl->is_leaf
       && !crtl->profile
       && !ix86_current_function_calls_tls_descriptor)
@@ -11236,7 +11263,8 @@  ix86_expand_prologue (void)
 	}
       else
 	{
-          insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+	  rtx reg = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
+          insn = emit_insn (gen_set_got (reg));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	  add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
 	}
@@ -11789,7 +11817,8 @@  ix86_expand_epilogue (int style)
 static void
 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
 {
-  if (pic_offset_table_rtx)
+  if (pic_offset_table_rtx
+      && REGNO (pic_offset_table_rtx) < FIRST_PSEUDO_REGISTER)
     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
 #if TARGET_MACHO
   /* Mach-O doesn't support labels at the end of objects, so if
@@ -13107,6 +13136,15 @@  ix86_GOT_alias_set (void)
   return set;
 }
 
+/* Set regs_ever_live for PIC base address register
+   to true if required.  */
+static void
+set_pic_reg_ever_alive ()
+{
+  if (reload_in_progress)
+    df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
+}
+
 /* Return a legitimate reference for ORIG (an address) using the
    register REG.  If REG is 0, a new pseudo is generated.
 
@@ -13157,8 +13195,7 @@  legitimize_pic_address (rtx orig, rtx reg)
       /* This symbol may be referenced via a displacement from the PIC
 	 base address (@GOTOFF).  */
 
-      if (reload_in_progress)
-	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      set_pic_reg_ever_alive ();
       if (GET_CODE (addr) == CONST)
 	addr = XEXP (addr, 0);
       if (GET_CODE (addr) == PLUS)
@@ -13190,8 +13227,7 @@  legitimize_pic_address (rtx orig, rtx reg)
       /* This symbol may be referenced via a displacement from the PIC
 	 base address (@GOTOFF).  */
 
-      if (reload_in_progress)
-	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      set_pic_reg_ever_alive ();
       if (GET_CODE (addr) == CONST)
 	addr = XEXP (addr, 0);
       if (GET_CODE (addr) == PLUS)
@@ -13252,8 +13288,7 @@  legitimize_pic_address (rtx orig, rtx reg)
 	  /* This symbol must be referenced via a load from the
 	     Global Offset Table (@GOT).  */
 
-	  if (reload_in_progress)
-	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  set_pic_reg_ever_alive ();
 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
 	  if (TARGET_64BIT)
@@ -13305,8 +13340,7 @@  legitimize_pic_address (rtx orig, rtx reg)
 	    {
 	      if (!TARGET_64BIT)
 		{
-		  if (reload_in_progress)
-		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+		  set_pic_reg_ever_alive ();
 		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
 					    UNSPEC_GOTOFF);
 		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
@@ -13601,8 +13635,7 @@  legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	}
       else if (flag_pic)
 	{
-	  if (reload_in_progress)
-	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  set_pic_reg_ever_alive ();
 	  pic = pic_offset_table_rtx;
 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
 	}
@@ -14233,6 +14266,8 @@  ix86_pic_register_p (rtx x)
   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
     return (pic_offset_table_rtx
 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+  else if (pic_offset_table_rtx)
+    return REG_P (x) && REGNO (x) == REGNO (pic_offset_table_rtx);
   else
     return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
 }
@@ -14408,7 +14443,9 @@  ix86_delegitimize_address (rtx x)
 	 ...
 	 movl foo@GOTOFF(%ecx), %edx
 	 in which case we return (%ecx - %ebx) + foo.  */
-      if (pic_offset_table_rtx)
+      if (pic_offset_table_rtx
+	  && (!reload_completed
+	      || REGNO (pic_offset_table_rtx) < FIRST_PSEUDO_REGISTER))
         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
 						     pic_offset_table_rtx),
 			       result);
@@ -24915,7 +24952,7 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 		  && DEFAULT_ABI != MS_ABI))
 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
 	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
-	use_reg (&use, pic_offset_table_rtx);
+	use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
     }
 
   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
@@ -47228,6 +47265,8 @@  ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG ix86_function_arg
+#undef TARGET_IMPLICIT_PIC_ARG
+#define TARGET_IMPLICIT_PIC_ARG ix86_implicit_pic_arg
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
 #undef TARGET_PASS_BY_REFERENCE
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2c64162..d5fa250 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1243,11 +1243,13 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define REAL_PIC_OFFSET_TABLE_REGNUM  BX_REG
 
-#define PIC_OFFSET_TABLE_REGNUM				\
-  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC	\
-                     || TARGET_PECOFF))		\
-   || !flag_pic ? INVALID_REGNUM			\
-   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+#define PIC_OFFSET_TABLE_REGNUM						\
+  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC			\
+                     || TARGET_PECOFF))					\
+   || !flag_pic ? INVALID_REGNUM					\
+   : X86_TUNE_RELAX_PIC_REG ? (pic_offset_table_rtx ? INVALID_REGNUM	\
+			       : REAL_PIC_OFFSET_TABLE_REGNUM)		\
+   : reload_completed ? REGNO (pic_offset_table_rtx)			\
    : REAL_PIC_OFFSET_TABLE_REGNUM)
 
 #define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
@@ -1652,6 +1654,7 @@  typedef struct ix86_args {
   int float_in_sse;		/* Set to 1 or 2 for 32bit targets if
 				   SFmode/DFmode arguments should be passed
 				   in SSE registers.  Otherwise 0.  */
+  rtx implicit_pic_arg;         /* Implicit PIC base address arg if passed.  */
   enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
 } CUMULATIVE_ARGS;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8e74eab..27028ba 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2725,7 +2725,7 @@ 
 
 (define_insn "*pushtf"
   [(set (match_operand:TF 0 "push_operand" "=<,<")
-	(match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
+	(match_operand:TF 1 "nonimmediate_no_elim_operand" "x,*roF"))]
   "TARGET_64BIT || TARGET_SSE"
 {
   /* This insn should be already split before reg-stack.  */
@@ -2750,7 +2750,7 @@ 
 
 (define_insn "*pushxf"
   [(set (match_operand:XF 0 "push_operand" "=<,<")
-	(match_operand:XF 1 "general_no_elim_operand" "f,Yx*roF"))]
+	(match_operand:XF 1 "nonimmediate_no_elim_operand" "f,Yx*roF"))]
   ""
 {
   /* This insn should be already split before reg-stack.  */
@@ -2781,7 +2781,7 @@ 
 
 (define_insn "*pushdf"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-	(match_operand:DF 1 "general_no_elim_operand" "f,Yd*roF,rmF,x"))]
+	(match_operand:DF 1 "nonimmediate_no_elim_operand" "f,Yd*roF,rmF,x"))]
   ""
 {
   /* This insn should be already split before reg-stack.  */
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 62970be..56eca24 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -580,6 +580,12 @@ 
     (match_operand 0 "register_no_elim_operand")
     (match_operand 0 "general_operand")))
 
+;; Return false if this is any eliminable register.  Otherwise nonimmediate_operand.
+(define_predicate "nonimmediate_no_elim_operand"
+  (if_then_else (match_code "reg,subreg")
+    (match_operand 0 "register_no_elim_operand")
+    (match_operand 0 "nonimmediate_operand")))
+
 ;; Return false if this is any eliminable register.  Otherwise
 ;; register_operand or a constant.
 (define_predicate "nonmemory_no_elim_operand"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 215c63c..ffb7a2d 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -537,3 +537,6 @@  DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0)
    unrolling small loop less important. For, such architectures we adjust
    the unroll factor so that the unrolled loop fits the loop buffer.  */
 DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
+
+/* X86_TUNE_RELAX_PIC_REG: Do not fix hard register for GOT base usage.  */
+DEF_TUNE (X86_TUNE_RELAX_PIC_REG, "relax_pic_reg", ~0)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9dd8d68..33b36be 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3967,6 +3967,12 @@  If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,
 @code{TARGET_FUNCTION_ARG} serves both purposes.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_IMPLICIT_PIC_ARG (const_tree @var{fntype_or_decl})
+This hook returns register holding PIC base address for functions
+which do not fix hard register but handle it similar to function arg
+assigning a virtual reg for it.
+@end deftypefn
+
 @deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, enum machine_mode @var{mode}, tree @var{type}, bool @var{named})
 This target hook returns the number of bytes at the beginning of an
 argument that must be put in registers.  The value must be zero for
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index dd72b98..3e6da2f 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3413,6 +3413,8 @@  the stack.
 
 @hook TARGET_FUNCTION_INCOMING_ARG
 
+@hook TARGET_IMPLICIT_PIC_ARG
+
 @hook TARGET_ARG_PARTIAL_BYTES
 
 @hook TARGET_PASS_BY_REFERENCE
diff --git a/gcc/function.c b/gcc/function.c
index 8156766..3a85c16 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3456,6 +3456,15 @@  assign_parms (tree fndecl)
 
   fnargs.release ();
 
+  /* Handle implicit PIC arg if any.  */
+  if (targetm.calls.implicit_pic_arg (fndecl))
+    {
+      rtx old_reg = targetm.calls.implicit_pic_arg (fndecl);
+      rtx new_reg = gen_reg_rtx (GET_MODE (old_reg));
+      emit_move_insn (new_reg, old_reg);
+      pic_offset_table_rtx = new_reg;
+    }
+
   /* Output all parameter conversion instructions (possibly including calls)
      now that all parameters have been copied out of hard registers.  */
   emit_insn (all.first_conversion_insn);
diff --git a/gcc/hooks.c b/gcc/hooks.c
index 5c06562..47784e2 100644
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -352,6 +352,13 @@  hook_rtx_rtx_null (rtx x ATTRIBUTE_UNUSED)
   return NULL;
 }
 
+/* Generic hook that takes a const_tree arg and returns NULL_RTX.  */
+rtx
+hook_rtx_const_tree_null (const_tree a ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
 /* Generic hook that takes a tree and an int and returns NULL_RTX.  */
 rtx
 hook_rtx_tree_int_null (tree a ATTRIBUTE_UNUSED, int b ATTRIBUTE_UNUSED)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index ba42b6c..cf830ef 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -100,6 +100,7 @@  extern bool default_can_output_mi_thunk_no_vcall (const_tree, HOST_WIDE_INT,
 
 extern rtx hook_rtx_rtx_identity (rtx);
 extern rtx hook_rtx_rtx_null (rtx);
+extern rtx hook_rtx_const_tree_null (const_tree);
 extern rtx hook_rtx_tree_int_null (tree, int);
 
 extern const char *hook_constcharptr_void_null (void);
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index a43f8dc..253934b 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -4017,7 +4017,11 @@  lra_constraints (bool first_p)
       ("Maximum number of LRA constraint passes is achieved (%d)\n",
        LRA_MAX_CONSTRAINT_ITERATION_NUMBER);
   changed_p = false;
-  lra_risky_transformations_p = false;
+  if (pic_offset_table_rtx
+      && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
+    lra_risky_transformations_p = true;
+  else
+    lra_risky_transformations_p = false;
   new_insn_uid_start = get_max_uid ();
   new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num ();
   /* Mark used hard regs for target stack size calulations.  */
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index bc16437..1cd7ea3 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -110,7 +110,8 @@  rtx_unstable_p (const_rtx x)
       /* ??? When call-clobbered, the value is stable modulo the restore
 	 that must happen after a call.  This currently screws up local-alloc
 	 into believing that the restore is not needed.  */
-      if (!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED && x == pic_offset_table_rtx)
+      if (!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED && x == pic_offset_table_rtx
+	  && REGNO (pic_offset_table_rtx) < FIRST_PSEUDO_REGISTER)
 	return 0;
       return 1;
 
@@ -185,7 +186,9 @@  rtx_varies_p (const_rtx x, bool for_alias)
 	     that must happen after a call.  This currently screws up
 	     local-alloc into believing that the restore is not needed, so we
 	     must return 0 only if we are called from alias analysis.  */
-	  && (!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED || for_alias))
+	  && ((!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
+	       && REGNO (pic_offset_table_rtx) < FIRST_PSEUDO_REGISTER)
+	      || for_alias))
 	return 0;
       return 1;
 
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index 5c34fee..50de8d5 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -448,7 +448,7 @@  try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
     {
       HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
       struct hard_reg_set_container set_up_by_prologue;
-      rtx p_insn;
+      rtx p_insn, reg;
       vec<basic_block> vec;
       basic_block bb;
       bitmap_head bb_antic_flags;
@@ -494,9 +494,13 @@  try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
       if (frame_pointer_needed)
 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
 			     HARD_FRAME_POINTER_REGNUM);
-      if (pic_offset_table_rtx)
+      if (pic_offset_table_rtx
+	  && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
 			     PIC_OFFSET_TABLE_REGNUM);
+      if ((reg = targetm.calls.implicit_pic_arg (current_function_decl)))
+	add_to_hard_reg_set (&set_up_by_prologue.set,
+			     Pmode, REGNO (reg));
       if (crtl->drap_reg)
 	add_to_hard_reg_set (&set_up_by_prologue.set,
 			     GET_MODE (crtl->drap_reg),
diff --git a/gcc/target.def b/gcc/target.def
index 3a41db1..5c221b6 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3976,6 +3976,14 @@  If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,\n\
  default_function_incoming_arg)
 
 DEFHOOK
+(implicit_pic_arg,
+ "This hook returns register holding PIC base address for functions\n\
+which do not fix hard register but handle it similar to function arg\n\
+assigning a virtual reg for it.",
+ rtx, (const_tree fntype_or_decl),
+ hook_rtx_const_tree_null)
+
+DEFHOOK
 (function_arg_boundary,
  "This hook returns the alignment boundary, in bits, of an argument\n\
 with the specified mode and type.  The default hook returns\n\