diff mbox

Enable EBX for x86 in 32bits PIC code

Message ID 20140923135437.GD35262@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Sept. 23, 2014, 1:54 p.m. UTC
On 03 Sep 16:19, Vladimir Makarov wrote:
> On 2014-08-29 2:47 AM, Ilya Enkovich wrote:
> >Seems your patch doesn't cover all cases.  Attached is a modified
> >patch (with your changes included) and a test where double constant is
> >wrongly rematerialized.  I also see in ira dump that there is still a
> >copy of PIC reg created:
> >
> >Initialization of original PIC reg:
> >(insn 23 22 24 2 (set (reg:SI 127)
> >         (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal}
> >      (expr_list:REG_DEAD (reg:SI 3 bx)
> >         (nil)))
> >...
> >Copy is created:
> >(insn 135 37 25 3 (set (reg:SI 138 [127])
> >         (reg:SI 127)) 90 {*movsi_internal}
> >      (expr_list:REG_DEAD (reg:SI 127)
> >         (nil)))
> >...
> >Copy is used:
> >(insn 119 25 122 3 (set (reg:DF 134)
> >         (mem/u/c:DF (plus:SI (reg:SI 138 [127])
> >                 (const:SI (unspec:SI [
> >                             (symbol_ref/u:SI ("*.LC0") [flags 0x2])
> >                         ] UNSPEC_GOTOFF))) [5  S8 A64])) 128 {*movdf_internal}
> >      (expr_list:REG_EQUIV (const_double:DF
> >2.9999999999999997371893933895137251965934410691261292e-4
> >[0x0.9d495182a99308p-11])
> >         (nil)))
> >
> 
> The copy is created by a newer IRA optimization for function prologues.
> 
> The patch in the attachment should solve the problem.  I also added
> the code to prevent spilling the pic pseudo in LRA which could
> happen before theoretically.
> 
> 
> >After reload we have new usage of r127 which is allocated to ecx which
> >actually does not have any definition in this function at all.
> >
> >(insn 151 42 44 4 (set (reg:SI 0 ax [147])
> >         (plus:SI (reg:SI 2 cx [127])
> >             (const:SI (unspec:SI [
> >                         (symbol_ref/u:SI ("*.LC0") [flags 0x2])
> >                     ] UNSPEC_GOTOFF)))) test.cc:44 213 {*leasi}
> >      (expr_list:REG_EQUAL (symbol_ref/u:SI ("*.LC0") [flags 0x2])
> >         (nil)))
> >(insn 44 151 45 4 (set (reg:DF 21 xmm0 [orig:129 D.2450 ] [129])
> >         (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
> >             (mem/u/c:DF (reg:SI 0 ax [147]) [5  S8 A64]))) test.cc:44
> >790 {*fop_df_comm_sse}
> >      (expr_list:REG_EQUAL (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
> >             (const_double:DF
> >2.9999999999999997371893933895137251965934410691261292e-4
> >[0x0.9d495182a99308p-11]))
> >         (nil)))
> >
> >Compilation string: g++ -m32 -O2 -mfpmath=sse -fPIE -S test.cc
> 

> Index: ira.c
> ===================================================================
> --- ira.c	(revision 214576)
> +++ ira.c	(working copy)
> @@ -4887,7 +4887,7 @@ split_live_ranges_for_shrink_wrap (void)
>    FOR_BB_INSNS (first, insn)
>      {
>        rtx dest = interesting_dest_for_shprep (insn, call_dom);
> -      if (!dest)
> +      if (!dest || dest == pic_offset_table_rtx)
>  	continue;
>  
>        rtx newreg = NULL_RTX;
> Index: lra-assigns.c
> ===================================================================
> --- lra-assigns.c	(revision 214576)
> +++ lra-assigns.c	(working copy)
> @@ -879,11 +879,13 @@ spill_for (int regno, bitmap spilled_pse
>  	}
>        /* Spill pseudos.	 */
>        EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi)
> -	if ((int) spill_regno >= lra_constraint_new_regno_start
> -	    && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
> -	    && ! bitmap_bit_p (&lra_split_regs, spill_regno)
> -	    && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
> -	    && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
> +	if ((pic_offset_table_rtx != NULL
> +	     && spill_regno == REGNO (pic_offset_table_rtx))
> +	    || ((int) spill_regno >= lra_constraint_new_regno_start
> +		&& ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
> +		&& ! bitmap_bit_p (&lra_split_regs, spill_regno)
> +		&& ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
> +		&& ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)))
>  	  goto fail;
>        insn_pseudos_num = 0;
>        if (lra_dump_file != NULL)
> @@ -1053,7 +1055,9 @@ setup_live_pseudos_and_spill_after_risky
>        return;
>      }
>    for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
> -    if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
> +    if ((pic_offset_table_rtx == NULL_RTX
> +	 || i != (int) REGNO (pic_offset_table_rtx))
> +	&& reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
>        sorted_pseudos[n++] = i;
>    qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
>    for (i = n - 1; i >= 0; i--)
> @@ -1360,6 +1364,8 @@ assign_by_spills (void)
>  	}
>        EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno)
>  	{
> +	  gcc_assert (pic_offset_table_rtx == NULL
> +		      || conflict_regno != REGNO (pic_offset_table_rtx));
>  	  if ((int) conflict_regno >= lra_constraint_new_regno_start)
>  	    sorted_pseudos[nfails++] = conflict_regno;
>  	  if (lra_dump_file != NULL)

Hi,

Here is a patch which combines results of my and Vladimir's work on EBX enabling.

It works OK for SPEC2000 and SPEC2006 on -Ofast + LTO.  It passes bootstrap but there are few new failures in make check.

gcc.target/i386/pic-1.c fails because it doesn't expect we can use EBX in 32bit PIC mode
gcc.target/i386/pr55458.c fails due to the same reason
gcc.target/i386/pr23098.c fails because compiler fails to use float constant as an immediate and loads it from GOT instead

Do we have the final decision about having a sompiler flag to control enabling of pseudo PIC register?  I think we should have a possibility to use fixed EBX at least until we make sure pseudo PIC doesn't harm debug info generation. If we have such option then gcc.target/i386/pic-1.c and gcc.target/i386/pr55458.c should be modified, otherwise these tests should be removed.

@Vladimir: I didn't want to speculate about your changes and just put '??' for them in ChangeLog description.  Could you please fill proper comments?  Or may be you would like to split this patch into two parts and commit ira changes separately?

Thanks,
Ilya
--
2014-09-23  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.c (ix86_use_pseudo_pic_reg): New.
	(ix86_init_pic_reg): New.
	(ix86_select_alt_pic_regnum): Support pseudo PIC register.
	(ix86_save_reg): Likewise.
	(ix86_output_function_epilogue): Likewise.
	(ix86_expand_prologue): Remove PIC register initialization
	now performed in ix86_init_pic_reg.
	(set_pic_reg_ever_alive): New.
	(legitimize_pic_address): Use set_pic_reg_ever_alive.
	(ix86_pic_register_p): Support pseudo PIC register.
	(ix86_delegitimize_address): Likewise.
	(ix86_expand_call): Fill REAL_PIC_OFFSET_TABLE_REGNUM
	with GOT address if required.
	(TARGET_INIT_PIC_REG): New.
	(TARGET_USE_PSEUDO_PIC_REG): New.
	(PIC_OFFSET_TABLE_REGNUM): Return INVALID_REGNUM if
	pic_offset_table_rtx exists.
	* doc/tm.texi.in (TARGET_USE_PSEUDO_PIC_REG): New.
	(TARGET_INIT_PIC_REG): New.
	* doc/tm.texi: Regenrated.
	* function.c (assign_parms): Create pseudo PIC register
	if required.
	* init-regs.c (initialize_uninitialized_regs): Don't
	initialize PIC regsiter.
	* ira-color.c (color_pass): ??
	* ira-emit.c (change_loop): ??
	* ira.c (split_live_ranges_for_shrink_wrap): ??
	(ira): Call target hook to initialize PIC register.
	(do_reload): Avoid taransformation of pic_offset_table_rtx
	into hard register.
	* lra-assigns.c (spill_for): ??
	(setup_live_pseudos_and_spill_after_risky_transforms): ??
	* lra-constraints.c (contains_symbol_ref_p): New.
	(lra_constraints): Pseudo PIC register means we make risky
	transformations.
	* shrink-wrap.c (try_shrink_wrapping): Support pseudo PIC
	regsiter.
	* target.def (use_pseudo_pic_reg): New.
	(init_pic_reg): New.

Comments

Uros Bizjak Sept. 23, 2014, 2:23 p.m. UTC | #1
On Tue, Sep 23, 2014 at 3:54 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:

> Here is a patch which combines results of my and Vladimir's work on EBX enabling.
>
> It works OK for SPEC2000 and SPEC2006 on -Ofast + LTO.  It passes bootstrap but there are few new failures in make check.
>
> gcc.target/i386/pic-1.c fails because it doesn't expect we can use EBX in 32bit PIC mode
> gcc.target/i386/pr55458.c fails due to the same reason
> gcc.target/i386/pr23098.c fails because compiler fails to use float constant as an immediate and loads it from GOT instead
>
> Do we have the final decision about having a sompiler flag to control enabling of pseudo PIC register?  I think we should have a possibility to use fixed EBX at least until we make sure pseudo PIC doesn't harm debug info generation. If we have such option then gcc.target/i386/pic-1.c and gcc.target/i386/pr55458.c should be modified, otherwise these tests should be removed.

I think having this flag would be dangerous. In effect, this flag
would be a hidden -ffixed-bx, with unwanted consequences on asm code
that handles ebx. As an example, please see config/i386/cpuid.h - ATM,
we handle ebx in a special way when __PIC__ is defined. With your
patch, we will have to handle it in a special way when new flag is in
effect, which is impossible, unless another compiler-generated define
is emitted.

So, I vote to change PIC reg to a pseudo unconditionally and adjust
testsuite for all (expected) fall-out.

Uros.
Jakub Jelinek Sept. 23, 2014, 2:34 p.m. UTC | #2
On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
> use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
> info generation.  If we have such option then gcc.target/i386/pic-1.c and

For debug info, it seems you are already handling this in
delegitimize_address target hook, I'd suggest just building some very large
shared library at -O2 -g -fpic on i?86 and either look at the
sizes of .debug_info/.debug_loc sections with/without the patch,
or use the locstat utility from elfutils (talk to Petr Machata if needed).

	Jakub
Jeff Law Sept. 23, 2014, 3:59 p.m. UTC | #3
On 09/23/14 08:23, Uros Bizjak wrote:
> On Tue, Sep 23, 2014 at 3:54 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>
>> Here is a patch which combines results of my and Vladimir's work on EBX enabling.
>>
>> It works OK for SPEC2000 and SPEC2006 on -Ofast + LTO.  It passes bootstrap but there are few new failures in make check.
>>
>> gcc.target/i386/pic-1.c fails because it doesn't expect we can use EBX in 32bit PIC mode
>> gcc.target/i386/pr55458.c fails due to the same reason
>> gcc.target/i386/pr23098.c fails because compiler fails to use float constant as an immediate and loads it from GOT instead
>>
>> Do we have the final decision about having a sompiler flag to control enabling of pseudo PIC register?  I think we should have a possibility to use fixed EBX at least until we make sure pseudo PIC doesn't harm debug info generation. If we have such option then gcc.target/i386/pic-1.c and gcc.target/i386/pr55458.c should be modified, otherwise these tests should be removed.
>
> I think having this flag would be dangerous. In effect, this flag
> would be a hidden -ffixed-bx, with unwanted consequences on asm code
> that handles ebx. As an example, please see config/i386/cpuid.h - ATM,
> we handle ebx in a special way when __PIC__ is defined. With your
> patch, we will have to handle it in a special way when new flag is in
> effect, which is impossible, unless another compiler-generated define
> is emitted.
>
> So, I vote to change PIC reg to a pseudo unconditionally and adjust
> testsuite for all (expected) fall-out.
Agreed.  Continuing to support both modes just seems like a maintenance 
nightmare and asking for problems at some point.  If there's performance 
regressions, we just tackle them :-)

I suspect any performance regressions we find are going to point us at 
issues in IRA/LRA that we would want to look at anyway.

jeff
Petr Machata Sept. 23, 2014, 3:59 p.m. UTC | #4
Jakub Jelinek <jakub@redhat.com> writes:

> look at the sizes of .debug_info/.debug_loc sections with/without the
> patch, or use the locstat utility from elfutils

Not actually part of elfutils, but available either here:
        https://github.com/pmachata/dwlocstat

... or packaged in Fedora.

Thanks,
PM
Jeff Law Sept. 23, 2014, 4 p.m. UTC | #5
On 09/23/14 08:34, Jakub Jelinek wrote:
> On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
>> use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
>> info generation.  If we have such option then gcc.target/i386/pic-1.c and
>
> For debug info, it seems you are already handling this in
> delegitimize_address target hook, I'd suggest just building some very large
> shared library at -O2 -g -fpic on i?86 and either look at the
> sizes of .debug_info/.debug_loc sections with/without the patch,
> or use the locstat utility from elfutils (talk to Petr Machata if needed).
Can't hurt, but I really don't see how changing from a fixed to an 
allocatable register is going to muck up debug info in any significant 
way.

jeff
Jakub Jelinek Sept. 23, 2014, 4:03 p.m. UTC | #6
On Tue, Sep 23, 2014 at 10:00:00AM -0600, Jeff Law wrote:
> On 09/23/14 08:34, Jakub Jelinek wrote:
> >On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
> >>use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
> >>info generation.  If we have such option then gcc.target/i386/pic-1.c and
> >
> >For debug info, it seems you are already handling this in
> >delegitimize_address target hook, I'd suggest just building some very large
> >shared library at -O2 -g -fpic on i?86 and either look at the
> >sizes of .debug_info/.debug_loc sections with/without the patch,
> >or use the locstat utility from elfutils (talk to Petr Machata if needed).
> Can't hurt, but I really don't see how changing from a fixed to an
> allocatable register is going to muck up debug info in any significant way.

What matters is if the delegitimize_address target hook is as efficient in
delegitimization as before.  E.g. if it previously matched only when seeing
%ebx + gotoff or similar, and wouldn't match anything now, some vars could
have debug locations including UNSPEC and be dropped on the floor.

	Jakub
Jeff Law Sept. 23, 2014, 4:10 p.m. UTC | #7
On 09/23/14 10:03, Jakub Jelinek wrote:
> On Tue, Sep 23, 2014 at 10:00:00AM -0600, Jeff Law wrote:
>> On 09/23/14 08:34, Jakub Jelinek wrote:
>>> On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
>>>> use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
>>>> info generation.  If we have such option then gcc.target/i386/pic-1.c and
>>>
>>> For debug info, it seems you are already handling this in
>>> delegitimize_address target hook, I'd suggest just building some very large
>>> shared library at -O2 -g -fpic on i?86 and either look at the
>>> sizes of .debug_info/.debug_loc sections with/without the patch,
>>> or use the locstat utility from elfutils (talk to Petr Machata if needed).
>> Can't hurt, but I really don't see how changing from a fixed to an
>> allocatable register is going to muck up debug info in any significant way.
>
> What matters is if the delegitimize_address target hook is as efficient in
> delegitimization as before.  E.g. if it previously matched only when seeing
> %ebx + gotoff or similar, and wouldn't match anything now, some vars could
> have debug locations including UNSPEC and be dropped on the floor.
Ah, yea, that makes sense.

jeff
Ilya Enkovich Sept. 24, 2014, 6:56 a.m. UTC | #8
2014-09-23 20:10 GMT+04:00 Jeff Law <law@redhat.com>:
> On 09/23/14 10:03, Jakub Jelinek wrote:
>>
>> On Tue, Sep 23, 2014 at 10:00:00AM -0600, Jeff Law wrote:
>>>
>>> On 09/23/14 08:34, Jakub Jelinek wrote:
>>>>
>>>> On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
>>>>>
>>>>> use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
>>>>> info generation.  If we have such option then gcc.target/i386/pic-1.c
>>>>> and
>>>>
>>>>
>>>> For debug info, it seems you are already handling this in
>>>> delegitimize_address target hook, I'd suggest just building some very
>>>> large
>>>> shared library at -O2 -g -fpic on i?86 and either look at the
>>>> sizes of .debug_info/.debug_loc sections with/without the patch,
>>>> or use the locstat utility from elfutils (talk to Petr Machata if
>>>> needed).
>>>
>>> Can't hurt, but I really don't see how changing from a fixed to an
>>> allocatable register is going to muck up debug info in any significant
>>> way.
>>
>>
>> What matters is if the delegitimize_address target hook is as efficient in
>> delegitimization as before.  E.g. if it previously matched only when
>> seeing
>> %ebx + gotoff or similar, and wouldn't match anything now, some vars could
>> have debug locations including UNSPEC and be dropped on the floor.
>
> Ah, yea, that makes sense.
>
> jeff


After register allocation we have no idea where GOT address is and
therefore delegitimize_address target hook becomes less efficient and
cannot remove UNSPECs. That's what I see now when build GCC with patch
applied:

../../../../gcc/libgfortran/generated/sum_r4.c: In function 'msum_r4':
../../../../gcc/libgfortran/generated/sum_r4.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
 msum_r4 (gfc_array_r4 * const restrict retarray,
 ^
../../../../gcc/libgfortran/generated/sum_r4.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
../../../../gcc/libgfortran/generated/sum_r4.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
../../../../gcc/libgfortran/generated/sum_r4.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
../../../../gcc/libgfortran/generated/sum_r8.c: In function 'msum_r8':
../../../../gcc/libgfortran/generated/sum_r8.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
 msum_r8 (gfc_array_r8 * const restrict retarray,
 ^
../../../../gcc/libgfortran/generated/sum_r8.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
../../../../gcc/libgfortran/generated/sum_r8.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location
../../../../gcc/libgfortran/generated/sum_r8.c:195:1: note:
non-delegitimized UNSPEC UNSPEC_GOTOFF (1) found in variable location


Ilya
Jeff Law Sept. 24, 2014, 3:27 p.m. UTC | #9
On 09/24/14 00:56, Ilya Enkovich wrote:
> 2014-09-23 20:10 GMT+04:00 Jeff Law <law@redhat.com>:
>> On 09/23/14 10:03, Jakub Jelinek wrote:
>>>
>>> On Tue, Sep 23, 2014 at 10:00:00AM -0600, Jeff Law wrote:
>>>>
>>>> On 09/23/14 08:34, Jakub Jelinek wrote:
>>>>>
>>>>> On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
>>>>>>
>>>>>> use fixed EBX at least until we make sure pseudo PIC doesn't harm debug
>>>>>> info generation.  If we have such option then gcc.target/i386/pic-1.c
>>>>>> and
>>>>>
>>>>>
>>>>> For debug info, it seems you are already handling this in
>>>>> delegitimize_address target hook, I'd suggest just building some very
>>>>> large
>>>>> shared library at -O2 -g -fpic on i?86 and either look at the
>>>>> sizes of .debug_info/.debug_loc sections with/without the patch,
>>>>> or use the locstat utility from elfutils (talk to Petr Machata if
>>>>> needed).
>>>>
>>>> Can't hurt, but I really don't see how changing from a fixed to an
>>>> allocatable register is going to muck up debug info in any significant
>>>> way.
>>>
>>>
>>> What matters is if the delegitimize_address target hook is as efficient in
>>> delegitimization as before.  E.g. if it previously matched only when
>>> seeing
>>> %ebx + gotoff or similar, and wouldn't match anything now, some vars could
>>> have debug locations including UNSPEC and be dropped on the floor.
>>
>> Ah, yea, that makes sense.
>>
>> jeff
>
>
> After register allocation we have no idea where GOT address is and
> therefore delegitimize_address target hook becomes less efficient and
> cannot remove UNSPECs. That's what I see now when build GCC with patch
> applied:
In theory this shouldn't be too hard to fix.

I haven't looked at the code, but it might be something looking 
explicitly for ebx by register #, or something similar.  Which case 
within delegitimize_address isn't firing as it should after your changes?

jeff
Ilya Enkovich Sept. 24, 2014, 8:32 p.m. UTC | #10
2014-09-24 19:27 GMT+04:00 Jeff Law <law@redhat.com>:
> On 09/24/14 00:56, Ilya Enkovich wrote:
>>
>> 2014-09-23 20:10 GMT+04:00 Jeff Law <law@redhat.com>:
>>>
>>> On 09/23/14 10:03, Jakub Jelinek wrote:
>>>>
>>>>
>>>> On Tue, Sep 23, 2014 at 10:00:00AM -0600, Jeff Law wrote:
>>>>>
>>>>>
>>>>> On 09/23/14 08:34, Jakub Jelinek wrote:
>>>>>>
>>>>>>
>>>>>> On Tue, Sep 23, 2014 at 05:54:37PM +0400, Ilya Enkovich wrote:
>>>>>>>
>>>>>>>
>>>>>>> use fixed EBX at least until we make sure pseudo PIC doesn't harm
>>>>>>> debug
>>>>>>> info generation.  If we have such option then gcc.target/i386/pic-1.c
>>>>>>> and
>>>>>>
>>>>>>
>>>>>>
>>>>>> For debug info, it seems you are already handling this in
>>>>>> delegitimize_address target hook, I'd suggest just building some very
>>>>>> large
>>>>>> shared library at -O2 -g -fpic on i?86 and either look at the
>>>>>> sizes of .debug_info/.debug_loc sections with/without the patch,
>>>>>> or use the locstat utility from elfutils (talk to Petr Machata if
>>>>>> needed).
>>>>>
>>>>>
>>>>> Can't hurt, but I really don't see how changing from a fixed to an
>>>>> allocatable register is going to muck up debug info in any significant
>>>>> way.
>>>>
>>>>
>>>>
>>>> What matters is if the delegitimize_address target hook is as efficient
>>>> in
>>>> delegitimization as before.  E.g. if it previously matched only when
>>>> seeing
>>>> %ebx + gotoff or similar, and wouldn't match anything now, some vars
>>>> could
>>>> have debug locations including UNSPEC and be dropped on the floor.
>>>
>>>
>>> Ah, yea, that makes sense.
>>>
>>> jeff
>>
>>
>>
>> After register allocation we have no idea where GOT address is and
>> therefore delegitimize_address target hook becomes less efficient and
>> cannot remove UNSPECs. That's what I see now when build GCC with patch
>> applied:
>
> In theory this shouldn't be too hard to fix.
>
> I haven't looked at the code, but it might be something looking explicitly
> for ebx by register #, or something similar.  Which case within
> delegitimize_address isn't firing as it should after your changes?

It is the case I had to fix:

@@ -14415,7 +14433,8 @@ ix86_delegitimize_address (rtx x)
         ...
         movl foo@GOTOFF(%ecx), %edx
         in which case we return (%ecx - %ebx) + foo.  */
-      if (pic_offset_table_rtx)
+      if (pic_offset_table_rtx
+         && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
                                                     pic_offset_table_rtx),
                               result);

Originally if there is a UNSPEC_GOTOFFSET but no EBX usage then we
just remove this UNSPEC and substract EBX value.  With pseudo PIC reg
we should use PIC register instead of EBX but it is unclear what to
use after register allocation.

Ilya

>
> jeff
>
Jeff Law Sept. 24, 2014, 9:20 p.m. UTC | #11
On 09/24/14 14:32, Ilya Enkovich wrote:
> 2014-09-24 19:27 GMT+04:00 Jeff Law <law@redhat.com>:
>> On 09/24/14 00:56, Ilya Enkovich wrote:

>>>
>>> After register allocation we have no idea where GOT address is and
>>> therefore delegitimize_address target hook becomes less efficient and
>>> cannot remove UNSPECs. That's what I see now when build GCC with patch
>>> applied:
>>
>> In theory this shouldn't be too hard to fix.
>>
>> I haven't looked at the code, but it might be something looking explicitly
>> for ebx by register #, or something similar.  Which case within
>> delegitimize_address isn't firing as it should after your changes?
>
> It is the case I had to fix:
>
> @@ -14415,7 +14433,8 @@ ix86_delegitimize_address (rtx x)
>           ...
>           movl foo@GOTOFF(%ecx), %edx
>           in which case we return (%ecx - %ebx) + foo.  */
> -      if (pic_offset_table_rtx)
> +      if (pic_offset_table_rtx
> +         && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
>           result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
>                                                       pic_offset_table_rtx),
>                                 result);
>
> Originally if there is a UNSPEC_GOTOFFSET but no EBX usage then we
> just remove this UNSPEC and substract EBX value.  With pseudo PIC reg
> we should use PIC register instead of EBX but it is unclear what to
> use after register allocation.
What's the RTL before & after allocation?  Feel free to just pass along 
the dump files for sum_r4 that you referenced in a prior message.

jeff
Jakub Jelinek Sept. 29, 2014, 11:08 a.m. UTC | #12
On Wed, Sep 24, 2014 at 03:20:44PM -0600, Jeff Law wrote:
> On 09/24/14 14:32, Ilya Enkovich wrote:
> >2014-09-24 19:27 GMT+04:00 Jeff Law <law@redhat.com>:
> >>On 09/24/14 00:56, Ilya Enkovich wrote:
> 
> >>>
> >>>After register allocation we have no idea where GOT address is and
> >>>therefore delegitimize_address target hook becomes less efficient and
> >>>cannot remove UNSPECs. That's what I see now when build GCC with patch
> >>>applied:
> >>
> >>In theory this shouldn't be too hard to fix.
> >>
> >>I haven't looked at the code, but it might be something looking explicitly
> >>for ebx by register #, or something similar.  Which case within
> >>delegitimize_address isn't firing as it should after your changes?
> >
> >It is the case I had to fix:
> >
> >@@ -14415,7 +14433,8 @@ ix86_delegitimize_address (rtx x)
> >          ...
> >          movl foo@GOTOFF(%ecx), %edx
> >          in which case we return (%ecx - %ebx) + foo.  */
> >-      if (pic_offset_table_rtx)
> >+      if (pic_offset_table_rtx
> >+         && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
> >          result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
> >                                                      pic_offset_table_rtx),
> >                                result);
> >
> >Originally if there is a UNSPEC_GOTOFFSET but no EBX usage then we
> >just remove this UNSPEC and substract EBX value.  With pseudo PIC reg
> >we should use PIC register instead of EBX but it is unclear what to
> >use after register allocation.
> What's the RTL before & after allocation?  Feel free to just pass along the
> dump files for sum_r4 that you referenced in a prior message.

I wonder if during/after reload we just couldn't look at
ORIGINAL_REGNO of hard regs if ix86_use_pseudo_pic_reg.  Or is that
the other case, where you don't have any PIC register replacement around,
and want to subtract something?  Perhaps in that case we could just
subtract the value of _GLOBAL_OFFSET_TABLE_ symbol if we have nothing better
around.

	Jakub
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6337aa5..a21ae25 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6134,6 +6134,68 @@  ix86_maybe_switch_abi (void)
     reinit_regs ();
 }
 
+/* Return 1 if pseudo register should be created and used to hold
+   GOT address for PIC code.  */
+static bool
+ix86_use_pseudo_pic_reg (void)
+{
+  if ((TARGET_64BIT
+       && (ix86_cmodel == CM_SMALL_PIC
+	   || TARGET_PECOFF))
+      || !flag_pic)
+    return false;
+  return true;
+}
+
+/* Create and initialize PIC register if required.  */
+static void
+ix86_init_pic_reg (void)
+{
+  edge entry_edge;
+  rtx_insn *seq;
+
+  if (!ix86_use_pseudo_pic_reg ())
+    return;
+
+  start_sequence ();
+
+  if (TARGET_64BIT)
+    {
+      if (ix86_cmodel == CM_LARGE_PIC)
+	{
+	  rtx_code_label *label;
+	  rtx tmp_reg;
+
+	  gcc_assert (Pmode == DImode);
+	  label = gen_label_rtx ();
+	  emit_label (label);
+	  LABEL_PRESERVE_P (label) = 1;
+	  tmp_reg = gen_rtx_REG (Pmode, R11_REG);
+	  gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
+	  emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
+					label));
+	  emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
+	  emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
+				    pic_offset_table_rtx, tmp_reg));
+	}
+      else
+	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+    }
+  else
+    {
+      rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
+    }
+
+  seq = get_insns ();
+  end_sequence ();
+
+  entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  insert_insn_on_edge (seq, entry_edge);
+  commit_one_edge_insertion (entry_edge);
+}
+
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
    For a library call, FNTYPE is 0.  */
@@ -9376,6 +9438,9 @@  gen_pop (rtx arg)
 static unsigned int
 ix86_select_alt_pic_regnum (void)
 {
+  if (ix86_use_pseudo_pic_reg ())
+    return INVALID_REGNUM;
+
   if (crtl->is_leaf
       && !crtl->profile
       && !ix86_current_function_calls_tls_descriptor)
@@ -9400,6 +9465,7 @@  static bool
 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
 {
   if (pic_offset_table_rtx
+      && !ix86_use_pseudo_pic_reg ()
       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
 	  || crtl->profile
@@ -10752,7 +10818,6 @@  ix86_expand_prologue (void)
 {
   struct machine_function *m = cfun->machine;
   rtx insn, t;
-  bool pic_reg_used;
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   bool int_registers_saved;
@@ -11199,60 +11264,6 @@  ix86_expand_prologue (void)
   if (!sse_registers_saved)
     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
 
-  pic_reg_used = false;
-  /* We don't use pic-register for pe-coff target.  */
-  if (pic_offset_table_rtx
-      && !TARGET_PECOFF
-      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
-	  || crtl->profile))
-    {
-      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
-
-      if (alt_pic_reg_used != INVALID_REGNUM)
-	SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
-
-      pic_reg_used = true;
-    }
-
-  if (pic_reg_used)
-    {
-      if (TARGET_64BIT)
-	{
-	  if (ix86_cmodel == CM_LARGE_PIC)
-	    {
-	      rtx_code_label *label;
-	      rtx tmp_reg;
-
-	      gcc_assert (Pmode == DImode);
-	      label = gen_label_rtx ();
-	      emit_label (label);
-	      LABEL_PRESERVE_P (label) = 1;
-	      tmp_reg = gen_rtx_REG (Pmode, R11_REG);
-	      gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
-	      insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
-						   label));
-	      insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
-	      insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
-					       pic_offset_table_rtx, tmp_reg));
-	    }
-	  else
-            insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
-	}
-      else
-	{
-          insn = emit_insn (gen_set_got (pic_offset_table_rtx));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
-	}
-    }
-
-  /* In the pic_reg_used case, make sure that the got load isn't deleted
-     when mcount needs it.  Blockage to avoid call movement across mcount
-     call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
-     note.  */
-  if (crtl->profile && !flag_fentry && pic_reg_used)
-    emit_insn (gen_prologue_use (pic_offset_table_rtx));
-
   if (crtl->drap_reg && !crtl->stack_realign_needed)
     {
       /* vDRAP is setup but after reload it turns out stack realign
@@ -11793,7 +11804,8 @@  ix86_expand_epilogue (int style)
 static void
 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
 {
-  if (pic_offset_table_rtx)
+  if (pic_offset_table_rtx
+      && !ix86_use_pseudo_pic_reg ())
     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
 #if TARGET_MACHO
   /* Mach-O doesn't support labels at the end of objects, so if
@@ -13113,6 +13125,15 @@  ix86_GOT_alias_set (void)
   return set;
 }
 
+/* Set regs_ever_live for PIC base address register
+   to true if required.  */
+static void
+set_pic_reg_ever_alive ()
+{
+  if (reload_in_progress)
+    df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
+}
+
 /* Return a legitimate reference for ORIG (an address) using the
    register REG.  If REG is 0, a new pseudo is generated.
 
@@ -13163,8 +13184,7 @@  legitimize_pic_address (rtx orig, rtx reg)
       /* This symbol may be referenced via a displacement from the PIC
 	 base address (@GOTOFF).  */
 
-      if (reload_in_progress)
-	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      set_pic_reg_ever_alive ();
       if (GET_CODE (addr) == CONST)
 	addr = XEXP (addr, 0);
       if (GET_CODE (addr) == PLUS)
@@ -13196,8 +13216,7 @@  legitimize_pic_address (rtx orig, rtx reg)
       /* This symbol may be referenced via a displacement from the PIC
 	 base address (@GOTOFF).  */
 
-      if (reload_in_progress)
-	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      set_pic_reg_ever_alive ();
       if (GET_CODE (addr) == CONST)
 	addr = XEXP (addr, 0);
       if (GET_CODE (addr) == PLUS)
@@ -13258,8 +13277,7 @@  legitimize_pic_address (rtx orig, rtx reg)
 	  /* This symbol must be referenced via a load from the
 	     Global Offset Table (@GOT).  */
 
-	  if (reload_in_progress)
-	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  set_pic_reg_ever_alive ();
 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
 	  if (TARGET_64BIT)
@@ -13311,8 +13329,7 @@  legitimize_pic_address (rtx orig, rtx reg)
 	    {
 	      if (!TARGET_64BIT)
 		{
-		  if (reload_in_progress)
-		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+		  set_pic_reg_ever_alive ();
 		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
 					    UNSPEC_GOTOFF);
 		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
@@ -13608,8 +13625,7 @@  legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	}
       else if (flag_pic)
 	{
-	  if (reload_in_progress)
-	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  set_pic_reg_ever_alive ();
 	  pic = pic_offset_table_rtx;
 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
 	}
@@ -14240,6 +14256,8 @@  ix86_pic_register_p (rtx x)
   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
     return (pic_offset_table_rtx
 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+  else if (pic_offset_table_rtx)
+    return REG_P (x) && REGNO (x) == REGNO (pic_offset_table_rtx);
   else
     return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
 }
@@ -14415,7 +14433,8 @@  ix86_delegitimize_address (rtx x)
 	 ...
 	 movl foo@GOTOFF(%ecx), %edx
 	 in which case we return (%ecx - %ebx) + foo.  */
-      if (pic_offset_table_rtx)
+      if (pic_offset_table_rtx
+	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
 						     pic_offset_table_rtx),
 			       result);
@@ -24891,7 +24910,12 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 		  && DEFAULT_ABI != MS_ABI))
 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
 	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
-	use_reg (&use, pic_offset_table_rtx);
+	{
+	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
+	  if (ix86_use_pseudo_pic_reg ())
+	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
+			    pic_offset_table_rtx);
+	}
     }
 
   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
@@ -47300,6 +47324,10 @@  ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG ix86_function_arg
+#undef TARGET_INIT_PIC_REG
+#define TARGET_INIT_PIC_REG ix86_init_pic_reg
+#undef TARGET_USE_PSEUDO_PIC_REG
+#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
 #undef TARGET_PASS_BY_REFERENCE
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2c64162..a1be45e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1243,11 +1243,11 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define REAL_PIC_OFFSET_TABLE_REGNUM  BX_REG
 
-#define PIC_OFFSET_TABLE_REGNUM				\
-  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC	\
-                     || TARGET_PECOFF))		\
-   || !flag_pic ? INVALID_REGNUM			\
-   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+#define PIC_OFFSET_TABLE_REGNUM						\
+  ((TARGET_64BIT && (ix86_cmodel == CM_SMALL_PIC			\
+                     || TARGET_PECOFF))					\
+   || !flag_pic ? INVALID_REGNUM					\
+   : pic_offset_table_rtx ? INVALID_REGNUM				\
    : REAL_PIC_OFFSET_TABLE_REGNUM)
 
 #define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 396909f..0dd9b79 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3909,6 +3909,16 @@  If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,
 @code{TARGET_FUNCTION_ARG} serves both purposes.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_USE_PSEUDO_PIC_REG (void)
+This hook should return 1 in case pseudo register should be created
+for pic_offset_table_rtx during function expand.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_INIT_PIC_REG (void)
+Perform a target dependent initialization of pic_offset_table_rtx.
+This hook is called at the start of register allocation.
+@end deftypefn
+
 @deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, enum machine_mode @var{mode}, tree @var{type}, bool @var{named})
 This target hook returns the number of bytes at the beginning of an
 argument that must be put in registers.  The value must be zero for
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 798c1aa..d6ee52a 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3355,6 +3355,10 @@  the stack.
 
 @hook TARGET_FUNCTION_INCOMING_ARG
 
+@hook TARGET_USE_PSEUDO_PIC_REG
+
+@hook TARGET_INIT_PIC_REG
+
 @hook TARGET_ARG_PARTIAL_BYTES
 
 @hook TARGET_PASS_BY_REFERENCE
diff --git a/gcc/function.c b/gcc/function.c
index ac50f4a..cd7e42e 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3459,6 +3459,11 @@  assign_parms (tree fndecl)
 
   fnargs.release ();
 
+  /* Initialize pic_offset_table_rtx with a pseudo register
+     if required.  */
+  if (targetm.use_pseudo_pic_reg ())
+    pic_offset_table_rtx = gen_reg_rtx (Pmode);
+
   /* Output all parameter conversion instructions (possibly including calls)
      now that all parameters have been copied out of hard registers.  */
   emit_insn (all.first_conversion_insn);
diff --git a/gcc/init-regs.c b/gcc/init-regs.c
index 91b123d..bf83e51 100644
--- a/gcc/init-regs.c
+++ b/gcc/init-regs.c
@@ -80,6 +80,11 @@  initialize_uninitialized_regs (void)
 	      if (regno < FIRST_PSEUDO_REGISTER)
 		continue;
 
+	      /* Ignore pseudo PIC register.  */
+	      if (pic_offset_table_rtx
+		  && regno == REGNO (pic_offset_table_rtx))
+		continue;
+
 	      /* Do not generate multiple moves for the same regno.
 		 This is common for sequences of subreg operations.
 		 They would be deleted during combine but there is no
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index 6846567..26b8ffe 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -3239,9 +3239,11 @@  color_pass (ira_loop_tree_node_t loop_tree_node)
 	  ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
 	  ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
 				    ALLOCNO_NUM (subloop_allocno)));
-	  if ((flag_ira_region == IRA_REGION_MIXED)
-	      && (loop_tree_node->reg_pressure[pclass]
-		  <= ira_class_hard_regs_num[pclass]))
+	  if ((flag_ira_region == IRA_REGION_MIXED
+	       && (loop_tree_node->reg_pressure[pclass]
+		   <= ira_class_hard_regs_num[pclass]))
+	      || (pic_offset_table_rtx != NULL
+		  && regno == (int) REGNO (pic_offset_table_rtx)))
 	    {
 	      if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
 		{
diff --git a/gcc/ira-emit.c b/gcc/ira-emit.c
index a3bf41e..676ee1a 100644
--- a/gcc/ira-emit.c
+++ b/gcc/ira-emit.c
@@ -620,7 +620,10 @@  change_loop (ira_loop_tree_node_t node)
 		  /* don't create copies because reload can spill an
 		     allocno set by copy although the allocno will not
 		     get memory slot.  */
-		  || ira_equiv_no_lvalue_p (regno)))
+		  || ira_equiv_no_lvalue_p (regno)
+		  || (pic_offset_table_rtx != NULL
+		      && (ALLOCNO_REGNO (allocno)
+			  == (int) REGNO (pic_offset_table_rtx)))))
 	    continue;
 	  original_reg = allocno_emit_reg (allocno);
 	  if (parent_allocno == NULL
diff --git a/gcc/ira.c b/gcc/ira.c
index f377f7d..ae83aa5 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -4887,7 +4887,7 @@  split_live_ranges_for_shrink_wrap (void)
   FOR_BB_INSNS (first, insn)
     {
       rtx dest = interesting_dest_for_shprep (insn, call_dom);
-      if (!dest)
+      if (!dest || dest == pic_offset_table_rtx)
 	continue;
 
       rtx newreg = NULL_RTX;
@@ -5039,6 +5039,9 @@  ira (FILE *f)
   bool saved_flag_caller_saves = flag_caller_saves;
   enum ira_region saved_flag_ira_region = flag_ira_region;
 
+  /* Perform target specific PIC register initialization.  */
+  targetm.init_pic_reg ();
+
   ira_conflicts_p = optimize > 0;
 
   ira_use_lra_p = targetm.lra_p ();
@@ -5290,10 +5293,18 @@  do_reload (void)
 {
   basic_block bb;
   bool need_dce;
+  unsigned pic_offset_table_regno = INVALID_REGNUM;
 
   if (flag_ira_verbose < 10)
     ira_dump_file = dump_file;
 
+  /* If pic_offset_table_rtx is a pseudo register, then keep it so
+     after reload to avoid possible wrong usages of hard reg assigned
+     to it.  */
+  if (pic_offset_table_rtx
+      && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
+    pic_offset_table_regno = REGNO (pic_offset_table_rtx);
+
   timevar_push (TV_RELOAD);
   if (ira_use_lra_p)
     {
@@ -5398,6 +5409,9 @@  do_reload (void)
       inform (DECL_SOURCE_LOCATION (decl), "for %qD", decl);
     }
 
+  if (pic_offset_table_regno != INVALID_REGNUM)
+    pic_offset_table_rtx = gen_rtx_REG (Pmode, pic_offset_table_regno);
+
   timevar_pop (TV_IRA);
 }
 
diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c
index c7164cd..99ae00d 100644
--- a/gcc/lra-assigns.c
+++ b/gcc/lra-assigns.c
@@ -879,11 +879,13 @@  spill_for (int regno, bitmap spilled_pseudo_bitmap, bool first_p)
 	}
       /* Spill pseudos.	 */
       EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi)
-	if ((int) spill_regno >= lra_constraint_new_regno_start
-	    && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
-	    && ! bitmap_bit_p (&lra_split_regs, spill_regno)
-	    && ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
-	    && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
+	if ((pic_offset_table_rtx != NULL
+	     && spill_regno == REGNO (pic_offset_table_rtx))
+	    || ((int) spill_regno >= lra_constraint_new_regno_start
+		&& ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
+		&& ! bitmap_bit_p (&lra_split_regs, spill_regno)
+		&& ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
+		&& ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)))
 	  goto fail;
       insn_pseudos_num = 0;
       if (lra_dump_file != NULL)
@@ -1053,9 +1055,15 @@  setup_live_pseudos_and_spill_after_risky_transforms (bitmap
       return;
     }
   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
-    if (reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
+    if ((pic_offset_table_rtx == NULL_RTX
+	 || i != (int) REGNO (pic_offset_table_rtx))
+	&& reg_renumber[i] >= 0 && lra_reg_info[i].nrefs > 0)
       sorted_pseudos[n++] = i;
   qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func);
+  if (pic_offset_table_rtx != NULL_RTX
+      && (regno = REGNO (pic_offset_table_rtx)) >= FIRST_PSEUDO_REGISTER
+      && reg_renumber[regno] >= 0 && lra_reg_info[regno].nrefs > 0)
+    sorted_pseudos[n++] = regno;
   for (i = n - 1; i >= 0; i--)
     {
       regno = sorted_pseudos[i];
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 5f68399..977e1db 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -3798,6 +3798,35 @@  contains_reg_p (rtx x, bool hard_reg_p, bool spilled_p)
   return false;
 }
 
+/* Return true if X contains a symbol reg.  */
+static bool
+contains_symbol_ref_p (rtx x)
+{
+  int i, j;
+  const char *fmt;
+  enum rtx_code code;
+
+  code = GET_CODE (x);
+  if (code == SYMBOL_REF)
+    return true;
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if (contains_symbol_ref_p (XEXP (x, i)))
+	    return true;
+	}
+      else if (fmt[i] == 'E')
+	{
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (contains_symbol_ref_p (XVECEXP (x, i, j)))
+	      return true;
+	}
+    }
+  return false;
+}
+
 /* Process all regs in location *LOC and change them on equivalent
    substitution.  Return true if any change was done.  */
 static bool
@@ -4020,7 +4049,11 @@  lra_constraints (bool first_p)
       ("Maximum number of LRA constraint passes is achieved (%d)\n",
        LRA_MAX_CONSTRAINT_ITERATION_NUMBER);
   changed_p = false;
-  lra_risky_transformations_p = false;
+  if (pic_offset_table_rtx
+      && REGNO (pic_offset_table_rtx) >= FIRST_PSEUDO_REGISTER)
+    lra_risky_transformations_p = true;
+  else
+    lra_risky_transformations_p = false;
   new_insn_uid_start = get_max_uid ();
   new_regno_start = first_p ? lra_constraint_new_regno_start : max_reg_num ();
   /* Mark used hard regs for target stack size calulations.  */
@@ -4088,7 +4121,12 @@  lra_constraints (bool first_p)
 		   paradoxical subregs.  */
 		|| (MEM_P (x)
 		    && (GET_MODE_SIZE (lra_reg_info[i].biggest_mode)
-			> GET_MODE_SIZE (GET_MODE (x)))))
+			> GET_MODE_SIZE (GET_MODE (x))))
+		|| (pic_offset_table_rtx
+		    && ((CONST_POOL_OK_P (PSEUDO_REGNO_MODE (i), x)
+			 && (targetm.preferred_reload_class
+			     (x, lra_get_allocno_class (i)) == NO_REGS))
+			|| contains_symbol_ref_p (x))))
 	      ira_reg_equiv[i].defined_p = false;
 	    if (contains_reg_p (x, false, true))
 	      ira_reg_equiv[i].profitable_p = false;
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index fd24135..e1ecff7 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -495,7 +495,8 @@  try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
       if (frame_pointer_needed)
 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
 			     HARD_FRAME_POINTER_REGNUM);
-      if (pic_offset_table_rtx)
+      if (pic_offset_table_rtx
+	  && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
 			     PIC_OFFSET_TABLE_REGNUM);
       if (crtl->drap_reg)
diff --git a/gcc/target.def b/gcc/target.def
index ce11eae..4d90fc2 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4274,6 +4274,20 @@  DEFHOOK
 
 HOOK_VECTOR_END (calls)
 
+DEFHOOK
+(use_pseudo_pic_reg,
+ "This hook should return 1 in case pseudo register should be created\n\
+for pic_offset_table_rtx during function expand.",
+ bool, (void),
+ hook_bool_void_false)
+
+DEFHOOK
+(init_pic_reg,
+ "Perform a target dependent initialization of pic_offset_table_rtx.\n\
+This hook is called at the start of register allocation.",
+ void, (void),
+ hook_void_void)
+
 /* Return the diagnostic message string if conversion from FROMTYPE
    to TOTYPE is not allowed, NULL otherwise.  */
 DEFHOOK