diff mbox

PATCH: PR target/47715: [x32] Use SImode for thread pointer

Message ID CAMe9rOo0KGfje5FvAkVF0ZSOT=AS8scTSyPBZ7-QvJbXGKJaqQ@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu July 28, 2011, 2:47 p.m. UTC
On Thu, Jul 28, 2011 at 6:40 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 3:24 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>> In x32, thread pointer is 32bit and choice of segment register for the
>>>>> thread base ptr load should be based on TARGET_64BIT.  This patch
>>>>> implements it.  OK for trunk?
>>>>
>>>> -ENOTESTCASE.
>>>>
>>>
>>> There is no standalone testcase.  The symptom is in glibc build, I
>>> got
>>>
>>> CPP='/export/build/gnu/gcc-x32/release/usr/gcc-4.7.0-x32/bin/gcc -mx32
>>>  -E -x c-header'
>>> /export/build/gnu/glibc-x32/build-x86_64-linux/elf/ld-linux-x32.so.2
>>> --library-path /export/build/gnu/glibc-x32/build-x86_64-linux:/export/build/gnu/glibc-x32/build-x86_64-linux/math:/export/build/gnu/glibc-x32/build-x86_64-linux/elf:/export/build/gnu/glibc-x32/build-x86_64-linux/dlfcn:/export/build/gnu/glibc-x32/build-x86_64-linux/nss:/export/build/gnu/glibc-x32/build-x86_64-linux/nis:/export/build/gnu/glibc-x32/build-x86_64-linux/rt:/export/build/gnu/glibc-x32/build-x86_64-linux/resolv:/export/build/gnu/glibc-x32/build-x86_64-linux/crypt:/export/build/gnu/glibc-x32/build-x86_64-linux/nptl
>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcgen -Y
>>> ../scripts -h rpcsvc/yppasswd.x -o
>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcsvc/yppasswd.T
>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xbootparam_prot.stmp]
>>> Segmentation fault
>>> make[5]: *** Waiting for unfinished jobs....
>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xrstat.stmp]
>>> Segmentation fault
>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xyppasswd.stmp]
>>> Segmentation fault
>>>
>>> since thread pointer is 32bit in x32.
>>>
>>
>> If we load thread pointer (fs segment register) in x32 with 64bit
>> load, the upper 32bits are garbage.
>> We must load 32bit
>
> So, instead of huge complications with new mode iterator, just
> introduce two new patterns that will shadow existing ones for
> TARGET_X32.
>
> Like in attached (untested) patch.
>

I tried the following patch with typos fixed.  It almost worked,
except for this failure in glibc testsuite:

gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
-c -f $charmap -i $input ${common_objpfx}localedata/$out
Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
Error 1

I will add:

Comments

Uros Bizjak July 28, 2011, 2:59 p.m. UTC | #1
On Thu, Jul 28, 2011 at 4:47 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>>> In x32, thread pointer is 32bit and choice of segment register for the
>>>>>> thread base ptr load should be based on TARGET_64BIT.  This patch
>>>>>> implements it.  OK for trunk?
>>>>>
>>>>> -ENOTESTCASE.
>>>>>
>>>>
>>>> There is no standalone testcase.  The symptom is in glibc build, I
>>>> got
>>>>
>>>> CPP='/export/build/gnu/gcc-x32/release/usr/gcc-4.7.0-x32/bin/gcc -mx32
>>>>  -E -x c-header'
>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/elf/ld-linux-x32.so.2
>>>> --library-path /export/build/gnu/glibc-x32/build-x86_64-linux:/export/build/gnu/glibc-x32/build-x86_64-linux/math:/export/build/gnu/glibc-x32/build-x86_64-linux/elf:/export/build/gnu/glibc-x32/build-x86_64-linux/dlfcn:/export/build/gnu/glibc-x32/build-x86_64-linux/nss:/export/build/gnu/glibc-x32/build-x86_64-linux/nis:/export/build/gnu/glibc-x32/build-x86_64-linux/rt:/export/build/gnu/glibc-x32/build-x86_64-linux/resolv:/export/build/gnu/glibc-x32/build-x86_64-linux/crypt:/export/build/gnu/glibc-x32/build-x86_64-linux/nptl
>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcgen -Y
>>>> ../scripts -h rpcsvc/yppasswd.x -o
>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcsvc/yppasswd.T
>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xbootparam_prot.stmp]
>>>> Segmentation fault
>>>> make[5]: *** Waiting for unfinished jobs....
>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xrstat.stmp]
>>>> Segmentation fault
>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xyppasswd.stmp]
>>>> Segmentation fault
>>>>
>>>> since thread pointer is 32bit in x32.
>>>>
>>>
>>> If we load thread pointer (fs segment register) in x32 with 64bit
>>> load, the upper 32bits are garbage.
>>> We must load 32bit
>>
>> So, instead of huge complications with new mode iterator, just
>> introduce two new patterns that will shadow existing ones for
>> TARGET_X32.
>>
>> Like in attached (untested) patch.
>>
>
> I tried the following patch with typos fixed.  It almost worked,
> except for this failure in glibc testsuite:
>
> gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
> I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
> -c -f $charmap -i $input ${common_objpfx}localedata/$out
> Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
> make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
> Error 1
>
> I will add:
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 8723dc5..d32d64d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12120,7 +12120,9 @@ get_thread_pointer (bool to_reg)
>  {
>   rtx tp, reg, insn;
>
> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
> +  if (ptr_mode != Pmode)
> +    tp = convert_to_mode (Pmode, tp, 1);
>   if (!to_reg)
>     return tp;
>
> since TP must be 32bit.

No, this won't have the desired effect. It will change the UNSPEC, so
it won't match patterns in i386.md.

Can you debug the failure a bit more? With my patterns, add{l} and
mov{l} should clear top 32bits.

I'd also argue that there is something wrong with glibc. It should
initialize %fs with a zero-extended value, so original 64bit
load_tp/add_tp patterns could be used.

Uros.
H.J. Lu July 28, 2011, 3:59 p.m. UTC | #2
On Thu, Jul 28, 2011 at 7:59 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 4:47 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>>>> In x32, thread pointer is 32bit and choice of segment register for the
>>>>>>> thread base ptr load should be based on TARGET_64BIT.  This patch
>>>>>>> implements it.  OK for trunk?
>>>>>>
>>>>>> -ENOTESTCASE.
>>>>>>
>>>>>
>>>>> There is no standalone testcase.  The symptom is in glibc build, I
>>>>> got
>>>>>
>>>>> CPP='/export/build/gnu/gcc-x32/release/usr/gcc-4.7.0-x32/bin/gcc -mx32
>>>>>  -E -x c-header'
>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/elf/ld-linux-x32.so.2
>>>>> --library-path /export/build/gnu/glibc-x32/build-x86_64-linux:/export/build/gnu/glibc-x32/build-x86_64-linux/math:/export/build/gnu/glibc-x32/build-x86_64-linux/elf:/export/build/gnu/glibc-x32/build-x86_64-linux/dlfcn:/export/build/gnu/glibc-x32/build-x86_64-linux/nss:/export/build/gnu/glibc-x32/build-x86_64-linux/nis:/export/build/gnu/glibc-x32/build-x86_64-linux/rt:/export/build/gnu/glibc-x32/build-x86_64-linux/resolv:/export/build/gnu/glibc-x32/build-x86_64-linux/crypt:/export/build/gnu/glibc-x32/build-x86_64-linux/nptl
>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcgen -Y
>>>>> ../scripts -h rpcsvc/yppasswd.x -o
>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcsvc/yppasswd.T
>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xbootparam_prot.stmp]
>>>>> Segmentation fault
>>>>> make[5]: *** Waiting for unfinished jobs....
>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xrstat.stmp]
>>>>> Segmentation fault
>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xyppasswd.stmp]
>>>>> Segmentation fault
>>>>>
>>>>> since thread pointer is 32bit in x32.
>>>>>
>>>>
>>>> If we load thread pointer (fs segment register) in x32 with 64bit
>>>> load, the upper 32bits are garbage.
>>>> We must load 32bit
>>>
>>> So, instead of huge complications with new mode iterator, just
>>> introduce two new patterns that will shadow existing ones for
>>> TARGET_X32.
>>>
>>> Like in attached (untested) patch.
>>>
>>
>> I tried the following patch with typos fixed.  It almost worked,
>> except for this failure in glibc testsuite:
>>
>> gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
>> I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
>> -c -f $charmap -i $input ${common_objpfx}localedata/$out
>> Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
>> make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
>> Error 1
>>
>> I will add:
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 8723dc5..d32d64d 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -12120,7 +12120,9 @@ get_thread_pointer (bool to_reg)
>>  {
>>   rtx tp, reg, insn;
>>
>> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>> +  if (ptr_mode != Pmode)
>> +    tp = convert_to_mode (Pmode, tp, 1);
>>   if (!to_reg)
>>     return tp;
>>
>> since TP must be 32bit.
>
> No, this won't have the desired effect. It will change the UNSPEC, so
> it won't match patterns in i386.md.
>
> Can you debug the failure a bit more? With my patterns, add{l} and
> mov{l} should clear top 32bits.
>

TP is 32bit in x32  For load_tp_x32, we load SImode value and
zero-extend to DImode. For add_tp_x32, we are adding SImode
value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
must take SImode TP.
H.J. Lu July 28, 2011, 4 p.m. UTC | #3
On Thu, Jul 28, 2011 at 8:59 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 7:59 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Jul 28, 2011 at 4:47 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>>>>>>>> In x32, thread pointer is 32bit and choice of segment register for the
>>>>>>>> thread base ptr load should be based on TARGET_64BIT.  This patch
>>>>>>>> implements it.  OK for trunk?
>>>>>>>
>>>>>>> -ENOTESTCASE.
>>>>>>>
>>>>>>
>>>>>> There is no standalone testcase.  The symptom is in glibc build, I
>>>>>> got
>>>>>>
>>>>>> CPP='/export/build/gnu/gcc-x32/release/usr/gcc-4.7.0-x32/bin/gcc -mx32
>>>>>>  -E -x c-header'
>>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/elf/ld-linux-x32.so.2
>>>>>> --library-path /export/build/gnu/glibc-x32/build-x86_64-linux:/export/build/gnu/glibc-x32/build-x86_64-linux/math:/export/build/gnu/glibc-x32/build-x86_64-linux/elf:/export/build/gnu/glibc-x32/build-x86_64-linux/dlfcn:/export/build/gnu/glibc-x32/build-x86_64-linux/nss:/export/build/gnu/glibc-x32/build-x86_64-linux/nis:/export/build/gnu/glibc-x32/build-x86_64-linux/rt:/export/build/gnu/glibc-x32/build-x86_64-linux/resolv:/export/build/gnu/glibc-x32/build-x86_64-linux/crypt:/export/build/gnu/glibc-x32/build-x86_64-linux/nptl
>>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcgen -Y
>>>>>> ../scripts -h rpcsvc/yppasswd.x -o
>>>>>> /export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/rpcsvc/yppasswd.T
>>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xbootparam_prot.stmp]
>>>>>> Segmentation fault
>>>>>> make[5]: *** Waiting for unfinished jobs....
>>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xrstat.stmp]
>>>>>> Segmentation fault
>>>>>> make[5]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/sunrpc/xyppasswd.stmp]
>>>>>> Segmentation fault
>>>>>>
>>>>>> since thread pointer is 32bit in x32.
>>>>>>
>>>>>
>>>>> If we load thread pointer (fs segment register) in x32 with 64bit
>>>>> load, the upper 32bits are garbage.
>>>>> We must load 32bit
>>>>
>>>> So, instead of huge complications with new mode iterator, just
>>>> introduce two new patterns that will shadow existing ones for
>>>> TARGET_X32.
>>>>
>>>> Like in attached (untested) patch.
>>>>
>>>
>>> I tried the following patch with typos fixed.  It almost worked,
>>> except for this failure in glibc testsuite:
>>>
>>> gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
>>> I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
>>> -c -f $charmap -i $input ${common_objpfx}localedata/$out
>>> Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
>>> make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
>>> Error 1
>>>
>>> I will add:
>>>
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index 8723dc5..d32d64d 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -12120,7 +12120,9 @@ get_thread_pointer (bool to_reg)
>>>  {
>>>   rtx tp, reg, insn;
>>>
>>> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>> +  if (ptr_mode != Pmode)
>>> +    tp = convert_to_mode (Pmode, tp, 1);
>>>   if (!to_reg)
>>>     return tp;
>>>
>>> since TP must be 32bit.
>>
>> No, this won't have the desired effect. It will change the UNSPEC, so
>> it won't match patterns in i386.md.
>>
>> Can you debug the failure a bit more? With my patterns, add{l} and
>> mov{l} should clear top 32bits.
>>
>
> TP is 32bit in x32  For load_tp_x32, we load SImode value and
> zero-extend to DImode. For add_tp_x32, we are adding SImode
> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
> must take SImode TP.
>

I will see what I can do.
Uros Bizjak July 28, 2011, 6:21 p.m. UTC | #4
On Thu, Jul 28, 2011 at 8:03 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>>> So, instead of huge complications with new mode iterator, just
>>>>>> introduce two new patterns that will shadow existing ones for
>>>>>> TARGET_X32.
>>>>>>
>>>>>> Like in attached (untested) patch.
>>>>>>
>>>>>
>>>>> I tried the following patch with typos fixed.  It almost worked,
>>>>> except for this failure in glibc testsuite:
>>>>>
>>>>> gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
>>>>> I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
>>>>> -c -f $charmap -i $input ${common_objpfx}localedata/$out
>>>>> Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
>>>>> make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
>>>>> Error 1
>>>>>
>>>>> I will add:
>>>>>
>>>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>>>> index 8723dc5..d32d64d 100644
>>>>> --- a/gcc/config/i386/i386.c
>>>>> +++ b/gcc/config/i386/i386.c
>>>>> @@ -12120,7 +12120,9 @@ get_thread_pointer (bool to_reg)
>>>>>  {
>>>>>   rtx tp, reg, insn;
>>>>>
>>>>> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>>>> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>>>> +  if (ptr_mode != Pmode)
>>>>> +    tp = convert_to_mode (Pmode, tp, 1);
>>>>>   if (!to_reg)
>>>>>     return tp;
>>>>>
>>>>> since TP must be 32bit.
>>>>
>>>> No, this won't have the desired effect. It will change the UNSPEC, so
>>>> it won't match patterns in i386.md.
>>>>
>>>> Can you debug the failure a bit more? With my patterns, add{l} and
>>>> mov{l} should clear top 32bits.
>>>>
>>>
>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>> must take SImode TP.
>>>
>>
>> I will see what I can do.
>>
>
> Here is the updated patch to use 32bit TP for 32.

Why??

This part makes no sense:

-  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  if (ptr_mode != Pmode)
+    tp = convert_to_mode (Pmode, tp, 1);

You will create zero_extend (unspec ...), that won't be matched by any pattern.

Can you please explain, how is this pattern different than DImode
pattern, proposed in my patch?

+(define_insn "*load_tp_x32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_X32"
+  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])

vs:

+(define_insn "*load_tp_x32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_X32"
+  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])

Uros.
H.J. Lu July 28, 2011, 6:30 p.m. UTC | #5
On Thu, Jul 28, 2011 at 11:21 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 8:03 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>>>> So, instead of huge complications with new mode iterator, just
>>>>>>> introduce two new patterns that will shadow existing ones for
>>>>>>> TARGET_X32.
>>>>>>>
>>>>>>> Like in attached (untested) patch.
>>>>>>>
>>>>>>
>>>>>> I tried the following patch with typos fixed.  It almost worked,
>>>>>> except for this failure in glibc testsuite:
>>>>>>
>>>>>> gen-locale.sh: line 27: 14755 Aborted                 (core dumped)
>>>>>> I18NPATH=. GCONV_PATH=${common_objpfx}iconvdata ${localedef} --quiet
>>>>>> -c -f $charmap -i $input ${common_objpfx}localedata/$out
>>>>>> Charmap: "ISO-8859-1" Inputfile: "nb_NO" Outputdir: "nb_NO.ISO-8859-1" failed
>>>>>> make[4]: *** [/export/build/gnu/glibc-x32/build-x86_64-linux/localedata/nb_NO.ISO-8859-1/LC_CTYPE]
>>>>>> Error 1
>>>>>>
>>>>>> I will add:
>>>>>>
>>>>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>>>>> index 8723dc5..d32d64d 100644
>>>>>> --- a/gcc/config/i386/i386.c
>>>>>> +++ b/gcc/config/i386/i386.c
>>>>>> @@ -12120,7 +12120,9 @@ get_thread_pointer (bool to_reg)
>>>>>>  {
>>>>>>   rtx tp, reg, insn;
>>>>>>
>>>>>> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>>>>> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>>>>>> +  if (ptr_mode != Pmode)
>>>>>> +    tp = convert_to_mode (Pmode, tp, 1);
>>>>>>   if (!to_reg)
>>>>>>     return tp;
>>>>>>
>>>>>> since TP must be 32bit.
>>>>>
>>>>> No, this won't have the desired effect. It will change the UNSPEC, so
>>>>> it won't match patterns in i386.md.
>>>>>
>>>>> Can you debug the failure a bit more? With my patterns, add{l} and
>>>>> mov{l} should clear top 32bits.
>>>>>
>>>>
>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>> must take SImode TP.
>>>>
>>>
>>> I will see what I can do.
>>>
>>
>> Here is the updated patch to use 32bit TP for 32.
>
> Why??
>
> This part makes no sense:
>
> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
> +  if (ptr_mode != Pmode)
> +    tp = convert_to_mode (Pmode, tp, 1);
>
> You will create zero_extend (unspec ...), that won't be matched by any pattern.

No.  I created  zero_exten from (reg:SI) to (reg: DI).

> Can you please explain, how is this pattern different than DImode
> pattern, proposed in my patch?
>
> +(define_insn "*load_tp_x32"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +       (unspec:SI [(const_int 0)] UNSPEC_TP))]
> +  "TARGET_X32"
> +  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
> +  [(set_attr "type" "imov")
> +   (set_attr "modrm" "0")
> +   (set_attr "length" "7")
> +   (set_attr "memory" "load")
> +   (set_attr "imm_disp" "false")])
>
> vs:
>
> +(define_insn "*load_tp_x32"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +       (unspec:DI [(const_int 0)] UNSPEC_TP))]

That is wrong since source (TP)  is 32bit.  This pattern tells compiler
source is 64bit.

> +  "TARGET_X32"
> +  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
> +  [(set_attr "type" "imov")
> +   (set_attr "modrm" "0")
> +   (set_attr "length" "7")
> +   (set_attr "memory" "load")
> +   (set_attr "imm_disp" "false")])
>

I will try zero_extend to see if it works.

Thanks.
Uros Bizjak July 28, 2011, 6:31 p.m. UTC | #6
On Thu, Jul 28, 2011 at 8:30 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>> must take SImode TP.
>>>>>
>>>>
>>>> I will see what I can do.
>>>>
>>>
>>> Here is the updated patch to use 32bit TP for 32.
>>
>> Why??
>>
>> This part makes no sense:
>>
>> -  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>> +  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
>> +  if (ptr_mode != Pmode)
>> +    tp = convert_to_mode (Pmode, tp, 1);
>>
>> You will create zero_extend (unspec ...), that won't be matched by any pattern.
>
> No.  I created  zero_exten from (reg:SI) to (reg: DI).
>
>> Can you please explain, how is this pattern different than DImode
>> pattern, proposed in my patch?
>>
>> +(define_insn "*load_tp_x32"
>> +  [(set (match_operand:SI 0 "register_operand" "=r")
>> +       (unspec:SI [(const_int 0)] UNSPEC_TP))]
>> +  "TARGET_X32"
>> +  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
>> +  [(set_attr "type" "imov")
>> +   (set_attr "modrm" "0")
>> +   (set_attr "length" "7")
>> +   (set_attr "memory" "load")
>> +   (set_attr "imm_disp" "false")])
>>
>> vs:
>>
>> +(define_insn "*load_tp_x32"
>> +  [(set (match_operand:DI 0 "register_operand" "=r")
>> +       (unspec:DI [(const_int 0)] UNSPEC_TP))]
>
> That is wrong since source (TP)  is 32bit.  This pattern tells compiler
> source is 64bit.

Where?

Uros.
Uros Bizjak July 28, 2011, 10:03 p.m. UTC | #7
On Thu, Jul 28, 2011 at 10:15 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>> must take SImode TP.

> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
> For
>
> ---
> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>
> int *
> __errno_location (void)
> {
>  return &__libc_errno;
> }
> ---
>
> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>
>        movq    __libc_errno@gottpoff(%rip), %rax
>        addl    %fs:0, %eax
>        mov     %eax, %eax
>        ret
>
> SImode *add_tp_x32 generates:
>
>        movl    %fs:0, %eax
>        addl    __libc_errno@gottpoff(%rip), %eax
>        ret

This happens because combine can't combine DImode load and SImode plus
RTXes. These RTXes have to be in Pmode, see the intention in
legitimize_tls_address, also for TARGET_GNU2_TLS.

Can you please debug what goes wrong with tp_add_x32 in DImode?

Uros.
H.J. Lu July 28, 2011, 10:28 p.m. UTC | #8
On Thu, Jul 28, 2011 at 3:03 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 10:15 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>>> must take SImode TP.
>
>> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
>> For
>>
>> ---
>> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>>
>> int *
>> __errno_location (void)
>> {
>>  return &__libc_errno;
>> }
>> ---
>>
>> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>>
>>        movq    __libc_errno@gottpoff(%rip), %rax
>>        addl    %fs:0, %eax
>>        mov     %eax, %eax
>>        ret
>>
>> SImode *add_tp_x32 generates:
>>
>>        movl    %fs:0, %eax
>>        addl    __libc_errno@gottpoff(%rip), %eax
>>        ret
>
> This happens because combine can't combine DImode load and SImode plus
> RTXes. These RTXes have to be in Pmode, see the intention in
> legitimize_tls_address, also for TARGET_GNU2_TLS.
>
> Can you please debug what goes wrong with tp_add_x32 in DImode?
>

We start with

insn 5 2 6 2 (set (reg:DI 63)
        (unspec:DI [
                (const_int 0 [0])
            ] UNSPEC_TP)) err.i:6 716 {*load_tp_x32}
     (nil))

(insn 6 5 7 2 (set (reg:DI 64)
        (mem/u/c:DI (const:DI (unspec:DI [
                        (symbol_ref:SI ("__libc_errno") [flags 0x60]  <var_decl
0x7ffff085b140 __libc_errno>)
                    ] UNSPEC_GOTNTPOFF)) [0 S8 A8])) err.i:6 62 {*movdi_internal
_rex64}
     (nil))

(insn 7 6 8 2 (parallel [
            (set (reg:DI 65)
                (plus:DI (reg:DI 63)
                    (reg:DI 64)))
            (clobber (reg:CC 17 flags))
        ]) err.i:6 251 {*adddi_1}
     (expr_list:REG_DEAD (reg:DI 64)
        (expr_list:REG_DEAD (reg:DI 63)
            (expr_list:REG_UNUSED (reg:CC 17 flags)
                (nil)))))

With DImode *add_tp_x32, combine matches

(parallel [
        (set (reg:DI 65)
            (plus:DI (unspec:DI [
                        (const_int 0 [0])
                    ] UNSPEC_TP)
                (reg:DI 64)))
        (clobber (reg:CC 17 flags))
    ])

first.  It turns out that it isn't a very good choice since
it prevents combine to try

(parallel [
        (set (reg:DI 65)
            (plus:DI (reg:DI 63)
                (mem/u/c:DI (const:DI (unspec:DI [
                                (symbol_ref:SI ("__libc_errno") [flags
0x60]  <var_decl 0x7
ffff085b140 __libc_errno>)
                            ] UNSPEC_GOTNTPOFF)) [0 S8 A8])))
        (clobber (reg:CC 17 flags))
    ])

I am not sure how useful *add_tp_XXX pattern is.
Uros Bizjak July 28, 2011, 10:40 p.m. UTC | #9
On Fri, Jul 29, 2011 at 12:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>>>> must take SImode TP.
>>
>>> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
>>> For
>>>
>>> ---
>>> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>>>
>>> int *
>>> __errno_location (void)
>>> {
>>>  return &__libc_errno;
>>> }
>>> ---
>>>
>>> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>>>
>>>        movq    __libc_errno@gottpoff(%rip), %rax
>>>        addl    %fs:0, %eax
>>>        mov     %eax, %eax
>>>        ret
>>>
>>> SImode *add_tp_x32 generates:
>>>
>>>        movl    %fs:0, %eax
>>>        addl    __libc_errno@gottpoff(%rip), %eax
>>>        ret
>>
>> This happens because combine can't combine DImode load and SImode plus
>> RTXes. These RTXes have to be in Pmode, see the intention in
>> legitimize_tls_address, also for TARGET_GNU2_TLS.
>>
>> Can you please debug what goes wrong with tp_add_x32 in DImode?
>>
>
> We start with

Uh, we didn't understand each other... can you please debug what goes
wrong with glibc runtime test?

Thanks,
Uros.
H.J. Lu July 28, 2011, 10:46 p.m. UTC | #10
On Thu, Jul 28, 2011 at 3:40 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Jul 29, 2011 at 12:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>>>>> must take SImode TP.
>>>
>>>> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
>>>> For
>>>>
>>>> ---
>>>> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>>>>
>>>> int *
>>>> __errno_location (void)
>>>> {
>>>>  return &__libc_errno;
>>>> }
>>>> ---
>>>>
>>>> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>>>>
>>>>        movq    __libc_errno@gottpoff(%rip), %rax
>>>>        addl    %fs:0, %eax
>>>>        mov     %eax, %eax
>>>>        ret
>>>>
>>>> SImode *add_tp_x32 generates:
>>>>
>>>>        movl    %fs:0, %eax
>>>>        addl    __libc_errno@gottpoff(%rip), %eax
>>>>        ret
>>>
>>> This happens because combine can't combine DImode load and SImode plus
>>> RTXes. These RTXes have to be in Pmode, see the intention in
>>> legitimize_tls_address, also for TARGET_GNU2_TLS.
>>>
>>> Can you please debug what goes wrong with tp_add_x32 in DImode?
>>>
>>
>> We start with
>
> Uh, we didn't understand each other... can you please debug what goes
> wrong with glibc runtime test?
>

I haven't be able to reproduce it.  I may have a typo when I fixed the
pattern.  I will try again.
H.J. Lu July 28, 2011, 11:25 p.m. UTC | #11
On Thu, Jul 28, 2011 at 3:46 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 3:40 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Fri, Jul 29, 2011 at 12:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>>>>>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>>>>>> must take SImode TP.
>>>>
>>>>> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
>>>>> For
>>>>>
>>>>> ---
>>>>> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>>>>>
>>>>> int *
>>>>> __errno_location (void)
>>>>> {
>>>>>  return &__libc_errno;
>>>>> }
>>>>> ---
>>>>>
>>>>> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>>>>>
>>>>>        movq    __libc_errno@gottpoff(%rip), %rax
>>>>>        addl    %fs:0, %eax
>>>>>        mov     %eax, %eax
>>>>>        ret
>>>>>
>>>>> SImode *add_tp_x32 generates:
>>>>>
>>>>>        movl    %fs:0, %eax
>>>>>        addl    __libc_errno@gottpoff(%rip), %eax
>>>>>        ret
>>>>
>>>> This happens because combine can't combine DImode load and SImode plus
>>>> RTXes. These RTXes have to be in Pmode, see the intention in
>>>> legitimize_tls_address, also for TARGET_GNU2_TLS.
>>>>
>>>> Can you please debug what goes wrong with tp_add_x32 in DImode?
>>>>
>>>
>>> We start with
>>
>> Uh, we didn't understand each other... can you please debug what goes
>> wrong with glibc runtime test?
>>
>
> I haven't be able to reproduce it.  I may have a typo when I fixed the
> pattern.  I will try again.
>

I can't reproduce it any more.
H.J. Lu July 29, 2011, 12:57 a.m. UTC | #12
On Thu, Jul 28, 2011 at 4:25 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Jul 28, 2011 at 3:46 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Jul 28, 2011 at 3:40 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Fri, Jul 29, 2011 at 12:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>>>>>>>>>>> TP is 32bit in x32  For load_tp_x32, we load SImode value and
>>>>>>>>>>>> zero-extend to DImode. For add_tp_x32, we are adding SImode
>>>>>>>>>>>> value.  We can't pretend TP is 64bit.  load_tp_x32 and add_tp_x32
>>>>>>>>>>>> must take SImode TP.
>>>>>
>>>>>> Here is the revised patch.  The difference is I changed *add_tp_x32 to SImode.
>>>>>> For
>>>>>>
>>>>>> ---
>>>>>> extern __thread int __libc_errno __attribute__ ((tls_model ("initial-exec")));
>>>>>>
>>>>>> int *
>>>>>> __errno_location (void)
>>>>>> {
>>>>>>  return &__libc_errno;
>>>>>> }
>>>>>> ---
>>>>>>
>>>>>> compiled with -mx32 -O2 -fPIC  DImode *add_tp_x32 generates:
>>>>>>
>>>>>>        movq    __libc_errno@gottpoff(%rip), %rax
>>>>>>        addl    %fs:0, %eax
>>>>>>        mov     %eax, %eax
>>>>>>        ret
>>>>>>
>>>>>> SImode *add_tp_x32 generates:
>>>>>>
>>>>>>        movl    %fs:0, %eax
>>>>>>        addl    __libc_errno@gottpoff(%rip), %eax
>>>>>>        ret
>>>>>
>>>>> This happens because combine can't combine DImode load and SImode plus
>>>>> RTXes. These RTXes have to be in Pmode, see the intention in
>>>>> legitimize_tls_address, also for TARGET_GNU2_TLS.
>>>>>
>>>>> Can you please debug what goes wrong with tp_add_x32 in DImode?
>>>>>
>>>>
>>>> We start with
>>>
>>> Uh, we didn't understand each other... can you please debug what goes
>>> wrong with glibc runtime test?
>>>
>>
>> I haven't be able to reproduce it.  I may have a typo when I fixed the
>> pattern.  I will try again.
>>
>
> I can't reproduce it any more.
>

I know why. I also have

	(*load_tp_<mode>): Disabled for TARGET_X32.
	(*add_tp_<mode>): Likewise.

since the SImode version is wrong for x32 due to

(define_mode_attr tp_seg [(SI "gs") (DI "fs")])

x32 uses fs even if it is 32bit.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8723dc5..d32d64d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12120,7 +12120,9 @@  get_thread_pointer (bool to_reg)
 {
   rtx tp, reg, insn;

-  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  if (ptr_mode != Pmode)
+    tp = convert_to_mode (Pmode, tp, 1);
   if (!to_reg)
     return tp;

since TP must be 32bit.

Thanks.

-- 
H.J.
---
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index aaaf53a..a194ffb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12452,6 +12452,17 @@ 
 (define_mode_attr tp_seg [(SI "gs") (DI "fs")])

 ;; Load and add the thread base pointer from %<tp_seg>:0.
+(define_insn "*load_tp_x32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_X32"
+  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
 (define_insn "*load_tp_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(unspec:P [(const_int 0)] UNSPEC_TP))]
@@ -12463,6 +12474,19 @@ 
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])

+(define_insn "*add_tp_x32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		 (match_operand:DI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_X32"
+  "add{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
 (define_insn "*add_tp_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(plus:P (unspec:P [(const_int 0)] UNSPEC_TP)