Message ID | 20240303014837.808610-1-ibuclaw@gdcproject.org |
---|---|
State | New |
Headers | show |
Series | [committed] d: Fix gdc -O2 -mavx generates misaligned vmovdqa instruction [PR114171] | expand |
> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>: > > Hi, > > This patch fixes a wrong code issue in the D front-end where lowered > struct comparisons would reinterpret fields with a different (usually > bigger) alignment than the original. Use `build_aligned_type' to > preserve the alignment when casting away for such comparisons. > > Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed > to mainline, and backported to releases/gcc-13, releases/gcc-12, and > releases/gcc-11. LGTM. You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit). The middle-end should better turn that into target optimal code. Richard > Regards, > Iain. > --- > PR d/114171 > > gcc/d/ChangeLog: > > * d-codegen.cc (lower_struct_comparison): Keep alignment of original > type in reinterpret cast for comparison. > > gcc/testsuite/ChangeLog: > > * gdc.dg/torture/pr114171.d: New test. > --- > gcc/d/d-codegen.cc | 1 + > gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++ > 2 files changed, 30 insertions(+) > create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d > > diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc > index 5bc233928aa..43d7739f8fc 100644 > --- a/gcc/d/d-codegen.cc > +++ b/gcc/d/d-codegen.cc > @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd, > if (tmode == NULL_TREE) > tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ())); > > + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype)); > t1ref = build_vconvert (tmode, t1ref); > t2ref = build_vconvert (tmode, t2ref); > > diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d > new file mode 100644 > index 00000000000..0f9ffcab916 > --- /dev/null > +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d > @@ -0,0 +1,29 @@ > +// { dg-do run } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } > +import gcc.builtins; > + > +struct S1 > +{ > + string label; > +} > + > +struct S2 > +{ > + ulong pad; > + S1 label; > +} > + > +pragma(inline, false) > +auto newitem() > +{ > + void *p = __builtin_malloc(S2.sizeof); > + __builtin_memset(p, 0, S2.sizeof); > + return cast(S2*) p; > +} > + > +int main() > +{ > + auto bn = newitem(); > + return bn.label is S1.init ? 0 : 1; > +} > -- > 2.40.1 >
Excerpts from Richard Biener's message of März 3, 2024 11:41 am: > > >> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>: >> >> Hi, >> >> This patch fixes a wrong code issue in the D front-end where lowered >> struct comparisons would reinterpret fields with a different (usually >> bigger) alignment than the original. Use `build_aligned_type' to >> preserve the alignment when casting away for such comparisons. >> >> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed >> to mainline, and backported to releases/gcc-13, releases/gcc-12, and >> releases/gcc-11. > > LGTM. You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit). The middle-end should better turn that into target optimal code. > Indeed, just looking at the history, it was introduced well over ten years ago so I can't comment on the original context for it (it doesn't directly fix any old issues). Small comparison between this optimization and memcmp_eq --- _5 = newitem (); # DEBUG bn => _5 _1 = MEM[(ucent *)_5 + 8B]; _2 = _1 != 0; _6 = (int) _2; return _6; --- call _D8pr1141717newitemFNbNiZPSQz2S2 .LVL29: vmovdqu 8(%rax), %xmm0 xorl %eax, %eax .LVL30: vptest %xmm0, %xmm0 setne %al addq $8, %rsp ret --- _6 = newitem (); # DEBUG bn => _6 D.2335.length = 0; D.2335.ptr = 0B; _1 = &_6->label.label; _2 = __builtin_memcmp_eq (_1, &D.2335, 16); _3 = _2 != 0; _9 = (int) _3; return _9; --- call _D8pr1141717newitemFNbNiZPSQz2S2 .LVL29: movq $0, (%rsp) movq $0, 8(%rsp) vmovdqu 8(%rax), %xmm0 xorl %eax, %eax .LVL30: vpxor (%rsp), %xmm0, %xmm0 vptest %xmm0, %xmm0 setne %al addq $24, %rsp ret
On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote: > > Hi, > > This patch fixes a wrong code issue in the D front-end where lowered > struct comparisons would reinterpret fields with a different (usually > bigger) alignment than the original. Use `build_aligned_type' to > preserve the alignment when casting away for such comparisons. > > Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed > to mainline, and backported to releases/gcc-13, releases/gcc-12, and > releases/gcc-11. > > Regards, > Iain. > --- > PR d/114171 > > gcc/d/ChangeLog: > > * d-codegen.cc (lower_struct_comparison): Keep alignment of original > type in reinterpret cast for comparison. > > gcc/testsuite/ChangeLog: > > * gdc.dg/torture/pr114171.d: New test. > --- > gcc/d/d-codegen.cc | 1 + > gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++ > 2 files changed, 30 insertions(+) > create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d > > diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc > index 5bc233928aa..43d7739f8fc 100644 > --- a/gcc/d/d-codegen.cc > +++ b/gcc/d/d-codegen.cc > @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd, > if (tmode == NULL_TREE) > tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ())); > > + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype)); You might also need to build a may_alias variant too. Or make sure the access is using the correct aliasing set. I have not checked if the D front-end does anything special for aliasing sets so I am not sure if that is needed or not but I suspect it is. Thanks, Andrew Pinski > t1ref = build_vconvert (tmode, t1ref); > t2ref = build_vconvert (tmode, t2ref); > > diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d > new file mode 100644 > index 00000000000..0f9ffcab916 > --- /dev/null > +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d > @@ -0,0 +1,29 @@ > +// { dg-do run } > +// { dg-additional-options "-mavx" { target avx_runtime } } > +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } > +import gcc.builtins; > + > +struct S1 > +{ > + string label; > +} > + > +struct S2 > +{ > + ulong pad; > + S1 label; > +} > + > +pragma(inline, false) > +auto newitem() > +{ > + void *p = __builtin_malloc(S2.sizeof); > + __builtin_memset(p, 0, S2.sizeof); > + return cast(S2*) p; > +} > + > +int main() > +{ > + auto bn = newitem(); > + return bn.label is S1.init ? 0 : 1; > +} > -- > 2.40.1 >
Excerpts from Andrew Pinski's message of März 3, 2024 11:49 pm: > On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote: >> >> Hi, >> >> This patch fixes a wrong code issue in the D front-end where lowered >> struct comparisons would reinterpret fields with a different (usually >> bigger) alignment than the original. Use `build_aligned_type' to >> preserve the alignment when casting away for such comparisons. >> >> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed >> to mainline, and backported to releases/gcc-13, releases/gcc-12, and >> releases/gcc-11. >> >> Regards, >> Iain. >> --- >> PR d/114171 >> >> gcc/d/ChangeLog: >> >> * d-codegen.cc (lower_struct_comparison): Keep alignment of original >> type in reinterpret cast for comparison. >> >> gcc/testsuite/ChangeLog: >> >> * gdc.dg/torture/pr114171.d: New test. >> --- >> gcc/d/d-codegen.cc | 1 + >> gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++ >> 2 files changed, 30 insertions(+) >> create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d >> >> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc >> index 5bc233928aa..43d7739f8fc 100644 >> --- a/gcc/d/d-codegen.cc >> +++ b/gcc/d/d-codegen.cc >> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd, >> if (tmode == NULL_TREE) >> tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ())); >> >> + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype)); > > You might also need to build a may_alias variant too. Or make sure the > access is using the correct aliasing set. > I have not checked if the D front-end does anything special for > aliasing sets so I am not sure if that is needed or not but I suspect > it is. > There are no alias sets defined in the D front-end - the reference compiler doesn't enforce it, which has allowed enough code out there to expect modifying bits (eg: of a float) through a reinterpret cast (such as an int*) to just work. Thanks for the reminder though. Iain.
diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc index 5bc233928aa..43d7739f8fc 100644 --- a/gcc/d/d-codegen.cc +++ b/gcc/d/d-codegen.cc @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd, if (tmode == NULL_TREE) tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ())); + tmode = build_aligned_type (tmode, TYPE_ALIGN (stype)); t1ref = build_vconvert (tmode, t1ref); t2ref = build_vconvert (tmode, t2ref); diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d new file mode 100644 index 00000000000..0f9ffcab916 --- /dev/null +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d @@ -0,0 +1,29 @@ +// { dg-do run } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } } +import gcc.builtins; + +struct S1 +{ + string label; +} + +struct S2 +{ + ulong pad; + S1 label; +} + +pragma(inline, false) +auto newitem() +{ + void *p = __builtin_malloc(S2.sizeof); + __builtin_memset(p, 0, S2.sizeof); + return cast(S2*) p; +} + +int main() +{ + auto bn = newitem(); + return bn.label is S1.init ? 0 : 1; +}