Message ID | 20230213051843.2615021-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V2] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd | expand |
Hi I would like to ping this patch for stage1: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html BR, Jeff (Jiufu) Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > Compare with previous version: > https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html > This patch does not use UNSPEC for insn mtvsrws anymore. And to handle > the subreg better on BE and LE, predicate "lowpart_subreg_operator" > is introducted. To help combine pass to match the pattern on high32 > bit of DI, shiftrt is still used. > > As mentioned in PR108338, on p9, we could use mtvsrws to implement > the conversion from SI#0 to SF (or lowpart DI to SF). > > For examples: > *(long long*)buff = di; > float f = *(float*)(buff); > We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of > "mtvsrws 1,3 ; xscvspdpn 1,1". > > This patch update this, and also enhance the bitcast from highpart > DI to SF. > > Bootstrap and regtests pass on ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu) > > PR target/108338 > > gcc/ChangeLog: > > * config/rs6000/predicates.md (lowpart_subreg_operator): New > define_predicate. > * config/rs6000/rs6000.md (any_rshift): New code_iterator. > (movsf_from_si2): Rename to... > (movsf_from_si2_<code>): ... this. > (si2sf_mtvsrws): New define_insn. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pr108338.c: New test. > > --- > gcc/config/rs6000/predicates.md | 5 +++ > gcc/config/rs6000/rs6000.md | 35 ++++++++++++----- > gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++ > 3 files changed, 73 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c > > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index 52c65534e51..e57c9d99c6b 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" > else > return false; > }) > + > +(define_predicate "lowpart_subreg_operator" > + (and (match_code "subreg") > + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) > + == SUBREG_BYTE (op)"))) > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 4a7812fa592..5b4a7f8d801 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -7539,6 +7539,14 @@ (define_split > UNSPEC_MOVSI_GOT))] > "") > > +(define_insn "si2sf_mtvsrws" > + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") > + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] > + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" > + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" > + [(set_attr "type" "mfvsr") > + (set_attr "length" "8")]) > + > ;; MR LA > ;; LWZ LFIWZX LXSIWZX > ;; STW STFIWX STXSIWX > @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" > rtx op2 = operands[2]; > rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); > > - /* Move SF value to upper 32-bits for xscvspdpn. */ > - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); > - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); > - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); > + if (TARGET_P9_VECTOR) > + { > + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); > + } > + else > + { > + /* Move SF value to upper 32-bits for xscvspdpn. */ > + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); > + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); > + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); > + } > + > DONE; > } > [(set_attr "length" > @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" > "*, *, p9v, p8v, *, *, > p8v, p8v, p8v, *")]) > > +(define_code_iterator any_rshift [ashiftrt lshiftrt]) > + > ;; For extracting high part element from DImode register like: > ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} > ;; split it before reload with "and mask" to avoid generating shift right > ;; 32 bit then shift left 32 bit. > -(define_insn_and_split "movsf_from_si2" > +(define_insn_and_split "movsf_from_si2_<code>" > [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") > (unspec:SF > - [(subreg:SI > - (ashiftrt:DI > + [(match_operator:SI 3 "lowpart_subreg_operator" > + [(any_rshift:DI > (match_operand:DI 1 "input_operand" "r") > - (const_int 32)) > - 0)] > + (const_int 32))])] > UNSPEC_SF_FROM_SI)) > (clobber (match_scratch:DI 2 "=r"))] > "TARGET_NO_SF_SUBREG" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c > new file mode 100644 > index 00000000000..2438dc13f41 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c > @@ -0,0 +1,42 @@ > +// { dg-do run } > +// { dg-options "-O2 -save-temps" } > + > +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) > +{ > + char buff[16]; > + *(long long*)buff = l; > + float f = *(float*)(buff); > + return f; > +} > + > +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) > +{ > + char buff[16]; > + *(long long*)buff = l; > + float f = *(float*)(buff + 4); > + return f; > +} > + > +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ > +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ > +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ > + > +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ > +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ > +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ > + > +union di_sf_sf > +{ > + struct {float f1; float f2;}; > + long long l; > +}; > + > +int main() > +{ > + union di_sf_sf v; > + v.f1 = 1.0f; > + v.f2 = 2.0f; > + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) > + __builtin_abort (); > + return 0; > +}
Gentle ping... Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi > > I would like to ping this patch for stage1: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html > > BR, > Jeff (Jiufu) > > Jiufu Guo <guojiufu@linux.ibm.com> writes: > >> Hi, >> >> Compare with previous version: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html >> This patch does not use UNSPEC for insn mtvsrws anymore. And to handle >> the subreg better on BE and LE, predicate "lowpart_subreg_operator" >> is introducted. To help combine pass to match the pattern on high32 >> bit of DI, shiftrt is still used. >> >> As mentioned in PR108338, on p9, we could use mtvsrws to implement >> the conversion from SI#0 to SF (or lowpart DI to SF). >> >> For examples: >> *(long long*)buff = di; >> float f = *(float*)(buff); >> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of >> "mtvsrws 1,3 ; xscvspdpn 1,1". >> >> This patch update this, and also enhance the bitcast from highpart >> DI to SF. >> >> Bootstrap and regtests pass on ppc64{,le}. >> Is this ok for trunk? >> >> BR, >> Jeff (Jiufu) >> >> PR target/108338 >> >> gcc/ChangeLog: >> >> * config/rs6000/predicates.md (lowpart_subreg_operator): New >> define_predicate. >> * config/rs6000/rs6000.md (any_rshift): New code_iterator. >> (movsf_from_si2): Rename to... >> (movsf_from_si2_<code>): ... this. >> (si2sf_mtvsrws): New define_insn. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/powerpc/pr108338.c: New test. >> >> --- >> gcc/config/rs6000/predicates.md | 5 +++ >> gcc/config/rs6000/rs6000.md | 35 ++++++++++++----- >> gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++ >> 3 files changed, 73 insertions(+), 9 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c >> >> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md >> index 52c65534e51..e57c9d99c6b 100644 >> --- a/gcc/config/rs6000/predicates.md >> +++ b/gcc/config/rs6000/predicates.md >> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" >> else >> return false; >> }) >> + >> +(define_predicate "lowpart_subreg_operator" >> + (and (match_code "subreg") >> + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) >> + == SUBREG_BYTE (op)"))) >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index 4a7812fa592..5b4a7f8d801 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -7539,6 +7539,14 @@ (define_split >> UNSPEC_MOVSI_GOT))] >> "") >> >> +(define_insn "si2sf_mtvsrws" >> + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >> + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] >> + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" >> + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" >> + [(set_attr "type" "mfvsr") >> + (set_attr "length" "8")]) >> + >> ;; MR LA >> ;; LWZ LFIWZX LXSIWZX >> ;; STW STFIWX STXSIWX >> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" >> rtx op2 = operands[2]; >> rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); >> >> - /* Move SF value to upper 32-bits for xscvspdpn. */ >> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >> - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >> + if (TARGET_P9_VECTOR) >> + { >> + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); >> + } >> + else >> + { >> + /* Move SF value to upper 32-bits for xscvspdpn. */ >> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >> + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >> + } >> + >> DONE; >> } >> [(set_attr "length" >> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" >> "*, *, p9v, p8v, *, *, >> p8v, p8v, p8v, *")]) >> >> +(define_code_iterator any_rshift [ashiftrt lshiftrt]) >> + >> ;; For extracting high part element from DImode register like: >> ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} >> ;; split it before reload with "and mask" to avoid generating shift right >> ;; 32 bit then shift left 32 bit. >> -(define_insn_and_split "movsf_from_si2" >> +(define_insn_and_split "movsf_from_si2_<code>" >> [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >> (unspec:SF >> - [(subreg:SI >> - (ashiftrt:DI >> + [(match_operator:SI 3 "lowpart_subreg_operator" >> + [(any_rshift:DI >> (match_operand:DI 1 "input_operand" "r") >> - (const_int 32)) >> - 0)] >> + (const_int 32))])] >> UNSPEC_SF_FROM_SI)) >> (clobber (match_scratch:DI 2 "=r"))] >> "TARGET_NO_SF_SUBREG" >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c >> new file mode 100644 >> index 00000000000..2438dc13f41 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c >> @@ -0,0 +1,42 @@ >> +// { dg-do run } >> +// { dg-options "-O2 -save-temps" } >> + >> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) >> +{ >> + char buff[16]; >> + *(long long*)buff = l; >> + float f = *(float*)(buff); >> + return f; >> +} >> + >> +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) >> +{ >> + char buff[16]; >> + *(long long*)buff = l; >> + float f = *(float*)(buff + 4); >> + return f; >> +} >> + >> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ >> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ >> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ >> + >> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ >> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >> + >> +union di_sf_sf >> +{ >> + struct {float f1; float f2;}; >> + long long l; >> +}; >> + >> +int main() >> +{ >> + union di_sf_sf v; >> + v.f1 = 1.0f; >> + v.f2 = 2.0f; >> + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) >> + __builtin_abort (); >> + return 0; >> +}
Gentle ping... Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Gentle ping... > > Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > >> Hi >> >> I would like to ping this patch for stage1: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html >> >> BR, >> Jeff (Jiufu) >> >> Jiufu Guo <guojiufu@linux.ibm.com> writes: >> >>> Hi, >>> >>> Compare with previous version: >>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html >>> This patch does not use UNSPEC for insn mtvsrws anymore. And to handle >>> the subreg better on BE and LE, predicate "lowpart_subreg_operator" >>> is introducted. To help combine pass to match the pattern on high32 >>> bit of DI, shiftrt is still used. >>> >>> As mentioned in PR108338, on p9, we could use mtvsrws to implement >>> the conversion from SI#0 to SF (or lowpart DI to SF). >>> >>> For examples: >>> *(long long*)buff = di; >>> float f = *(float*)(buff); >>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of >>> "mtvsrws 1,3 ; xscvspdpn 1,1". >>> >>> This patch update this, and also enhance the bitcast from highpart >>> DI to SF. >>> >>> Bootstrap and regtests pass on ppc64{,le}. >>> Is this ok for trunk? >>> >>> BR, >>> Jeff (Jiufu) >>> >>> PR target/108338 >>> >>> gcc/ChangeLog: >>> >>> * config/rs6000/predicates.md (lowpart_subreg_operator): New >>> define_predicate. >>> * config/rs6000/rs6000.md (any_rshift): New code_iterator. >>> (movsf_from_si2): Rename to... >>> (movsf_from_si2_<code>): ... this. >>> (si2sf_mtvsrws): New define_insn. >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.target/powerpc/pr108338.c: New test. >>> >>> --- >>> gcc/config/rs6000/predicates.md | 5 +++ >>> gcc/config/rs6000/rs6000.md | 35 ++++++++++++----- >>> gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++ >>> 3 files changed, 73 insertions(+), 9 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c >>> >>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md >>> index 52c65534e51..e57c9d99c6b 100644 >>> --- a/gcc/config/rs6000/predicates.md >>> +++ b/gcc/config/rs6000/predicates.md >>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" >>> else >>> return false; >>> }) >>> + >>> +(define_predicate "lowpart_subreg_operator" >>> + (and (match_code "subreg") >>> + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) >>> + == SUBREG_BYTE (op)"))) >>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>> index 4a7812fa592..5b4a7f8d801 100644 >>> --- a/gcc/config/rs6000/rs6000.md >>> +++ b/gcc/config/rs6000/rs6000.md >>> @@ -7539,6 +7539,14 @@ (define_split >>> UNSPEC_MOVSI_GOT))] >>> "") >>> >>> +(define_insn "si2sf_mtvsrws" >>> + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>> + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] >>> + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" >>> + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" >>> + [(set_attr "type" "mfvsr") >>> + (set_attr "length" "8")]) >>> + >>> ;; MR LA >>> ;; LWZ LFIWZX LXSIWZX >>> ;; STW STFIWX STXSIWX >>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" >>> rtx op2 = operands[2]; >>> rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); >>> >>> - /* Move SF value to upper 32-bits for xscvspdpn. */ >>> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>> - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>> + if (TARGET_P9_VECTOR) >>> + { >>> + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); >>> + } >>> + else >>> + { >>> + /* Move SF value to upper 32-bits for xscvspdpn. */ >>> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>> + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>> + } >>> + >>> DONE; >>> } >>> [(set_attr "length" >>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" >>> "*, *, p9v, p8v, *, *, >>> p8v, p8v, p8v, *")]) >>> >>> +(define_code_iterator any_rshift [ashiftrt lshiftrt]) >>> + >>> ;; For extracting high part element from DImode register like: >>> ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} >>> ;; split it before reload with "and mask" to avoid generating shift right >>> ;; 32 bit then shift left 32 bit. >>> -(define_insn_and_split "movsf_from_si2" >>> +(define_insn_and_split "movsf_from_si2_<code>" >>> [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>> (unspec:SF >>> - [(subreg:SI >>> - (ashiftrt:DI >>> + [(match_operator:SI 3 "lowpart_subreg_operator" >>> + [(any_rshift:DI >>> (match_operand:DI 1 "input_operand" "r") >>> - (const_int 32)) >>> - 0)] >>> + (const_int 32))])] >>> UNSPEC_SF_FROM_SI)) >>> (clobber (match_scratch:DI 2 "=r"))] >>> "TARGET_NO_SF_SUBREG" >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>> new file mode 100644 >>> index 00000000000..2438dc13f41 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>> @@ -0,0 +1,42 @@ >>> +// { dg-do run } >>> +// { dg-options "-O2 -save-temps" } >>> + >>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) >>> +{ >>> + char buff[16]; >>> + *(long long*)buff = l; >>> + float f = *(float*)(buff); >>> + return f; >>> +} >>> + >>> +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) >>> +{ >>> + char buff[16]; >>> + *(long long*)buff = l; >>> + float f = *(float*)(buff + 4); >>> + return f; >>> +} >>> + >>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ >>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ >>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ >>> + >>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ >>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>> + >>> +union di_sf_sf >>> +{ >>> + struct {float f1; float f2;}; >>> + long long l; >>> +}; >>> + >>> +int main() >>> +{ >>> + union di_sf_sf v; >>> + v.f1 = 1.0f; >>> + v.f2 = 2.0f; >>> + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) >>> + __builtin_abort (); >>> + return 0; >>> +}
Hi, Gentle ping ... Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Gentle ping... > > Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > >> Gentle ping... >> >> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: >> >>> Hi >>> >>> I would like to ping this patch for stage1: >>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html >>> >>> BR, >>> Jeff (Jiufu) >>> >>> Jiufu Guo <guojiufu@linux.ibm.com> writes: >>> >>>> Hi, >>>> >>>> Compare with previous version: >>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html >>>> This patch does not use UNSPEC for insn mtvsrws anymore. And to handle >>>> the subreg better on BE and LE, predicate "lowpart_subreg_operator" >>>> is introducted. To help combine pass to match the pattern on high32 >>>> bit of DI, shiftrt is still used. >>>> >>>> As mentioned in PR108338, on p9, we could use mtvsrws to implement >>>> the conversion from SI#0 to SF (or lowpart DI to SF). >>>> >>>> For examples: >>>> *(long long*)buff = di; >>>> float f = *(float*)(buff); >>>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of >>>> "mtvsrws 1,3 ; xscvspdpn 1,1". >>>> >>>> This patch update this, and also enhance the bitcast from highpart >>>> DI to SF. >>>> >>>> Bootstrap and regtests pass on ppc64{,le}. >>>> Is this ok for trunk? >>>> >>>> BR, >>>> Jeff (Jiufu) >>>> >>>> PR target/108338 >>>> >>>> gcc/ChangeLog: >>>> >>>> * config/rs6000/predicates.md (lowpart_subreg_operator): New >>>> define_predicate. >>>> * config/rs6000/rs6000.md (any_rshift): New code_iterator. >>>> (movsf_from_si2): Rename to... >>>> (movsf_from_si2_<code>): ... this. >>>> (si2sf_mtvsrws): New define_insn. >>>> >>>> gcc/testsuite/ChangeLog: >>>> >>>> * gcc.target/powerpc/pr108338.c: New test. >>>> >>>> --- >>>> gcc/config/rs6000/predicates.md | 5 +++ >>>> gcc/config/rs6000/rs6000.md | 35 ++++++++++++----- >>>> gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++ >>>> 3 files changed, 73 insertions(+), 9 deletions(-) >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c >>>> >>>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md >>>> index 52c65534e51..e57c9d99c6b 100644 >>>> --- a/gcc/config/rs6000/predicates.md >>>> +++ b/gcc/config/rs6000/predicates.md >>>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" >>>> else >>>> return false; >>>> }) >>>> + >>>> +(define_predicate "lowpart_subreg_operator" >>>> + (and (match_code "subreg") >>>> + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) >>>> + == SUBREG_BYTE (op)"))) >>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>>> index 4a7812fa592..5b4a7f8d801 100644 >>>> --- a/gcc/config/rs6000/rs6000.md >>>> +++ b/gcc/config/rs6000/rs6000.md >>>> @@ -7539,6 +7539,14 @@ (define_split >>>> UNSPEC_MOVSI_GOT))] >>>> "") >>>> >>>> +(define_insn "si2sf_mtvsrws" >>>> + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>>> + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] >>>> + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" >>>> + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" >>>> + [(set_attr "type" "mfvsr") >>>> + (set_attr "length" "8")]) >>>> + >>>> ;; MR LA >>>> ;; LWZ LFIWZX LXSIWZX >>>> ;; STW STFIWX STXSIWX >>>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" >>>> rtx op2 = operands[2]; >>>> rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); >>>> >>>> - /* Move SF value to upper 32-bits for xscvspdpn. */ >>>> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>>> - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>>> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>>> + if (TARGET_P9_VECTOR) >>>> + { >>>> + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); >>>> + } >>>> + else >>>> + { >>>> + /* Move SF value to upper 32-bits for xscvspdpn. */ >>>> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>>> + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>>> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>>> + } >>>> + >>>> DONE; >>>> } >>>> [(set_attr "length" >>>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" >>>> "*, *, p9v, p8v, *, *, >>>> p8v, p8v, p8v, *")]) >>>> >>>> +(define_code_iterator any_rshift [ashiftrt lshiftrt]) >>>> + >>>> ;; For extracting high part element from DImode register like: >>>> ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} >>>> ;; split it before reload with "and mask" to avoid generating shift right >>>> ;; 32 bit then shift left 32 bit. >>>> -(define_insn_and_split "movsf_from_si2" >>>> +(define_insn_and_split "movsf_from_si2_<code>" >>>> [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>>> (unspec:SF >>>> - [(subreg:SI >>>> - (ashiftrt:DI >>>> + [(match_operator:SI 3 "lowpart_subreg_operator" >>>> + [(any_rshift:DI >>>> (match_operand:DI 1 "input_operand" "r") >>>> - (const_int 32)) >>>> - 0)] >>>> + (const_int 32))])] >>>> UNSPEC_SF_FROM_SI)) >>>> (clobber (match_scratch:DI 2 "=r"))] >>>> "TARGET_NO_SF_SUBREG" >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>>> new file mode 100644 >>>> index 00000000000..2438dc13f41 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>>> @@ -0,0 +1,42 @@ >>>> +// { dg-do run } >>>> +// { dg-options "-O2 -save-temps" } >>>> + >>>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) >>>> +{ >>>> + char buff[16]; >>>> + *(long long*)buff = l; >>>> + float f = *(float*)(buff); >>>> + return f; >>>> +} >>>> + >>>> +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) >>>> +{ >>>> + char buff[16]; >>>> + *(long long*)buff = l; >>>> + float f = *(float*)(buff + 4); >>>> + return f; >>>> +} >>>> + >>>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ >>>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ >>>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ >>>> + >>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ >>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>>> + >>>> +union di_sf_sf >>>> +{ >>>> + struct {float f1; float f2;}; >>>> + long long l; >>>> +}; >>>> + >>>> +int main() >>>> +{ >>>> + union di_sf_sf v; >>>> + v.f1 = 1.0f; >>>> + v.f2 = 2.0f; >>>> + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) >>>> + __builtin_abort (); >>>> + return 0; >>>> +}
Hi, I just submit a new version: https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html So, we could ignore this ping and check the new version. BR, Jeff (Jiufu Guo) Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi, > > Gentle ping ... > > Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > >> Gentle ping... >> >> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: >> >>> Gentle ping... >>> >>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: >>> >>>> Hi >>>> >>>> I would like to ping this patch for stage1: >>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html >>>> >>>> BR, >>>> Jeff (Jiufu) >>>> >>>> Jiufu Guo <guojiufu@linux.ibm.com> writes: >>>> >>>>> Hi, >>>>> >>>>> Compare with previous version: >>>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html >>>>> This patch does not use UNSPEC for insn mtvsrws anymore. And to handle >>>>> the subreg better on BE and LE, predicate "lowpart_subreg_operator" >>>>> is introducted. To help combine pass to match the pattern on high32 >>>>> bit of DI, shiftrt is still used. >>>>> >>>>> As mentioned in PR108338, on p9, we could use mtvsrws to implement >>>>> the conversion from SI#0 to SF (or lowpart DI to SF). >>>>> >>>>> For examples: >>>>> *(long long*)buff = di; >>>>> float f = *(float*)(buff); >>>>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of >>>>> "mtvsrws 1,3 ; xscvspdpn 1,1". >>>>> >>>>> This patch update this, and also enhance the bitcast from highpart >>>>> DI to SF. >>>>> >>>>> Bootstrap and regtests pass on ppc64{,le}. >>>>> Is this ok for trunk? >>>>> >>>>> BR, >>>>> Jeff (Jiufu) >>>>> >>>>> PR target/108338 >>>>> >>>>> gcc/ChangeLog: >>>>> >>>>> * config/rs6000/predicates.md (lowpart_subreg_operator): New >>>>> define_predicate. >>>>> * config/rs6000/rs6000.md (any_rshift): New code_iterator. >>>>> (movsf_from_si2): Rename to... >>>>> (movsf_from_si2_<code>): ... this. >>>>> (si2sf_mtvsrws): New define_insn. >>>>> >>>>> gcc/testsuite/ChangeLog: >>>>> >>>>> * gcc.target/powerpc/pr108338.c: New test. >>>>> >>>>> --- >>>>> gcc/config/rs6000/predicates.md | 5 +++ >>>>> gcc/config/rs6000/rs6000.md | 35 ++++++++++++----- >>>>> gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++ >>>>> 3 files changed, 73 insertions(+), 9 deletions(-) >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c >>>>> >>>>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md >>>>> index 52c65534e51..e57c9d99c6b 100644 >>>>> --- a/gcc/config/rs6000/predicates.md >>>>> +++ b/gcc/config/rs6000/predicates.md >>>>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" >>>>> else >>>>> return false; >>>>> }) >>>>> + >>>>> +(define_predicate "lowpart_subreg_operator" >>>>> + (and (match_code "subreg") >>>>> + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) >>>>> + == SUBREG_BYTE (op)"))) >>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>>>> index 4a7812fa592..5b4a7f8d801 100644 >>>>> --- a/gcc/config/rs6000/rs6000.md >>>>> +++ b/gcc/config/rs6000/rs6000.md >>>>> @@ -7539,6 +7539,14 @@ (define_split >>>>> UNSPEC_MOVSI_GOT))] >>>>> "") >>>>> >>>>> +(define_insn "si2sf_mtvsrws" >>>>> + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>>>> + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] >>>>> + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" >>>>> + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" >>>>> + [(set_attr "type" "mfvsr") >>>>> + (set_attr "length" "8")]) >>>>> + >>>>> ;; MR LA >>>>> ;; LWZ LFIWZX LXSIWZX >>>>> ;; STW STFIWX STXSIWX >>>>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" >>>>> rtx op2 = operands[2]; >>>>> rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); >>>>> >>>>> - /* Move SF value to upper 32-bits for xscvspdpn. */ >>>>> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>>>> - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>>>> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>>>> + if (TARGET_P9_VECTOR) >>>>> + { >>>>> + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); >>>>> + } >>>>> + else >>>>> + { >>>>> + /* Move SF value to upper 32-bits for xscvspdpn. */ >>>>> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); >>>>> + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); >>>>> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); >>>>> + } >>>>> + >>>>> DONE; >>>>> } >>>>> [(set_attr "length" >>>>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" >>>>> "*, *, p9v, p8v, *, *, >>>>> p8v, p8v, p8v, *")]) >>>>> >>>>> +(define_code_iterator any_rshift [ashiftrt lshiftrt]) >>>>> + >>>>> ;; For extracting high part element from DImode register like: >>>>> ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} >>>>> ;; split it before reload with "and mask" to avoid generating shift right >>>>> ;; 32 bit then shift left 32 bit. >>>>> -(define_insn_and_split "movsf_from_si2" >>>>> +(define_insn_and_split "movsf_from_si2_<code>" >>>>> [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") >>>>> (unspec:SF >>>>> - [(subreg:SI >>>>> - (ashiftrt:DI >>>>> + [(match_operator:SI 3 "lowpart_subreg_operator" >>>>> + [(any_rshift:DI >>>>> (match_operand:DI 1 "input_operand" "r") >>>>> - (const_int 32)) >>>>> - 0)] >>>>> + (const_int 32))])] >>>>> UNSPEC_SF_FROM_SI)) >>>>> (clobber (match_scratch:DI 2 "=r"))] >>>>> "TARGET_NO_SF_SUBREG" >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>>>> new file mode 100644 >>>>> index 00000000000..2438dc13f41 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c >>>>> @@ -0,0 +1,42 @@ >>>>> +// { dg-do run } >>>>> +// { dg-options "-O2 -save-temps" } >>>>> + >>>>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) >>>>> +{ >>>>> + char buff[16]; >>>>> + *(long long*)buff = l; >>>>> + float f = *(float*)(buff); >>>>> + return f; >>>>> +} >>>>> + >>>>> +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) >>>>> +{ >>>>> + char buff[16]; >>>>> + *(long long*)buff = l; >>>>> + float f = *(float*)(buff + 4); >>>>> + return f; >>>>> +} >>>>> + >>>>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ >>>>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ >>>>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ >>>>> + >>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ >>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>>>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ >>>>> + >>>>> +union di_sf_sf >>>>> +{ >>>>> + struct {float f1; float f2;}; >>>>> + long long l; >>>>> +}; >>>>> + >>>>> +int main() >>>>> +{ >>>>> + union di_sf_sf v; >>>>> + v.f1 = 1.0f; >>>>> + v.f2 = 2.0f; >>>>> + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) >>>>> + __builtin_abort (); >>>>> + return 0; >>>>> +}
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 52c65534e51..e57c9d99c6b 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address" else return false; }) + +(define_predicate "lowpart_subreg_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) + == SUBREG_BYTE (op)"))) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4a7812fa592..5b4a7f8d801 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -7539,6 +7539,14 @@ (define_split UNSPEC_MOVSI_GOT))] "") +(define_insn "si2sf_mtvsrws" + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") + (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))] + "TARGET_P9_VECTOR && TARGET_XSCVSPDPN" + "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0" + [(set_attr "type" "mfvsr") + (set_attr "length" "8")]) + ;; MR LA ;; LWZ LFIWZX LXSIWZX ;; STW STFIWX STXSIWX @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si" rtx op2 = operands[2]; rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); - /* Move SF value to upper 32-bits for xscvspdpn. */ - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); - emit_insn (gen_p8_mtvsrd_sf (op0, op2)); - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + if (TARGET_P9_VECTOR) + { + emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di))); + } + else + { + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + } + DONE; } [(set_attr "length" @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si" "*, *, p9v, p8v, *, *, p8v, p8v, p8v, *")]) +(define_code_iterator any_rshift [ashiftrt lshiftrt]) + ;; For extracting high part element from DImode register like: ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;} ;; split it before reload with "and mask" to avoid generating shift right ;; 32 bit then shift left 32 bit. -(define_insn_and_split "movsf_from_si2" +(define_insn_and_split "movsf_from_si2_<code>" [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") (unspec:SF - [(subreg:SI - (ashiftrt:DI + [(match_operator:SI 3 "lowpart_subreg_operator" + [(any_rshift:DI (match_operand:DI 1 "input_operand" "r") - (const_int 32)) - 0)] + (const_int 32))])] UNSPEC_SF_FROM_SI)) (clobber (match_scratch:DI 2 "=r"))] "TARGET_NO_SF_SUBREG" diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c new file mode 100644 index 00000000000..2438dc13f41 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c @@ -0,0 +1,42 @@ +// { dg-do run } +// { dg-options "-O2 -save-temps" } + +float __attribute__ ((noipa)) sf_from_di_off0 (long long l) +{ + char buff[16]; + *(long long*)buff = l; + float f = *(float*)(buff); + return f; +} + +float __attribute__ ((noipa)) sf_from_di_off4 (long long l) +{ + char buff[16]; + *(long long*)buff = l; + float f = *(float*)(buff + 4); + return f; +} + +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */ +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */ + +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */ +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */ + +union di_sf_sf +{ + struct {float f1; float f2;}; + long long l; +}; + +int main() +{ + union di_sf_sf v; + v.f1 = 1.0f; + v.f2 = 2.0f; + if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f ) + __builtin_abort (); + return 0; +}