Message ID | 20231208021655.1595917-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. | expand |
On Fri, Dec 8, 2023 at 10:17 AM liuhongt <hongtao.liu@intel.com> wrote: > > If the function desn't clobber any sse registers or only clobber > 128-bit part, then vzeroupper isn't issued before the function exit. > the status not CLEAN but ANY after the function. > > Also for sibling_call, it's safe to issue an vzeroupper. Also there > could be missing vzeroupper since there's no mode_exit for > sibling_call_p. > > Compared to the patch in the PR, this patch add sibling_call part. > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk and backport? Part of this has been approved in the PR, and for the sibling_call part, i think it should be reasonable. So i'm going to commit the patch. > > gcc/ChangeLog: > > PR target/112891 > * config/i386/i386.cc (ix86_avx_u128_mode_after): Return > AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to > align with ix86_avx_u128_mode_needed. > (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for > sibling_call. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr112891.c: New test. > * gcc.target/i386/pr112891-2.c: New test. > --- > gcc/config/i386/i386.cc | 22 +++++++++++++--- > gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++ > 3 files changed, 78 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 7c5cab4e2c6..fe259cdb789 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) > vzeroupper if all SSE registers are clobbered. */ > const function_abi &abi = insn_callee_abi (insn); > if (vzeroupper_pattern (PATTERN (insn), VOIDmode) > - || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS], > - abi.mode_clobbers (V4DImode))) > + /* Should be safe to issue an vzeroupper before sibling_call_p. > + Also there not mode_exit for sibling_call, so there could be > + missing vzeroupper for that. */ > + || !(SIBLING_CALL_P (insn) > + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], > + abi.mode_clobbers (V4DImode)))) > return AVX_U128_ANY; > > return AVX_U128_CLEAN; > @@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) > bool avx_upper_reg_found = false; > note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); > > - return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; > + if (avx_upper_reg_found) > + return AVX_U128_DIRTY; > + > + /* If the function desn't clobber any sse registers or only clobber > + 128-bit part, Then vzeroupper isn't issued before the function exit. > + the status not CLEAN but ANY after the function. */ > + const function_abi &abi = insn_callee_abi (insn); > + if (!(SIBLING_CALL_P (insn) > + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], > + abi.mode_clobbers (V4DImode)))) > + return AVX_U128_ANY; > + > + return AVX_U128_CLEAN; > } > > /* Otherwise, return current mode. Remember that if insn > diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c > new file mode 100644 > index 00000000000..164c3985d50 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O3" } */ > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ > + > +void > +__attribute__((noinline)) > +bar (double* a) > +{ > + a[0] = 1.0; > + a[1] = 2.0; > +} > + > +double > +__attribute__((noinline)) > +foo (double* __restrict a, double* b) > +{ > + a[0] += b[0]; > + a[1] += b[1]; > + a[2] += b[2]; > + a[3] += b[3]; > + bar (b); > + return a[5] + b[5]; > +} > + > +double > +foo1 (double* __restrict a, double* b) > +{ > + double c = foo (a, b); > + return __builtin_exp (c); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c > new file mode 100644 > index 00000000000..dbf6c67948a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr112891.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -O3" } */ > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ > + > +void > +__attribute__((noinline)) > +bar (double* a) > +{ > + a[0] = 1.0; > + a[1] = 2.0; > +} > + > +void > +__attribute__((noinline)) > +foo (double* __restrict a, double* b) > +{ > + a[0] += b[0]; > + a[1] += b[1]; > + a[2] += b[2]; > + a[3] += b[3]; > + bar (b); > +} > + > +double > +foo1 (double* __restrict a, double* b) > +{ > + foo (a, b); > + return __builtin_exp (b[1]); > +} > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 7c5cab4e2c6..fe259cdb789 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) vzeroupper if all SSE registers are clobbered. */ const function_abi &abi = insn_callee_abi (insn); if (vzeroupper_pattern (PATTERN (insn), VOIDmode) - || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS], - abi.mode_clobbers (V4DImode))) + /* Should be safe to issue an vzeroupper before sibling_call_p. + Also there not mode_exit for sibling_call, so there could be + missing vzeroupper for that. */ + || !(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) return AVX_U128_ANY; return AVX_U128_CLEAN; @@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) bool avx_upper_reg_found = false; note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + if (avx_upper_reg_found) + return AVX_U128_DIRTY; + + /* If the function desn't clobber any sse registers or only clobber + 128-bit part, Then vzeroupper isn't issued before the function exit. + the status not CLEAN but ANY after the function. */ + const function_abi &abi = insn_callee_abi (insn); + if (!(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c new file mode 100644 index 00000000000..164c3985d50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +double +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); + return a[5] + b[5]; +} + +double +foo1 (double* __restrict a, double* b) +{ + double c = foo (a, b); + return __builtin_exp (c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c new file mode 100644 index 00000000000..dbf6c67948a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +void +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); +} + +double +foo1 (double* __restrict a, double* b) +{ + foo (a, b); + return __builtin_exp (b[1]); +}