Message ID | CA+=Sn1mTJPVOke20gDmH=RrjEA-qvdg=b+CgSJReoi3rvzM25g@mail.gmail.com |
---|---|
State | New |
Headers | show |
On 08/05/2016 12:18 AM, Andrew Pinski wrote: > This patch disables the forming of the load/store pairs for SImode if > we are tuning for ThunderX. I used the tuning flags route so it can > be overridden if needed later on or if someone else wants to use the > same method for their core. + if (mode == SImode + && AARCH64_EXTRA_TUNE_SLOW_LDPW + && !optimize_size) + return false; AARCH64_EXTRA_TUNE_SLOW_LDPW is a non-zero bit-mask. That will always be true. This is present in two places in the patch. You need something more like && (aarch64_tune_params.extra_tuning_flags & AARCH64_EXTRA_TUNE_SLOW_LDPW) You should verify that the patch disables the optimization for ThunderX but does not disable it for other targets. Jim
Index: gcc/config/aarch64/aarch64-tuning-flags.def =================================================================== --- gcc/config/aarch64/aarch64-tuning-flags.def (revision 239150) +++ gcc/config/aarch64/aarch64-tuning-flags.def (working copy) @@ -29,3 +29,4 @@ AARCH64_TUNE_ to give an enum name. */ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) +AARCH64_EXTRA_TUNING_OPTION ("slow_ldpw", SLOW_LDPW) Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c (revision 239150) +++ gcc/config/aarch64/aarch64.c (working copy) @@ -712,7 +712,7 @@ 0, /* max_case_values. */ 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_SLOW_LDPW) /* tune_flags. */ }; static const struct tune_params xgene1_tunings = @@ -13574,6 +13574,11 @@ enum reg_class rclass_1, rclass_2; rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; + if (mode == SImode + && AARCH64_EXTRA_TUNE_SLOW_LDPW + && !optimize_size) + return false; + if (load) { mem_1 = operands[1]; @@ -13673,6 +13678,11 @@ rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4; rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4; + if (mode == SImode + && AARCH64_EXTRA_TUNE_SLOW_LDPW + && !optimize_size) + return false; + if (load) { reg_1 = operands[0];