Message ID | 000901d122f5$d6c67fc0$84537f40$@arm.com |
---|---|
State | New |
Headers | show |
Hi Wilco, > Enable instruction fusion of AES instructions on ARM for Cortex-A53 and > Cortex-A57. I've a question regarding Cortex-A35, I don't see the same documentation for it on ARM website as we have for the other cores yet, but is AES fusion not beneficial for it or is it planned to do it later ? BTW, patch looks good to me (but can't approved it) Cheers, Yvan > OK for commit? > > ChangeLog: > 2015-11-20 Wilco Dijkstra <wdijkstr@arm.com> > > * gcc/config/arm/arm.c (arm_cortex_a53_tune): Add AES fusion. > (arm_cortex_a57_tune): Likewise. > (aarch_macro_fusion_pair_p): Add support for AES fusion. > * gcc/config/arm/arm-protos.h (fuse_ops): Add FUSE_AES_AESMC. > > --- > gcc/config/arm/arm-protos.h | 5 +++-- > gcc/config/arm/arm.c | 9 +++++++-- > 2 files changed, 10 insertions(+), 4 deletions(-) > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index f9b1276..4801bb8 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -302,8 +302,9 @@ struct tune_params > enum fuse_ops > { > FUSE_NOTHING = 0, > - FUSE_MOVW_MOVT = 1 << 0 > - } fusible_ops: 1; > + FUSE_MOVW_MOVT = 1 << 0, > + FUSE_AES_AESMC = 1 << 1 > + } fusible_ops: 2; > /* Depth of scheduling queue to check for L2 autoprefetcher. */ > enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL} > sched_autopref: 2; > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 02f5dc3..7077199 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -1969,7 +1969,7 @@ const struct tune_params arm_cortex_a53_tune = > tune_params::DISPARAGE_FLAGS_NEITHER, > tune_params::PREF_NEON_64_FALSE, > tune_params::PREF_NEON_STRINGOPS_TRUE, > - FUSE_OPS (tune_params::FUSE_MOVW_MOVT), > + FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), > tune_params::SCHED_AUTOPREF_OFF > }; > > @@ -1992,7 +1992,7 @@ const struct tune_params arm_cortex_a57_tune = > tune_params::DISPARAGE_FLAGS_ALL, > tune_params::PREF_NEON_64_FALSE, > tune_params::PREF_NEON_STRINGOPS_TRUE, > - FUSE_OPS (tune_params::FUSE_MOVW_MOVT), > + FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), > tune_params::SCHED_AUTOPREF_FULL > }; > > @@ -29668,6 +29668,11 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, > rtx_insn* curr) > && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST > (prev_set))) > return true; > } > + > + if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC > + && aarch_crypto_can_dual_issue (prev, curr)) > + return true; > + > return false; > } > > -- > 1.9.1 > > >
Yvan Roux wrote: > I've a question regarding Cortex-A35, I don't see the same > documentation for it on ARM website as we have for the other cores > yet, but is AES fusion not beneficial for it or is it planned to do it > later ? It's early days for Cortex-A35, GCC 6 just has initial support. When the optimization manual is released further tuning might be feasible, but for now treating it as a single-issue Cortex-A53 is a reasonable approximation. Wilco
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index f9b1276..4801bb8 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -302,8 +302,9 @@ struct tune_params enum fuse_ops { FUSE_NOTHING = 0, - FUSE_MOVW_MOVT = 1 << 0 - } fusible_ops: 1; + FUSE_MOVW_MOVT = 1 << 0, + FUSE_AES_AESMC = 1 << 1 + } fusible_ops: 2; /* Depth of scheduling queue to check for L2 autoprefetcher. */ enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL} sched_autopref: 2; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 02f5dc3..7077199 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1969,7 +1969,7 @@ const struct tune_params arm_cortex_a53_tune = tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_64_FALSE, tune_params::PREF_NEON_STRINGOPS_TRUE, - FUSE_OPS (tune_params::FUSE_MOVW_MOVT), + FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), tune_params::SCHED_AUTOPREF_OFF }; @@ -1992,7 +1992,7 @@ const struct tune_params arm_cortex_a57_tune = tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_64_FALSE, tune_params::PREF_NEON_STRINGOPS_TRUE, - FUSE_OPS (tune_params::FUSE_MOVW_MOVT), + FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), tune_params::SCHED_AUTOPREF_FULL }; @@ -29668,6 +29668,11 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) return true; } + + if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC + && aarch_crypto_can_dual_issue (prev, curr)) + return true; + return false;