========================================================================================================================
length=256, char=0: 1.82 (-87.23%) 26.67 ( 87.51%) 31.86 (123.99%) 24.50 ( 72.23%) 14.23
length=257, char=0: 1.82 (-87.27%) 26.82 ( 87.83%) 32.02 (124.27%) 24.31 ( 70.27%) 14.28
length=258, char=0: 1.82 (-87.36%) 26.13 ( 81.75%) 32.00 (122.52%) 24.23 ( 68.50%) 14.38
length=259, char=0: 1.82 (-87.39%) 25.17 ( 74.45%) 32.06 (122.23%) 24.43 ( 69.36%) 14.43
length=260, char=0: 1.82 (-87.40%) 25.38 ( 76.04%) 31.91 (121.27%) 24.47 ( 69.70%) 14.42
length=261, char=0: 1.82 (-87.42%) 25.86 ( 78.74%) 32.22 (122.72%) 24.60 ( 70.01%) 14.47
length=262, char=0: 1.82 (-87.51%) 25.79 ( 76.97%) 32.31 (121.76%) 24.21 ( 66.17%) 14.57
length=263, char=0: 1.82 (-87.54%) 25.56 ( 74.92%) 32.35 (121.37%) 24.26 ( 66.00%) 14.61
length=264, char=0: 1.82 (-87.54%) 25.34 ( 73.35%) 32.16 (120.05%) 24.51 ( 67.68%) 14.62
length=265, char=0: 1.82 (-87.57%) 25.03 ( 70.62%) 32.49 (121.49%) 24.60 ( 67.68%) 14.67
length=266, char=0: 1.82 (-87.61%) 24.69 ( 67.82%) 32.55 (121.28%) 24.67 ( 67.72%) 14.71
length=267, char=0: 1.82 (-87.70%) 24.76 ( 67.09%) 32.33 (118.20%) 24.57 ( 65.84%) 14.82
length=268, char=0: 1.82 (-87.75%) 24.10 ( 62.09%) 32.58 (119.14%) 24.81 ( 66.87%) 14.87
length=269, char=0: 1.82 (-87.72%) 23.72 ( 59.63%) 32.45 (118.36%) 24.88 ( 67.41%) 14.86
length=270, char=0: 1.83 (-87.76%) 23.44 ( 57.22%) 32.72 (119.47%) 24.96 ( 67.44%) 14.91
length=271, char=0: 1.83 (-87.75%) 23.26 ( 56.06%) 32.80 (120.04%) 25.02 ( 67.90%) 14.90
length=512, char=0: 1.90 (-92.54%) 29.15 ( 14.39%) 42.50 ( 66.77%) 40.75 ( 59.92%) 25.48
length=513, char=0: 1.90 (-92.59%) 29.21 ( 13.75%) 42.18 ( 64.27%) 40.61 ( 58.13%) 25.68
length=514, char=0: 1.90 (-92.56%) 28.78 ( 12.50%) 42.56 ( 66.34%) 40.71 ( 59.12%) 25.59
length=515, char=0: 1.90 (-92.61%) 28.59 ( 11.01%) 42.20 ( 63.85%) 40.74 ( 58.18%) 25.76
length=516, char=0: 1.90 (-92.59%) 28.42 ( 10.75%) 42.60 ( 66.00%) 40.38 ( 57.34%) 25.66
length=517, char=0: 1.90 (-92.64%) 28.32 ( 9.49%) 42.32 ( 63.59%) 40.49 ( 56.54%) 25.87
length=518, char=0: 1.90 (-92.61%) 28.17 ( 9.43%) 42.73 ( 65.98%) 40.55 ( 57.49%) 25.75
length=519, char=0: 1.90 (-92.66%) 28.04 ( 8.16%) 42.37 ( 63.39%) 40.58 ( 56.51%) 25.93
length=520, char=0: 1.90 (-92.63%) 27.58 ( 6.71%) 42.80 ( 65.61%) 40.62 ( 57.19%) 25.84
length=521, char=0: 1.90 (-92.68%) 27.33 ( 5.01%) 42.39 ( 62.86%) 40.67 ( 56.26%) 26.03
length=522, char=0: 1.90 (-92.66%) 27.28 ( 5.28%) 42.83 ( 65.27%) 40.76 ( 57.27%) 25.91
length=523, char=0: 1.90 (-92.70%) 27.37 ( 4.87%) 42.47 ( 62.72%) 40.77 ( 56.22%) 26.10
length=524, char=0: 1.90 (-92.68%) 27.25 ( 4.82%) 42.88 ( 64.92%) 40.81 ( 56.98%) 26.00
length=525, char=0: 1.91 (-92.72%) 27.12 ( 3.58%) 42.52 ( 62.41%) 40.82 ( 55.93%) 26.18
length=526, char=0: 1.90 (-92.70%) 27.00 ( 3.54%) 42.93 ( 64.65%) 40.89 ( 56.81%) 26.08
length=527, char=0: 1.91 (-92.74%) 26.88 ( 2.39%) 42.93 ( 63.53%) 40.90 ( 55.81%) 26.25
length=1024, char=0: 1.95 (-95.35%) 30.48 (-27.21%) 51.02 ( 21.86%) 49.74 ( 18.79%) 41.87
length=1025, char=0: 1.95 (-95.31%) 30.52 (-26.49%) 51.05 ( 22.97%) 49.75 ( 19.85%) 41.51
length=1026, char=0: 1.95 (-95.35%) 30.24 (-27.85%) 51.05 ( 21.81%) 49.28 ( 17.59%) 41.91
length=1027, char=0: 1.95 (-95.35%) 30.16 (-28.08%) 50.61 ( 20.67%) 49.77 ( 18.67%) 41.94
length=1028, char=0: 1.95 (-95.36%) 29.63 (-29.42%) 51.06 ( 21.63%) 49.79 ( 18.59%) 41.98
length=1029, char=0: 1.95 (-95.36%) 29.58 (-29.61%) 51.06 ( 21.51%) 49.82 ( 18.55%) 42.02
length=1030, char=0: 1.95 (-95.33%) 29.71 (-28.78%) 51.07 ( 22.41%) 49.79 ( 19.36%) 41.72
length=1031, char=0: 1.95 (-95.37%) 29.58 (-29.70%) 51.09 ( 21.42%) 49.38 ( 17.36%) 42.07
length=1032, char=0: 1.95 (-95.37%) 29.55 (-29.83%) 50.62 ( 20.19%) 49.82 ( 18.31%) 42.11
length=1033, char=0: 1.95 (-95.37%) 29.26 (-30.56%) 51.09 ( 21.26%) 49.85 ( 18.32%) 42.14
length=1034, char=0: 1.95 (-95.38%) 29.21 (-30.77%) 51.13 ( 21.19%) 49.86 ( 18.17%) 42.19
length=1035, char=0: 1.95 (-95.38%) 29.27 (-30.66%) 51.14 ( 21.15%) 49.86 ( 18.10%) 42.22
length=1036, char=0: 1.95 (-95.35%) 29.25 (-30.22%) 51.13 ( 21.99%) 49.88 ( 19.00%) 41.92
length=1037, char=0: 1.95 (-95.39%) 29.11 (-31.12%) 51.16 ( 21.06%) 49.43 ( 16.96%) 42.26
length=1038, char=0: 1.95 (-95.39%) 29.21 (-30.96%) 50.67 ( 19.76%) 49.89 ( 17.92%) 42.31
length=1039, char=0: 1.95 (-95.39%) 28.98 (-31.54%) 51.16 ( 20.85%) 49.90 ( 17.87%) 42.33
* sysdeps/aarch64/memset.S (do_zva_64): Set 64 bytes in
prologue and epilogue instead of 128 bytes.
---
sysdeps/aarch64/memset.S | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
@@ -62,20 +62,14 @@
str q0, [dst, 16]
stp q0, q0, [dst, 32]
bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
- add dst, dst, 128
- nop
+ add dst, dst, 64
+ sub dstend, dstend, 64
1: dc zva, dst
add dst, dst, 64
- subs count, count, 64
+ cmp dstend, dst
b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
+ stp q0, q0, [dstend]
+ stp q0, q0, [dstend, 32]
ret
.endm