Message ID | PAWPR08MB8982471B55A3A03B78C1DF8083B02@PAWPR08MB8982.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | benchtests: Add random strlen benchmark | expand |
On Tue, Jul 30, 2024 at 10:03 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Add a new randomized strlen test similar to bench-random-memcpy. Instead of repeating > the same call to strlen over and over again, it times a large number of different > strings. The distribution of the string length and alignment is based on SPEC2017. > > --- > > diff --git a/benchtests/Makefile b/benchtests/Makefile > index d228e9e68af3d73129591f3d875d6be545182eac..6d746ad2d4de78470b1f7936c34affbd6edb254a 100644 > --- a/benchtests/Makefile > +++ b/benchtests/Makefile > @@ -171,6 +171,7 @@ string-benchset := \ > strcpy_chk \ > strcspn \ > strlen \ > + strlen-random \ > strncasecmp \ > strncat \ > strncmp \ > diff --git a/benchtests/bench-strlen-random.c b/benchtests/bench-strlen-random.c > new file mode 100644 > index 0000000000000000000000000000000000000000..becd09a2caff40262127b732aadd7d0e4374e9aa > --- /dev/null > +++ b/benchtests/bench-strlen-random.c > @@ -0,0 +1,194 @@ > +/* Measure strlen performance. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define TEST_MAIN > +#define TEST_NAME "strlen" > + > +#define NUM_TESTS 65536 > +#define MAX_ALIGN 32 > +#define MAX_STRLEN 128 > +#define MIN_PAGE_SIZE (2 * getpagesize()) > + > +#include "bench-string.h" > +#include <assert.h> > +#include "json-lib.h" > + > +typedef size_t (*proto_t) (const CHAR *); > + > +size_t memchr_strlen (const CHAR *); > + > +IMPL (memchr_strlen, 0) > + > +size_t > +memchr_strlen (const CHAR *p) > +{ > + return (const CHAR *)MEMCHR (p, 0, PTRDIFF_MAX) - p; > +} > + > +IMPL (STRLEN, 1) > + > +static uint32_t strlen_tests[NUM_TESTS]; > + > +typedef struct { uint16_t size; uint16_t freq; } freq_data_t; > +typedef struct { uint16_t align; uint16_t freq; } align_data_t; > + > +#define SIZE_NUM 65536 > +#define SIZE_MASK (SIZE_NUM-1) > +static uint8_t strlen_len_arr[SIZE_NUM]; > + > +/* Frequency data for strlen sizes up to 256 based on SPEC2017. */ Why the 256 cutoff? > +static freq_data_t strlen_len_freq[] = > +{ > + { 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115}, > + { 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418}, > + { 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79}, > + { 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21}, > + { 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9}, > + { 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5}, > + { 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2}, > + { 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1}, > + { 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1}, > + { 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1}, > + { 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1}, > + {107, 1}, { 0, 0} > +}; > + > +#define ALIGN_NUM 1024 > +#define ALIGN_MASK (ALIGN_NUM-1) > +static uint8_t strlen_align_arr[ALIGN_NUM]; > + > +/* Alignment data for strlen based on SPEC2017. */ Here (and in memset) can you comment what the alignment is relative to? Page alignment? Cache alignment? etc... > +static align_data_t string_align_freq[] = > +{ > + {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0} > +}; > + > +static void > +init_strlen_distribution (void) > +{ > + int i, j, freq, size, n; > + > + for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++) > + for (j = 0, size = strlen_len_freq[i].size; j < freq; j++) > + strlen_len_arr[n++] = size; > + assert (n == SIZE_NUM); > + > + for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++) > + for (j = 0, size = string_align_freq[i].align; j < freq; j++) > + strlen_align_arr[n++] = size; > + assert (n == ALIGN_NUM); > +} > + > +static volatile size_t maskv = 0; > + > +static void > +do_one_test (json_ctx_t *json_ctx, impl_t *impl, size_t iters, > + uint32_t *input, size_t n) > +{ > + timing_t start, stop, cur; > + size_t res = 0; > + size_t mask = maskv; > + > + /* Avoid 'cold start' performance penalty. */ > + for (int i = 0; i < 10; i++) > + for (int j = 0; j < n; j++) > + CALL (impl, (const char*)buf1 + input[j]); > + > + TIMING_NOW (start); > + for (int i = 0; i < iters; ++i) > + for (int j = 0; j < n; j++) > + res = CALL (impl, (const char*)buf1 + input[j] + (res & mask)); > + TIMING_NOW (stop); > + TIMING_DIFF (cur, start, stop); > + json_element_double (json_ctx, (double) cur / (double) iters); > +} > + > +static void > +do_test (json_ctx_t *json_ctx) > +{ > + size_t n; > + uint8_t *a = buf1; > + uint16_t index[MAX_ALIGN]; > + > + memset (a, 'x', MIN_PAGE_SIZE); > + > + /* Create indices for strings at all alignments. */ > + for (int i = 0; i < MAX_ALIGN; i++) > + { > + index[i] = i * (MAX_STRLEN + 1); > + a[index[i] + MAX_STRLEN] = 0; > + } > + > + /* Create a random set of strlen input strings using the string length > + and alignment distributions. */ > + for (n = 0; n < NUM_TESTS; n++) > + { > + int align = strlen_align_arr[rand () & ALIGN_MASK]; > + int exp_len = strlen_len_arr[rand () & SIZE_MASK]; > + > + strlen_tests[n] = > + index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len; > + assert ((strlen_tests[n] & (align - 1)) == 0); > + assert (strlen ((char*) a + strlen_tests[n]) == exp_len); > + } > + > + json_element_object_begin (json_ctx); > + json_array_begin (json_ctx, "timings"); > + > + FOR_EACH_IMPL (impl, 0) > + do_one_test (json_ctx, impl, INNER_LOOP_ITERS_MEDIUM, strlen_tests, n); > + > + json_array_end (json_ctx); > + json_element_object_end (json_ctx); > +} > + > +int > +test_main (void) > +{ > + > + json_ctx_t json_ctx; > + > + test_init (); > + init_strlen_distribution (); > + > + json_init (&json_ctx, 0, stdout); > + > + json_document_begin (&json_ctx); > + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); > + > + json_attr_object_begin (&json_ctx, "functions"); > + json_attr_object_begin (&json_ctx, TEST_NAME); > + json_attr_string (&json_ctx, "bench-variant", "random"); > + > + json_array_begin (&json_ctx, "ifuncs"); > + FOR_EACH_IMPL (impl, 0) > + json_element_string (&json_ctx, impl->name); > + json_array_end (&json_ctx); > + > + json_array_begin (&json_ctx, "results"); > + do_test (&json_ctx); > + > + json_array_end (&json_ctx); > + json_attr_object_end (&json_ctx); > + json_attr_object_end (&json_ctx); > + json_document_end (&json_ctx); > + > + return ret; > +} > + > +#include <support/test-driver.c> > > >
Hi Noah, > +/* Frequency data for strlen sizes up to 256 based on SPEC2017. */ > Why the 256 cutoff? Large strings are rare and when you collect this data you have to have a cutoff somewhere. As it happens the longest string was 128 bytes. > +/* Alignment data for strlen based on SPEC2017. */ > Here (and in memset) can you comment what the alignment is relative to? > Page alignment? Cache alignment? etc... Not sure what you mean - alignment is not relative to anything. It's simply the alignment of a pointer. Cheers, Wilco
On Tue, Jul 30, 2024, 22:35 Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > Hi Noah, > > > +/* Frequency data for strlen sizes up to 256 based on SPEC2017. */ > > > Why the 256 cutoff? > > Large strings are rare and when you collect this data you have to have a > cutoff > somewhere. As it happens the longest string was 128 bytes. > > > +/* Alignment data for strlen based on SPEC2017. */ > > > Here (and in memset) can you comment what the alignment is relative to? > > Page alignment? Cache alignment? etc... > > Not sure what you mean - alignment is not relative to anything. It's > simply the > alignment of a pointer. > So this is just based on trailing zeros of all the pointer arguments? Seems highly unlikely that the max would be aligned(32)... > > Cheers, > Wilco
Hi Noah,
> So this is just based on trailing zeros of all the pointer arguments? Seems highly unlikely that the max would be aligned(32)...
I limited max to a cacheline IIRC. We only simulate 2^16 strings, so anything
with a low probability will occur zero times. There isn't any point considering
larger alignments since string implementations won't benefit - what is
important is that some 97% of strings are at least 8-byte aligned.
Cheers,
Wilco
On Wed, Jul 31, 2024 at 5:54 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > > So this is just based on trailing zeros of all the pointer arguments? Seems highly unlikely that the max would be aligned(32)... > > I limited max to a cacheline IIRC. We only simulate 2^16 strings, so anything > with a low probability will occur zero times. There isn't any point considering > larger alignments since string implementations won't benefit - what is > important is that some 97% of strings are at least 8-byte aligned. I would prefer page offsets (here and in memset), in x86 at least many of our string/memory functions have behavior that changes with page offset and have alignment preferences up to 256 byte. > > Cheers, > Wilco
Hi Noah, >> I limited max to a cacheline IIRC. We only simulate 2^16 strings, so anything >> with a low probability will occur zero times. There isn't any point considering >> larger alignments since string implementations won't benefit - what is >> important is that some 97% of strings are at least 8-byte aligned. > > I would prefer page offsets (here and in memset), in x86 at least many of our > string/memory functions have behavior that changes with page offset and have > alignment preferences up to 256 byte. I'm still not sure what you mean. In all of the random benchmarks we're choosing random offsets in a dataset that is far larger than a page. Can you show what you would do differently via a patch or code sample? Cheers, Wilco
On Wed, Jul 31, 2024 at 8:29 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > >> I limited max to a cacheline IIRC. We only simulate 2^16 strings, so anything > >> with a low probability will occur zero times. There isn't any point considering > >> larger alignments since string implementations won't benefit - what is > >> important is that some 97% of strings are at least 8-byte aligned. > > > > I would prefer page offsets (here and in memset), in x86 at least many of our > > string/memory functions have behavior that changes with page offset and have > > alignment preferences up to 256 byte. > > I'm still not sure what you mean. In all of the random benchmarks we're choosing > random offsets in a dataset that is far larger than a page. > > Can you show what you would do differently via a patch or code sample? For alignment I assume you did something like: ``` hist[__builtin_ctzll(<addr>)] += 1; ``` then combined all entries for index 5 and above. What I would prefer is: ``` hist[<addr> & PAGE_MASK] += 1; ``` and not combining any entries. Although I do realize this will complicate the code a bit. > > Cheers, > Wilco
Hi Noah, >> Can you show what you would do differently via a patch or code sample? > >For alignment I assume you did something like: > > ``` > hist[__builtin_ctzll(<addr>)] += 1; > ``` > > then combined all entries for index 5 and above. Indeed. > What I would prefer is: > ``` > hist[<addr> & PAGE_MASK] += 1; > ``` > and not combining any entries. > > Although I do realize this will complicate the code a bit. So data alignment is due to strings being allocated via malloc, being in constdata or on the stack etc. Page offsets vary a lot due to unrelated things, so the key question is, if we had such data (and I don't), would it be useful? The very small average string size means it's also very rare to cross a page boundary for example. Cheers, Wilco
On Thu, Aug 1, 2024 at 2:02 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > >> Can you show what you would do differently via a patch or code sample? > > > >For alignment I assume you did something like: > > > > ``` > > hist[__builtin_ctzll(<addr>)] += 1; > > ``` > > > > then combined all entries for index 5 and above. > > Indeed. > > > What I would prefer is: > > ``` > > hist[<addr> & PAGE_MASK] += 1; > > ``` > > and not combining any entries. > > > > Although I do realize this will complicate the code a bit. > > So data alignment is due to strings being allocated via malloc, being in constdata > or on the stack etc. Page offsets vary a lot due to unrelated things, so the key question > is, if we had such data (and I don't), would it be useful? The very small average string > size means it's also very rare to cross a page boundary for example. I grabbed the data, and it seems page crosses are less common than random (although not sure why). Attached is an aggregate histogram of strlen/memset sizes (up to 4096) and page offsets running SPEC. Also attached is tar with the histograms for each of the runs. > > Cheers, > Wilco STRLEN_LEN:CNT 12:41407180 11:21980235 13:17802359 10:14204290 6:11716461 18:5241632 8:1120567 16:1025380 9:870642 7:728851 3:706465 4:589952 26:556440 5:439687 22:399450 17:327356 15:302614 25:256239 31:234423 2:224244 32:216307 14:161513 21:149088 33:146712 19:126314 20:105440 24:90575 30:88310 27:75972 0:73943 29:71542 34:69909 42:57879 28:53630 44:51135 45:50000 41:39883 43:38346 23:34185 47:26972 1:22482 40:16606 39:12926 50:9875 35:8313 78:7120 36:6730 86:2254 38:1572 46:1500 37:960 143:789 144:623 94:510 93:250 104:240 142:215 145:161 146:153 116:138 148:123 149:119 124:112 123:107 126:101 127:81 48:63 139:55 65:51 138:49 51:47 53:44 49:37 77:33 140:33 57:31 55:29 141:29 52:28 136:27 71:25 137:25 91:22 81:21 147:21 60:20 747:20 80:18 95:16 748:16 125:15 67:15 68:15 115:14 64:14 128:13 89:12 59:12 62:12 4096+:12 66:11 155:11 990:10 995:10 69:8 70:8 56:8 58:8 61:8 63:8 90:8 122:8 152:8 204:8 221:8 290:8 306:8 749:8 129:7 54:7 76:7 73:6 108:6 109:6 134:6 219:6 756:6 75:5 82:4 156:4 346:4 103:4 1492:4 85:4 92:4 97:4 113:4 114:4 131:4 180:4 182:4 185:4 192:4 209:4 245:4 282:4 318:4 514:4 549:4 600:4 615:4 772:4 917:4 1137:4 1180:4 1427:4 1678:4 1843:4 1881:4 1940:4 2041:4 2102:4 2132:4 2286:4 2347:4 2618:4 135:3 121:3 345:3 494:2 602:2 79:2 120:2 72:2 83:2 88:2 106:2 112:2 117:2 130:2 153:2 154:2 157:2 161:2 166:2 168:2 169:2 173:2 179:2 181:2 183:2 186:2 190:2 194:2 199:2 208:2 213:2 222:2 236:2 252:2 260:2 261:2 267:2 277:2 298:2 311:2 381:2 409:2 533:2 1268:2 133:1 STRLEN_PG_OFF:CNT 3108:40768739 2500:16183943 2226:10870377 1919:1206059 2352:753918 2432:748304 2576:468257 3248:427564 1632:426533 3888:420569 384:394575 1776:294608 1712:279967 112:275836 1696:273073 96:265263 1168:262914 1760:261111 560:260643 848:259100 3856:250659 3648:243776 2304:241155 912:236161 2288:225731 16:225089 2832:224342 2608:217345 3232:216094 3760:214755 1968:212807 1840:212631 240:212587 3696:212197 1520:212120 1504:211579 496:210786 1680:210680 3664:208786 1040:206600 656:206594 3120:206282 272:206250 1904:205979 2064:205694 3344:205668 3952:204966 3328:204827 3440:204058 3792:203493 2032:203390 2640:203036 2768:202059 2896:201774 3088:201738 3184:201354 2448:201292 1936:200960 2080:199578 1296:199539 2512:199062 304:198711 896:198431 4016:198364 2912:197783 2672:197616 0:197280 1008:197126 3920:196973 816:196866 1808:196815 1616:195916 2320:195723 368:195100 3536:194493 4000:194019 3776:193582 336:193562 3152:193396 592:193181 2384:192748 2096:191722 3568:191576 528:191565 880:191478 1648:191200 1872:191123 1552:190963 400:190816 352:190765 1744:190478 1104:190131 1328:190020 80:189794 3056:189414 3824:189207 1264:189098 2224:188863 3472:188806 1360:187829 2624:187701 2416:187548 3936:186918 2272:186830 2256:186717 1136:186281 2128:185937 1072:185853 2928:185658 48:185418 3728:185362 208:185021 784:184997 1456:184721 464:184650 3408:184377 2480:184330 3600:183328 688:183326 192:183293 1856:183278 1536:183101 2000:183080 2784:182890 2160:182860 2704:182850 3632:182828 2112:182781 944:182350 3504:182286 2960:182247 3984:182229 1184:182029 1824:181788 2864:181586 624:181539 1392:180994 2544:180575 3680:180470 4048:179923 176:179792 720:179676 2736:179660 1888:179381 2336:179179 2992:179095 1584:178956 3872:178854 3280:178801 2192:178731 4080:178170 3216:178167 3376:177563 1200:177174 3312:176580 752:176552 3024:176347 432:175720 1984:175695 144:175624 2800:175240 976:174781 1488:174781 3488:174723 2400:174638 1248:174624 3744:174433 2592:174329 640:173987 1424:173642 576:172651 1232:172648 2560:171526 448:171117 2368:170917 3552:170293 3040:170021 992:169693 3456:169481 3840:169037 320:169033 2016:168582 288:168561 512:168554 1728:168011 3424:167093 1472:166301 2048:166101 3072:165919 704:165333 1920:165319 2496:165078 3712:164862 416:164422 928:164307 864:163935 2656:163705 2848:163539 3968:162077 3616:161945 3296:161540 672:161476 3104:161184 2816:160776 1664:160629 3904:160628 2528:160431 224:160320 1408:160093 1056:159109 2976:159003 1312:158946 3264:158905 3136:158844 1024:158295 1120:158066 1216:157957 3200:157918 608:157770 1952:157627 544:157607 3808:157502 3168:157437 1792:157359 4032:157339 480:157271 3584:157022 1088:156748 2752:156559 2464:156443 2688:156429 128:156044 160:155853 3392:155796 2880:155728 2720:155672 1600:155241 2144:155180 2240:154779 960:154691 1568:154346 832:154296 3360:154099 1280:154006 736:153768 768:153741 3008:153635 1376:153501 800:153364 256:153068 2176:152617 4064:152369 64:152304 1440:152200 32:152098 1152:152075 3520:151874 2208:151152 1344:150050 2944:149475 2618:129402 3498:73163 1512:55358 2294:54812 1367:47566 1391:42476 2386:39757 410:39403 321:39402 3606:39393 1640:39372 1480:35907 3003:35450 381:34581 3614:34455 1733:34339 2991:34339 1063:34296 867:34270 714:34265 1547:34262 2017:34259 618:34226 3656:34219 2350:34217 1036:34204 4027:34203 3265:34202 2140:34201 1174:34201 696:26968 712:25660 1890:22681 1736:22549 1432:22343 1720:21002 2248:19816 3551:19751 1700:19714 1137:19702 2514:19698 2232:19698 1448:19579 1977:18610 2239:17125 340:17124 2581:17108 4011:17105 2410:17104 3527:17102 595:17100 894:17100 3080:16917 2936:16027 3976:16015 2920:15324 2328:15151 3960:15143 3768:15039 2331:14841 1796:14827 3752:14778 1877:14590 1802:14266 2371:13079 2475:12012 2425:11386 2252:10805 2025:10707 2332:10249 2367:10182 2776:9992 2308:9897 2762:9517 1938:9344 2419:9197 2244:9182 413:9163 3416:8699 2440:8603 296:8562 280:8558 2236:8532 2257:8230 2262:8129 2267:8103 2281:7967 2295:7838 3432:7710 2343:7565 2337:7429 2507:7401 324:7363 428:7330 2393:7325 2359:7313 2348:7296 331:7133 3085:7120 2378:7105 2385:7041 2772:6995 2403:6821 2431:6764 2572:6735 1895:6661 3784:6589 1844:6582 2392:6503 2787:6462 1850:6301 3518:6266 2600:6196 2584:6175 2457:6152 2467:6018 2495:5889 2882:5888 2399:5826 2484:5826 888:5704 2516:5634 2526:5568 1656:5552 2583:5550 255:5506 2538:5449 2796:5442 3455:5430 3557:5385 2575:5380 2568:5319 1635:5318 2553:5318 2798:5251 2638:5215 3173:5212 2630:5202 3460:5200 2802:5199 785:5140 2407:5011 3352:5002 1928:4966 2593:4937 2605:4930 3400:4928 3336:4926 2859:4916 1721:4912 3105:4847 2490:4842 2615:4812 2646:4810 2414:4785 1827:4749 2809:4748 2622:4737 3490:4510 2657:4493 1857:4489 2933:4483 1311:4455 2666:4448 2819:4374 3427:4369 1483:4364 2706:4339 1323:4336 2159:4331 2679:4297 2266:4236 2686:4224 1385:4217 2714:4169 2693:4160 2725:4142 2698:4119 2824:4101 1458:4088 1490:4069 1231:4031 1366:3962 2757:3928 3000:3902 2749:3894 1400:3884 2734:3843 2978:3799 2741:3779 2086:3766 2786:3692 1893:3644 1410:3630 2829:3628 3177:3586 1291:3581 2009:3581 2842:3529 2810:3529 3175:3521 1498:3482 2821:3459 1505:3450 1305:3446 2435:3396 1423:3396 1961:3392 1317:3386 3124:3315 1333:3309 2851:3271 1338:3268 1348:3231 1516:3226 648:3221 1343:3201 1357:3197 1353:3159 2869:3136 1374:3104 1362:3101 1754:3088 1079:3083 2529:3073 1406:3065 1245:3026 2029:3026 2536:3016 1646:3012 1626:3012 2889:3011 2844:3001 1470:2969 1395:2968 1235:2958 2892:2947 1427:2934 1662:2924 2451:2919 1695:2916 1705:2914 2959:2910 1677:2909 1915:2909 1762:2892 2907:2885 2760:2875 2166:2861 2946:2861 2922:2854 2712:2846 2877:2819 2286:2810 1500:2802 2739:2797 2856:2795 1805:2786 1444:2784 2186:2775 1789:2774 1817:2769 2938:2759 1521:2739 1847:2726 2459:2723 1484:2708 2543:2705 1837:2703 1491:2687 1865:2654 1873:2641 2958:2640 3209:2632 1237:2631 1511:2629 1924:2621 1886:2620 1561:2605 1896:2604 2235:2596 1044:2579 1535:2577 2732:2568 3074:2563 1172:2557 1539:2550 1944:2541 1992:2541 1551:2538 1176:2537 1934:2536 1545:2526 2726:2520 3496:2519 1953:2505 2968:2501 2468:2496 1832:2493 1969:2487 2867:2482 1618:2474 1689:2472 1571:2469 1982:2467 2040:2432 1586:2430 3711:2424 2055:2413 2082:2395 1601:2394 3288:2388 1526:2384 3361:2381 2068:2363 3083:2355 1112:2354 1660:2347 2994:2344 2098:2338 2985:2325 2113:2320 1651:2318 3090:2301 2121:2299 2136:2285 1671:2265 2148:2264 3098:2240 1682:2238 1784:2236 2206:2230 1799:2225 1708:2202 3114:2197 2225:2195 3004:2177 1702:2134 2299:2129 2279:2126 360:2122 3011:2114 3618:2110 2324:2109 1739:2107 2616:2090 1714:2090 2498:2086 1898:2079 2408:2077 1868:2063 1845:2061 2334:2057 2357:2043 1542:2032 1731:2030 2394:2028 2380:2025 3131:2017 3078:2016 3269:2008 2458:2004 2878:2003 1750:1987 2422:1973 3139:1962 1637:1961 3033:1957 1761:1952 2745:1948 2578:1935 3146:1935 1772:1926 72:1922 2694:1917 2729:1916 3097:1902 1550:1896 3610:1894 2505:1889 3286:1888 1105:1887 2520:1875 3272:1868 3068:1858 3153:1853 1810:1849 3160:1843 2559:1836 610:1829 66:1828 1111:1820 3096:1817 3084:1807 2597:1807 1960:1800 1116:1786 2806:1784 56:1782 3167:1781 3848:1775 1851:1774 1882:1773 1126:1771 2636:1768 2818:1767 2888:1764 2663:1758 1528:1757 77:1754 1140:1747 1559:1746 2677:1739 2904:1737 3064:1723 1875:1696 1645:1680 87:1672 727:1671 101:1662 314:1648 1462:1642 2780:1633 2897:1630 1566:1626 3197:1624 133:1621 1404:1616 2871:1615 302:1604 113:1601 2915:1601 2680:1598 1575:1591 3799:1584 1495:1580 2485:1575 2519:1575 3203:1568 2860:1566 1018:1564 2845:1564 2937:1552 2926:1544 3099:1538 1186:1523 1948:1521 2957:1520 1906:1513 2893:1508 2564:1507 2665:1505 3512:1502 2563:1493 147:1476 3544:1473 3142:1470 1914:1463 1256:1460 3215:1459 3221:1454 1194:1452 3464:1446 1752:1445 4072:1441 3029:1434 1926:1432 1955:1428 1912:1416 2996:1416 2948:1414 3116:1409 1115:1405 1657:1403 2987:1397 1324:1380 2970:1377 3015:1377 2077:1367 1201:1353 155:1345 3126:1344 488:1341 1964:1328 162:1324 1217:1320 2175:1320 3672:1310 3005:1309 3073:1305 3528:1289 1576:1287 178:1286 1987:1284 2997:1276 1278:1264 1273:1259 2523:1259 3037:1258 2984:1232 3227:1232 1252:1229 2183:1226 4040:1224 2503:1195 1998:1193 213:1187 2021:1186 3243:1183 3235:1181 2914:1162 3180:1161 2060:1160 239:1155 234:1152 2664:1140 2497:1136 1396:1127 1213:1126 3149:1125 2050:1122 3166:1122 1666:1120 3194:1116 3225:1110 3154:1106 2908:1103 2035:1102 246:1101 1285:1089 3681:1078 3278:1077 3253:1070 3176:1065 1298:1064 3240:1059 1544:1042 3254:1040 3261:1040 3169:1037 2967:1026 259:1026 1588:1024 2065:1016 2024:1012 3234:1012 3277:997 1384:992 2754:988 1198:986 3362:985 1340:980 287:973 3293:971 3188:968 3522:968 284:963 3183:962 3316:960 3201:942 3339:942 3690:942 301:930 1356:926 1997:909 2388:907 2090:904 3447:902 3391:901 317:899 2212:897 3422:888 1599:882 327:870 3478:869 920:869 2106:857 1435:856 3640:849 2547:845 1675:840 2097:836 2506:836 1624:835 3217:835 345:834 3491:828 3559:827 164:823 3506:817 1768:810 2731:806 3223:805 365:801 2956:799 3700:792 2648:789 3525:787 2109:785 376:782 1457:772 1415:769 3541:767 2117:756 2124:750 418:748 3242:742 600:737 4002:736 282:735 3577:735 2509:726 168:725 2270:722 3599:717 437:716 136:713 396:707 2132:706 3611:703 232:698 470:694 184:684 3627:684 562:670 1476:662 2303:657 415:656 2141:654 2155:648 2351:645 2492:643 1508:640 3560:626 3128:614 3112:612 1519:605 1497:595 458:581 334:579 3249:576 2194:574 1052:560 1683:560 3718:556 3297:550 3914:549 469:548 748:546 3908:544 1836:533 1925:526 2205:526 1534:523 2872:521 1555:518 2376:515 3260:513 959:498 1016:495 1842:495 3947:494 1032:493 392:481 495:480 2216:479 2428:476 2184:466 516:465 536:465 2687:460 2684:458 2748:457 900:439 1976:429 2289:422 2405:420 2265:420 3065:419 2316:417 2733:416 882:402 3989:401 3725:397 3305:392 2260:389 3300:387 913:382 344:382 2275:378 2296:374 1983:373 3944:373 1434:371 2833:370 308:369 2255:367 40:365 2411:364 514:360 892:360 1688:357 2233:356 2840:352 2345:351 3928:350 874:349 2012:349 1841:348 2483:348 883:347 2588:344 2356:343 3763:343 2610:342 1452:342 2580:342 4007:341 2360:340 388:338 2298:338 3736:336 1774:336 1909:336 2602:335 1814:334 1889:334 1821:333 1831:333 1879:333 2322:330 3318:329 2401:328 2476:327 2424:322 1988:322 2775:321 3311:321 4089:320 2596:315 2433:313 854:313 2504:313 760:312 2722:312 2696:311 2676:311 2456:310 1378:310 2891:309 3230:309 3320:307 3047:303 2644:298 3738:297 2465:293 2613:293 3107:291 2660:291 2292:290 2652:290 2850:288 2825:288 2515:286 906:284 2988:283 862:280 3332:277 3745:277 1153:274 1145:274 1268:273 2668:272 1864:269 1320:269 2534:268 1416:267 1748:267 2001:267 2546:266 2524:266 1117:263 2311:263 2264:261 2695:260 1197:260 2280:260 915:253 616:252 2561:250 2218:249 1240:244 104:244 347:243 1543:243 968:240 2841:240 159:239 2591:237 1138:236 1220:235 1411:235 2601:233 2623:232 2721:232 684:228 1426:224 2740:224 3774:222 2756:219 3368:218 1005:217 2674:216 1093:216 2274:216 2654:215 2770:214 3133:214 987:212 1887:212 1171:212 2977:212 1160:210 809:210 1205:210 1903:210 803:209 1181:209 1259:209 1309:209 1229:208 1236:208 475:207 679:207 978:207 3156:207 3513:206 1464:205 3162:205 2701:202 3195:202 3208:200 1554:200 1033:200 2767:200 3834:196 3081:196 2789:195 2935:194 3053:194 2781:192 3839:191 996:190 1028:190 124:188 1421:188 2793:187 547:184 2801:184 1468:183 2742:183 457:182 387:181 706:180 1999:179 2794:179 2152:179 3896:179 1854:178 3032:178 1531:177 2765:176 1496:175 3117:175 3669:175 3172:172 2659:172 3340:172 2805:171 2847:171 2853:170 2817:169 703:169 2813:169 2586:168 3086:168 1038:167 601:165 2868:165 3526:165 1901:164 2008:164 1598:164 2716:163 2811:163 2004:162 2552:161 3534:160 2200:158 1439:158 2570:158 2954:158 3780:158 1447:157 3897:156 3035:156 2620:154 3135:154 3576:153 106:153 1015:151 1533:150 2876:150 3141:149 1382:148 3089:148 3092:148 3204:148 3245:148 3256:147 777:147 675:146 180:146 3165:145 682:144 694:144 3001:144 3363:144 199:143 471:143 1430:143 1612:142 728:142 2966:142 4050:141 652:141 4008:140 930:140 493:140 811:140 1099:140 1471:140 1364:139 699:139 1515:139 2246:139 2900:139 1100:138 2870:138 643:137 1524:136 2885:135 1390:135 2941:135 3835:135 1473:134 3945:134 893:133 934:133 2930:133 3012:132 1581:132 3054:132 312:131 2344:130 985:130 4074:130 977:129 534:128 613:128 1401:128 3905:128 456:127 657:127 2986:127 1755:126 2766:126 2790:126 3002:126 1326:124 2759:124 2898:124 3006:124 3612:124 3935:124 1486:123 2803:122 942:122 3977:122 406:121 2746:121 3341:121 3470:121 1130:120 2502:120 1967:119 2909:119 4033:117 3314:117 3:116 143:116 398:116 1517:116 2846:116 2964:116 3870:116 19:115 375:115 567:115 1142:115 1479:115 2808:115 2831:115 3847:115 3879:114 3864:113 2594:113 3675:113 3862:113 3996:112 677:112 721:112 1425:112 3892:112 1608:111 1724:111 2852:111 2903:111 3438:111 1557:110 570:110 2815:110 2751:109 2993:109 3921:109 3950:109 225:108 627:108 1113:108 1564:108 61:107 371:107 667:107 2587:107 3964:107 4014:106 691:106 745:106 1087:106 2228:106 1573:105 2990:105 3773:105 3789:105 1048:104 554:104 642:104 711:104 1594:104 2865:104 2133:103 2761:103 3151:103 1634:102 685:102 1704:101 884:101 4065:101 1316:100 2773:100 2942:100 2953:100 2969:100 3019:100 3237:100 2785:99 34:99 598:99 1659:99 1691:99 734:98 1644:98 1804:98 2857:98 2755:97 1812:97 1963:97 1560:96 689:96 1674:96 3026:96 2242:95 807:95 3041:95 26:94 499:94 289:93 2952:93 3076:92 43:92 279:92 2491:92 2947:92 2998:91 3013:91 52:91 2981:91 520:90 98:90 41:90 1769:90 2962:90 3191:90 3421:90 338:89 94:89 3113:89 3755:89 3147:88 1794:88 519:88 1042:88 84:88 463:88 3059:88 120:87 559:87 1096:86 200:86 389:86 1337:86 3213:86 3020:85 3155:85 2879:84 1775:84 2631:84 2982:84 3143:84 1370:83 158:83 359:83 746:83 1579:83 1781:83 1787:83 1900:83 2198:83 3010:82 173:82 655:82 1567:82 1979:82 2220:82 2934:82 188:81 1823:81 2590:81 3014:81 3103:81 257:80 737:80 1918:80 3193:80 2251:79 205:79 197:79 545:79 591:79 723:79 3042:79 2466:78 831:78 993:78 1301:78 1834:78 1930:78 2837:78 3268:78 23:77 148:77 403:77 1222:77 1352:77 1941:77 2901:77 3023:77 671:76 801:76 1091:76 1284:76 3250:76 3494:76 1596:75 587:75 218:75 806:75 1058:75 3122:75 3174:75 3310:75 581:74 763:74 450:74 582:74 753:74 788:74 1013:74 1310:74 1883:74 1409:73 191:73 267:73 910:73 924:73 1405:73 3137:73 3691:73 821:72 902:72 1175:72 1469:72 3070:72 3461:72 1224:71 842:71 1281:71 1431:71 1641:71 3374:71 1330:70 3100:70 204:70 295:70 399:70 527:70 2473:70 3500:70 3846:70 1114:69 3236:69 3186:69 278:69 661:69 755:69 858:69 961:69 1286:69 1306:69 1540:69 1693:69 3570:69 730:68 322:68 491:68 792:68 876:68 945:68 2182:68 3163:68 3629:68 461:67 1299:67 664:67 851:67 852:67 861:67 922:67 1165:67 1211:67 1336:67 1971:67 2123:67 3798:67 1729:67 30:66 632:66 361:66 669:66 765:66 829:66 955:66 972:66 1029:66 1129:66 1233:66 1442:66 1450:66 3130:66 3507:66 451:65 2902:65 370:65 481:65 501:65 617:65 866:65 886:65 916:65 929:65 950:65 965:65 1049:65 1066:65 1076:65 1354:65 2139:65 2249:65 3134:65 3170:65 2482:64 92:64 483:64 506:64 687:64 819:64 841:64 908:64 970:64 1017:64 1039:64 1043:64 1178:64 1185:64 1204:64 3123:64 3214:64 3238:64 994:63 297:63 487:63 980:63 2285:63 3259:62 2011:62 1698:62 235:62 715:62 999:62 1966:62 2202:62 3144:62 3161:62 3224:62 3367:62 258:61 1602:61 6:61 478:61 479:61 538:61 1625:61 3182:61 3517:61 60:60 244:60 507:60 1050:60 1413:60 1589:60 2195:60 2302:60 3550:60 3676:60 411:59 422:59 1060:59 3219:59 153:58 369:58 584:58 1059:58 1074:58 1341:58 1437:58 3043:58 530:57 39:57 55:57 549:57 635:57 1446:57 1478:57 1929:57 2541:57 1880:56 226:56 50:56 131:56 262:56 462:56 937:56 1068:56 1134:56 1147:56 1241:56 1389:56 1403:56 1532:56 1917:56 2354:56 3655:56 984:55 78:55 1086:55 1155:55 2028:55 2033:55 2037:55 2041:55 2259:55 3060:55 82:54 552:54 3276:54 65:54 116:54 1159:54 1460:54 1609:54 1892:54 3205:54 3258:54 3663:54 1482:53 114:53 1591:53 1081:53 1668:53 1758:53 3247:53 3284:53 3301:53 2421:52 154:52 523:52 535:52 2119:52 2886:52 3279:52 3887:52 1800:51 76:51 212:51 509:51 2921:51 4005:51 3724:50 81:50 95:50 209:50 637:50 1209:50 2187:50 3025:50 3389:50 604:49 2138:49 579:49 1228:49 1477:49 2071:49 2094:49 3493:49 1380:48 566:48 619:48 1461:48 1974:48 3062:48 3313:48 3322:48 3326:48 1247:47 109:47 1260:47 1710:47 3403:47 409:46 1258:46 589:46 622:46 630:46 663:46 710:46 1125:46 1453:46 2108:46 2906:46 2995:46 3342:46 3378:46 3786:46 3502:45 1830:45 274:45 99:45 107:45 614:45 1265:45 1289:45 2199:45 2207:45 3196:45 1351:44 441:44 3048:44 111:44 2204:44 2214:44 3353:44 2323:43 319:43 3901:43 770:43 653:43 654:43 1302:43 2625:43 3409:43 3523:43 3723:42 3441:42 172:42 318:42 2163:42 3706:42 4069:42 568:41 1522:41 126:41 166:41 329:41 662:41 1398:41 1622:41 2346:41 2916:41 3372:41 3425:41 4076:41 2434:40 1510:40 169:40 1412:40 1489:40 2049:40 2150:40 2153:40 2238:40 3451:40 3877:40 71:39 1935:39 2170:39 3666:39 134:39 1501:39 1606:39 2230:39 2397:38 2446:38 2792:38 189:38 323:38 435:38 665:38 670:38 1487:38 2341:38 2366:38 3231:38 4081:38 299:37 2488:37 3157:37 186:37 203:37 217:37 221:37 339:37 797:37 904:37 1503:37 1541:37 1623:37 1685:37 2258:37 2499:37 3679:37 3754:37 29:36 2072:36 264:36 3688:36 183:36 211:36 404:36 571:36 713:36 1518:36 1633:36 2135:36 3467:36 3589:36 3714:36 4024:36 2387:35 2415:35 3138:35 3330:35 170:35 366:35 747:35 1144:35 1663:35 2151:35 2254:35 2273:35 3643:35 3646:35 3807:35 3867:35 4013:35 2312:35 3492:34 431:34 2070:34 1314:34 119:34 276:34 349:34 674:34 1655:34 1676:34 2471:34 2923:34 3433:34 3481:34 3510:34 45:34 1565:33 3907:33 434:33 252:33 298:33 510:33 607:33 724:33 769:33 873:33 1027:33 1070:33 1558:33 1652:33 1658:33 1667:33 2161:33 2179:33 2363:33 2573:33 3244:33 3274:33 3598:33 3875:33 4093:33 680:32 2373:32 1:32 7:32 219:32 444:32 623:32 756:32 780:32 1327:32 1572:32 1649:32 2084:32 2115:32 2190:32 2342:32 2599:32 2963:32 3532:32 3756:32 3574:31 3127:31 3578:31 2197:31 4056:31 149:31 163:31 358:31 473:31 1678:31 2277:31 3585:31 3591:31 3595:31 3685:31 3880:31 4009:31 270:30 2310:30 9:30 139:30 193:30 335:30 373:30 606:30 815:30 827:30 1643:30 1699:30 3189:30 3538:30 3581:30 3638:30 3771:30 3895:30 2455:29 4045:29 401:29 2168:29 3720:29 3746:29 194:29 90:29 138:29 485:29 2943:29 3308:29 3509:29 3803:29 3845:29 4028:29 3794:28 820:28 3508:28 58:28 261:28 269:28 426:28 839:28 1218:28 1630:28 1650:28 1737:28 1742:28 3298:28 3567:28 3701:28 3815:28 3938:28 3949:28 4067:28 2379:28 3730:27 531:27 564:27 644:27 701:27 1347:27 1358:27 1722:27 1745:27 1779:27 3179:27 3257:27 3292:27 3304:27 3476:27 3542:27 3580:27 3608:27 3820:27 3874:27 3930:27 3998:27 216:26 1684:26 3602:26 17:26 51:26 67:26 117:26 220:26 227:26 325:26 508:26 1605:26 1673:26 1692:26 2571:26 3533:26 3547:26 3566:26 3617:26 3623:26 3836:26 3913:26 3971:26 3983:26 4026:26 4042:26 4044:26 2449:25 1538:25 2442:25 3266:25 1778:25 179:25 196:25 548:25 574:25 844:25 856:25 875:25 1707:25 1785:25 3621:25 3726:25 2429:24 2447:24 2427:24 2418:24 1250:24 2178:24 424:24 553:24 732:24 836:24 849:24 868:24 1619:24 1756:24 2227:24 3613:24 3735:24 4047:24 3515:23 18:23 129:23 2531:23 3594:23 1368:23 13:23 121:23 1743:23 1921:23 2278:23 2333:23 2374:23 2621:23 3590:23 3686:22 2486:22 489:22 1149:22 2099:22 2728:22 3556:22 3698:22 145:22 243:22 542:22 1730:22 2300:22 2518:22 3290:22 3667:22 3737:22 3953:22 3982:22 4058:22 2469:21 455:21 2738:21 1746:21 1102:21 477:21 911:21 933:21 1765:21 1801:21 2413:21 2611:21 2750:21 3349:21 3637:21 3670:21 3963:21 4:20 2477:20 2438:20 2658:20 49:20 890:20 899:20 931:20 1345:20 1381:20 1790:20 1943:20 2085:20 2617:20 2633:20 3067:20 3468:20 3592:20 3687:20 2858:19 3543:19 2675:19 3031:19 332:19 419:19 609:19 919:19 1123:19 1445:19 1807:19 2110:19 2317:19 2335:19 2710:19 2961:19 2965:19 2971:19 2999:19 3052:19 3321:19 3338:19 3692:19 4021:19 2165:19 3480:19 2330:18 2383:18 326:18 1234:18 247:18 447:18 575:18 588:18 947:18 949:18 1620:18 1786:18 2282:18 2911:18 3271:18 3346:18 3430:18 3579:18 3631:18 3702:18 3703:18 3985:18 313:17 1537:17 2472:17 2681:17 3767:17 2430:17 468:17 1819:17 2104:17 2769:17 504:17 3202:17 1170:17 46:17 62:17 214:17 356:17 476:17 505:17 518:17 569:17 594:17 708:17 729:17 971:17 1738:17 1937:17 2241:17 2554:17 2683:17 2797:17 2814:17 2843:17 3017:17 3082:17 3291:17 3331:17 3402:17 3539:17 3604:17 3657:17 3668:17 3713:17 3734:17 3765:17 3855:17 3876:17 3918:17 3926:17 4004:17 4053:17 4090:17 1249:17 2461:16 3486:16 151:16 4018:16 490:16 1025:16 1494:16 1611:16 1828:16 2181:16 3665:16 3796:16 3806:16 4083:16 4091:16 2059:16 2609:16 990:15 2450:15 2478:15 817:15 290:15 3351:15 3650:15 245:15 417:15 558:15 636:15 1773:15 2223:15 2325:15 2474:15 3395:15 3575:15 4094:15 2209:15 2395:15 2217:14 2747:14 3586:14 3823:14 995:14 1073:14 705:14 1208:14 2530:14 22:14 175:14 281:14 1681:14 1690:14 1766:14 2005:14 2365:14 2577:14 2626:14 2807:14 3018:14 3355:14 3453:14 3708:14 3729:14 2339:14 2391:14 3335:14 2101:14 2126:14 2167:14 2219:14 2355:14 2375:14 2444:13 1965:13 2692:13 2932:13 626:13 1959:13 2647:13 3061:13 3704:13 260:13 285:13 309:13 555:13 1523:13 1843:13 2046:13 2691:13 3050:13 3111:13 3252:13 3384:13 3482:13 44:12 59:12 2250:12 3882:12 3954:12 3396:12 3282:12 2162:12 3397:12 31:12 36:12 692:12 897:12 1139:12 1788:12 1867:12 1902:12 2283:12 2549:12 2551:12 2607:12 2974:12 3315:12 3382:12 3511:12 2445:11 402:11 764:11 1613:11 2051:11 2662:11 3865:11 1167:11 2237:11 2628:11 583:11 3747:11 1950:11 2875:11 3436:11 1031:11 354:11 367:11 511:11 1051:11 1107:11 1379:11 1638:11 1670:11 1881:11 1975:11 2508:11 2604:11 2713:11 3220:11 2018:11 3871:11 1946:10 2558:10 2699:10 3405:10 3529:10 3549:10 3561:10 3739:10 3753:10 3809:10 3828:10 3893:10 4055:10 3531:10 3956:10 3973:10 2951:10 3970:10 1874:10 223:10 573:10 651:10 798:10 853:10 1199:10 1207:10 1307:10 1342:10 1481:10 1556:10 1574:10 1822:10 1829:10 1931:10 1940:10 2007:10 2836:10 3044:10 3206:10 3477:10 3795:10 805:10 1991:10 2614:10 3185:10 3419:10 3587:10 2326:9 2210:9 420:9 2437:9 1090:9 4019:9 744:9 3748:9 2955:9 962:9 3605:9 4082:9 42:9 103:9 263:9 342:9 346:9 605:9 650:9 697:9 1019:9 1065:9 1221:9 1270:9 1276:9 1372:9 1394:9 1995:9 3095:9 3228:9 3295:9 3386:9 3393:9 3775:9 895:9 1402:9 1939:9 2042:9 2201:9 3063:9 3145:9 4001:9 2453:8 2470:8 2533:8 1397:8 2489:8 3211:8 2439:8 1826:8 4063:8 328:8 739:8 1463:8 3483:8 498:8 3158:8 1346:8 210:8 1858:8 3682:8 2370:8 3922:8 429:8 835:8 1035:8 1150:8 1226:8 1292:8 1359:8 1459:8 1617:8 1907:8 1945:8 2063:8 2705:8 2735:8 2743:8 3075:8 3140:8 3212:8 3289:8 3303:8 3554:8 3569:8 3741:8 3758:8 187:8 265:8 762:8 891:8 2358:8 2565:8 2703:8 3603:8 3883:8 2309:7 1855:7 2045:7 3444:7 3924:7 1529:7 4003:7 749:7 1816:7 2632:7 3475:7 3962:7 615:7 1122:7 2642:7 754:7 37:7 118:7 229:7 343:7 379:7 645:7 707:7 738:7 773:7 818:7 1011:7 1891:7 1905:7 2038:7 2069:7 2145:7 2169:7 3057:7 3404:7 3829:7 3941:7 3957:7 3993:7 4034:7 1436:7 1451:7 2143:7 2261:7 2887:7 2905:7 3069:7 3671:7 3843:7 2454:6 2460:6 593:6 2532:6 35:6 53:6 603:6 735:6 1355:6 2917:6 3429:6 3912:6 4092:6 1764:6 668:6 776:6 3733:6 4035:6 4088:6 1859:6 3109:6 1154:6 658:6 2402:6 283:6 408:6 526:6 602:6 758:6 1083:6 1141:6 1813:6 2019:6 2074:6 2083:6 2111:6 2180:6 2511:6 2527:6 2537:6 2715:6 2835:6 3192:6 3348:6 3407:6 3503:6 3582:6 3911:6 4087:6 293:6 577:6 778:6 855:6 1054:6 1082:6 1143:6 1422:6 1475:6 1686:6 2185:6 2539:6 2667:6 2719:6 3051:6 3323:6 3505:6 3639:6 3859:6 3946:6 3961:6 3980:6 2107:6 2931:6 3365:5 2443:5 439:5 3094:5 3459:5 3485:5 3909:5 2441:5 1833:5 2436:5 3929:5 440:5 472:5 580:5 1592:5 2122:5 2975:5 3150:5 3489:5 3800:5 337:5 3285:5 3906:5 130:5 975:5 3858:5 3890:5 722:5 3826:5 15:5 254:5 446:5 563:5 781:5 787:5 812:5 823:5 863:5 885:5 1034:5 1109:5 1148:5 1180:5 1687:5 1701:5 1767:5 1884:5 1947:5 1954:5 2002:5 2003:5 2061:5 2127:5 2245:5 2463:5 2585:5 2595:5 2603:5 2669:5 2671:5 2682:5 2828:5 2838:5 3066:5 3101:5 3190:5 3241:5 3262:5 3306:5 3379:5 3448:5 3583:5 3609:5 3619:5 3853:5 3933:5 3978:5 185:5 351:5 425:5 465:5 741:5 782:5 997:5 1007:5 1295:5 2118:5 2120:5 2154:5 2191:5 2522:5 2535:5 2562:5 2718:5 2919:5 3423:5 3571:5 3749:5 3772:5 3783:5 4059:5 4085:5 2149:5 2177:5 3727:5 2291:4 2329:4 2426:4 2417:4 2894:4 3333:4 414:4 631:4 974:4 1046:4 1196:4 1546:4 1569:4 2056:4 2067:4 2089:4 2462:4 3115:4 3412:4 3418:4 3742:4 3814:4 3866:4 3988:4 231:4 2827:4 3045:4 195:4 1642:4 2189:4 2555:4 3091:4 3118:4 8:4 24:4 152:4 306:4 513:4 533:4 1064:4 1304:4 1835:4 1933:4 1989:4 3469:4 3722:4 3790:4 3778:4 946:4 2778:4 2834:4 3058:4 3426:4 3442:4 3458:4 2:4 898:4 2315:4 5:4 198:4 215:4 233:4 292:4 300:4 363:4 482:4 521:4 539:4 767:4 771:4 775:4 837:4 938:4 953:4 1026:4 1047:4 1166:4 1283:4 1335:4 1419:4 1443:4 1583:4 1709:4 1735:4 1793:4 1849:4 1899:4 1942:4 2134:4 2481:4 2598:4 2690:4 2862:4 2972:4 2983:4 3121:4 3345:4 3457:4 3487:4 3902:4 4023:4 4052:4 4073:4 4084:4 21:4 100:4 171:4 271:4 330:4 436:4 529:4 621:4 638:4 659:4 757:4 878:4 905:4 1119:4 1124:4 1262:4 1290:4 1365:4 1393:4 1562:4 1577:4 1628:4 2031:4 2093:4 2229:4 2321:4 2404:4 2494:4 2548:4 2550:4 2582:4 2635:4 2651:4 2685:4 2753:4 2804:4 3129:4 3178:4 3181:4 3294:4 3573:4 3597:4 3624:4 3630:4 3751:4 3838:4 3850:4 3854:4 4010:4 4041:4 4071:4 881:4 2103:4 2510:4 2579:4 3659:4 3939:4 2423:3 2398:3 2420:3 3414:3 2062:3 2081:3 2521:3 2689:3 2711:3 3884:3 3898:3 135:3 273:3 2574:3 453:3 494:3 717:3 750:3 824:3 840:3 935:3 952:3 1272:3 1672:3 2629:3 2890:3 3077:3 649:3 1506:3 57:3 91:3 241:3 307:3 315:3 386:3 405:3 430:3 541:3 556:3 786:3 789:3 825:3 828:3 845:3 869:3 871:3 925:3 932:3 941:3 957:3 963:3 1030:3 1089:3 1127:3 1132:3 1151:3 1189:3 1190:3 1192:3 1210:3 1223:3 1253:3 1315:3 1332:3 1349:3 1399:3 1420:3 1530:3 1563:3 1725:3 1751:3 1770:3 1838:3 1870:3 1932:3 1973:3 1986:3 1993:3 2013:3 2057:3 2088:3 2129:3 2306:3 2362:3 2556:3 2567:3 2650:3 2670:3 2697:3 2709:3 2777:3 2795:3 2812:3 2863:3 2873:3 2910:3 2913:3 2925:3 2929:3 2939:3 2940:3 3102:3 3370:3 3443:3 3462:3 3535:3 3553:3 3564:3 3601:3 3660:3 3990:3 4051:3 167:3 207:3 250:3 357:3 380:3 433:3 515:3 565:3 599:3 681:3 700:3 702:3 791:3 795:3 808:3 834:3 850:3 887:3 901:3 917:3 954:3 981:3 986:3 1006:3 1061:3 1098:3 1118:3 1169:3 1188:3 1279:3 1339:3 1386:3 1388:3 1465:3 1514:3 1582:3 1610:3 1716:3 1719:3 1734:3 1763:3 1803:3 1958:3 1981:3 2271:3 2287:3 2347:3 2369:3 2501:3 2545:3 2606:3 2655:3 2727:3 2826:3 2855:3 2899:3 3034:3 3222:3 3229:3 3283:3 3307:3 3377:3 3390:3 3437:3 3450:3 3710:3 3779:3 3788:3 3802:3 3804:3 3831:3 3842:3 3863:3 3927:3 3965:3 3966:3 4060:3 174:3 412:3 629:3 751:3 804:3 1002:3 1010:3 1022:3 1077:3 1128:3 1187:3 1243:3 1639:3 1923:3 2945:3 3299:3 3319:3 3381:3 3413:3 3699:3 3787:3 3857:3 3899:3 3925:3 3967:3 3981:3 2452:2 2327:2 551:2 2409:2 3287:2 2406:2 2412:2 275:2 467:2 612:2 286:2 442:2 731:2 872:2 1000:2 1474:2 2390:2 3022:2 3049:2 3497:2 3816:2 1261:2 1970:2 1202:2 14:2 108:2 242:2 266:2 355:2 474:2 572:2 625:2 683:2 774:2 796:2 921:2 927:2 936:2 939:2 951:2 1003:2 1004:2 1094:2 1097:2 1157:2 1183:2 1212:2 1293:2 1325:2 1363:2 1373:2 1414:2 1441:2 1525:2 1570:2 1647:2 1679:2 1718:2 1740:2 1747:2 1771:2 1861:2 1866:2 1871:2 1897:2 1922:2 1951:2 1978:2 2010:2 2022:2 2026:2 2034:2 2054:2 2147:2 2171:2 2215:2 2381:2 2619:2 2707:2 2763:2 2774:2 3016:2 3030:2 3119:2 3199:2 3210:2 3325:2 3327:2 3356:2 3388:2 3394:2 3435:2 3521:2 3524:2 3615:2 3634:2 3644:2 3645:2 3674:2 3677:2 3782:2 3810:2 3811:2 3919:2 3943:2 4017:2 4043:2 4049:2 10:2 11:2 110:2 140:2 190:2 201:2 251:2 310:2 333:2 341:2 350:2 378:2 393:2 397:2 407:2 532:2 585:2 586:2 597:2 690:2 726:2 814:2 847:2 877:2 903:2 914:2 973:2 988:2 1067:2 1095:2 1133:2 1162:2 1191:2 1193:2 1206:2 1242:2 1246:2 1274:2 1288:2 1308:2 1375:2 1454:2 1527:2 1548:2 1595:2 1615:2 1715:2 1791:2 1839:2 1860:2 1911:2 1927:2 1980:2 2006:2 2076:2 2091:2 2174:2 2253:2 2301:2 2313:2 2353:2 2377:2 2479:2 2487:2 2540:2 2649:2 2661:2 2700:2 2702:2 2723:2 2881:2 3009:2 3164:2 3239:2 3246:2 3495:2 3499:2 3572:2 3620:2 3628:2 3695:2 3731:2 3740:2 3801:2 3825:2 3837:2 3885:2 3886:2 3934:2 3937:2 3955:2 3975:2 3979:2 3992:2 3995:2 4006:2 4022:2 4054:2 27:2 93:2 268:2 421:2 686:2 761:2 793:2 833:2 1121:2 1239:2 1257:2 1269:2 1578:2 1580:2 1636:2 1665:2 1732:2 1809:2 2027:2 2157:2 2269:2 2307:2 2338:2 2758:2 2980:2 3171:2 3251:2 3275:2 3324:2 3364:2 3841:2 3851:2 3891:2 4030:2 2319:1 2297:1 230:1 466:1 497:1 502:1 1161:1 1972:1 2114:1 2744:1 3827:1 3844:1 3218:1 3762:1 70:1 102:1 150:1 165:1 206:1 248:1 427:1 492:1 503:1 517:1 524:1 525:1 596:1 646:1 693:1 733:1 740:1 779:1 802:1 813:1 830:1 857:1 909:1 926:1 943:1 958:1 967:1 1020:1 1041:1 1075:1 1078:1 1085:1 1103:1 1158:1 1177:1 1227:1 1238:1 1244:1 1263:1 1275:1 1277:1 1387:1 1407:1 1429:1 1433:1 1455:1 1466:1 1492:1 1493:1 1509:1 1585:1 1607:1 1629:1 1669:1 1697:1 1711:1 1727:1 1741:1 1753:1 1783:1 1795:1 1811:1 1878:1 1910:1 1985:1 1994:1 2023:1 2052:1 2078:1 2105:1 2116:1 2130:1 2137:1 2146:1 2156:1 2164:1 2193:1 2234:1 2542:1 2639:1 2643:1 2645:1 2783:1 2820:1 2849:1 2924:1 3036:1 3039:1 3358:1 3366:1 3406:1 3417:1 3516:1 3540:1 3545:1 3596:1 3622:1 3633:1 3642:1 3651:1 3652:1 3654:1 3715:1 3743:1 3791:1 3793:1 3833:1 3860:1 3931:1 3958:1 3994:1 3997:1 4037:1 4066:1 12:1 33:1 63:1 75:1 79:1 97:1 115:1 146:1 249:1 277:1 303:1 364:1 383:1 460:1 540:1 561:1 628:1 647:1 716:1 719:1 725:1 1057:1 1069:1 1146:1 1156:1 1214:1 1319:1 1350:1 1369:1 1449:1 1654:1 1661:1 1694:1 1723:1 1757:1 1777:1 1806:1 1820:1 1825:1 1869:1 1916:1 1949:1 1962:1 2020:1 2030:1 2043:1 2058:1 2073:1 2095:1 2222:1 2231:1 2276:1 2290:1 2293:1 2314:1 2340:1 2349:1 2364:1 2557:1 2678:1 2717:1 2764:1 2866:1 2883:1 2884:1 3007:1 3079:1 3106:1 3159:1 3198:1 3263:1 3317:1 3474:1 3519:1 3548:1 3562:1 3658:1 3709:1 3719:1 3757:1 3812:1 3849:1 3852:1 3873:1 3878:1 3900:1 3903:1 3923:1 3932:1 3987:1 3991:1 4012:1 4039:1 4057:1 4075:1 25:1 88:1 137:1 228:1 294:1 305:1 353:1 385:1 449:1 452:1 634:1 759:1 766:1 794:1 907:1 966:1 989:1 991:1 1009:1 1084:1 1106:1 1131:1 1182:1 1215:1 1230:1 1297:1 1371:1 1485:1 1502:1 1513:1 1604:1 1627:1 1759:1 1852:1 1876:1 1885:1 1913:1 1957:1 2036:1 2053:1 2092:1 2125:1 2196:1 2203:1 2211:1 2243:1 2247:1 2305:1 2361:1 2517:1 2525:1 2569:1 2641:1 2673:1 2708:1 2730:1 2895:1 2927:1 2950:1 3027:1 3093:1 3110:1 3125:1 3270:1 3309:1 3334:1 3343:1 3411:1 3420:1 3428:1 3452:1 3466:1 3558:1 3588:1 3661:1 3683:1 3770:1 3781:1 3818:1 3868:1 3889:1 3894:1 3916:1 3948:1 MEMSET_LEN:CNT 16:65585916 48:10541188 32:7729506 40:3223549 64:1980480 1472:1310438 24:899211 72:875986 56:447112 12:112544 8:106123 1060:50107 4096+:19670 192:19382 648:19306 3:15900 512:10314 2:7830 15:7050 104:6694 14:6518 4:6375 1312:6014 17:5936 216:5904 13:5336 18:4546 80:4192 11:3352 128:3100 19:2858 96:2783 548:2254 1:2242 256:2128 10:2094 344:2048 2616:2004 136:1563 9:1382 160:1334 112:1284 168:1198 20:1170 200:1144 240:1102 288:1038 416:916 496:876 592:792 712:716 856:628 1024:550 7:504 21:484 3072:466 1232:394 6:342 88:315 704:290 1296:284 3264:270 1480:248 5:218 296:187 22:136 2024:132 536:119 1048:74 816:70 1168:62 1776:56 120:54 2056:52 2048:48 144:46 2128:42 2552:40 3064:40 3680:40 1640:34 1936:30 152:30 1400:30 208:26 384:24 608:24 232:22 184:22 176:20 23:18 360:18 272:14 248:14 27:12 560:12 76:10 124:10 224:10 1224:10 119:8 28:8 320:8 328:8 336:8 400:8 1032:8 37:6 352:6 656:6 25:6 26:6 29:6 480:6 304:6 424:6 1040:5 30:4 464:4 1056:4 392:4 440:4 472:4 672:4 680:4 688:4 728:4 776:4 1136:4 1160:4 984:2 50:2 928:2 1026:2 1887:2 3708:2 4000:2 888:2 936:2 280:2 312:2 408:2 432:2 456:2 528:2 576:2 664:2 848:2 872:2 880:2 960:2 1200:2 1744:2 2088:2 2512:2 568:2 632:2 1288:2 1384:2 1656:2 1816:2 1976:2 2176:2 944:1 2320:1 MEMSET_PG_OFFSET:CNT 1904:2836079 3088:1930028 1952:1794655 2368:1481687 3168:1340034 1888:1291742 3536:1258047 3104:1216888 3360:1198677 3504:1168306 3200:1129211 2128:1102306 3312:1067297 3344:1040996 3472:1034851 2896:1027457 3296:1013114 3488:978729 3072:927314 2288:925692 2576:910999 2688:903392 2880:901774 2304:899793 2248:888310 3328:874560 3280:851583 2672:835242 3720:814187 3552:808646 3520:805500 3056:792177 2864:789943 2320:766209 3456:754603 1072:744337 2336:733514 3184:726754 3264:718677 3040:716874 2848:707874 3152:706834 1616:688951 3824:687483 2656:678554 2160:668467 3376:663826 3248:659225 2352:658050 3136:655463 2640:649332 1984:642495 3440:640381 3024:630634 2832:624178 896:620449 2144:610461 464:602465 2944:601042 752:600994 2624:596366 3648:594958 3216:590911 3696:587756 2608:574383 2928:573092 3424:571287 3392:570260 3232:569283 2816:564760 2112:560753 1808:545256 3120:544031 2800:537038 2272:534677 1792:531846 2704:525228 2976:523659 3408:522574 2592:494591 2256:493492 2384:488990 2008:487018 2784:479866 2992:477563 3008:476848 2752:457614 1968:455035 2096:421712 2960:412295 1776:389696 2768:383314 1656:377387 2560:362931 1936:361734 1536:361036 2080:324262 4000:308911 2208:282590 1920:280346 2904:276421 2240:276333 1640:274325 3832:272741 824:270461 3600:270127 2392:265409 3576:260568 1760:260055 3464:259358 2544:254705 568:248766 2728:239189 2176:227197 1440:219379 1056:218014 1856:215566 1456:201516 1472:200966 1512:186114 1680:180327 3936:163993 3888:163913 2224:162119 1504:158333 1552:153814 1648:152017 1408:144930 1216:143274 1744:140678 800:138343 2048:136713 1632:127054 1568:126050 632:124813 2064:123917 1696:120977 1712:120353 616:119580 1024:118193 664:118140 1520:114602 1664:111391 1280:109137 1248:108510 1328:107847 3568:107330 1296:106863 1824:106743 1088:106670 1376:105619 2496:104933 2032:104644 1184:100183 1152:99930 2000:99013 1840:98568 1488:96766 1584:96679 2720:96405 1360:95555 2016:94344 1040:94318 1432:94069 1312:93488 1728:93189 2400:92927 304:92895 992:92097 2448:91860 1200:90975 1008:90619 3616:90618 832:89461 704:88768 1104:88544 880:88231 2416:87549 1232:87382 976:87293 1344:87064 3584:86731 1136:86586 1600:85978 2912:85268 1264:85222 96:85155 768:84774 2192:84333 2736:84298 1872:84088 1168:83402 736:82218 1392:82170 720:82132 928:81700 0:81695 784:81205 640:80882 944:80600 816:80440 672:79184 528:78925 1120:77833 544:77338 2480:75776 592:75649 496:75208 336:75137 656:75095 3904:74656 560:74484 608:74213 416:73675 288:72932 912:72384 224:72302 624:72260 2512:71959 2432:71334 320:70573 688:70490 2528:70378 576:70136 480:70042 128:69200 176:68949 432:67648 512:67625 3776:67548 400:67447 448:67425 368:66615 864:66525 960:66451 192:66302 160:65222 3808:65215 32:64600 848:64253 2464:64213 1032:64121 4032:63995 64:63452 1424:63017 352:62584 144:62263 3968:61298 80:61100 4064:60827 384:60768 3712:60512 112:60350 3840:60320 208:60231 3760:60119 3872:60116 1144:59652 272:59577 3728:58822 256:58697 3952:58368 3680:58100 3856:57949 240:57821 3744:57745 3632:57597 4016:56410 48:56321 3920:56291 3984:56241 4048:55385 16:55270 3664:55119 3792:54133 4080:53634 1960:47451 3688:45596 1768:16104 3176:14577 2835:12498 3512:11817 1720:11383 1112:7382 1336:6737 3896:5560 968:5215 2088:5215 936:5208 2312:4783 2120:4522 3416:4096 296:3896 3752:3873 3224:3715 776:3310 2750:3227 3902:3227 2296:3204 2536:3074 1896:3018 216:2929 3240:2753 72:2748 3080:2639 696:2610 4072:2545 1480:2399 280:2398 1496:2344 488:2315 1096:2274 840:2243 920:2237 472:2218 536:2153 952:2103 3640:2058 3304:2033 3736:2023 3032:2018 1160:2010 4056:2002 4088:2002 3016:1999 3192:1964 1272:1962 3528:1918 1816:1883 904:1877 1912:1849 1528:1840 856:1803 680:1755 2056:1747 2152:1747 1416:1739 1608:1717 1736:1701 2440:1697 3656:1682 40:1678 1576:1668 3992:1665 3112:1663 1240:1655 3272:1646 1256:1638 3976:1638 3560:1629 200:1625 3336:1621 3256:1617 2616:1608 360:1603 2200:1599 1368:1595 3400:1595 152:1587 120:1586 2376:1568 2952:1565 2072:1560 600:1559 264:1547 1624:1540 1976:1525 520:1523 3352:1519 2344:1513 1384:1506 2792:1503 2232:1499 1192:1497 424:1494 3944:1494 3784:1492 744:1491 104:1490 1176:1489 2856:1486 3496:1484 3544:1482 1224:1478 2712:1475 2216:1473 4040:1473 2136:1471 1128:1461 2040:1449 2760:1447 3848:1446 3912:1440 2936:1434 3096:1433 440:1431 2472:1429 3432:1427 3880:1420 1848:1419 2184:1414 2632:1411 456:1410 2456:1404 376:1401 3592:1401 136:1395 2776:1395 1016:1392 1672:1392 1064:1386 760:1380 1352:1379 2584:1374 2600:1367 504:1366 56:1365 1864:1363 1288:1361 648:1358 2872:1357 2744:1351 3672:1351 1544:1350 3160:1350 1752:1346 2696:1346 1704:1345 1000:1344 3928:1343 3368:1338 2424:1337 3144:1337 872:1332 3816:1331 4024:1329 1944:1324 312:1324 888:1322 1832:1322 2552:1320 3208:1318 392:1318 232:1317 2104:1315 1992:1314 1320:1314 1464:1314 2024:1311 2680:1311 168:1310 2504:1300 24:1299 3384:1298 184:1297 3768:1295 4008:1295 1208:1292 3704:1287 8:1281 328:1280 984:1278 3800:1278 2168:1276 1784:1273 3608:1271 1592:1270 88:1269 1880:1268 2824:1263 3448:1262 3864:1259 1304:1256 792:1252 344:1243 3064:1242 1400:1238 1080:1238 2664:1237 3624:1234 2280:1230 808:1227 2888:1226 712:1226 3320:1225 2648:1213 2984:1211 2840:1208 408:1205 1688:1204 1560:1200 2568:1196 3000:1193 3960:1190 3288:1186 3480:1183 552:1181 584:1181 3048:1172 3128:1171 2520:1160 1048:1159 1800:1159 2488:1155 1928:1153 728:1152 2264:1139 2920:1127 756:1127 2692:1125 2408:1120 2360:1113 248:1102 2328:1098 2808:1090 1448:1080 2968:1059 398:327 2062:327 3443:316 34:124 35:110 33:106 422:72 36:50 37:50 49:50 369:48 38:30 1985:24 689:18 3825:18 2241:18 225:14 385:14 3745:14 3457:14 51:12 4071:12 433:10 2913:10 1988:9 84:8 260:8 548:8 836:8 900:8 1124:8 1412:8 1700:8 2052:8 2164:8 2276:8 2452:8 2740:8 2916:8 3028:8 3316:8 3604:8 3892:8 4068:8 273:8 2129:8 2801:8 3713:8 54:8 1905:7 148:6 436:6 612:6 724:6 1012:6 1188:6 1300:6 1476:6 1588:6 1764:6 1876:6 2340:6 2628:6 2804:6 3092:6 3204:6 3380:6 3492:6 3620:6 3668:6 3780:6 3956:6 753:6 1265:6 2049:6 2497:6 2577:6 2689:6 3025:6 3137:6 3473:6 2353:6 3041:6 3265:6 3377:6 3489:6 3601:6 3937:6 707:6 1011:6 453:4 2676:4 3828:4 17:4 129:4 241:4 321:4 353:4 545:4 657:4 769:4 801:4 913:4 1025:4 1601:4 1713:4 1825:4 1937:4 2161:4 2273:4 2721:4 2833:4 2849:4 3249:4 3361:4 3857:4 3921:4 4081:4 65:4 177:4 305:4 2017:4 3153:4 4049:4 532:4 2465:4 39:2 465:2 497:2 577:2 770:2 773:2 1297:2 1329:2 1409:2 1537:2 1730:2 1745:2 1873:2 2001:2 2209:2 2385:2 2609:2 2641:2 2737:2 2945:2 2961:2 3073:2 3185:2 3297:2 3345:2 3409:2 3521:2 3585:2 3697:2 3809:2 3969:2 68:2 81:2 193:2 204:2 289:2 401:2 417:2 513:2 529:2 641:2 977:2 1073:2 1121:2 1153:2 1233:2 1345:2 1377:2 1441:2 1457:2 1489:2 1569:2 1617:2 1681:2 1793:2 1844:2 2401:2 2769:2 3059:2 3553:2 3665:2 3793:2 3842:2 3845:2 3905:2 3985:2 4065:2 52:2 97:2 209:2 284:2 446:2 716:2 735:2 742:2 782:2 804:2 821:2 844:2 881:2 911:2 932:2 941:2 993:2 1007:2 1015:2 1090:2 1105:2 1137:2 1217:2 1633:2 1665:2 1697:2 1777:2 2194:2 2196:2 2215:2 2306:2 2321:2 2380:2 2404:2 2417:2 2529:2 2564:2 2652:2 2657:2 2706:2 2709:2 2785:2 2929:2 3100:2 3652:2 3908:2 4033:2 2180:1 2181:1 2305:1 2517:1 1780:1 1781:1 2117:1 2244:1 2245:1 2357:1 2581:1 980:1 981:1 1093:1 1317:1 324:1 2492:1
On 01/08/24 07:05, Noah Goldstein wrote: > On Thu, Aug 1, 2024 at 2:02 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: >> >> Hi Noah, >> >>>> Can you show what you would do differently via a patch or code sample? >>> >>> For alignment I assume you did something like: >>> >>> ``` >>> hist[__builtin_ctzll(<addr>)] += 1; >>> ``` >>> >>> then combined all entries for index 5 and above. >> >> Indeed. >> >>> What I would prefer is: >>> ``` >>> hist[<addr> & PAGE_MASK] += 1; >>> ``` >>> and not combining any entries. >>> >>> Although I do realize this will complicate the code a bit. >> >> So data alignment is due to strings being allocated via malloc, being in constdata >> or on the stack etc. Page offsets vary a lot due to unrelated things, so the key question >> is, if we had such data (and I don't), would it be useful? The very small average string >> size means it's also very rare to cross a page boundary for example. > > I grabbed the data, and it seems page crosses are less common than > random (although > not sure why). > > Attached is an aggregate histogram of strlen/memset sizes (up to 4096) > and page offsets > running SPEC. > > Also attached is tar with the histograms for each of the runs. > Noah, do you think the patch is good as-is or does it need some further change? It is not clear form this last comment.
On Tue, Aug 6, 2024, 22:02 Adhemerval Zanella Netto < adhemerval.zanella@linaro.org> wrote: > > > On 01/08/24 07:05, Noah Goldstein wrote: > > On Thu, Aug 1, 2024 at 2:02 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> > wrote: > >> > >> Hi Noah, > >> > >>>> Can you show what you would do differently via a patch or code sample? > >>> > >>> For alignment I assume you did something like: > >>> > >>> ``` > >>> hist[__builtin_ctzll(<addr>)] += 1; > >>> ``` > >>> > >>> then combined all entries for index 5 and above. > >> > >> Indeed. > >> > >>> What I would prefer is: > >>> ``` > >>> hist[<addr> & PAGE_MASK] += 1; > >>> ``` > >>> and not combining any entries. > >>> > >>> Although I do realize this will complicate the code a bit. > >> > >> So data alignment is due to strings being allocated via malloc, being > in constdata > >> or on the stack etc. Page offsets vary a lot due to unrelated things, > so the key question > >> is, if we had such data (and I don't), would it be useful? The very > small average string > >> size means it's also very rare to cross a page boundary for example. > > > > I grabbed the data, and it seems page crosses are less common than > > random (although > > not sure why). > > > > Attached is an aggregate histogram of strlen/memset sizes (up to 4096) > > and page offsets > > running SPEC. > > > > Also attached is tar with the histograms for each of the runs. > > > > Noah, do you think the patch is good as-is or does it need some further > change? > > It is not clear form this last comment. > I'm okay with this and memset patches going in if they add a comment mentioning the truncation point of the alignment values. My preference for this and the memset benchmarks would be that they more authentically represent the addresses as they are relevant to performance. In both cases my biggest gripe is that they randomize page offsets as opposed to just getting them directly from the profile. But that doesn't seem like a good enough reason to hold these up indefinitely, I'll hopefully have time to revisit them later. >
Hi Noah, > I grabbed the data, and it seems page crosses are less common than > random (although > not sure why). Short strings and high average alignment makes it less likely. If you are worried about hitting worst-case scenarios (repeatedly hitting pagecrosses or mispredictions due to alternating), you can always do the first load aligned without much of a penalty - strings are very short, so you're most likely to get the full string in the first load anyway. It simplifies the implementation as well. > I'm okay with this and memset patches going in if they add > a comment mentioning the truncation point of the alignment values. I'll add something in the commit log eventhough it's made very clear at the start: +#define MAX_ALIGN 32 +#define MAX_STRLEN 128 > My preference for this and the memset benchmarks would be > that they more authentically represent the addresses as they > are relevant to performance. In both cases my biggest gripe > is that they randomize page offsets as opposed to just > getting them directly from the profile. > But that doesn't seem like a good enough reason to hold these up > indefinitely, I'll hopefully have time to revisit them later. I don't believe the page offsets are useful - it would be quite complex to try to use page offsets with nul-terminated strings. And I think you end up with something that is less general - for example your data shows that over 55% of strlen calls use just 3 different page offsets, and offsets are extremely skewed towards the end of a page. Different options, compiler or target will give wildly different results. Cheers, Wilco
On Thu, Aug 8, 2024 at 1:50 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > > I grabbed the data, and it seems page crosses are less common than > > random (although > > not sure why). > > Short strings and high average alignment makes it less likely. If you are worried > about hitting worst-case scenarios (repeatedly hitting pagecrosses or mispredictions > due to alternating), you can always do the first load aligned without much of a > penalty - strings are very short, so you're most likely to get the full string in the > first load anyway. It simplifies the implementation as well. > > > I'm okay with this and memset patches going in if they add > > a comment mentioning the truncation point of the alignment values. > > I'll add something in the commit log eventhough it's made very clear at the start: > > +#define MAX_ALIGN 32 > +#define MAX_STRLEN 128 > > > My preference for this and the memset benchmarks would be > > that they more authentically represent the addresses as they > > are relevant to performance. In both cases my biggest gripe > > is that they randomize page offsets as opposed to just > > getting them directly from the profile. > > But that doesn't seem like a good enough reason to hold these up > > indefinitely, I'll hopefully have time to revisit them later. > > I don't believe the page offsets are useful - it would be quite complex to try to use > page offsets with nul-terminated strings. And I think you end up with something that > is less general - for example your data shows that over 55% of strlen calls use just 3 > different page offsets, and offsets are extremely skewed towards the end of a page. > Different options, compiler or target will give wildly different results. > agreed it would make the implementation more complex. Likewise I can get behind getting profiles from multiple compilers (although you could probably make that case about lower alignments as well). I don't, however, agree that we should just pretend it's random when that may not be the case. > Cheers, > Wilco
Hi Noah, >> I don't believe the page offsets are useful - it would be quite complex to try to use >> page offsets with nul-terminated strings. And I think you end up with something that >> is less general - for example your data shows that over 55% of strlen calls use just 3 >> different page offsets, and offsets are extremely skewed towards the end of a page. >> Different options, compiler or target will give wildly different results. >> > agreed it would make the implementation more complex. Likewise I can get behind > getting profiles from multiple compilers (although you could probably make that > case about lower alignments as well). I don't, however, agree that we > should just pretend it's random when that may not be the case. My point is that all data will be aligned to a minimum alignment (which can't change). This directly affects performance because of unaligned accesses. On the other hand the offset within a page is random and doesn't affect performance - even if you consider implementations that are affected by page crossing (since the chance of page crossing is so low that it hardly matters in reality). If ASLR was implemented correctly, we'd do a alloca (rand() & 4095) and malloc (rand() & 4095) at startup to randomize the stack and heap offsets within a page. Then multiple runs of the same binary would show completely different offsets each run while the distribution of alignment would remain essentially the same. Hence I just don't get why you see any significance in page offsets. Cheers, Wilco
On Mon, Aug 12, 2024 at 10:04 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > >> I don't believe the page offsets are useful - it would be quite complex to try to use > >> page offsets with nul-terminated strings. And I think you end up with something that > >> is less general - for example your data shows that over 55% of strlen calls use just 3 > >> different page offsets, and offsets are extremely skewed towards the end of a page. > >> Different options, compiler or target will give wildly different results. > >> > > agreed it would make the implementation more complex. Likewise I can get behind > > getting profiles from multiple compilers (although you could probably make that > > case about lower alignments as well). I don't, however, agree that we > > should just pretend it's random when that may not be the case. > > My point is that all data will be aligned to a minimum alignment (which can't change). > This directly affects performance because of unaligned accesses. On the other hand the > offset within a page is random and doesn't affect performance - even if you consider > implementations that are affected by page crossing (since the chance of page crossing > is so low that it hardly matters in reality). > > If ASLR was implemented correctly, we'd do a alloca (rand() & 4095) and > malloc (rand() & 4095) at startup to randomize the stack and heap offsets within a page. But we dont... any it's definitely not ubiquitous among malloc implementations. These benchmarks are meant to be representative of how the world is, not how it should be. > Then multiple runs of the same binary would show completely different offsets each run > while the distribution of alignment would remain essentially the same. > > Hence I just don't get why you see any significance in page offsets. > I can see your argument that page offsets will vary depending on environment in ways a single profile doesn't capture. Likewise that we really need page-cross rate (which is probably much lower). But either way, alignment will be captured by page offset, so I don't really see how it could be worse than what we have with this patch. What about instead of generating based on these paired down histograms we just grab N random samples from an actual run? > Cheers, > Wilco
Hi Noah, >> If ASLR was implemented correctly, we'd do a alloca (rand() & 4095) and >> malloc (rand() & 4095) at startup to randomize the stack and heap offsets within a page. > > But we dont... any it's definitely not ubiquitous among malloc implementations. > These benchmarks are meant to be representative of how the world is, not how it > should be. And I consider that a security risk. Exploits use the fact that data structures/offsets rarely change across multiple GCC/GLIBC releases. IIRC there is a nice fixed difference from initial SP to TLS data... >> Then multiple runs of the same binary would show completely different offsets each run >> while the distribution of alignment would remain essentially the same. >> >> Hence I just don't get why you see any significance in page offsets. >> > I can see your argument that page offsets will vary depending on environment > in ways a single profile doesn't capture. > Likewise that we really need page-cross rate (which is probably much lower). Measuring page-cross rate in SPEC and checking it ends up similar in benchmarks sounds fine. But in general it is low enough it doesn't really affect performance. > But either way, alignment will be captured by page offset, so I don't really see > how it could be worse than what we have with this patch. Sure it is not worse, but it is not better either. And you have to solve the issue of how to deal with nul-terminated strings somehow. > What about instead of generating based on these paired down histograms we > just grab N random samples from an actual run? Yes, for memcpy/memmove/memset that would be a good idea - the only issue is how to take a representative subset of the full trace. For other string functions you have the difficulty in how to replay them without strings overlapping each other. Cheers, Wilco
On Tue, Aug 13, 2024 at 9:23 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote: > > Hi Noah, > > >> If ASLR was implemented correctly, we'd do a alloca (rand() & 4095) and > >> malloc (rand() & 4095) at startup to randomize the stack and heap offsets within a page. > > > > But we dont... any it's definitely not ubiquitous among malloc implementations. > > These benchmarks are meant to be representative of how the world is, not how it > > should be. > > And I consider that a security risk. Exploits use the fact that data structures/offsets rarely > change across multiple GCC/GLIBC releases. IIRC there is a nice fixed difference from > initial SP to TLS data... > > >> Then multiple runs of the same binary would show completely different offsets each run > >> while the distribution of alignment would remain essentially the same. > >> > >> Hence I just don't get why you see any significance in page offsets. > >> > > I can see your argument that page offsets will vary depending on environment > > in ways a single profile doesn't capture. > > Likewise that we really need page-cross rate (which is probably much lower). > > Measuring page-cross rate in SPEC and checking it ends up similar in benchmarks > sounds fine. But in general it is low enough it doesn't really affect performance. > > > But either way, alignment will be captured by page offset, so I don't really see > > how it could be worse than what we have with this patch. > > Sure it is not worse, but it is not better either. And you have to solve the issue of how to > deal with nul-terminated strings somehow. See below discussion based on just taking samples. > > > What about instead of generating based on these paired down histograms we > > just grab N random samples from an actual run? > > Yes, for memcpy/memmove/memset that would be a good idea - the only issue is > how to take a representative subset of the full trace. For other string functions you > have the difficulty in how to replay them without strings overlapping each other. I figured I would just grab all of the calls for the function in question then randomly select N of the calls. Or we could take a histogram of the sizes and select calls proportionately from each bucket. Or if you have any ideas? For memset/memcpy I don't think it would be to hard. Unless we want to simulate properly different addresses for the sake of cache behavior, we can just re-use the same dest/source. For strnlen I imagine a greedy algorithm would be fine here. We create buckets based on page offsets then pop sizes to try to fill pages. memcmp would be harder. I'll think about it. > > Cheers, > Wilco
ping Add a new randomized strlen test similar to bench-random-memcpy. Instead of repeating the same call to strlen over and over again, it times a large number of different strings. The distribution of the string length and alignment is based on SPEC2017. --- diff --git a/benchtests/Makefile b/benchtests/Makefile index d228e9e68af3d73129591f3d875d6be545182eac..6d746ad2d4de78470b1f7936c34affbd6edb254a 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -171,6 +171,7 @@ string-benchset := \ strcpy_chk \ strcspn \ strlen \ + strlen-random \ strncasecmp \ strncat \ strncmp \ diff --git a/benchtests/bench-strlen-random.c b/benchtests/bench-strlen-random.c new file mode 100644 index 0000000000000000000000000000000000000000..becd09a2caff40262127b732aadd7d0e4374e9aa --- /dev/null +++ b/benchtests/bench-strlen-random.c @@ -0,0 +1,194 @@ +/* Measure strlen performance. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define TEST_MAIN +#define TEST_NAME "strlen" + +#define NUM_TESTS 65536 +#define MAX_ALIGN 32 +#define MAX_STRLEN 128 +#define MIN_PAGE_SIZE (2 * getpagesize()) + +#include "bench-string.h" +#include <assert.h> +#include "json-lib.h" + +typedef size_t (*proto_t) (const CHAR *); + +size_t memchr_strlen (const CHAR *); + +IMPL (memchr_strlen, 0) + +size_t +memchr_strlen (const CHAR *p) +{ + return (const CHAR *)MEMCHR (p, 0, PTRDIFF_MAX) - p; +} + +IMPL (STRLEN, 1) + +static uint32_t strlen_tests[NUM_TESTS]; + +typedef struct { uint16_t size; uint16_t freq; } freq_data_t; +typedef struct { uint16_t align; uint16_t freq; } align_data_t; + +#define SIZE_NUM 65536 +#define SIZE_MASK (SIZE_NUM-1) +static uint8_t strlen_len_arr[SIZE_NUM]; + +/* Frequency data for strlen sizes up to 256 based on SPEC2017. */ +static freq_data_t strlen_len_freq[] = +{ + { 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115}, + { 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418}, + { 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79}, + { 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21}, + { 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9}, + { 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5}, + { 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2}, + { 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1}, + { 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1}, + { 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1}, + { 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1}, + {107, 1}, { 0, 0} +}; + +#define ALIGN_NUM 1024 +#define ALIGN_MASK (ALIGN_NUM-1) +static uint8_t strlen_align_arr[ALIGN_NUM]; + +/* Alignment data for strlen based on SPEC2017. */ +static align_data_t string_align_freq[] = +{ + {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0} +}; + +static void +init_strlen_distribution (void) +{ + int i, j, freq, size, n; + + for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++) + for (j = 0, size = strlen_len_freq[i].size; j < freq; j++) + strlen_len_arr[n++] = size; + assert (n == SIZE_NUM); + + for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++) + for (j = 0, size = string_align_freq[i].align; j < freq; j++) + strlen_align_arr[n++] = size; + assert (n == ALIGN_NUM); +} + +static volatile size_t maskv = 0; + +static void +do_one_test (json_ctx_t *json_ctx, impl_t *impl, size_t iters, + uint32_t *input, size_t n) +{ + timing_t start, stop, cur; + size_t res = 0; + size_t mask = maskv; + + /* Avoid 'cold start' performance penalty. */ + for (int i = 0; i < 10; i++) + for (int j = 0; j < n; j++) + CALL (impl, (const char*)buf1 + input[j]); + + TIMING_NOW (start); + for (int i = 0; i < iters; ++i) + for (int j = 0; j < n; j++) + res = CALL (impl, (const char*)buf1 + input[j] + (res & mask)); + TIMING_NOW (stop); + TIMING_DIFF (cur, start, stop); + json_element_double (json_ctx, (double) cur / (double) iters); +} + +static void +do_test (json_ctx_t *json_ctx) +{ + size_t n; + uint8_t *a = buf1; + uint16_t index[MAX_ALIGN]; + + memset (a, 'x', MIN_PAGE_SIZE); + + /* Create indices for strings at all alignments. */ + for (int i = 0; i < MAX_ALIGN; i++) + { + index[i] = i * (MAX_STRLEN + 1); + a[index[i] + MAX_STRLEN] = 0; + } + + /* Create a random set of strlen input strings using the string length + and alignment distributions. */ + for (n = 0; n < NUM_TESTS; n++) + { + int align = strlen_align_arr[rand () & ALIGN_MASK]; + int exp_len = strlen_len_arr[rand () & SIZE_MASK]; + + strlen_tests[n] = + index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len; + assert ((strlen_tests[n] & (align - 1)) == 0); + assert (strlen ((char*) a + strlen_tests[n]) == exp_len); + } + + json_element_object_begin (json_ctx); + json_array_begin (json_ctx, "timings"); + + FOR_EACH_IMPL (impl, 0) + do_one_test (json_ctx, impl, INNER_LOOP_ITERS_MEDIUM, strlen_tests, n); + + json_array_end (json_ctx); + json_element_object_end (json_ctx); +} + +int +test_main (void) +{ + + json_ctx_t json_ctx; + + test_init (); + init_strlen_distribution (); + + json_init (&json_ctx, 0, stdout); + + json_document_begin (&json_ctx); + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); + + json_attr_object_begin (&json_ctx, "functions"); + json_attr_object_begin (&json_ctx, TEST_NAME); + json_attr_string (&json_ctx, "bench-variant", "random"); + + json_array_begin (&json_ctx, "ifuncs"); + FOR_EACH_IMPL (impl, 0) + json_element_string (&json_ctx, impl->name); + json_array_end (&json_ctx); + + json_array_begin (&json_ctx, "results"); + do_test (&json_ctx); + + json_array_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_document_end (&json_ctx); + + return ret; +} + +#include <support/test-driver.c>
diff --git a/benchtests/Makefile b/benchtests/Makefile index d228e9e68af3d73129591f3d875d6be545182eac..6d746ad2d4de78470b1f7936c34affbd6edb254a 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -171,6 +171,7 @@ string-benchset := \ strcpy_chk \ strcspn \ strlen \ + strlen-random \ strncasecmp \ strncat \ strncmp \ diff --git a/benchtests/bench-strlen-random.c b/benchtests/bench-strlen-random.c new file mode 100644 index 0000000000000000000000000000000000000000..becd09a2caff40262127b732aadd7d0e4374e9aa --- /dev/null +++ b/benchtests/bench-strlen-random.c @@ -0,0 +1,194 @@ +/* Measure strlen performance. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define TEST_MAIN +#define TEST_NAME "strlen" + +#define NUM_TESTS 65536 +#define MAX_ALIGN 32 +#define MAX_STRLEN 128 +#define MIN_PAGE_SIZE (2 * getpagesize()) + +#include "bench-string.h" +#include <assert.h> +#include "json-lib.h" + +typedef size_t (*proto_t) (const CHAR *); + +size_t memchr_strlen (const CHAR *); + +IMPL (memchr_strlen, 0) + +size_t +memchr_strlen (const CHAR *p) +{ + return (const CHAR *)MEMCHR (p, 0, PTRDIFF_MAX) - p; +} + +IMPL (STRLEN, 1) + +static uint32_t strlen_tests[NUM_TESTS]; + +typedef struct { uint16_t size; uint16_t freq; } freq_data_t; +typedef struct { uint16_t align; uint16_t freq; } align_data_t; + +#define SIZE_NUM 65536 +#define SIZE_MASK (SIZE_NUM-1) +static uint8_t strlen_len_arr[SIZE_NUM]; + +/* Frequency data for strlen sizes up to 256 based on SPEC2017. */ +static freq_data_t strlen_len_freq[] = +{ + { 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115}, + { 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418}, + { 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79}, + { 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21}, + { 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9}, + { 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5}, + { 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2}, + { 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1}, + { 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1}, + { 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1}, + { 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1}, + {107, 1}, { 0, 0} +}; + +#define ALIGN_NUM 1024 +#define ALIGN_MASK (ALIGN_NUM-1) +static uint8_t strlen_align_arr[ALIGN_NUM]; + +/* Alignment data for strlen based on SPEC2017. */ +static align_data_t string_align_freq[] = +{ + {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0} +}; + +static void +init_strlen_distribution (void) +{ + int i, j, freq, size, n; + + for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++) + for (j = 0, size = strlen_len_freq[i].size; j < freq; j++) + strlen_len_arr[n++] = size; + assert (n == SIZE_NUM); + + for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++) + for (j = 0, size = string_align_freq[i].align; j < freq; j++) + strlen_align_arr[n++] = size; + assert (n == ALIGN_NUM); +} + +static volatile size_t maskv = 0; + +static void +do_one_test (json_ctx_t *json_ctx, impl_t *impl, size_t iters, + uint32_t *input, size_t n) +{ + timing_t start, stop, cur; + size_t res = 0; + size_t mask = maskv; + + /* Avoid 'cold start' performance penalty. */ + for (int i = 0; i < 10; i++) + for (int j = 0; j < n; j++) + CALL (impl, (const char*)buf1 + input[j]); + + TIMING_NOW (start); + for (int i = 0; i < iters; ++i) + for (int j = 0; j < n; j++) + res = CALL (impl, (const char*)buf1 + input[j] + (res & mask)); + TIMING_NOW (stop); + TIMING_DIFF (cur, start, stop); + json_element_double (json_ctx, (double) cur / (double) iters); +} + +static void +do_test (json_ctx_t *json_ctx) +{ + size_t n; + uint8_t *a = buf1; + uint16_t index[MAX_ALIGN]; + + memset (a, 'x', MIN_PAGE_SIZE); + + /* Create indices for strings at all alignments. */ + for (int i = 0; i < MAX_ALIGN; i++) + { + index[i] = i * (MAX_STRLEN + 1); + a[index[i] + MAX_STRLEN] = 0; + } + + /* Create a random set of strlen input strings using the string length + and alignment distributions. */ + for (n = 0; n < NUM_TESTS; n++) + { + int align = strlen_align_arr[rand () & ALIGN_MASK]; + int exp_len = strlen_len_arr[rand () & SIZE_MASK]; + + strlen_tests[n] = + index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len; + assert ((strlen_tests[n] & (align - 1)) == 0); + assert (strlen ((char*) a + strlen_tests[n]) == exp_len); + } + + json_element_object_begin (json_ctx); + json_array_begin (json_ctx, "timings"); + + FOR_EACH_IMPL (impl, 0) + do_one_test (json_ctx, impl, INNER_LOOP_ITERS_MEDIUM, strlen_tests, n); + + json_array_end (json_ctx); + json_element_object_end (json_ctx); +} + +int +test_main (void) +{ + + json_ctx_t json_ctx; + + test_init (); + init_strlen_distribution (); + + json_init (&json_ctx, 0, stdout); + + json_document_begin (&json_ctx); + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); + + json_attr_object_begin (&json_ctx, "functions"); + json_attr_object_begin (&json_ctx, TEST_NAME); + json_attr_string (&json_ctx, "bench-variant", "random"); + + json_array_begin (&json_ctx, "ifuncs"); + FOR_EACH_IMPL (impl, 0) + json_element_string (&json_ctx, impl->name); + json_array_end (&json_ctx); + + json_array_begin (&json_ctx, "results"); + do_test (&json_ctx); + + json_array_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_attr_object_end (&json_ctx); + json_document_end (&json_ctx); + + return ret; +} + +#include <support/test-driver.c>