Message ID | 20240806113044.90204-1-cooper.qu@linux.alibaba.com |
---|---|
State | New |
Headers | show |
Series | genoutput: Accelerate the place_operands function. | expand |
Xianmiao Qu <cooper.qu@linux.alibaba.com> writes: > With the increase in the number of modes and patterns for some > backend architectures, the place_operands function becomes a > bottleneck in the speed of genoutput, and may even become a > bottleneck in the overall speed of building the GCC project. > This patch aims to accelerate the place_operands function, > the optimizations it includes are: > 1. Use a hash table to store operand information, > improving the lookup time for the first operand. > 2. Move mode comparison to the beginning to avoid the scenarios of most strcmp. > > I tested the speed improvements for the following backends, > Improvement Ratio > x86_64 197.9% > aarch64 954.5% > riscv 2578.6% > If the build machine is slow, then this improvement can save a lot of time. > > I tested the genoutput output for x86_64/aarch64/riscv backends, > and there was no difference compared to before the optimization, > so this shouldn't introduce any functional issues. Looks like a nice speed-up thanks. A couple of general points: * Could you try using the more type-safe hash-table.h, instead of hashtab.h? Similarly inchash.h for the hashing. * Although this wasn't always the style in older code, the preference now is to put new functions before their first use where possible, to avoid forward declarations. A couple of very minor comments below. > --- > gcc/genoutput.cc | 101 ++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 95 insertions(+), 6 deletions(-) > > diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc > index efd81766bb5b..456d96112cfb 100644 > --- a/gcc/genoutput.cc > +++ b/gcc/genoutput.cc > @@ -112,6 +112,8 @@ static int next_operand_number = 1; > struct operand_data > { > struct operand_data *next; > + /* Point to the next member with the same hash value in the hash table. */ > + struct operand_data *eq_next; > int index; > const char *predicate; > const char *constraint; > @@ -127,11 +129,12 @@ struct operand_data > > static struct operand_data null_operand = > { > - 0, 0, "", "", E_VOIDmode, 0, 0, 0, 0, 0 > + 0, 0, 0, "", "", E_VOIDmode, 0, 0, 0, 0, 0 > }; > > static struct operand_data *odata = &null_operand; > static struct operand_data **odata_end = &null_operand.next; > +static htab_t operand_data_table; > > /* Must match the constants in recog.h. */ > > @@ -180,6 +183,11 @@ static void place_operands (class data *); > static void process_template (class data *, const char *); > static void validate_insn_alternatives (class data *); > static void validate_insn_operands (class data *); > +static hashval_t hash_struct_operand_data (const void *); > +static int eq_struct_operand_data (const void *, const void *); > +static void insert_operand_data (struct operand_data *); > +static struct operand_data *lookup_operand_data (struct operand_data *); > +static void init_operand_data_table (void); > > class constraint_data > { > @@ -532,6 +540,13 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) > { > const char *p0, *p1; > > + /* On one hand, comparing strings for predicate and constraint > + is time-consuming, and on the other hand, the probability of > + different modes is relatively high. Therefore, checking the mode > + first can speed up the execution of the program. */ > + if (d0->mode != d1->mode) > + return 0; > + > p0 = d0->predicate; > if (!p0) > p0 = ""; > @@ -550,9 +565,6 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) > if (strcmp (p0, p1) != 0) > return 0; > > - if (d0->mode != d1->mode) > - return 0; > - > if (d0->strict_low != d1->strict_low) > return 0; > > @@ -577,9 +589,9 @@ place_operands (class data *d) > return; > } > > + od = lookup_operand_data (&d->operand[0]); > /* Brute force substring search. */ > - for (od = odata, i = 0; od; od = od->next, i = 0) > - if (compare_operands (od, &d->operand[0])) > + for (i = 0; od; od = od->eq_next, i = 0) I think we should move the i = 0 to after the loop, for the "no match" case. As it stands, each iteration immediate sets i to 1. The loop body should be moved 2 columns to the left, to account for the removed if condition. Richard > { > od2 = od->next; > i = 1; > @@ -605,6 +617,7 @@ place_operands (class data *d) > *odata_end = od2; > odata_end = &od2->next; > od2->index = next_operand_number++; > + insert_operand_data (od2); > } > *odata_end = NULL; > return; > @@ -1049,6 +1062,7 @@ main (int argc, const char **argv) > progname = "genoutput"; > > init_insn_for_nothing (); > + init_operand_data_table (); > > if (!init_rtx_reader_args (argc, argv)) > return (FATAL_EXIT_CODE); > @@ -1224,3 +1238,78 @@ mdep_constraint_len (const char *s, file_location loc, int opno) > message_at (loc, "note: in operand %d", opno); > return 1; /* safe */ > } > + > +/* Helper to Hash a struct operand_data. */ > + > +static hashval_t > +hash_struct_operand_data (const void *ptr) > +{ > + const struct operand_data *d = (const struct operand_data *) ptr; > + const char *pred, *cons; > + hashval_t hash; > + > + pred = d->predicate; > + if (!pred) > + pred = ""; > + hash = htab_hash_string (pred); > + > + cons = d->constraint; > + if (!cons) > + cons = ""; > + hash = iterative_hash (cons, strlen (cons), hash); > + > + hash = iterative_hash_object (d->mode, hash); > + hash = iterative_hash_object (d->strict_low, hash); > + hash = iterative_hash_object (d->eliminable, hash); > + return hash; > +} > + > +/* Equality function of the operand_data hash table. */ > + > +static int > +eq_struct_operand_data (const void *p1, const void *p2) > +{ > + const struct operand_data *d1 = (const struct operand_data *) p1; > + const struct operand_data *d2 = (const struct operand_data *) p2; > + > + return compare_operands (const_cast<operand_data *>(d1), > + const_cast<operand_data *>(d2)); > +} > + > +/* Insert the operand_data variable D into the hash table. > + If an variable with the same hash value already exists in the hash table, > + insert the element at the end of the linked list connected > + through the eq_next member. */ > + > +static void > +insert_operand_data (struct operand_data *d) > +{ > + void **slot = htab_find_slot (operand_data_table, d, INSERT); > + if (*slot) > + { > + struct operand_data *last = (struct operand_data *) *slot; > + while (last->eq_next) > + last = last->eq_next; > + last->eq_next = d; > + } > + else > + *slot = d; > +} > + > +/* Look up the operand_data D in the hash table. */ > + > +static struct operand_data * > +lookup_operand_data (struct operand_data *d) > +{ > + return (struct operand_data *) htab_find (operand_data_table, d); > +} > + > +/* Initializes the operand_data hash table. */ > + > +static void > +init_operand_data_table (void) > +{ > + operand_data_table = htab_create_alloc (64, hash_struct_operand_data, > + eq_struct_operand_data, 0, > + xcalloc, free); > +}
On Wed, Aug 07, 2024 at 03:17:24PM +0100, Richard Sandiford wrote: > Looks like a nice speed-up thanks. > > A couple of general points: > > * Could you try using the more type-safe hash-table.h, instead of hashtab.h? > Similarly inchash.h for the hashing. > > * Although this wasn't always the style in older code, the preference now > is to put new functions before their first use where possible, to avoid > forward declarations. > > A couple of very minor comments below. > Thanks, I will submit a new patch to address these issues. However, there is one more question. > > @@ -532,6 +540,13 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) > > { > > const char *p0, *p1; > > > > + /* On one hand, comparing strings for predicate and constraint > > + is time-consuming, and on the other hand, the probability of > > + different modes is relatively high. Therefore, checking the mode > > + first can speed up the execution of the program. */ > > + if (d0->mode != d1->mode) > > + return 0; > > + > > p0 = d0->predicate; > > if (!p0) > > p0 = ""; > > @@ -550,9 +565,6 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) > > if (strcmp (p0, p1) != 0) > > return 0; > > > > - if (d0->mode != d1->mode) > > - return 0; > > - > > if (d0->strict_low != d1->strict_low) > > return 0; > > > > @@ -577,9 +589,9 @@ place_operands (class data *d) > > return; > > } > > > > + od = lookup_operand_data (&d->operand[0]); > > /* Brute force substring search. */ > > - for (od = odata, i = 0; od; od = od->next, i = 0) > > - if (compare_operands (od, &d->operand[0])) > > + for (i = 0; od; od = od->eq_next, i = 0) > > I think we should move the i = 0 to after the loop, for the "no match" case. > As it stands, each iteration immediate sets i to 1. > As the method for finding matching operands changes to hash table lookup, a "no match" scenario will correspond to 'lookup_operand_data (&d->operand[0])' returning NULL and assigning it to 'od'. This means that 'i' will remain 0 and indicate "no match" in the subsequent code. Therefore, I think we still need to iterate starting from 'i = 0', but reassigning i to 0 on each iteration is redundant. We can modify it to 'for (i = 0; od; od = od->eq_next)'. BR, Xianmiao
Xianmiao Qu <cooper.qu@linux.alibaba.com> writes: > On Wed, Aug 07, 2024 at 03:17:24PM +0100, Richard Sandiford wrote: >> Looks like a nice speed-up thanks. >> >> A couple of general points: >> >> * Could you try using the more type-safe hash-table.h, instead of hashtab.h? >> Similarly inchash.h for the hashing. >> >> * Although this wasn't always the style in older code, the preference now >> is to put new functions before their first use where possible, to avoid >> forward declarations. >> >> A couple of very minor comments below. >> > > Thanks, I will submit a new patch to address these issues. > > However, there is one more question. > >> > @@ -532,6 +540,13 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) >> > { >> > const char *p0, *p1; >> > >> > + /* On one hand, comparing strings for predicate and constraint >> > + is time-consuming, and on the other hand, the probability of >> > + different modes is relatively high. Therefore, checking the mode >> > + first can speed up the execution of the program. */ >> > + if (d0->mode != d1->mode) >> > + return 0; >> > + >> > p0 = d0->predicate; >> > if (!p0) >> > p0 = ""; >> > @@ -550,9 +565,6 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) >> > if (strcmp (p0, p1) != 0) >> > return 0; >> > >> > - if (d0->mode != d1->mode) >> > - return 0; >> > - >> > if (d0->strict_low != d1->strict_low) >> > return 0; >> > >> > @@ -577,9 +589,9 @@ place_operands (class data *d) >> > return; >> > } >> > >> > + od = lookup_operand_data (&d->operand[0]); >> > /* Brute force substring search. */ >> > - for (od = odata, i = 0; od; od = od->next, i = 0) >> > - if (compare_operands (od, &d->operand[0])) >> > + for (i = 0; od; od = od->eq_next, i = 0) >> >> I think we should move the i = 0 to after the loop, for the "no match" case. >> As it stands, each iteration immediate sets i to 1. >> > > As the method for finding matching operands changes to hash table lookup, > a "no match" scenario will correspond to 'lookup_operand_data (&d->operand[0])' > returning NULL and assigning it to 'od'. This means that 'i' will remain 0 > and indicate "no match" in the subsequent code. > Therefore, I think we still need to iterate starting from 'i = 0', > but reassigning i to 0 on each iteration is redundant. > We can modify it to > 'for (i = 0; od; od = od->eq_next)'. But what I mean is: the code could just be: od = lookup_operand_data (&d->operand[0]); /* Brute force substring search. */ for (; od; od = od->eq_next) { i = 1; while (1) { if (i == d->n_operands) goto full_match; if (od2 == NULL) goto partial_match; if (! compare_operands (od2, &d->operand[i])) break; ++i, od2 = od2->next; } } i = 0; /* Either partial match at the end of the list, or no match. In either case, we tack on what operands are remaining to the end of the list. */ partial_match: ... full_match: ... Each partial and full match starts at index 1, because the hash table guarantees a match for index 0. partial_match is entered with i == 0 iff this is an entirely new entry. Thanks, Richard > > > > BR, > Xianmiao
On Fri, Aug 09, 2024 at 04:35:32PM +0100, Richard Sandiford wrote: > > But what I mean is: the code could just be: > > od = lookup_operand_data (&d->operand[0]); > /* Brute force substring search. */ > for (; od; od = od->eq_next) > { > i = 1; > while (1) > { > if (i == d->n_operands) > goto full_match; > if (od2 == NULL) > goto partial_match; > if (! compare_operands (od2, &d->operand[i])) > break; > ++i, od2 = od2->next; > } > } > i = 0; > > /* Either partial match at the end of the list, or no match. In either > case, we tack on what operands are remaining to the end of the list. */ > partial_match: > ... > > full_match: > ... > > Each partial and full match starts at index 1, because the hash > table guarantees a match for index 0. partial_match is entered > with i == 0 iff this is an entirely new entry. > > Thanks, > Richard > Thank you for your detailed explanation. BR, Xianmiao
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc index efd81766bb5b..456d96112cfb 100644 --- a/gcc/genoutput.cc +++ b/gcc/genoutput.cc @@ -112,6 +112,8 @@ static int next_operand_number = 1; struct operand_data { struct operand_data *next; + /* Point to the next member with the same hash value in the hash table. */ + struct operand_data *eq_next; int index; const char *predicate; const char *constraint; @@ -127,11 +129,12 @@ struct operand_data static struct operand_data null_operand = { - 0, 0, "", "", E_VOIDmode, 0, 0, 0, 0, 0 + 0, 0, 0, "", "", E_VOIDmode, 0, 0, 0, 0, 0 }; static struct operand_data *odata = &null_operand; static struct operand_data **odata_end = &null_operand.next; +static htab_t operand_data_table; /* Must match the constants in recog.h. */ @@ -180,6 +183,11 @@ static void place_operands (class data *); static void process_template (class data *, const char *); static void validate_insn_alternatives (class data *); static void validate_insn_operands (class data *); +static hashval_t hash_struct_operand_data (const void *); +static int eq_struct_operand_data (const void *, const void *); +static void insert_operand_data (struct operand_data *); +static struct operand_data *lookup_operand_data (struct operand_data *); +static void init_operand_data_table (void); class constraint_data { @@ -532,6 +540,13 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) { const char *p0, *p1; + /* On one hand, comparing strings for predicate and constraint + is time-consuming, and on the other hand, the probability of + different modes is relatively high. Therefore, checking the mode + first can speed up the execution of the program. */ + if (d0->mode != d1->mode) + return 0; + p0 = d0->predicate; if (!p0) p0 = ""; @@ -550,9 +565,6 @@ compare_operands (struct operand_data *d0, struct operand_data *d1) if (strcmp (p0, p1) != 0) return 0; - if (d0->mode != d1->mode) - return 0; - if (d0->strict_low != d1->strict_low) return 0; @@ -577,9 +589,9 @@ place_operands (class data *d) return; } + od = lookup_operand_data (&d->operand[0]); /* Brute force substring search. */ - for (od = odata, i = 0; od; od = od->next, i = 0) - if (compare_operands (od, &d->operand[0])) + for (i = 0; od; od = od->eq_next, i = 0) { od2 = od->next; i = 1; @@ -605,6 +617,7 @@ place_operands (class data *d) *odata_end = od2; odata_end = &od2->next; od2->index = next_operand_number++; + insert_operand_data (od2); } *odata_end = NULL; return; @@ -1049,6 +1062,7 @@ main (int argc, const char **argv) progname = "genoutput"; init_insn_for_nothing (); + init_operand_data_table (); if (!init_rtx_reader_args (argc, argv)) return (FATAL_EXIT_CODE); @@ -1224,3 +1238,78 @@ mdep_constraint_len (const char *s, file_location loc, int opno) message_at (loc, "note: in operand %d", opno); return 1; /* safe */ } + +/* Helper to Hash a struct operand_data. */ + +static hashval_t +hash_struct_operand_data (const void *ptr) +{ + const struct operand_data *d = (const struct operand_data *) ptr; + const char *pred, *cons; + hashval_t hash; + + pred = d->predicate; + if (!pred) + pred = ""; + hash = htab_hash_string (pred); + + cons = d->constraint; + if (!cons) + cons = ""; + hash = iterative_hash (cons, strlen (cons), hash); + + hash = iterative_hash_object (d->mode, hash); + hash = iterative_hash_object (d->strict_low, hash); + hash = iterative_hash_object (d->eliminable, hash); + return hash; +} + +/* Equality function of the operand_data hash table. */ + +static int +eq_struct_operand_data (const void *p1, const void *p2) +{ + const struct operand_data *d1 = (const struct operand_data *) p1; + const struct operand_data *d2 = (const struct operand_data *) p2; + + return compare_operands (const_cast<operand_data *>(d1), + const_cast<operand_data *>(d2)); +} + +/* Insert the operand_data variable D into the hash table. + If an variable with the same hash value already exists in the hash table, + insert the element at the end of the linked list connected + through the eq_next member. */ + +static void +insert_operand_data (struct operand_data *d) +{ + void **slot = htab_find_slot (operand_data_table, d, INSERT); + if (*slot) + { + struct operand_data *last = (struct operand_data *) *slot; + while (last->eq_next) + last = last->eq_next; + last->eq_next = d; + } + else + *slot = d; +} + +/* Look up the operand_data D in the hash table. */ + +static struct operand_data * +lookup_operand_data (struct operand_data *d) +{ + return (struct operand_data *) htab_find (operand_data_table, d); +} + +/* Initializes the operand_data hash table. */ + +static void +init_operand_data_table (void) +{ + operand_data_table = htab_create_alloc (64, hash_struct_operand_data, + eq_struct_operand_data, 0, + xcalloc, free); +}