Message ID | 3c66efd4-eb5e-f2bb-6138-4126b5909c9c@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | [v2] contrib/mklog.py: Improve PR handling (was: Re: git gcc-commit-mklog doesn't extract PR number to ChangeLog) | expand |
Hi. I see the following warnings: $ pytest test_mklog.py FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n' $ flake8 mklog.py mklog.py:187:23: Q000 Remove bad quotes and ... > contrib/mklog.py: Improve PR handling > > Co-authored-by: Martin Sebor <msebor@redhat.com> > > contrib/ChangeLog: > > * mklog.py (bugzilla_url): Fetch also component. > (pr_filename_regex): New. > (get_pr_titles): Update PR string with correct format and component. > (generate_changelog): Take additional PRs; extract PR from the > filename. > (__main__): Add -b/--pr-numbers argument. > > contrib/mklog.py | 41 ++++++++++++++++++++++++++++++++--------- > 1 file changed, 32 insertions(+), 9 deletions(-) > > diff --git a/contrib/mklog.py b/contrib/mklog.py > index 1f59055e723..bba6c1a0e1a 100755 > --- a/contrib/mklog.py > +++ b/contrib/mklog.py > @@ -42,6 +42,7 @@ pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') > prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') > dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') > dg_regex = re.compile(r'{\s+dg-(error|warning)') > +pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') > identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') > comment_regex = re.compile(r'^\/\*') > struct_regex = re.compile(r'^(class|struct|union|enum)\s+' > @@ -52,7 +53,7 @@ fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') > template_and_param_regex = re.compile(r'<[^<>]*>') > md_def_regex = re.compile(r'\(define.*\s+"(.*)"') > bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ > - 'include_fields=summary' > + 'include_fields=summary,component' > > function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} > > @@ -118,20 +119,23 @@ def sort_changelog_files(changed_file): > > > def get_pr_titles(prs): > - output = '' > - for pr in prs: > + output = [] > + for idx, pr in enumerate(prs): > pr_id = pr.split('/')[-1] > r = requests.get(bugzilla_url % pr_id) > bugs = r.json()['bugs'] > if len(bugs) == 1: > - output += '%s - %s\n' % (pr, bugs[0]['summary']) > - print(output) > + prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) > + out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) > + if out not in output: > + output.append(out) > if output: > - output += '\n' > - return output > + output.append('') > + return '\n'.join(output) > > > -def generate_changelog(data, no_functions=False, fill_pr_titles=False): > +def generate_changelog(data, no_functions=False, fill_pr_titles=False, > + additional_prs=None): > changelogs = {} > changelog_list = [] > prs = [] > @@ -139,6 +143,8 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False): > diff = PatchSet(data) > global firstpr > > + if additional_prs: > + prs = [pr for pr in additional_prs if pr not in prs] > for file in diff: > # skip files that can't be parsed > if file.path == '/dev/null': > @@ -154,21 +160,33 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False): > # Only search first ten lines as later lines may > # contains commented code which a note that it > # has not been tested due to a certain PR or DR. > + this_file_prs = [] > for line in list(file)[0][0:10]: > m = pr_regex.search(line.value) > if m: > pr = m.group('pr') > if pr not in prs: > prs.append(pr) > + this_file_prs.append(pr.split('/')[-1]) > else: > m = dr_regex.search(line.value) > if m: > dr = m.group('dr') > if dr not in prs: > prs.append(dr) > + this_file_prs.append(dr.split('/')[-1]) > elif dg_regex.search(line.value): > # Found dg-warning/dg-error line > break > + # PR number in the file name > + fname = os.path.basename(file.path) This is a dead code. > + fname = os.path.splitext(fname)[0] > + m = pr_filename_regex.search(fname) > + if m: > + pr = m.group('pr') > + pr2 = "PR " + pr Bad quotes here. > + if pr not in this_file_prs and pr2 not in prs: > + prs.append(pr2) > > if prs: > firstpr = prs[0] > @@ -286,6 +304,8 @@ if __name__ == '__main__': > parser = argparse.ArgumentParser(description=help_message) > parser.add_argument('input', nargs='?', > help='Patch file (or missing, read standard input)') > + parser.add_argument('-b', '--pr-numbers', action='append', > + help='Add the specified PRs (comma separated)') Do we really want to support '-b 1 -b 2' and also -b '1,2' formats? Seems to me quite complicated. Cheers, Martin > parser.add_argument('-s', '--no-functions', action='store_true', > help='Do not generate function names in ChangeLogs') > parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', > @@ -308,8 +328,11 @@ if __name__ == '__main__': > if args.update_copyright: > update_copyright(data) > else: > + pr_numbers = args.pr_numbers > + if pr_numbers: > + pr_numbers = [b for i in args.pr_numbers for b in i.split(',')] > output = generate_changelog(data, args.no_functions, > - args.fill_up_bug_titles) > + args.fill_up_bug_titles, pr_numbers) > if args.changelog: > lines = open(args.changelog).read().split('\n') > start = list(takewhile(lambda l: not l.startswith('#'), lines)) On 6/21/21 9:54 AM, Tobias Burnus wrote: > On 17.06.21 02:17, Martin Sebor via Gcc wrote: >> @@ -147,6 +152,12 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False): >> >> # Extract PR entries from newly added tests >> if 'testsuite' in file.path and file.is_added_file: >> + name = os.path.basename(file.path) >> + name = os.path.splitext(name)[0] >> + if name.startswith("pr"): >> + name = name[2:] >> + name = "PR " + name >> + prs.append(name) > > I think you need a regular expression to extract the PR – as it will both match > too much and to little. We have file names such as: > * libstdc++-pr91488.C (other prefix) > * PR37039.f90 (capitalized PR) > * pr98218-1.C (suffix with '-') > * pr40724_1.f (suffix with '_') > * pr101023a.C (suffix with a letter) > > But otherwise, I like that idea. > > * * * > > Changes in my patch compared to v1: > - (From Martin's patch:) Extract the PR from new-files file > name (using pattern matching), but only take the PR if the > PR wasn't found in the file as PR comment. > (The latter happens, e.g., with b376b1ef389.) > - Avoid printing the same PR multiple times as summary line > (duplicates occur due to 'PR 134' vs. 'PR comp/123' vs. > 'PR othercomp/123') — This does not avoid all issues but at least > some. If this becomes a real world issue, we can try harder. > > OK to commit this one? — Comments? > > * * * > > I did leave out other changes as they seem to be less clear cut, > and which can be still be handled as follow up. Like: > - Adding 'Resolves:' (as in some cases it only resolves part of > the PR) > - ... other changes/patches I missed. (This thread has too many > emails.) In particular, if > ^PR <comp>/<pr> - .... > is accepted by gcc-commit/, then there is no need to list the > PRs individually later on. But currently, it is still required. > > * * * > > Cross ref: > * v1 of my patch was at > https://gcc.gnu.org/pipermail/gcc/2021-June/236498.html > * Discussion of the -b option is at > https://gcc.gnu.org/pipermail/gcc/2021-June/236519.html > * Martin S's patch (partially quoted above) is at > https://gcc.gnu.org/pipermail/gcc/2021-June/236460.html > > Tobias > > ----------------- > Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
On 21.06.21 10:09, Martin Liška wrote: > $ pytest test_mklog.py > FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert > '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n' Aha, missed that there is indeed a testsuite - nice! > $ flake8 mklog.py > mklog.py:187:23: Q000 Remove bad quotes I have now filled: https://bugs.launchpad.net/ubuntu/+source/python-pytest-flake8/+bug/1933075 >> + # PR number in the file name >> + fname = os.path.basename(file.path) > > This is a dead code. > >> + fname = os.path.splitext(fname)[0] >> + m = pr_filename_regex.search(fname) It does not look like dead code to me. >> + parser.add_argument('-b', '--pr-numbers', action='append', >> + help='Add the specified PRs (comma separated)') > > Do we really want to support '-b 1 -b 2' and also -b '1,2' formats? > Seems to me quite > complicated. I don't have a strong opinion. I started with '-b 123,245', believing that the syntax is fine. But then I realized that without '-p' specifying multiple '-b' looks better by having multiple '-b' if 'PR <component>/' (needed for -p as the string is than taken as is). Thus, I ended up supporting either variant. But I also happily drop the ',' support. Change: One quote change, one test_mklog update. Tobias ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
On 6/21/21 10:37 AM, Tobias Burnus wrote: > On 21.06.21 10:09, Martin Liška wrote: > >> $ pytest test_mklog.py >> FAILED test_mklog.py::TestMklog::test_sorting - AssertionError: assert >> '\n\tPR 50209...New test.\n\n' == 'gcc/ChangeLo...New test.\n\n' > Aha, missed that there is indeed a testsuite - nice! >> $ flake8 mklog.py >> mklog.py:187:23: Q000 Remove bad quotes > I have now filled: > https://bugs.launchpad.net/ubuntu/+source/python-pytest-flake8/+bug/1933075 > >>> + # PR number in the file name >>> + fname = os.path.basename(file.path) >> >> This is a dead code. >> >>> + fname = os.path.splitext(fname)[0] >>> + m = pr_filename_regex.search(fname) > It does not look like dead code to me. Hello. The code is weird as os.path.basename returns: In [5]: os.path.basename('/tmp/a/b/c.txt') Out[5]: 'c.txt' why do you need os.path.splitext(fname) call? >>> + parser.add_argument('-b', '--pr-numbers', action='append', >>> + help='Add the specified PRs (comma separated)') >> >> Do we really want to support '-b 1 -b 2' and also -b '1,2' formats? >> Seems to me quite >> complicated. > > I don't have a strong opinion. I started with '-b 123,245', believing > that the syntax is fine. But then I realized that without '-p' > specifying multiple '-b' looks better by having multiple '-b' if 'PR > <component>/' (needed for -p as the string is than taken as is). Thus, > I ended up supporting either variant. I would start with -b 1,2,3,4 syntax. It will be likely easier for git alias integration. Martin > > But I also happily drop the ',' support. > > Change: One quote change, one test_mklog update. > > Tobias > > ----------------- > Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Now committed as r12-1700-gedf0c3ffb59d75c11e05bc722432dc554e275c72 / as attached. (We had some follow-up discussion on IRC.) On 21.06.21 14:53, Martin Liška wrote: >>>> + # PR number in the file name >>>> + fname = os.path.basename(file.path) >>> >>> This is a dead code. >>> >>>> + fname = os.path.splitext(fname)[0] >>>> + m = pr_filename_regex.search(fname) (Meant was the 'splitext' line - it is dead code as the re.search globs all digits after 'pr' and then stops, ignoring the rest, including file extensions. – I first thought it referred to the basename line, which confused me.) >>>> + parser.add_argument('-b', '--pr-numbers', action='append', >>>> + help='Add the specified PRs (comma >>>> separated)') >>> >>> Do we really want to support '-b 1 -b 2' and also -b '1,2' formats? >>> Seems to me quite >>> complicated. > [...] > I would start with -b 1,2,3,4 syntax. It will be likely easier for git > alias integration. Done so. I note that argparse permits '-d dir1 -d dir2 -b bug1 -b bug2' which then only keeps the dir2 as directory and bug2 as bug without printing an error (or warning) for ignoring dir1 and bug1. Tobias ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
contrib/mklog.py: Improve PR handling Co-authored-by: Martin Sebor <msebor@redhat.com> contrib/ChangeLog: * mklog.py (bugzilla_url): Fetch also component. (pr_filename_regex): New. (get_pr_titles): Update PR string with correct format and component. (generate_changelog): Take additional PRs; extract PR from the filename. (__main__): Add -b/--pr-numbers argument. contrib/mklog.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/contrib/mklog.py b/contrib/mklog.py index 1f59055e723..bba6c1a0e1a 100755 --- a/contrib/mklog.py +++ b/contrib/mklog.py @@ -42,6 +42,7 @@ pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') dg_regex = re.compile(r'{\s+dg-(error|warning)') +pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') comment_regex = re.compile(r'^\/\*') struct_regex = re.compile(r'^(class|struct|union|enum)\s+' @@ -52,7 +53,7 @@ fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') template_and_param_regex = re.compile(r'<[^<>]*>') md_def_regex = re.compile(r'\(define.*\s+"(.*)"') bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ - 'include_fields=summary' + 'include_fields=summary,component' function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} @@ -118,20 +119,23 @@ def sort_changelog_files(changed_file): def get_pr_titles(prs): - output = '' - for pr in prs: + output = [] + for idx, pr in enumerate(prs): pr_id = pr.split('/')[-1] r = requests.get(bugzilla_url % pr_id) bugs = r.json()['bugs'] if len(bugs) == 1: - output += '%s - %s\n' % (pr, bugs[0]['summary']) - print(output) + prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) + out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) + if out not in output: + output.append(out) if output: - output += '\n' - return output + output.append('') + return '\n'.join(output) -def generate_changelog(data, no_functions=False, fill_pr_titles=False): +def generate_changelog(data, no_functions=False, fill_pr_titles=False, + additional_prs=None): changelogs = {} changelog_list = [] prs = [] @@ -139,6 +143,8 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False): diff = PatchSet(data) global firstpr + if additional_prs: + prs = [pr for pr in additional_prs if pr not in prs] for file in diff: # skip files that can't be parsed if file.path == '/dev/null': @@ -154,21 +160,33 @@ def generate_changelog(data, no_functions=False, fill_pr_titles=False): # Only search first ten lines as later lines may # contains commented code which a note that it # has not been tested due to a certain PR or DR. + this_file_prs = [] for line in list(file)[0][0:10]: m = pr_regex.search(line.value) if m: pr = m.group('pr') if pr not in prs: prs.append(pr) + this_file_prs.append(pr.split('/')[-1]) else: m = dr_regex.search(line.value) if m: dr = m.group('dr') if dr not in prs: prs.append(dr) + this_file_prs.append(dr.split('/')[-1]) elif dg_regex.search(line.value): # Found dg-warning/dg-error line break + # PR number in the file name + fname = os.path.basename(file.path) + fname = os.path.splitext(fname)[0] + m = pr_filename_regex.search(fname) + if m: + pr = m.group('pr') + pr2 = "PR " + pr + if pr not in this_file_prs and pr2 not in prs: + prs.append(pr2) if prs: firstpr = prs[0] @@ -286,6 +304,8 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description=help_message) parser.add_argument('input', nargs='?', help='Patch file (or missing, read standard input)') + parser.add_argument('-b', '--pr-numbers', action='append', + help='Add the specified PRs (comma separated)') parser.add_argument('-s', '--no-functions', action='store_true', help='Do not generate function names in ChangeLogs') parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', @@ -308,8 +328,11 @@ if __name__ == '__main__': if args.update_copyright: update_copyright(data) else: + pr_numbers = args.pr_numbers + if pr_numbers: + pr_numbers = [b for i in args.pr_numbers for b in i.split(',')] output = generate_changelog(data, args.no_functions, - args.fill_up_bug_titles) + args.fill_up_bug_titles, pr_numbers) if args.changelog: lines = open(args.changelog).read().split('\n') start = list(takewhile(lambda l: not l.startswith('#'), lines))