diff mbox series

[v4,3/7] parser: Parse "Depends-on" tags in emails

Message ID 20250130195643.757559-4-ahassick@iol.unh.edu
State Accepted
Headers show
Series Add support for series dependencies | expand

Commit Message

Adam Hassick Jan. 30, 2025, 7:56 p.m. UTC
Add a new function to parse "Depends-on" tags to the parser. The value
may either be the message ID of a patch or cover letter email already
received by Patchwork, or the web URL of a patch or series. When this
tag is found, the parser will add the series (or the series the patch
belongs to) as a dependency to the series it is creating.

This parser feature is only active when the feature flag is enabled on
the project related to the patch or cover letter.

Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
---
 patchwork/parser.py | 92 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

Comments

Aaron Conole Feb. 7, 2025, 2:58 p.m. UTC | #1
Adam Hassick <ahassick@iol.unh.edu> writes:

> Add a new function to parse "Depends-on" tags to the parser. The value
> may either be the message ID of a patch or cover letter email already
> received by Patchwork, or the web URL of a patch or series. When this
> tag is found, the parser will add the series (or the series the patch
> belongs to) as a dependency to the series it is creating.
>
> This parser feature is only active when the feature flag is enabled on
> the project related to the patch or cover letter.
>
> Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
> ---

Acked-by: Aaron Conole <aconole@redhat.com>

>  patchwork/parser.py | 92 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 91 insertions(+), 1 deletion(-)
>
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 09a53a0..4be6dcc 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -14,11 +14,13 @@ from email.errors import HeaderParseError
>  from fnmatch import fnmatch
>  import logging
>  import re
> +from urllib.parse import urlparse, parse_qs
>  
>  from django.contrib.auth.models import User
>  from django.db.utils import IntegrityError
>  from django.db import transaction
>  from django.utils import timezone as tz_utils
> +from django.urls import resolve, Resolver404
>  
>  from patchwork.models import Cover
>  from patchwork.models import CoverComment
> @@ -32,7 +34,6 @@ from patchwork.models import Series
>  from patchwork.models import SeriesReference
>  from patchwork.models import State
>  
> -
>  _msgid_re = re.compile(r'<[^>]+>')
>  _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
>  _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
> @@ -1054,6 +1055,85 @@ def parse_pull_request(content):
>      return None
>  
>  
> +def find_series_from_url(url):
> +    """
> +    Get a series from either a series or patch URL.
> +    """
> +    parse_result = urlparse(url)
> +
> +    # Resolve the URL path to see if this is a patch or series detail URL.
> +    try:
> +        result = resolve(parse_result.path)
> +    except Resolver404:
> +        logging.warning('Failed to resolve series or patch URL: %s', url)
> +        return None
> +
> +    # TODO: Use the series detail view here.
> +    if result.view_name == 'patch-list' and parse_result.query:
> +        # Parse the query string.
> +        # This can be replaced with something much friendlier once the
> +        # series detail view is implemented.
> +        series_query_param = parse_qs(parse_result.query)
> +
> +        if series_id := series_query_param.get('series'):
> +            try:
> +                series_id_num = int(series_id[0])
> +            except ValueError:
> +                logging.warning(
> +                    'Series URL with an invalid series query parameter was given: %s',
> +                    url,
> +                )
> +                return None
> +            # This will return None if there are no matches.
> +            return Series.objects.filter(id=series_id_num).first()
> +
> +        logging.warning(
> +            'Series URL did not have a series query parameter: %s', url
> +        )
> +        return None
> +    elif result.view_name == 'patch-detail':
> +        msgid = Patch.decode_msgid(result.kwargs['msgid'])
> +        if patch := Patch.objects.filter(msgid=msgid).first():
> +            return patch.series
> +
> +
> +def find_series_from_msgid(msgid):
> +    """
> +    Get a series from
> +    """
> +    if patch := Patch.objects.filter(msgid=msgid).first():
> +        return patch.series
> +
> +    if cover := Cover.objects.filter(msgid=msgid).first():
> +        return cover.series
> +
> +
> +def parse_depends_on(content):
> +    """Parses any dependency hints in the patch or series content."""
> +    dependencies = []
> +
> +    # Discover dependencies given as URLs.
> +    for url in re.findall(
> +        r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$',
> +        content,
> +        flags=re.MULTILINE | re.IGNORECASE,
> +    ):
> +        if series := find_series_from_url(url):
> +            dependencies.append(series)
> +
> +    # Discover dependencies given as message IDs.
> +    for msgid in re.findall(
> +        r'^Depends-on: (<[^>]+>)\s*$',
> +        content,
> +        flags=re.MULTILINE | re.IGNORECASE,
> +    ):
> +        if series := find_series_from_msgid(msgid):
> +            dependencies.append(series)
> +
> +    # Return list of series objects to depend on.
> +    return dependencies
> +
> +
>  def find_state(mail):
>      """Return the state with the given name or the default."""
>      state_name = clean_header(mail.get('X-Patchwork-State', ''))
> @@ -1308,6 +1388,10 @@ def parse_mail(mail, list_id=None):
>              # always have a series
>              series.add_patch(patch, x)
>  
> +        # Only parse patch dependencies if it is enabled for the project.
> +        if project.parse_dependencies:
> +            series.add_dependencies(parse_depends_on(message))
> +
>          return patch
>      elif x == 0:  # (potential) cover letters
>          # if refs are empty, it's implicitly a cover letter. If not,
> @@ -1375,6 +1459,12 @@ def parse_mail(mail, list_id=None):
>  
>              series.add_cover_letter(cover_letter)
>  
> +            # Cover letters are permitted to specify dependencies for the
> +            # entire patch series. Only parse patch dependencies if it is
> +            # enabled for the project.
> +            if project.parse_dependencies:
> +                series.add_dependencies(parse_depends_on(message))
> +
>              return cover_letter
>  
>      # comments
Stephen Finucane March 8, 2025, 10:54 a.m. UTC | #2
On Thu, 2025-01-30 at 14:56 -0500, Adam Hassick wrote:
> Add a new function to parse "Depends-on" tags to the parser. The value
> may either be the message ID of a patch or cover letter email already
> received by Patchwork, or the web URL of a patch or series. When this
> tag is found, the parser will add the series (or the series the patch
> belongs to) as a dependency to the series it is creating.
> 
> This parser feature is only active when the feature flag is enabled on
> the project related to the patch or cover letter.
> 
> Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>

With two notes below.

Reviewed-by: Stephen Finucane <stephen@that.guru>

> ---
>  patchwork/parser.py | 92 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 91 insertions(+), 1 deletion(-)
> 
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 09a53a0..4be6dcc 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -14,11 +14,13 @@ from email.errors import HeaderParseError
>  from fnmatch import fnmatch
>  import logging
>  import re
> +from urllib.parse import urlparse, parse_qs
>  
>  from django.contrib.auth.models import User
>  from django.db.utils import IntegrityError
>  from django.db import transaction
>  from django.utils import timezone as tz_utils
> +from django.urls import resolve, Resolver404
>  
>  from patchwork.models import Cover
>  from patchwork.models import CoverComment
> @@ -32,7 +34,6 @@ from patchwork.models import Series
>  from patchwork.models import SeriesReference
>  from patchwork.models import State
>  
> -
>  _msgid_re = re.compile(r'<[^>]+>')
>  _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
>  _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
> @@ -1054,6 +1055,85 @@ def parse_pull_request(content):
>      return None
>  
>  
> +def find_series_from_url(url):
> +    """
> +    Get a series from either a series or patch URL.
> +    """
> +    parse_result = urlparse(url)
> +
> +    # Resolve the URL path to see if this is a patch or series detail URL.
> +    try:
> +        result = resolve(parse_result.path)
> +    except Resolver404:
> +        logging.warning('Failed to resolve series or patch URL: %s', url)
> +        return None
> +
> +    # TODO: Use the series detail view here.
> +    if result.view_name == 'patch-list' and parse_result.query:
> +        # Parse the query string.
> +        # This can be replaced with something much friendlier once the
> +        # series detail view is implemented.
> +        series_query_param = parse_qs(parse_result.query)
> +
> +        if series_id := series_query_param.get('series'):
> +            try:
> +                series_id_num = int(series_id[0])
> +            except ValueError:
> +                logging.warning(
> +                    'Series URL with an invalid series query parameter was given: %s',
> +                    url,
> +                )
> +                return None
> +            # This will return None if there are no matches.
> +            return Series.objects.filter(id=series_id_num).first()
> +
> +        logging.warning(
> +            'Series URL did not have a series query parameter: %s', url
> +        )
> +        return None
> +    elif result.view_name == 'patch-detail':
> +        msgid = Patch.decode_msgid(result.kwargs['msgid'])
> +        if patch := Patch.objects.filter(msgid=msgid).first():
> +            return patch.series
> +
> +
> +def find_series_from_msgid(msgid):
> +    """
> +    Get a series from
> +    """
> +    if patch := Patch.objects.filter(msgid=msgid).first():
> +        return patch.series
> +
> +    if cover := Cover.objects.filter(msgid=msgid).first():
> +        return cover.series
> +
> +
> +def parse_depends_on(content):
> +    """Parses any dependency hints in the patch or series content."""
> +    dependencies = []
> +
> +    # Discover dependencies given as URLs.
> +    for url in re.findall(
> +        r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$',
> +        content,
> +        flags=re.MULTILINE | re.IGNORECASE,
> +    ):
> +        if series := find_series_from_url(url):
> +            dependencies.append(series)
> +
> +    # Discover dependencies given as message IDs.
> +    for msgid in re.findall(
> +        r'^Depends-on: (<[^>]+>)\s*$',
> +        content,
> +        flags=re.MULTILINE | re.IGNORECASE,
> +    ):
> +        if series := find_series_from_msgid(msgid):
> +            dependencies.append(series)
> +
> +    # Return list of series objects to depend on.
> +    return dependencies
> +
> +
>  def find_state(mail):
>      """Return the state with the given name or the default."""
>      state_name = clean_header(mail.get('X-Patchwork-State', ''))
> @@ -1308,6 +1388,10 @@ def parse_mail(mail, list_id=None):
>              # always have a series
>              series.add_patch(patch, x)
>  
> +        # Only parse patch dependencies if it is enabled for the project.
> +        if project.parse_dependencies:
> +            series.add_dependencies(parse_depends_on(message))
> +

I'm going to drop this conditional and enforce it elsewhere.

>          return patch
>      elif x == 0:  # (potential) cover letters
>          # if refs are empty, it's implicitly a cover letter. If not,
> @@ -1375,6 +1459,12 @@ def parse_mail(mail, list_id=None):
>  
>              series.add_cover_letter(cover_letter)
>  
> +            # Cover letters are permitted to specify dependencies for the
> +            # entire patch series. Only parse patch dependencies if it is
> +            # enabled for the project.
> +            if project.parse_dependencies:
> +                series.add_dependencies(parse_depends_on(message))

Ditto.

> +
>              return cover_letter
>  
>      # comments
diff mbox series

Patch

diff --git a/patchwork/parser.py b/patchwork/parser.py
index 09a53a0..4be6dcc 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -14,11 +14,13 @@  from email.errors import HeaderParseError
 from fnmatch import fnmatch
 import logging
 import re
+from urllib.parse import urlparse, parse_qs
 
 from django.contrib.auth.models import User
 from django.db.utils import IntegrityError
 from django.db import transaction
 from django.utils import timezone as tz_utils
+from django.urls import resolve, Resolver404
 
 from patchwork.models import Cover
 from patchwork.models import CoverComment
@@ -32,7 +34,6 @@  from patchwork.models import Series
 from patchwork.models import SeriesReference
 from patchwork.models import State
 
-
 _msgid_re = re.compile(r'<[^>]+>')
 _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
 _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
@@ -1054,6 +1055,85 @@  def parse_pull_request(content):
     return None
 
 
+def find_series_from_url(url):
+    """
+    Get a series from either a series or patch URL.
+    """
+    parse_result = urlparse(url)
+
+    # Resolve the URL path to see if this is a patch or series detail URL.
+    try:
+        result = resolve(parse_result.path)
+    except Resolver404:
+        logging.warning('Failed to resolve series or patch URL: %s', url)
+        return None
+
+    # TODO: Use the series detail view here.
+    if result.view_name == 'patch-list' and parse_result.query:
+        # Parse the query string.
+        # This can be replaced with something much friendlier once the
+        # series detail view is implemented.
+        series_query_param = parse_qs(parse_result.query)
+
+        if series_id := series_query_param.get('series'):
+            try:
+                series_id_num = int(series_id[0])
+            except ValueError:
+                logging.warning(
+                    'Series URL with an invalid series query parameter was given: %s',
+                    url,
+                )
+                return None
+            # This will return None if there are no matches.
+            return Series.objects.filter(id=series_id_num).first()
+
+        logging.warning(
+            'Series URL did not have a series query parameter: %s', url
+        )
+        return None
+    elif result.view_name == 'patch-detail':
+        msgid = Patch.decode_msgid(result.kwargs['msgid'])
+        if patch := Patch.objects.filter(msgid=msgid).first():
+            return patch.series
+
+
+def find_series_from_msgid(msgid):
+    """
+    Get a series from
+    """
+    if patch := Patch.objects.filter(msgid=msgid).first():
+        return patch.series
+
+    if cover := Cover.objects.filter(msgid=msgid).first():
+        return cover.series
+
+
+def parse_depends_on(content):
+    """Parses any dependency hints in the patch or series content."""
+    dependencies = []
+
+    # Discover dependencies given as URLs.
+    for url in re.findall(
+        r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$',
+        content,
+        flags=re.MULTILINE | re.IGNORECASE,
+    ):
+        if series := find_series_from_url(url):
+            dependencies.append(series)
+
+    # Discover dependencies given as message IDs.
+    for msgid in re.findall(
+        r'^Depends-on: (<[^>]+>)\s*$',
+        content,
+        flags=re.MULTILINE | re.IGNORECASE,
+    ):
+        if series := find_series_from_msgid(msgid):
+            dependencies.append(series)
+
+    # Return list of series objects to depend on.
+    return dependencies
+
+
 def find_state(mail):
     """Return the state with the given name or the default."""
     state_name = clean_header(mail.get('X-Patchwork-State', ''))
@@ -1308,6 +1388,10 @@  def parse_mail(mail, list_id=None):
             # always have a series
             series.add_patch(patch, x)
 
+        # Only parse patch dependencies if it is enabled for the project.
+        if project.parse_dependencies:
+            series.add_dependencies(parse_depends_on(message))
+
         return patch
     elif x == 0:  # (potential) cover letters
         # if refs are empty, it's implicitly a cover letter. If not,
@@ -1375,6 +1459,12 @@  def parse_mail(mail, list_id=None):
 
             series.add_cover_letter(cover_letter)
 
+            # Cover letters are permitted to specify dependencies for the
+            # entire patch series. Only parse patch dependencies if it is
+            # enabled for the project.
+            if project.parse_dependencies:
+                series.add_dependencies(parse_depends_on(message))
+
             return cover_letter
 
     # comments