diff mbox series

[v3,08/24] tests/functional: add a module for handling asset download & caching

Message ID 20240730170347.4103919-9-berrange@redhat.com
State New
Headers show
Series Convert avocado tests to normal Python unittests | expand

Commit Message

Daniel P. Berrangé July 30, 2024, 5:03 p.m. UTC
The 'Asset' class is a simple module that declares a downloadable
asset that can be cached locally. Downloads are stored in the user's
home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 tests/functional/qemu_test/__init__.py |  1 +
 tests/functional/qemu_test/asset.py    | 96 ++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tests/functional/qemu_test/asset.py

Comments

Philippe Mathieu-Daudé Aug. 1, 2024, 4:20 p.m. UTC | #1
On 30/7/24 19:03, Daniel P. Berrangé wrote:
> The 'Asset' class is a simple module that declares a downloadable
> asset that can be cached locally. Downloads are stored in the user's
> home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.
> 
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> ---
>   tests/functional/qemu_test/__init__.py |  1 +
>   tests/functional/qemu_test/asset.py    | 96 ++++++++++++++++++++++++++
>   2 files changed, 97 insertions(+)
>   create mode 100644 tests/functional/qemu_test/asset.py
> 
> diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
> index 2f1e0bc70d..db05c8f412 100644
> --- a/tests/functional/qemu_test/__init__.py
> +++ b/tests/functional/qemu_test/__init__.py
> @@ -6,6 +6,7 @@
>   # later.  See the COPYING file in the top-level directory.
>   
>   
> +from .asset import Asset

For next patch?

>   from .config import BUILD_DIR
>   from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
>       interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
> diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
> new file mode 100644
> index 0000000000..6432da2e0b
> --- /dev/null
> +++ b/tests/functional/qemu_test/asset.py
> @@ -0,0 +1,96 @@
> +# Test utilities for fetching & caching assets
> +#
> +# Copyright 2024 Red Hat, Inc.
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later.  See the COPYING file in the top-level directory.
> +
> +import hashlib
> +import logging
> +import os
> +import subprocess
> +import urllib.request
> +from pathlib import Path
> +from shutil import copyfileobj
> +
> +
> +# Instances of this class must be declared as class level variables
> +# starting with a name "ASSET_". This enables the pre-caching logic
> +# to easily find all referenced assets and download them prior to
> +# execution of the tests.
> +class Asset:
> +
> +    def __init__(self, url, hash):
> +        self.url = url
> +        self.hash = hash
> +        self.cache_dir = Path(Path("~").expanduser(),
> +                              ".cache", "qemu", "download")
> +        self.cache_file = Path(self.cache_dir,
> +                               hashlib.sha256(url.encode("utf-8")).hexdigest())
> +        self.log = logging.getLogger('qemu-test')
> +
> +    def __repr__(self):
> +        return "Asset: url=%s hash=%s cache=%s" % (
> +            self.url, self.hash, self.cache_file)
> +
> +    def _check(self, cache_file):
> +        if self.hash is None:
> +            return True
> +        if len(self.hash) == 40:
> +            sum_prog = 'sha1sum'
> +        elif len(self.hash) == 64:
> +            sum_prog = 'sha256sum'

Do we want to support these? Should we declare them deprecated
and emit a warning?

> +        elif len(self.hash) == 128:
> +            sum_prog = 'sha512sum'
> +        else:
> +            raise Exception("unknown hash type")

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>

Very nice :)
Daniel P. Berrangé Aug. 1, 2024, 5 p.m. UTC | #2
On Thu, Aug 01, 2024 at 06:20:58PM +0200, Philippe Mathieu-Daudé wrote:
> On 30/7/24 19:03, Daniel P. Berrangé wrote:
> > The 'Asset' class is a simple module that declares a downloadable
> > asset that can be cached locally. Downloads are stored in the user's
> > home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >   tests/functional/qemu_test/__init__.py |  1 +
> >   tests/functional/qemu_test/asset.py    | 96 ++++++++++++++++++++++++++
> >   2 files changed, 97 insertions(+)
> >   create mode 100644 tests/functional/qemu_test/asset.py
> > 
> > diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
> > index 2f1e0bc70d..db05c8f412 100644
> > --- a/tests/functional/qemu_test/__init__.py
> > +++ b/tests/functional/qemu_test/__init__.py
> > @@ -6,6 +6,7 @@
> >   # later.  See the COPYING file in the top-level directory.
> > +from .asset import Asset
> 
> For next patch?
> 
> >   from .config import BUILD_DIR
> >   from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
> >       interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
> > diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
> > new file mode 100644
> > index 0000000000..6432da2e0b
> > --- /dev/null
> > +++ b/tests/functional/qemu_test/asset.py
> > @@ -0,0 +1,96 @@
> > +# Test utilities for fetching & caching assets
> > +#
> > +# Copyright 2024 Red Hat, Inc.
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2 or
> > +# later.  See the COPYING file in the top-level directory.
> > +
> > +import hashlib
> > +import logging
> > +import os
> > +import subprocess
> > +import urllib.request
> > +from pathlib import Path
> > +from shutil import copyfileobj
> > +
> > +
> > +# Instances of this class must be declared as class level variables
> > +# starting with a name "ASSET_". This enables the pre-caching logic
> > +# to easily find all referenced assets and download them prior to
> > +# execution of the tests.
> > +class Asset:
> > +
> > +    def __init__(self, url, hash):
> > +        self.url = url
> > +        self.hash = hash
> > +        self.cache_dir = Path(Path("~").expanduser(),
> > +                              ".cache", "qemu", "download")
> > +        self.cache_file = Path(self.cache_dir,
> > +                               hashlib.sha256(url.encode("utf-8")).hexdigest())
> > +        self.log = logging.getLogger('qemu-test')
> > +
> > +    def __repr__(self):
> > +        return "Asset: url=%s hash=%s cache=%s" % (
> > +            self.url, self.hash, self.cache_file)
> > +
> > +    def _check(self, cache_file):
> > +        if self.hash is None:
> > +            return True
> > +        if len(self.hash) == 40:
> > +            sum_prog = 'sha1sum'
> > +        elif len(self.hash) == 64:
> > +            sum_prog = 'sha256sum'
> 
> Do we want to support these? Should we declare them deprecated
> and emit a warning?

Oh thanks for the reminder. I wanted to standardize on sha256 ,since
sha1 is broken and sha512 is overkill.

Since I've run the tests once I've got everything download and can
now trivially generate the sha256 for everything we need.

> 
> > +        elif len(self.hash) == 128:
> > +            sum_prog = 'sha512sum'
> > +        else:
> > +            raise Exception("unknown hash type")
> 
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> 
> Very nice :)
> 

With regards,
Daniel
Daniel P. Berrangé Aug. 1, 2024, 5:02 p.m. UTC | #3
On Thu, Aug 01, 2024 at 06:20:58PM +0200, Philippe Mathieu-Daudé wrote:
> On 30/7/24 19:03, Daniel P. Berrangé wrote:
> > The 'Asset' class is a simple module that declares a downloadable
> > asset that can be cached locally. Downloads are stored in the user's
> > home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >   tests/functional/qemu_test/__init__.py |  1 +
> >   tests/functional/qemu_test/asset.py    | 96 ++++++++++++++++++++++++++
> >   2 files changed, 97 insertions(+)
> >   create mode 100644 tests/functional/qemu_test/asset.py
> > 
> > diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
> > index 2f1e0bc70d..db05c8f412 100644
> > --- a/tests/functional/qemu_test/__init__.py
> > +++ b/tests/functional/qemu_test/__init__.py
> > @@ -6,6 +6,7 @@
> >   # later.  See the COPYING file in the top-level directory.
> > +from .asset import Asset
> 
> For next patch?

No, the idea is that a test class can just do 'from qemu_test import Asset',
so they don't need to be aware of the fact that we've split up the code into
separate files beneath the qemu_test/ directory.

> 
> >   from .config import BUILD_DIR
> >   from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
> >       interrupt_interactive_console_until_pattern, wait_for_console_pattern, \

With regards,
Daniel
Richard Henderson Aug. 1, 2024, 9:51 p.m. UTC | #4
On 7/31/24 03:03, Daniel P. Berrangé wrote:
> +    def __init__(self, url, hash):
> +        self.url = url
> +        self.hash = hash
> +        self.cache_dir = Path(Path("~").expanduser(),
> +                              ".cache", "qemu", "download")
> +        self.cache_file = Path(self.cache_dir,
> +                               hashlib.sha256(url.encode("utf-8")).hexdigest())

Out of curiosity, why are we hashing the url from which we downloaded?
Why not use the known hash of the contents?

If a url goes stale, and we adjust the url to a new host, we shouldn't need to re-download 
just because the url changed.


r~
Daniel P. Berrangé Aug. 2, 2024, 8:32 a.m. UTC | #5
On Fri, Aug 02, 2024 at 07:51:14AM +1000, Richard Henderson wrote:
> On 7/31/24 03:03, Daniel P. Berrangé wrote:
> > +    def __init__(self, url, hash):
> > +        self.url = url
> > +        self.hash = hash
> > +        self.cache_dir = Path(Path("~").expanduser(),
> > +                              ".cache", "qemu", "download")
> > +        self.cache_file = Path(self.cache_dir,
> > +                               hashlib.sha256(url.encode("utf-8")).hexdigest())
> 
> Out of curiosity, why are we hashing the url from which we downloaded?
> Why not use the known hash of the contents?
> 
> If a url goes stale, and we adjust the url to a new host, we shouldn't need
> to re-download just because the url changed.

Hmmm, good point, will change it. 

With regards,
Daniel
Philippe Mathieu-Daudé Aug. 2, 2024, 1:24 p.m. UTC | #6
On 1/8/24 19:02, Daniel P. Berrangé wrote:
> On Thu, Aug 01, 2024 at 06:20:58PM +0200, Philippe Mathieu-Daudé wrote:
>> On 30/7/24 19:03, Daniel P. Berrangé wrote:
>>> The 'Asset' class is a simple module that declares a downloadable
>>> asset that can be cached locally. Downloads are stored in the user's
>>> home dir at ~/.cache/qemu/download, using a sha256 sum of the URL.
>>>
>>> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
>>> ---
>>>    tests/functional/qemu_test/__init__.py |  1 +
>>>    tests/functional/qemu_test/asset.py    | 96 ++++++++++++++++++++++++++
>>>    2 files changed, 97 insertions(+)
>>>    create mode 100644 tests/functional/qemu_test/asset.py
>>>
>>> diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
>>> index 2f1e0bc70d..db05c8f412 100644
>>> --- a/tests/functional/qemu_test/__init__.py
>>> +++ b/tests/functional/qemu_test/__init__.py
>>> @@ -6,6 +6,7 @@
>>>    # later.  See the COPYING file in the top-level directory.
>>> +from .asset import Asset
>>
>> For next patch?
> 
> No, the idea is that a test class can just do 'from qemu_test import Asset',
> so they don't need to be aware of the fact that we've split up the code into
> separate files beneath the qemu_test/ directory.

Oh indeed I missed that, thanks for clarifying.
diff mbox series

Patch

diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py
index 2f1e0bc70d..db05c8f412 100644
--- a/tests/functional/qemu_test/__init__.py
+++ b/tests/functional/qemu_test/__init__.py
@@ -6,6 +6,7 @@ 
 # later.  See the COPYING file in the top-level directory.
 
 
+from .asset import Asset
 from .config import BUILD_DIR
 from .cmd import has_cmd, has_cmds, run_cmd, is_readable_executable_file, \
     interrupt_interactive_console_until_pattern, wait_for_console_pattern, \
diff --git a/tests/functional/qemu_test/asset.py b/tests/functional/qemu_test/asset.py
new file mode 100644
index 0000000000..6432da2e0b
--- /dev/null
+++ b/tests/functional/qemu_test/asset.py
@@ -0,0 +1,96 @@ 
+# Test utilities for fetching & caching assets
+#
+# Copyright 2024 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import hashlib
+import logging
+import os
+import subprocess
+import urllib.request
+from pathlib import Path
+from shutil import copyfileobj
+
+
+# Instances of this class must be declared as class level variables
+# starting with a name "ASSET_". This enables the pre-caching logic
+# to easily find all referenced assets and download them prior to
+# execution of the tests.
+class Asset:
+
+    def __init__(self, url, hash):
+        self.url = url
+        self.hash = hash
+        self.cache_dir = Path(Path("~").expanduser(),
+                              ".cache", "qemu", "download")
+        self.cache_file = Path(self.cache_dir,
+                               hashlib.sha256(url.encode("utf-8")).hexdigest())
+        self.log = logging.getLogger('qemu-test')
+
+    def __repr__(self):
+        return "Asset: url=%s hash=%s cache=%s" % (
+            self.url, self.hash, self.cache_file)
+
+    def _check(self, cache_file):
+        if self.hash is None:
+            return True
+        if len(self.hash) == 40:
+            sum_prog = 'sha1sum'
+        elif len(self.hash) == 64:
+            sum_prog = 'sha256sum'
+        elif len(self.hash) == 128:
+            sum_prog = 'sha512sum'
+        else:
+            raise Exception("unknown hash type")
+
+        checksum = subprocess.check_output(
+            [sum_prog, str(cache_file)]).split()[0]
+        return self.hash == checksum.decode("utf-8")
+
+    def valid(self):
+        return self.cache_file.exists() and self._check(self.cache_file)
+
+    def fetch(self):
+        if not self.cache_dir.exists():
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+        if self.valid():
+            self.log.debug("Using cached asset %s for %s",
+                           self.cache_file, self.url)
+            return str(self.cache_file)
+
+        self.log.info("Downloading %s to %s...", self.url, self.cache_file)
+        tmp_cache_file = self.cache_file.with_suffix(".download")
+
+        try:
+            resp = urllib.request.urlopen(self.url)
+        except Exception as e:
+            self.log.error("Unable to download %s: %s", self.url, e)
+            raise
+
+        try:
+            with tmp_cache_file.open("wb+") as dst:
+                copyfileobj(resp, dst)
+        except:
+            tmp_cache_file.unlink()
+            raise
+        try:
+            # Set these just for informational purposes
+            os.setxattr(str(tmp_cache_file), "user.qemu-asset-url",
+                        self.url.encode('utf8'))
+            os.setxattr(str(tmp_cache_file), "user.qemu-asset-hash",
+                        self.hash.encode('utf8'))
+        except Exception as e:
+            self.log.info("Unable to set xattr on %s: %s", tmp_cache_file, e)
+            pass
+
+        if not self._check(tmp_cache_file):
+            tmp_cache_file.unlink()
+            raise Exception("Hash of %s does not match %s" %
+                            (self.url, self.hash))
+        tmp_cache_file.replace(self.cache_file)
+
+        self.log.info("Cached %s at %s" % (self.url, self.cache_file))
+        return str(self.cache_file)