From 3bb0fd4434efa7770554e0e559efc09692dd0f62 Mon Sep 17 00:00:00 2001 From: Marco Esters Date: Tue, 30 Jul 2024 11:51:03 -0700 Subject: [PATCH] Add option to output file with installer hashes (#816) * Output hash for installer files * Add news file * Add missing _output_dir * Fix tests to check for two hashes * Remove line break from test files to achieve reproducibility between Unix and Windows * Update docs * Output one hash file per installers * Replace os.path with pathlib for all outputs * Remove EOL from JSON outputs * Revert "Remove EOL from JSON outputs" This reverts commit 5f65d3470ddbe2cd5982dc50dd99351799843444. * Revert "Replace os.path with pathlib for all outputs" This reverts commit b0f4f19d852d2f86dd8ab4df0ac02dfe126e8d0a. * Allow for algorithm list * Update constructor/build_outputs.py --------- Co-authored-by: jaimergp --- CONSTRUCT.md | 3 +++ constructor/build_outputs.py | 37 +++++++++++++++++++++++++++---- constructor/construct.py | 3 +++ constructor/main.py | 12 ++++++++++ docs/source/construct-yaml.md | 3 +++ news/816-output-installer-hashes | 19 ++++++++++++++++ tests/test_outputs.py | 38 ++++++++++++++++++++++++++++++++ 7 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 news/816-output-installer-hashes create mode 100644 tests/test_outputs.py diff --git a/CONSTRUCT.md b/CONSTRUCT.md index 0fdf19f73..20a516c89 100644 --- a/CONSTRUCT.md +++ b/CONSTRUCT.md @@ -825,6 +825,9 @@ _type:_ list
Additional artifacts to be produced after building the installer. It expects either a list of strings or single-key dictionaries: Allowed keys are: +- `hash`: The hash of the installer files. + - `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms: + https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available - `info.json`: The internal `info` object, serialized to JSON. Takes no options. - `pkgs_list`: The list of packages contained in a given environment. Options: - `env` (optional, default=`base`): Name of an environment in `extra_envs` to export. diff --git a/constructor/build_outputs.py b/constructor/build_outputs.py index 2cef1c8c3..94c065b34 100644 --- a/constructor/build_outputs.py +++ b/constructor/build_outputs.py @@ -3,6 +3,7 @@ Update documentation in `construct.py` if any changes are made. """ +import hashlib import json import logging import os @@ -33,14 +34,41 @@ def process_build_outputs(info): f"Available keys: {tuple(OUTPUT_HANDLERS.keys())}" ) outpath = handler(info, **config) - logger.info("build_outputs: '%s' created '%s'.", name, os.path.abspath(outpath)) + logger.info("build_outputs: '%s' created '%s'.", name, outpath) + + +def dump_hash(info, algorithm=None): + algorithm = algorithm or [] + if isinstance(algorithm, str): + algorithm = [algorithm] + algorithms = set(algorithm) + if any(algo not in hashlib.algorithms_available for algo in algorithms): + invalid = algorithms.difference(set(hashlib.algorithms_available)) + raise ValueError(f"Invalid algorithm: {', '.join(invalid)}") + BUFFER_SIZE = 65536 + if isinstance(info["_outpath"], str): + installers = [Path(info["_outpath"])] + else: + installers = [Path(outpath) for outpath in info["_outpath"]] + outpaths = [] + for installer in installers: + filehashes = {algo: hashlib.new(algo) for algo in algorithms} + with open(installer, "rb") as f: + while buffer := f.read(BUFFER_SIZE): + for algo in algorithms: + filehashes[algo].update(buffer) + for algo, filehash in filehashes.items(): + outpath = Path(f"{installer}.{algo}") + outpath.write_text(f"{filehash.hexdigest()} {installer.name}\n") + outpaths.append(str(outpath.absolute())) + return ", ".join(outpaths) def dump_info(info): outpath = os.path.join(info["_output_dir"], "info.json") with open(outpath, "w") as f: json.dump(info, f, indent=2, default=repr) - return outpath + return os.path.abspath(outpath) def dump_packages_list(info, env="base"): @@ -55,7 +83,7 @@ def dump_packages_list(info, env="base"): with open(outpath, 'w') as fo: fo.write(f"# {info['name']} {info['version']}, env={env}\n") fo.write("\n".join(dists)) - return outpath + return os.path.abspath(outpath) def dump_licenses(info, include_text=False, text_errors=None): @@ -105,10 +133,11 @@ def dump_licenses(info, include_text=False, text_errors=None): outpath = os.path.join(info["_output_dir"], "licenses.json") with open(outpath, "w") as f: json.dump(licenses, f, indent=2, default=repr) - return outpath + return os.path.abspath(outpath) OUTPUT_HANDLERS = { + "hash": dump_hash, "info.json": dump_info, "pkgs_list": dump_packages_list, "licenses": dump_licenses, diff --git a/constructor/construct.py b/constructor/construct.py index 1585fb9ca..8ec213813 100644 --- a/constructor/construct.py +++ b/constructor/construct.py @@ -606,6 +606,9 @@ Additional artifacts to be produced after building the installer. It expects either a list of strings or single-key dictionaries: Allowed keys are: +- `hash`: The hash of the installer files. + - `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms: + https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available - `info.json`: The internal `info` object, serialized to JSON. Takes no options. - `pkgs_list`: The list of packages contained in a given environment. Options: - `env` (optional, default=`base`): Name of an environment in `extra_envs` to export. diff --git a/constructor/main.py b/constructor/main.py index 13bac0630..ade9f66db 100644 --- a/constructor/main.py +++ b/constructor/main.py @@ -229,6 +229,7 @@ def main_build(dir_path, output_dir='.', platform=cc_platform, # '_dists': List[Dist] # '_urls': List[Tuple[url, md5]] + info_dicts = [] for itype in itypes: if itype == 'sh': from .shar import create as shar_create @@ -242,8 +243,19 @@ def main_build(dir_path, output_dir='.', platform=cc_platform, info['installer_type'] = itype info['_outpath'] = abspath(join(output_dir, get_output_filename(info))) create(info, verbose=verbose) + if len(itypes) > 1: + info_dicts.append(info.copy()) logger.info("Successfully created '%(_outpath)s'.", info) + # Merge info files for each installer type + if len(itypes) > 1: + keys = set() + for info_dict in info_dicts: + keys.update(info_dict.keys()) + for key in keys: + if any(info_dict.get(key) != info.get(key) for info_dict in info_dicts): + info[key] = [info_dict.get(key, "") for info_dict in info_dicts] + process_build_outputs(info) diff --git a/docs/source/construct-yaml.md b/docs/source/construct-yaml.md index 0fdf19f73..20a516c89 100644 --- a/docs/source/construct-yaml.md +++ b/docs/source/construct-yaml.md @@ -825,6 +825,9 @@ _type:_ list
Additional artifacts to be produced after building the installer. It expects either a list of strings or single-key dictionaries: Allowed keys are: +- `hash`: The hash of the installer files. + - `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms: + https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available - `info.json`: The internal `info` object, serialized to JSON. Takes no options. - `pkgs_list`: The list of packages contained in a given environment. Options: - `env` (optional, default=`base`): Name of an environment in `extra_envs` to export. diff --git a/news/816-output-installer-hashes b/news/816-output-installer-hashes new file mode 100644 index 000000000..f3a04b2dc --- /dev/null +++ b/news/816-output-installer-hashes @@ -0,0 +1,19 @@ +### Enhancements + +* Add option to output hashes of installer files. (#816) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +* diff --git a/tests/test_outputs.py b/tests/test_outputs.py new file mode 100644 index 000000000..ac0586c3a --- /dev/null +++ b/tests/test_outputs.py @@ -0,0 +1,38 @@ +from pathlib import Path + +import pytest + +from constructor.build_outputs import dump_hash + + +def test_hash_dump(tmp_path): + testfile = tmp_path / "test.txt" + testfile.write_text("test string") + testfile = tmp_path / "test2.txt" + testfile.write_text("another test") + expected = { + "sha256": ( + "d5579c46dfcc7f18207013e65b44e4cb4e2c2298f4ac457ba8f82743f31e930b", + "64320dd12e5c2caeac673b91454dac750c08ba333639d129671c2f58cb5d0ad1", + ), + "md5": ( + "6f8db599de986fab7a21625b7916589c", + "5e8862cd73694287ff341e75c95e3c6a", + ), + } + info = { + "_outpath": [ + str(tmp_path / "test.txt"), + str(tmp_path / "test2.txt"), + ] + } + with pytest.raises(ValueError): + dump_hash(info, algorithm="bad_algorithm") + dump_hash(info, algorithm=["sha256", "md5"]) + for f, file in enumerate(info["_outpath"]): + for algorithm in expected: + hashfile = Path(f"{file}.{algorithm}") + assert hashfile.exists() + filehash, filename = hashfile.read_text().strip().split() + assert filehash == expected[algorithm][f] + assert filename == Path(file).name