Skip to content

Commit

Permalink
Add option to output file with installer hashes (#816)
Browse files Browse the repository at this point in the history
* Output hash for installer files

* Add news file

* Add missing _output_dir

* Fix tests to check for two hashes

* Remove line break from test files to achieve reproducibility between Unix and Windows

* Update docs

* Output one hash file per installers

* Replace os.path with pathlib for all outputs

* Remove EOL from JSON outputs

* Revert "Remove EOL from JSON outputs"

This reverts commit 5f65d34.

* Revert "Replace os.path with pathlib for all outputs"

This reverts commit b0f4f19.

* Allow for algorithm list

* Update constructor/build_outputs.py

---------

Co-authored-by: jaimergp <jaimergp@users.noreply.github.com>
  • Loading branch information
marcoesters and jaimergp authored Jul 30, 2024
1 parent 9ff6bc7 commit 3bb0fd4
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CONSTRUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,9 @@ _type:_ list<br/>
Additional artifacts to be produced after building the installer.
It expects either a list of strings or single-key dictionaries:
Allowed keys are:
- `hash`: The hash of the installer files.
- `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms:
https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available
- `info.json`: The internal `info` object, serialized to JSON. Takes no options.
- `pkgs_list`: The list of packages contained in a given environment. Options:
- `env` (optional, default=`base`): Name of an environment in `extra_envs` to export.
Expand Down
37 changes: 33 additions & 4 deletions constructor/build_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Update documentation in `construct.py` if any changes are made.
"""
import hashlib
import json
import logging
import os
Expand Down Expand Up @@ -33,14 +34,41 @@ def process_build_outputs(info):
f"Available keys: {tuple(OUTPUT_HANDLERS.keys())}"
)
outpath = handler(info, **config)
logger.info("build_outputs: '%s' created '%s'.", name, os.path.abspath(outpath))
logger.info("build_outputs: '%s' created '%s'.", name, outpath)


def dump_hash(info, algorithm=None):
algorithm = algorithm or []
if isinstance(algorithm, str):
algorithm = [algorithm]
algorithms = set(algorithm)
if any(algo not in hashlib.algorithms_available for algo in algorithms):
invalid = algorithms.difference(set(hashlib.algorithms_available))
raise ValueError(f"Invalid algorithm: {', '.join(invalid)}")
BUFFER_SIZE = 65536
if isinstance(info["_outpath"], str):
installers = [Path(info["_outpath"])]
else:
installers = [Path(outpath) for outpath in info["_outpath"]]
outpaths = []
for installer in installers:
filehashes = {algo: hashlib.new(algo) for algo in algorithms}
with open(installer, "rb") as f:
while buffer := f.read(BUFFER_SIZE):
for algo in algorithms:
filehashes[algo].update(buffer)
for algo, filehash in filehashes.items():
outpath = Path(f"{installer}.{algo}")
outpath.write_text(f"{filehash.hexdigest()} {installer.name}\n")
outpaths.append(str(outpath.absolute()))
return ", ".join(outpaths)


def dump_info(info):
outpath = os.path.join(info["_output_dir"], "info.json")
with open(outpath, "w") as f:
json.dump(info, f, indent=2, default=repr)
return outpath
return os.path.abspath(outpath)


def dump_packages_list(info, env="base"):
Expand All @@ -55,7 +83,7 @@ def dump_packages_list(info, env="base"):
with open(outpath, 'w') as fo:
fo.write(f"# {info['name']} {info['version']}, env={env}\n")
fo.write("\n".join(dists))
return outpath
return os.path.abspath(outpath)


def dump_licenses(info, include_text=False, text_errors=None):
Expand Down Expand Up @@ -105,10 +133,11 @@ def dump_licenses(info, include_text=False, text_errors=None):
outpath = os.path.join(info["_output_dir"], "licenses.json")
with open(outpath, "w") as f:
json.dump(licenses, f, indent=2, default=repr)
return outpath
return os.path.abspath(outpath)


OUTPUT_HANDLERS = {
"hash": dump_hash,
"info.json": dump_info,
"pkgs_list": dump_packages_list,
"licenses": dump_licenses,
Expand Down
3 changes: 3 additions & 0 deletions constructor/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,9 @@
Additional artifacts to be produced after building the installer.
It expects either a list of strings or single-key dictionaries:
Allowed keys are:
- `hash`: The hash of the installer files.
- `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms:
https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available
- `info.json`: The internal `info` object, serialized to JSON. Takes no options.
- `pkgs_list`: The list of packages contained in a given environment. Options:
- `env` (optional, default=`base`): Name of an environment in `extra_envs` to export.
Expand Down
12 changes: 12 additions & 0 deletions constructor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def main_build(dir_path, output_dir='.', platform=cc_platform,
# '_dists': List[Dist]
# '_urls': List[Tuple[url, md5]]

info_dicts = []
for itype in itypes:
if itype == 'sh':
from .shar import create as shar_create
Expand All @@ -242,8 +243,19 @@ def main_build(dir_path, output_dir='.', platform=cc_platform,
info['installer_type'] = itype
info['_outpath'] = abspath(join(output_dir, get_output_filename(info)))
create(info, verbose=verbose)
if len(itypes) > 1:
info_dicts.append(info.copy())
logger.info("Successfully created '%(_outpath)s'.", info)

# Merge info files for each installer type
if len(itypes) > 1:
keys = set()
for info_dict in info_dicts:
keys.update(info_dict.keys())
for key in keys:
if any(info_dict.get(key) != info.get(key) for info_dict in info_dicts):
info[key] = [info_dict.get(key, "") for info_dict in info_dicts]

process_build_outputs(info)


Expand Down
3 changes: 3 additions & 0 deletions docs/source/construct-yaml.md
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,9 @@ _type:_ list<br/>
Additional artifacts to be produced after building the installer.
It expects either a list of strings or single-key dictionaries:
Allowed keys are:
- `hash`: The hash of the installer files.
- `algorithm` (str or list): The hash algorithm. Must be among `hashlib`'s available algorithms:
https://docs.python.org/3/library/hashlib.html#hashlib.algorithms_available
- `info.json`: The internal `info` object, serialized to JSON. Takes no options.
- `pkgs_list`: The list of packages contained in a given environment. Options:
- `env` (optional, default=`base`): Name of an environment in `extra_envs` to export.
Expand Down
19 changes: 19 additions & 0 deletions news/816-output-installer-hashes
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
### Enhancements

* Add option to output hashes of installer files. (#816)

### Bug fixes

* <news item>

### Deprecations

* <news item>

### Docs

* <news item>

### Other

* <news item>
38 changes: 38 additions & 0 deletions tests/test_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path

import pytest

from constructor.build_outputs import dump_hash


def test_hash_dump(tmp_path):
testfile = tmp_path / "test.txt"
testfile.write_text("test string")
testfile = tmp_path / "test2.txt"
testfile.write_text("another test")
expected = {
"sha256": (
"d5579c46dfcc7f18207013e65b44e4cb4e2c2298f4ac457ba8f82743f31e930b",
"64320dd12e5c2caeac673b91454dac750c08ba333639d129671c2f58cb5d0ad1",
),
"md5": (
"6f8db599de986fab7a21625b7916589c",
"5e8862cd73694287ff341e75c95e3c6a",
),
}
info = {
"_outpath": [
str(tmp_path / "test.txt"),
str(tmp_path / "test2.txt"),
]
}
with pytest.raises(ValueError):
dump_hash(info, algorithm="bad_algorithm")
dump_hash(info, algorithm=["sha256", "md5"])
for f, file in enumerate(info["_outpath"]):
for algorithm in expected:
hashfile = Path(f"{file}.{algorithm}")
assert hashfile.exists()
filehash, filename = hashfile.read_text().strip().split()
assert filehash == expected[algorithm][f]
assert filename == Path(file).name

0 comments on commit 3bb0fd4

Please sign in to comment.