diff --git a/docs/conf.py b/docs/conf.py index 1968a82a..5d93ffe3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -144,7 +144,8 @@ # On Read the Docs and CI, download the database and build a minimal HDF5 version if (ON_RTD or ON_GHA): - from fiasco.util import check_database, get_test_file_list + from fiasco.tests import get_test_file_list + from fiasco.util import check_database from fiasco.util.setup_db import CHIANTI_URL, LATEST_VERSION from fiasco.util.util import FIASCO_HOME, FIASCO_RC FIASCO_HOME.mkdir(exist_ok=True, parents=True) diff --git a/fiasco/conftest.py b/fiasco/conftest.py index 1ee4b5d6..505515f4 100644 --- a/fiasco/conftest.py +++ b/fiasco/conftest.py @@ -4,7 +4,8 @@ from packaging.version import Version -from fiasco.util import check_database, get_test_file_list, read_chianti_version +from fiasco.tests import get_test_file_list +from fiasco.util import check_database, read_chianti_version # Force MPL to use non-gui backends for testing. try: diff --git a/fiasco/tests/__init__.py b/fiasco/tests/__init__.py index 838b4573..bd1b65c9 100644 --- a/fiasco/tests/__init__.py +++ b/fiasco/tests/__init__.py @@ -2,3 +2,17 @@ """ This module contains package tests. """ +import json +import pathlib + +from astropy.utils.data import get_pkg_data_path + +__all__ = ['get_test_file_list'] + + +def get_test_file_list(): + data_dir = pathlib.Path(get_pkg_data_path('data', package='fiasco.tests')) + file_path = data_dir / 'test_file_list.json' + with open(file_path) as f: + hash_table = json.load(f) + return hash_table['test_files'] diff --git a/fiasco/util/data/file_hashes_v8.0.7.json b/fiasco/tests/data/file_hashes_v8.0.7.json similarity index 100% rename from fiasco/util/data/file_hashes_v8.0.7.json rename to fiasco/tests/data/file_hashes_v8.0.7.json diff --git a/fiasco/util/data/file_hashes_v9.0.1.json b/fiasco/tests/data/file_hashes_v9.0.1.json similarity index 100% rename from fiasco/util/data/file_hashes_v9.0.1.json rename to fiasco/tests/data/file_hashes_v9.0.1.json diff --git a/fiasco/util/data/test_file_list.json b/fiasco/tests/data/test_file_list.json similarity index 100% rename from fiasco/util/data/test_file_list.json rename to fiasco/tests/data/test_file_list.json diff --git a/fiasco/util/setup_db.py b/fiasco/util/setup_db.py index ca8e7589..058afa0e 100644 --- a/fiasco/util/setup_db.py +++ b/fiasco/util/setup_db.py @@ -28,7 +28,7 @@ ] LATEST_VERSION = SUPPORTED_VERSIONS[-1] -__all__ = ['check_database', 'check_database_version', 'download_dbase', 'md5hash', 'get_test_file_list', 'build_hdf5_dbase'] +__all__ = ['check_database', 'download_dbase', 'build_hdf5_dbase'] def check_database(hdf5_dbase_root, **kwargs): @@ -85,19 +85,12 @@ def check_database(hdf5_dbase_root, **kwargs): # NOTE: this check is only meant to be bypassed when testing new # versions. Hence, this kwarg is not documented if kwargs.get('check_chianti_version', True): - check_database_version(ascii_dbase_root) + _check_database_version(ascii_dbase_root) # If we made it this far, build the HDF5 database files = kwargs.get('files') build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=files, check_hash=kwargs.get('check_hash', False)) -def check_database_version(ascii_dbase_root): - version = read_chianti_version(ascii_dbase_root) - if str(version) not in SUPPORTED_VERSIONS: - raise UnsupportedVersionError( - f'CHIANTI {version} is not in the list of supported versions {SUPPORTED_VERSIONS}.') - - def download_dbase(ascii_dbase_url, ascii_dbase_root): """ Download the CHIANTI database in ASCII format @@ -113,39 +106,7 @@ def download_dbase(ascii_dbase_url, ascii_dbase_root): tar.extractall(path=ascii_dbase_root) -def md5hash(path): - # Use the md5 utility to generate this - path = pathlib.Path(path) - with path.open('rb') as f: - return hashlib.md5(f.read()).hexdigest() - - -def _get_hash_table(version): - data_dir = pathlib.Path(get_pkg_data_path('data', package='fiasco.util')) - file_path = data_dir / f'file_hashes_v{version}.json' - with open(file_path) as f: - hash_table = json.load(f) - return hash_table - - -def get_test_file_list(): - data_dir = pathlib.Path(get_pkg_data_path('data', package='fiasco.util')) - file_path = data_dir / 'test_file_list.json' - with open(file_path) as f: - hash_table = json.load(f) - return hash_table['test_files'] - - -def _check_hash(parser, hash_table): - actual = md5hash(parser.full_path) - key = '_'.join(parser.full_path.relative_to(parser.ascii_dbase_root).parts) - if hash_table[key] != actual: - raise RuntimeError( - f'Hash of {parser.full_path} ({actual}) did not match expected hash ({hash_table[key]})' - ) - - -def build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=None, check_hash=False): +def build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=None, check_hash=False, overwrite=False): """ Assemble HDF5 file from raw ASCII CHIANTI database. @@ -161,6 +122,9 @@ def build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=None, check_hash=F check_hash: `bool`, optional If True, check the file hash before adding it to the database. Building the database will fail if any of the hashes is not as expected. + overwrite: `bool`, optional + If True, overwrite existing database file. By default, this is false such + that an exception will be thrown if the database already exists. """ # Import the logger here to avoid circular imports from fiasco import log @@ -176,8 +140,9 @@ def build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=None, check_hash=F hash_table = _get_hash_table(version) log.debug(f'Checking hashes for version {version}') log.debug(f'Building HDF5 database in {hdf5_dbase_root}') + mode = 'w' if overwrite else 'x' with ProgressBar(len(files)) as progress: - with h5py.File(hdf5_dbase_root, 'a') as hf: + with h5py.File(hdf5_dbase_root, mode=mode) as hf: for f in files: parser = fiasco.io.Parser(f, ascii_dbase_root=ascii_dbase_root) try: @@ -201,3 +166,34 @@ def build_hdf5_dbase(ascii_dbase_root, hdf5_dbase_root, files=None, check_hash=F ion_list = list_ions(hdf5_dbase_root) ds = hf.create_dataset('ion_index', data=np.array(ion_list).astype(np.bytes_)) ds.attrs['unit'] = 'SKIP' + + +def _check_database_version(ascii_dbase_root): + version = read_chianti_version(ascii_dbase_root) + if str(version) not in SUPPORTED_VERSIONS: + raise UnsupportedVersionError( + f'CHIANTI {version} is not in the list of supported versions {SUPPORTED_VERSIONS}.') + + +def _md5hash(path): + # Use the md5 utility to generate this + path = pathlib.Path(path) + with path.open('rb') as f: + return hashlib.md5(f.read()).hexdigest() + + +def _get_hash_table(version): + data_dir = pathlib.Path(get_pkg_data_path('data', package='fiasco.tests')) + file_path = data_dir / f'file_hashes_v{version}.json' + with open(file_path) as f: + hash_table = json.load(f) + return hash_table + + +def _check_hash(parser, hash_table): + actual = _md5hash(parser.full_path) + key = '_'.join(parser.full_path.relative_to(parser.ascii_dbase_root).parts) + if hash_table[key] != actual: + raise RuntimeError( + f'Hash of {parser.full_path} ({actual}) did not match expected hash ({hash_table[key]})' + ) diff --git a/tools/generate_hash_table.py b/tools/generate_hash_table.py index 778c86ef..150fcb18 100644 --- a/tools/generate_hash_table.py +++ b/tools/generate_hash_table.py @@ -8,7 +8,7 @@ from astropy.utils.data import get_pkg_data_path from itertools import chain -from fiasco.util.setup_db import md5hash +from fiasco.util.setup_db import _md5hash from fiasco.util.util import get_chianti_catalog, read_chianti_version @@ -29,7 +29,7 @@ def build_hash_table(dbase_root): map(lambda x: pathlib.Path('dem') / x, catalogue['dem_files']), ) filepaths = map(lambda x: dbase_root / x, filepaths) - return {'_'.join(f.relative_to(dbase_root).parts): md5hash(f) for f in filepaths} + return {'_'.join(f.relative_to(dbase_root).parts): _md5hash(f) for f in filepaths} @click.command() diff --git a/tools/generate_test_file_list.py b/tools/generate_test_file_list.py index a2664ded..4e49dd6f 100644 --- a/tools/generate_test_file_list.py +++ b/tools/generate_test_file_list.py @@ -52,7 +52,7 @@ def sort_func(x): if __name__ == '__main__': # An example of how you might use this function to update the test file list - from fiasco.util import get_test_file_list + from fiasco.tests import get_test_file_list test_files = get_test_file_list() # Read current files test_files += ... # Add new files here