From 2ec3993c4d4a14ba3f6a623a0d6e80f3f6364845 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 2 May 2023 18:46:33 +0200 Subject: [PATCH 1/9] [docs] remove "maxdepth" setting in .readthedocs.yaml --- .readthedocs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ee770abcd..40e04811b 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -20,7 +20,6 @@ build: --private --module-first --force - --maxdepth 10 --output-dir docs/source/api src/pygama src/pygama/_version.py From e08808842493ce4966bd1dffb369601e202dfb94 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 15:05:36 +0200 Subject: [PATCH 2/9] [lgdo] expand_path now accepts a mapping of variable substitutions --- src/pygama/lgdo/lgdo_utils.py | 27 +++++++++++++++++++++------ src/pygama/lgdo/lh5_store.py | 4 ++-- tests/lgdo/test_lgdo_utils.py | 17 ++++++++++++++++- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/pygama/lgdo/lgdo_utils.py b/src/pygama/lgdo/lgdo_utils.py index dc14d22df..7d5aec9bc 100644 --- a/src/pygama/lgdo/lgdo_utils.py +++ b/src/pygama/lgdo/lgdo_utils.py @@ -1,11 +1,10 @@ -""" -Implements utilities for LEGEND Data Objects. -""" +"""Implements utilities for LEGEND Data Objects.""" from __future__ import annotations import glob import logging import os +import string import numpy as np @@ -122,8 +121,10 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str] return datatype, None, element_description.split(",") -def expand_path(path: str, list: bool = False) -> str | list: - """Expand environment variables and wildcards to return absolute path +def expand_path( + path: str, substitute: dict[str, str] = None, list: bool = False +) -> str | list: + """Expand (environment) variables and wildcards to return absolute paths. Parameters ---------- @@ -132,6 +133,9 @@ def expand_path(path: str, list: bool = False) -> str | list: list if ``True``, return a list. If ``False``, return a string; if ``False`` and a unique file is not found, raise an exception. + substitute + use this dictionary to substitute variables. Environment variables take + precedence. Returns ------- @@ -139,7 +143,18 @@ def expand_path(path: str, list: bool = False) -> str | list: Unique absolute path, or list of all absolute paths """ - paths = glob.glob(os.path.expanduser(os.path.expandvars(path))) + if substitute is None: + substitute = {} + + # expand env variables first + _path = os.path.expandvars(path) + + # then try using provided mapping + _path = string.Template(_path).safe_substitute(substitute) + + # then expand wildcards + paths = glob.glob(os.path.expanduser(_path)) + if not list: if len(paths) == 0: raise FileNotFoundError(f"could not find path matching {path}") diff --git a/src/pygama/lgdo/lh5_store.py b/src/pygama/lgdo/lh5_store.py index 1220a5301..c417ff0bc 100644 --- a/src/pygama/lgdo/lh5_store.py +++ b/src/pygama/lgdo/lh5_store.py @@ -1456,7 +1456,7 @@ def __init__( # List of files, with wildcards and env vars expanded if isinstance(lh5_files, str): - lh5_files = expand_path(lh5_files, True) + lh5_files = expand_path(lh5_files, list=True) elif not isinstance(lh5_files, list): raise ValueError("lh5_files must be a string or list of strings") @@ -1465,7 +1465,7 @@ def __init__( "entry_list and entry_mask arguments are mutually exclusive" ) - self.lh5_files = [f for f_wc in lh5_files for f in expand_path(f_wc, True)] + self.lh5_files = [f for f_wc in lh5_files for f in expand_path(f_wc, list=True)] # Map to last row in each file self.file_map = np.array( diff --git a/tests/lgdo/test_lgdo_utils.py b/tests/lgdo/test_lgdo_utils.py index 841cf3cef..db6a7dee7 100644 --- a/tests/lgdo/test_lgdo_utils.py +++ b/tests/lgdo/test_lgdo_utils.py @@ -68,4 +68,19 @@ def test_expand_path(lgnd_test_data): lgdo_utils.expand_path(f"{base_dir}/*.lh5") # Check if it finds a list of files correctly - assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", True)) == sorted(files) + assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted( + files + ) + + # Check env variable expansion + os.environ["PYGAMATESTBASEDIR"] = base_dir + assert lgdo_utils.expand_path("$PYGAMATESTBASEDIR/*20220716T104550Z*") == files[0] + + # Check user variable expansion + assert ( + lgdo_utils.expand_path( + "$PYGAMATESTBASEDIR2/*20220716T104550Z*", + substitute={"PYGAMATESTBASEDIR2": base_dir}, + ) + == files[0] + ) From 41502a9412c947cee011f0f49569137db63b5ad1 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 15:30:32 +0200 Subject: [PATCH 3/9] [lgdo] add LGDO util to just expand variables in a string --- src/pygama/lgdo/lgdo_utils.py | 29 +++++++++++++++++++++-------- tests/lgdo/test_lgdo_utils.py | 28 +++++++++++++++------------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/pygama/lgdo/lgdo_utils.py b/src/pygama/lgdo/lgdo_utils.py index 7d5aec9bc..958d892e5 100644 --- a/src/pygama/lgdo/lgdo_utils.py +++ b/src/pygama/lgdo/lgdo_utils.py @@ -121,6 +121,26 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str] return datatype, None, element_description.split(",") +def expand_vars(expr: str, substitute: dict[str, str] = None) -> str: + """Expand (environment) variables. + + Parameters + ---------- + expr + string expression, which may include (environment) variables prefixed by + ``$``. + substitute + use this dictionary to substitute variables. Environment variables take + precedence. + """ + if substitute is None: + substitute = {} + + # expand env variables first + # then try using provided mapping + return string.Template(os.path.expandvars(expr)).safe_substitute(substitute) + + def expand_path( path: str, substitute: dict[str, str] = None, list: bool = False ) -> str | list: @@ -143,14 +163,7 @@ def expand_path( Unique absolute path, or list of all absolute paths """ - if substitute is None: - substitute = {} - - # expand env variables first - _path = os.path.expandvars(path) - - # then try using provided mapping - _path = string.Template(_path).safe_substitute(substitute) + _path = expand_vars(path, substitute) # then expand wildcards paths = glob.glob(os.path.expanduser(_path)) diff --git a/tests/lgdo/test_lgdo_utils.py b/tests/lgdo/test_lgdo_utils.py index db6a7dee7..5b1f35b14 100644 --- a/tests/lgdo/test_lgdo_utils.py +++ b/tests/lgdo/test_lgdo_utils.py @@ -46,6 +46,21 @@ def test_parse_datatype(): assert pd_dt_tuple == dt_tuple +def test_expand_vars(): + # Check env variable expansion + os.environ["PYGAMATESTBASEDIR"] = "a_random_string" + assert lgdo_utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah" + + # Check user variable expansion + assert ( + lgdo_utils.expand_vars( + "$PYGAMATESTBASEDIR2/blah", + substitute={"PYGAMATESTBASEDIR2": "a_random_string"}, + ) + == "a_random_string/blah" + ) + + def test_expand_path(lgnd_test_data): files = [ lgnd_test_data.get_path( @@ -71,16 +86,3 @@ def test_expand_path(lgnd_test_data): assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted( files ) - - # Check env variable expansion - os.environ["PYGAMATESTBASEDIR"] = base_dir - assert lgdo_utils.expand_path("$PYGAMATESTBASEDIR/*20220716T104550Z*") == files[0] - - # Check user variable expansion - assert ( - lgdo_utils.expand_path( - "$PYGAMATESTBASEDIR2/*20220716T104550Z*", - substitute={"PYGAMATESTBASEDIR2": base_dir}, - ) - == files[0] - ) From 60d88618cf7f491bc1194a18f1e62fe7ecc319a6 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 15:31:15 +0200 Subject: [PATCH 4/9] [flow] FileDB can now properly expand variables in config --- src/pygama/flow/file_db.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 95df246a0..6f57b92c3 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -177,17 +177,26 @@ def set_config(self, config: dict, config_path: str = None) -> None: self.config = config self.tiers = list(self.config["tier_dirs"].keys()) self.file_format = self.config["file_format"] - self.tier_dirs = self.config["tier_dirs"] self.table_format = self.config["table_format"] - self.sortby = self.config.get("sortby", "timestamp") - # Handle environment variables - data_dir = os.path.expandvars(self.config["data_dir"]) + # expand/substitute variables in data_dir and tier_dirs + # $_ expands to the location of the config file + config_dir = os.path.dirname(str(config_path)) + + data_dir = lgdo.lgdo_utils.expand_path( + self.config["data_dir"], substitute={"_", config_dir} + ) + + tier_dirs = self.config["tier_dirs"] + for k, val in tier_dirs.items(): + tier_dirs[k] = lgdo.lgdo_utils.expand_vars( + val, substitute={"_", config_dir} + ) + self.tier_dirs = tier_dirs - # Relative paths are interpreted relative to the configuration file + # relative paths are also interpreted relative to the configuration file if not data_dir.startswith("/"): - config_dir = os.path.dirname(config_path) data_dir = os.path.join(config_dir, data_dir.lstrip("/")) data_dir = os.path.abspath(data_dir) self.data_dir = data_dir From ccc89ac2946b611f3e5ad1558b9d9867fdafa45a Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 15:35:16 +0200 Subject: [PATCH 5/9] [flow] bugfix --- src/pygama/flow/file_db.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 6f57b92c3..357a61b8a 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -182,7 +182,9 @@ def set_config(self, config: dict, config_path: str = None) -> None: # expand/substitute variables in data_dir and tier_dirs # $_ expands to the location of the config file - config_dir = os.path.dirname(str(config_path)) + config_dir = None + if config_path is not None: + config_dir = os.path.dirname(str(config_path)) data_dir = lgdo.lgdo_utils.expand_path( self.config["data_dir"], substitute={"_", config_dir} From 96062b891c3916f8530fc6b4022a821a04d1931c Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 15:39:22 +0200 Subject: [PATCH 6/9] [flow] use lgdo.utils.expand_vars in DataLoader --- src/pygama/flow/data_loader.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index b0fa91500..cd2b18567 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -15,7 +15,7 @@ import pandas as pd from tqdm import tqdm -from pygama.lgdo import Array, LH5Store, Struct, Table +from pygama.lgdo import Array, LH5Store, Struct, Table, lgdo_utils from pygama.lgdo.vectorofvectors import build_cl, explode_arrays, explode_cl from . import utils @@ -193,7 +193,9 @@ def set_config(self, config: dict | str) -> None: # look for info in configuration if FileDB is not set if self.filedb is None: # expand $_ variables - value = string.Template(config["filedb"]).substitute({"_": config_dir}) + value = lgdo_utils.expand_vars( + config["filedb"], substitute={"_": config_dir} + ) self.filedb = FileDB(value) if not os.path.isdir(self.filedb.data_dir): From c5839c5c706ede4703c59918ff46c266df0a71e5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 18:27:26 +0200 Subject: [PATCH 7/9] [lgdo] bug fix --- src/pygama/lgdo/lh5_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/lgdo/lh5_store.py b/src/pygama/lgdo/lh5_store.py index 91717d96a..4f4848701 100644 --- a/src/pygama/lgdo/lh5_store.py +++ b/src/pygama/lgdo/lh5_store.py @@ -1462,7 +1462,7 @@ def __init__( # List of files, with wildcards and env vars expanded if isinstance(lh5_files, str): lh5_files = [lh5_files] - if isinstance(groups, list=True): + if isinstance(groups, list): lh5_files *= len(groups) elif not isinstance(lh5_files, list): raise ValueError("lh5_files must be a string or list of strings") From 4037a4c48e36b8f7b52cd99f2c3a21eec08cd61b Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 3 May 2023 19:30:14 +0200 Subject: [PATCH 8/9] [flow] better error message. force user to use $_ in relative data_paths --- src/pygama/flow/file_db.py | 15 +++++---------- src/pygama/lgdo/lgdo_utils.py | 6 +++++- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 357a61b8a..221d452a0 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -182,27 +182,22 @@ def set_config(self, config: dict, config_path: str = None) -> None: # expand/substitute variables in data_dir and tier_dirs # $_ expands to the location of the config file - config_dir = None + subst_vars = {} if config_path is not None: - config_dir = os.path.dirname(str(config_path)) + subst_vars["_"] = os.path.dirname(str(config_path)) data_dir = lgdo.lgdo_utils.expand_path( - self.config["data_dir"], substitute={"_", config_dir} + self.config["data_dir"], substitute=subst_vars ) + self.data_dir = data_dir tier_dirs = self.config["tier_dirs"] for k, val in tier_dirs.items(): tier_dirs[k] = lgdo.lgdo_utils.expand_vars( - val, substitute={"_", config_dir} + val, substitute=subst_vars ) self.tier_dirs = tier_dirs - # relative paths are also interpreted relative to the configuration file - if not data_dir.startswith("/"): - data_dir = os.path.join(config_dir, data_dir.lstrip("/")) - data_dir = os.path.abspath(data_dir) - self.data_dir = data_dir - def scan_files(self, dirs: list[str] = None) -> None: """Scan the directory containing files from the lowest tier and fill the dataframe. diff --git a/src/pygama/lgdo/lgdo_utils.py b/src/pygama/lgdo/lgdo_utils.py index 95b25d18b..694d02d6f 100644 --- a/src/pygama/lgdo/lgdo_utils.py +++ b/src/pygama/lgdo/lgdo_utils.py @@ -124,6 +124,11 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str] def expand_vars(expr: str, substitute: dict[str, str] = None) -> str: """Expand (environment) variables. + Note + ---- + Malformed variable names and references to non-existing variables are left + unchanged. + Parameters ---------- expr @@ -167,7 +172,6 @@ def expand_path( path or list of paths Unique absolute path, or list of all absolute paths """ - if base_path is not None and base_path != "": base_path = os.path.expanduser(os.path.expandvars(base_path)) path = os.path.join(base_path, path) From 49c88f90731aadb49989a22bc528d9244e082656 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 17:33:17 +0000 Subject: [PATCH 9/9] style: pre-commit fixes --- src/pygama/flow/file_db.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 221d452a0..61e54c7f2 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -193,9 +193,7 @@ def set_config(self, config: dict, config_path: str = None) -> None: tier_dirs = self.config["tier_dirs"] for k, val in tier_dirs.items(): - tier_dirs[k] = lgdo.lgdo_utils.expand_vars( - val, substitute=subst_vars - ) + tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars) self.tier_dirs = tier_dirs def scan_files(self, dirs: list[str] = None) -> None: