Skip to content

Commit

Permalink
Merge pull request #486 from gipert/flow
Browse files Browse the repository at this point in the history
Properly substitute variables in `FileDB` and `DataLoader` configuration
  • Loading branch information
gipert authored May 3, 2023
2 parents f22d9f6 + 49c88f9 commit 491df43
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 21 deletions.
1 change: 0 additions & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ build:
--private
--module-first
--force
--maxdepth 10
--output-dir docs/source/api
src/pygama
src/pygama/_version.py
Expand Down
6 changes: 4 additions & 2 deletions src/pygama/flow/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pandas as pd
from tqdm import tqdm

from pygama.lgdo import Array, LH5Iterator, LH5Store, Struct, Table
from pygama.lgdo import Array, LH5Iterator, LH5Store, Struct, Table, lgdo_utils
from pygama.lgdo.vectorofvectors import build_cl, explode_arrays, explode_cl
from pygama.vis import WaveformBrowser

Expand Down Expand Up @@ -194,7 +194,9 @@ def set_config(self, config: dict | str) -> None:
# look for info in configuration if FileDB is not set
if self.filedb is None:
# expand $_ variables
value = string.Template(config["filedb"]).substitute({"_": config_dir})
value = lgdo_utils.expand_vars(
config["filedb"], substitute={"_": config_dir}
)
self.filedb = FileDB(value)

if not os.path.isdir(self.filedb.data_dir):
Expand Down
22 changes: 13 additions & 9 deletions src/pygama/flow/file_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,21 +177,25 @@ def set_config(self, config: dict, config_path: str = None) -> None:
self.config = config
self.tiers = list(self.config["tier_dirs"].keys())
self.file_format = self.config["file_format"]
self.tier_dirs = self.config["tier_dirs"]
self.table_format = self.config["table_format"]

self.sortby = self.config.get("sortby", "timestamp")

# Handle environment variables
data_dir = os.path.expandvars(self.config["data_dir"])
# expand/substitute variables in data_dir and tier_dirs
# $_ expands to the location of the config file
subst_vars = {}
if config_path is not None:
subst_vars["_"] = os.path.dirname(str(config_path))

# Relative paths are interpreted relative to the configuration file
if not data_dir.startswith("/"):
config_dir = os.path.dirname(config_path)
data_dir = os.path.join(config_dir, data_dir.lstrip("/"))
data_dir = os.path.abspath(data_dir)
data_dir = lgdo.lgdo_utils.expand_path(
self.config["data_dir"], substitute=subst_vars
)
self.data_dir = data_dir

tier_dirs = self.config["tier_dirs"]
for k, val in tier_dirs.items():
tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars)
self.tier_dirs = tier_dirs

def scan_files(self, dirs: list[str] = None) -> None:
"""Scan the directory containing files from the lowest tier and fill the dataframe.
Expand Down
51 changes: 44 additions & 7 deletions src/pygama/lgdo/lgdo_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""
Implements utilities for LEGEND Data Objects.
"""
"""Implements utilities for LEGEND Data Objects."""
from __future__ import annotations

import glob
import logging
import os
import string

import numpy as np

Expand Down Expand Up @@ -122,8 +121,38 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]
return datatype, None, element_description.split(",")


def expand_path(path: str, list: bool = False, base_path: str = None) -> str | list:
"""Expand environment variables and wildcards to return absolute path
def expand_vars(expr: str, substitute: dict[str, str] = None) -> str:
"""Expand (environment) variables.
Note
----
Malformed variable names and references to non-existing variables are left
unchanged.
Parameters
----------
expr
string expression, which may include (environment) variables prefixed by
``$``.
substitute
use this dictionary to substitute variables. Environment variables take
precedence.
"""
if substitute is None:
substitute = {}

# expand env variables first
# then try using provided mapping
return string.Template(os.path.expandvars(expr)).safe_substitute(substitute)


def expand_path(
path: str,
substitute: dict[str, str] = None,
list: bool = False,
base_path: str = None,
) -> str | list:
"""Expand (environment) variables and wildcards to return absolute paths.
Parameters
----------
Expand All @@ -132,6 +161,9 @@ def expand_path(path: str, list: bool = False, base_path: str = None) -> str | l
list
if ``True``, return a list. If ``False``, return a string; if ``False``
and a unique file is not found, raise an exception.
substitute
use this dictionary to substitute variables. Environment variables take
precedence.
base_path
name of base path. Returned paths will be relative to base.
Expand All @@ -140,11 +172,16 @@ def expand_path(path: str, list: bool = False, base_path: str = None) -> str | l
path or list of paths
Unique absolute path, or list of all absolute paths
"""

if base_path is not None and base_path != "":
base_path = os.path.expanduser(os.path.expandvars(base_path))
path = os.path.join(base_path, path)
paths = glob.glob(os.path.expanduser(os.path.expandvars(path)))

# first expand variables
_path = expand_vars(path, substitute)

# then expand wildcards
paths = glob.glob(os.path.expanduser(_path))

if base_path is not None and base_path != "":
paths = [os.path.relpath(p, base_path) for p in paths]

Expand Down
2 changes: 1 addition & 1 deletion src/pygama/lgdo/lh5_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,7 @@ def __init__(
self.lh5_files = []
self.groups = []
for f, g in zip(lh5_files, groups):
f_exp = expand_path(f, True, base_path)
f_exp = expand_path(f, list=True, base_path=base_path)
self.lh5_files += f_exp
self.groups += [g] * len(f_exp)

Expand Down
19 changes: 18 additions & 1 deletion tests/lgdo/test_lgdo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def test_parse_datatype():
assert pd_dt_tuple == dt_tuple


def test_expand_vars():
# Check env variable expansion
os.environ["PYGAMATESTBASEDIR"] = "a_random_string"
assert lgdo_utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah"

# Check user variable expansion
assert (
lgdo_utils.expand_vars(
"$PYGAMATESTBASEDIR2/blah",
substitute={"PYGAMATESTBASEDIR2": "a_random_string"},
)
== "a_random_string/blah"
)


def test_expand_path(lgnd_test_data):
files = [
lgnd_test_data.get_path(
Expand All @@ -68,4 +83,6 @@ def test_expand_path(lgnd_test_data):
lgdo_utils.expand_path(f"{base_dir}/*.lh5")

# Check if it finds a list of files correctly
assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", True)) == sorted(files)
assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted(
files
)

0 comments on commit 491df43

Please sign in to comment.