Skip to content

Commit

Permalink
Merge pull request #522 from MoritzNeuberger/issue_3_lgdo
Browse files Browse the repository at this point in the history
Support the latest legend-pydataobj and dspeed versions
  • Loading branch information
gipert authored Jan 2, 2024
2 parents 10798dc + 646cf1a commit 0adfdc2
Show file tree
Hide file tree
Showing 18 changed files with 189 additions and 160 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
python -m pip install --upgrade .[test]
- name: Run unit tests
run: |
pytest
python -m pytest
test-coverage:
name: Calculate and upload test coverage
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ project_urls =
packages = find:
install_requires =
colorlog
dspeed>=1.1
dspeed@git+https://github.com/legend-exp/dspeed@main
h5py>=3.2
iminuit
legend-daq2lh5>=1.1.0
legend-pydataobj>=1.3
legend-daq2lh5@git+https://github.com/legend-exp/legend-daq2lh5@main
legend-pydataobj>=1.5.0a1
matplotlib
numba!=0.53.*,!=0.54.*,!=0.57
numpy>=1.21
Expand Down
4 changes: 2 additions & 2 deletions src/pygama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def pygama_cli():


def add_lh5ls_parser(subparsers):
"""Configure :func:`.lgdo.lh5_store.show` command line interface."""
"""Configure :func:`.lgdo.lh5.show` command line interface."""

parser_lh5ls = subparsers.add_parser(
"lh5ls", description="""Inspect LEGEND HDF5 (LH5) file contents"""
Expand All @@ -99,7 +99,7 @@ def add_lh5ls_parser(subparsers):


def lh5_show_cli(args):
"""Passes command line arguments to :func:`.lgdo.lh5_store.show`."""
"""Passes command line arguments to :func:`.lgdo.lh5.show`."""

show(args.lh5_file, args.lh5_group, attrs=args.attributes)

Expand Down
6 changes: 3 additions & 3 deletions src/pygama/evt/build_tcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def build_tcm(
out_name
name for the TCM table in the output file.
wo_mode
mode to send to :meth:`~.lgdo.lh5_store.LH5Store.write_object`.
mode to send to :meth:`~.lgdo.lh5.LH5Store.write`.
See Also
--------
Expand Down Expand Up @@ -79,7 +79,7 @@ def build_tcm(
else:
array_id = len(all_tables) - 1
table = table + "/" + coin_col
coin_data.append(store.read_object(table, filename)[0].nda)
coin_data.append(store.read(table, filename)[0].nda)
array_ids.append(array_id)

tcm_cols = ptcm.generate_tcm_cols(
Expand All @@ -94,6 +94,6 @@ def build_tcm(
)

if out_file is not None:
store.write_object(tcm, out_name, out_file, wo_mode=wo_mode)
store.write(tcm, out_name, out_file, wo_mode=wo_mode)

return tcm
38 changes: 17 additions & 21 deletions src/pygama/flow/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import numpy as np
import pandas as pd
from dspeed.vis import WaveformBrowser
from lgdo import Array, LH5Iterator, LH5Store, Struct, Table, lgdo_utils
from lgdo.lh5 import LH5Iterator, LH5Store
from lgdo.lh5.utils import expand_vars
from lgdo.types import Array, Struct, Table
from lgdo.types.vectorofvectors import build_cl, explode_arrays, explode_cl
from tqdm.auto import tqdm

Expand Down Expand Up @@ -193,9 +195,7 @@ def set_config(self, config: dict | str) -> None:
# look for info in configuration if FileDB is not set
if self.filedb is None:
# expand $_ variables
value = lgdo_utils.expand_vars(
config["filedb"], substitute={"_": config_dir}
)
value = expand_vars(config["filedb"], substitute={"_": config_dir})
self.filedb = FileDB(value)

if not os.path.isdir(self.filedb.data_dir):
Expand Down Expand Up @@ -584,7 +584,7 @@ def build_entry_list(

tcm_table_name = self.filedb.get_table_name(tcm_tier, tcm_tb)
try:
tcm_lgdo, _ = sto.read_object(tcm_table_name, tcm_path)
tcm_lgdo, _ = sto.read(tcm_table_name, tcm_path)
except KeyError:
log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}")
continue
Expand Down Expand Up @@ -649,7 +649,7 @@ def build_entry_list(
if tb in col_tiers[file]["tables"][tier]:
table_name = self.filedb.get_table_name(tier, tb)
try:
tier_table, _ = sto.read_object(
tier_table, _ = sto.read(
table_name,
tier_path,
field_mask=cut_cols[level],
Expand Down Expand Up @@ -708,11 +708,9 @@ def build_entry_list(
f_dict = f_entries.to_dict("list")
f_struct = Struct(f_dict)
if self.merge_files:
sto.write_object(f_struct, "entries", output_file, wo_mode="a")
sto.write(f_struct, "entries", output_file, wo_mode="a")
else:
sto.write_object(
f_struct, f"entries/{file}", output_file, wo_mode="a"
)
sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")

if log.getEffectiveLevel() >= logging.INFO:
progress_bar.close()
Expand Down Expand Up @@ -862,7 +860,7 @@ def build_hit_entries(
# load the data from the tier file, just the columns needed for the cut
table_name = self.filedb.get_table_name(tier, tb)
try:
tier_tb, _ = sto.read_object(
tier_tb, _ = sto.read(
table_name, tier_path, field_mask=cut_cols
)
except KeyError:
Expand Down Expand Up @@ -902,11 +900,9 @@ def build_hit_entries(
f_dict = f_entries.to_dict("list")
f_struct = Struct(f_dict)
if self.merge_files:
sto.write_object(f_struct, "entries", output_file, wo_mode="a")
sto.write(f_struct, "entries", output_file, wo_mode="a")
else:
sto.write_object(
f_struct, f"entries/{file}", output_file, wo_mode="a"
)
sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")

if log.getEffectiveLevel() >= logging.INFO:
progress_bar.close()
Expand Down Expand Up @@ -1117,7 +1113,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
for file in files
]

tier_table, _ = sto.read_object(
tier_table, _ = sto.read(
name=tb_name,
lh5_file=tier_paths,
idx=idx_mask,
Expand All @@ -1143,7 +1139,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
f_table = utils.dict_to_table(col_dict=col_dict, attr_dict=attr_dict)

if output_file:
sto.write_object(f_table, "merged_data", output_file, wo_mode="o")
sto.write(f_table, "merged_data", output_file, wo_mode="o")
if in_memory:
if self.output_format == "lgdo.Table":
return f_table
Expand Down Expand Up @@ -1220,7 +1216,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
raise FileNotFoundError(tier_path)

table_name = self.filedb.get_table_name(tier, tb)
tier_table, _ = sto.read_object(
tier_table, _ = sto.read(
table_name,
tier_path,
idx=idx_mask,
Expand All @@ -1246,7 +1242,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
if in_memory:
load_out.add_field(name=file, obj=f_table)
if output_file:
sto.write_object(f_table, f"{file}", output_file, wo_mode="o")
sto.write(f_table, f"{file}", output_file, wo_mode="o")
# end file loop

if log.getEffectiveLevel() >= logging.INFO:
Expand Down Expand Up @@ -1318,7 +1314,7 @@ def load_evts(
)
if os.path.exists(tier_path):
table_name = self.filedb.get_table_name(tier, tb)
tier_table, _ = sto.read_object(
tier_table, _ = sto.read(
table_name,
tier_path,
idx=idx_mask,
Expand All @@ -1332,7 +1328,7 @@ def load_evts(
if in_memory:
load_out[file] = f_table
if output_file:
sto.write_object(f_table, f"file{file}", output_file, wo_mode="o")
sto.write(f_table, f"file{file}", output_file, wo_mode="o")
# end file loop

if in_memory:
Expand Down
38 changes: 17 additions & 21 deletions src/pygama/flow/file_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
import warnings

import h5py
import lgdo
import numpy as np
import pandas as pd
from lgdo import Array, Scalar, VectorOfVectors
from lgdo import lh5_store as lh5
from lgdo.lh5.store import LH5Store, ls
from lgdo.lh5.utils import expand_path, expand_vars
from lgdo.types import Array, Scalar, VectorOfVectors
from parse import parse

from . import utils
Expand Down Expand Up @@ -185,14 +185,12 @@ def set_config(self, config: dict, config_path: str = None) -> None:
if config_path is not None:
subst_vars["_"] = os.path.dirname(str(config_path))

data_dir = lgdo.lgdo_utils.expand_path(
self.config["data_dir"], substitute=subst_vars
)
data_dir = expand_path(self.config["data_dir"], substitute=subst_vars)
self.data_dir = data_dir

tier_dirs = self.config["tier_dirs"]
for k, val in tier_dirs.items():
tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars)
tier_dirs[k] = expand_vars(val, substitute=subst_vars)
self.tier_dirs = tier_dirs

def scan_files(self, dirs: list[str] = None) -> None:
Expand Down Expand Up @@ -407,7 +405,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
)

# TODO this call here is really expensive!
groups = lh5.ls(f, wildcard)
groups = ls(f, wildcard)
if len(groups) > 0 and parse(template, groups[0]) is None:
log.warning(f"groups in {fpath} don't match template")
else:
Expand All @@ -431,7 +429,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
table_name = template

try:
col = lh5.ls(f[table_name])
col = ls(f[table_name])
except KeyError:
log.warning(f"cannot find '{table_name}' in {fpath}")
continue
Expand Down Expand Up @@ -477,8 +475,8 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
columns_vov = VectorOfVectors(
flattened_data=flattened, cumulative_length=length
)
sto = lh5.LH5Store()
sto.write_object(columns_vov, "unique_columns", to_file)
sto = LH5Store()
sto.write(columns_vov, "unique_columns", to_file)

return self.columns

Expand All @@ -501,12 +499,12 @@ def from_disk(self, path: str | list[str]) -> None:
# expand wildcards
paths = []
for p in path:
paths += lgdo.lgdo_utils.expand_path(p, list=True)
paths += expand_path(p, list=True)

if not paths:
raise FileNotFoundError(path)

sto = lh5.LH5Store()
sto = LH5Store()
# objects/accumulators that will be used to configure the FileDB at the end
_cfg = None
_df = None
Expand All @@ -528,7 +526,7 @@ def _replace_idx(row, trans, tier):

# loop over the files
for p in paths:
cfg, _ = sto.read_object("config", p)
cfg, _ = sto.read("config", p)
cfg = json.loads(cfg.value.decode())

# make sure configurations are all the same
Expand All @@ -540,7 +538,7 @@ def _replace_idx(row, trans, tier):
)

# read in unique columns
vov, _ = sto.read_object("columns", p)
vov, _ = sto.read("columns", p)
# Convert back from VoV of UTF-8 bytestrings to a list of lists of strings
columns = [[v.decode("utf-8") for v in ov] for ov in list(vov)]

Expand Down Expand Up @@ -599,14 +597,12 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
filename
output LH5 file name.
wo_mode
passed to :meth:`~.lgdo.lh5_store.write_object`.
passed to :meth:`~.lgdo.lh5.write`.
"""
log.debug(f"writing database to {filename}")

sto = lh5.LH5Store()
sto.write_object(
Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode
)
sto = LH5Store()
sto.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode)

if wo_mode in ["write_safe", "w", "overwrite_file", "of"]:
wo_mode = "a"
Expand All @@ -623,7 +619,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
flattened_data=Array(nda=np.array(flat).astype("S")),
cumulative_length=Array(nda=np.array(cum_l)),
)
sto.write_object(col_vov, "columns", filename, wo_mode=wo_mode)
sto.write(col_vov, "columns", filename, wo_mode=wo_mode)

# FIXME: to_hdf() throws this:
#
Expand Down
4 changes: 2 additions & 2 deletions src/pygama/hit/build_hit.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def build_hit(
n_max
maximum number of rows to process
wo_mode
forwarded to :meth:`~.lgdo.lh5_store.write_object`.
forwarded to :meth:`~.lgdo.lh5.write`.
"""
store = LH5Store()

Expand Down Expand Up @@ -168,7 +168,7 @@ def build_hit(
if col not in cfg["outputs"]:
outtbl_obj.remove_column(col, delete=True)

store.write_object(
store.write(
obj=outtbl_obj,
name=tbl.replace("/dsp", "/hit"),
lh5_file=outfile,
Expand Down
2 changes: 1 addition & 1 deletion src/pygama/pargen/AoE_cal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import matplotlib as mpl

mpl.use("agg")
import lgdo.lh5_store as lh5
import lgdo.lh5 as lh5
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
Expand Down
2 changes: 1 addition & 1 deletion src/pygama/pargen/cuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import logging
import os

import lgdo.lh5_store as lh5
import lgdo.lh5 as lh5
import numpy as np
import pandas as pd
from scipy import stats
Expand Down
2 changes: 1 addition & 1 deletion src/pygama/pargen/ecal_th.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from scipy.stats import binned_statistic

mpl.use("agg")
import lgdo.lh5_store as lh5
import lgdo.lh5 as lh5
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
Expand Down
Loading

0 comments on commit 0adfdc2

Please sign in to comment.