Merge pull request #522 from MoritzNeuberger/issue_3_lgdo

Support the latest legend-pydataobj and dspeed versions
legend-exp · Jan 2, 2024 · 0adfdc2 · 0adfdc2
2 parents 10798dc + 646cf1a
commit 0adfdc2
Show file tree

Hide file tree

Showing 18 changed files with 189 additions and 160 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -35,7 +35,7 @@ jobs:
         python -m pip install --upgrade .[test]
     - name: Run unit tests
       run: |
-        pytest
+        python -m pytest
 
   test-coverage:
     name: Calculate and upload test coverage

diff --git a/setup.cfg b/setup.cfg
@@ -32,11 +32,11 @@ project_urls =
 packages = find:
 install_requires =
     colorlog
-    dspeed>=1.1
+    dspeed@git+https://github.com/legend-exp/dspeed@main
     h5py>=3.2
     iminuit
-    legend-daq2lh5>=1.1.0
-    legend-pydataobj>=1.3
+    legend-daq2lh5@git+https://github.com/legend-exp/legend-daq2lh5@main
+    legend-pydataobj>=1.5.0a1
     matplotlib
     numba!=0.53.*,!=0.54.*,!=0.57
     numpy>=1.21

diff --git a/src/pygama/cli.py b/src/pygama/cli.py
@@ -80,7 +80,7 @@ def pygama_cli():
 
 
 def add_lh5ls_parser(subparsers):
-    """Configure :func:`.lgdo.lh5_store.show` command line interface."""
+    """Configure :func:`.lgdo.lh5.show` command line interface."""
 
     parser_lh5ls = subparsers.add_parser(
         "lh5ls", description="""Inspect LEGEND HDF5 (LH5) file contents"""
@@ -99,7 +99,7 @@ def add_lh5ls_parser(subparsers):
 
 
 def lh5_show_cli(args):
-    """Passes command line arguments to :func:`.lgdo.lh5_store.show`."""
+    """Passes command line arguments to :func:`.lgdo.lh5.show`."""
 
     show(args.lh5_file, args.lh5_group, attrs=args.attributes)
 

diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py
@@ -49,7 +49,7 @@ def build_tcm(
     out_name
         name for the TCM table in the output file.
     wo_mode
-        mode to send to :meth:`~.lgdo.lh5_store.LH5Store.write_object`.
+        mode to send to :meth:`~.lgdo.lh5.LH5Store.write`.
 
     See Also
     --------
@@ -79,7 +79,7 @@ def build_tcm(
                 else:
                     array_id = len(all_tables) - 1
                 table = table + "/" + coin_col
-                coin_data.append(store.read_object(table, filename)[0].nda)
+                coin_data.append(store.read(table, filename)[0].nda)
                 array_ids.append(array_id)
 
     tcm_cols = ptcm.generate_tcm_cols(
@@ -94,6 +94,6 @@ def build_tcm(
     )
 
     if out_file is not None:
-        store.write_object(tcm, out_name, out_file, wo_mode=wo_mode)
+        store.write(tcm, out_name, out_file, wo_mode=wo_mode)
 
     return tcm
diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py
@@ -14,7 +14,9 @@
 import numpy as np
 import pandas as pd
 from dspeed.vis import WaveformBrowser
-from lgdo import Array, LH5Iterator, LH5Store, Struct, Table, lgdo_utils
+from lgdo.lh5 import LH5Iterator, LH5Store
+from lgdo.lh5.utils import expand_vars
+from lgdo.types import Array, Struct, Table
 from lgdo.types.vectorofvectors import build_cl, explode_arrays, explode_cl
 from tqdm.auto import tqdm
 
@@ -193,9 +195,7 @@ def set_config(self, config: dict | str) -> None:
         # look for info in configuration if FileDB is not set
         if self.filedb is None:
             # expand $_ variables
-            value = lgdo_utils.expand_vars(
-                config["filedb"], substitute={"_": config_dir}
-            )
+            value = expand_vars(config["filedb"], substitute={"_": config_dir})
             self.filedb = FileDB(value)
 
         if not os.path.isdir(self.filedb.data_dir):
@@ -584,7 +584,7 @@ def build_entry_list(
 
             tcm_table_name = self.filedb.get_table_name(tcm_tier, tcm_tb)
             try:
-                tcm_lgdo, _ = sto.read_object(tcm_table_name, tcm_path)
+                tcm_lgdo, _ = sto.read(tcm_table_name, tcm_path)
             except KeyError:
                 log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}")
                 continue
@@ -649,7 +649,7 @@ def build_entry_list(
                             if tb in col_tiers[file]["tables"][tier]:
                                 table_name = self.filedb.get_table_name(tier, tb)
                                 try:
-                                    tier_table, _ = sto.read_object(
+                                    tier_table, _ = sto.read(
                                         table_name,
                                         tier_path,
                                         field_mask=cut_cols[level],
@@ -708,11 +708,9 @@ def build_entry_list(
                 f_dict = f_entries.to_dict("list")
                 f_struct = Struct(f_dict)
                 if self.merge_files:
-                    sto.write_object(f_struct, "entries", output_file, wo_mode="a")
+                    sto.write(f_struct, "entries", output_file, wo_mode="a")
                 else:
-                    sto.write_object(
-                        f_struct, f"entries/{file}", output_file, wo_mode="a"
-                    )
+                    sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
 
         if log.getEffectiveLevel() >= logging.INFO:
             progress_bar.close()
@@ -862,7 +860,7 @@ def build_hit_entries(
                             # load the data from the tier file, just the columns needed for the cut
                             table_name = self.filedb.get_table_name(tier, tb)
                             try:
-                                tier_tb, _ = sto.read_object(
+                                tier_tb, _ = sto.read(
                                     table_name, tier_path, field_mask=cut_cols
                                 )
                             except KeyError:
@@ -902,11 +900,9 @@ def build_hit_entries(
                 f_dict = f_entries.to_dict("list")
                 f_struct = Struct(f_dict)
                 if self.merge_files:
-                    sto.write_object(f_struct, "entries", output_file, wo_mode="a")
+                    sto.write(f_struct, "entries", output_file, wo_mode="a")
                 else:
-                    sto.write_object(
-                        f_struct, f"entries/{file}", output_file, wo_mode="a"
-                    )
+                    sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
 
         if log.getEffectiveLevel() >= logging.INFO:
             progress_bar.close()
@@ -1117,7 +1113,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
                         for file in files
                     ]
 
-                    tier_table, _ = sto.read_object(
+                    tier_table, _ = sto.read(
                         name=tb_name,
                         lh5_file=tier_paths,
                         idx=idx_mask,
@@ -1143,7 +1139,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
             f_table = utils.dict_to_table(col_dict=col_dict, attr_dict=attr_dict)
 
             if output_file:
-                sto.write_object(f_table, "merged_data", output_file, wo_mode="o")
+                sto.write(f_table, "merged_data", output_file, wo_mode="o")
             if in_memory:
                 if self.output_format == "lgdo.Table":
                     return f_table
@@ -1220,7 +1216,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
                             raise FileNotFoundError(tier_path)
 
                         table_name = self.filedb.get_table_name(tier, tb)
-                        tier_table, _ = sto.read_object(
+                        tier_table, _ = sto.read(
                             table_name,
                             tier_path,
                             idx=idx_mask,
@@ -1246,7 +1242,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
                 if in_memory:
                     load_out.add_field(name=file, obj=f_table)
                 if output_file:
-                    sto.write_object(f_table, f"{file}", output_file, wo_mode="o")
+                    sto.write(f_table, f"{file}", output_file, wo_mode="o")
                 # end file loop
 
             if log.getEffectiveLevel() >= logging.INFO:
@@ -1318,7 +1314,7 @@ def load_evts(
                                 )
                                 if os.path.exists(tier_path):
                                     table_name = self.filedb.get_table_name(tier, tb)
-                                    tier_table, _ = sto.read_object(
+                                    tier_table, _ = sto.read(
                                         table_name,
                                         tier_path,
                                         idx=idx_mask,
@@ -1332,7 +1328,7 @@ def load_evts(
                 if in_memory:
                     load_out[file] = f_table
                 if output_file:
-                    sto.write_object(f_table, f"file{file}", output_file, wo_mode="o")
+                    sto.write(f_table, f"file{file}", output_file, wo_mode="o")
                 # end file loop
 
             if in_memory:

diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py
@@ -9,11 +9,11 @@
 import warnings
 
 import h5py
-import lgdo
 import numpy as np
 import pandas as pd
-from lgdo import Array, Scalar, VectorOfVectors
-from lgdo import lh5_store as lh5
+from lgdo.lh5.store import LH5Store, ls
+from lgdo.lh5.utils import expand_path, expand_vars
+from lgdo.types import Array, Scalar, VectorOfVectors
 from parse import parse
 
 from . import utils
@@ -185,14 +185,12 @@ def set_config(self, config: dict, config_path: str = None) -> None:
         if config_path is not None:
             subst_vars["_"] = os.path.dirname(str(config_path))
 
-        data_dir = lgdo.lgdo_utils.expand_path(
-            self.config["data_dir"], substitute=subst_vars
-        )
+        data_dir = expand_path(self.config["data_dir"], substitute=subst_vars)
         self.data_dir = data_dir
 
         tier_dirs = self.config["tier_dirs"]
         for k, val in tier_dirs.items():
-            tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars)
+            tier_dirs[k] = expand_vars(val, substitute=subst_vars)
         self.tier_dirs = tier_dirs
 
     def scan_files(self, dirs: list[str] = None) -> None:
@@ -407,7 +405,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
                 )
 
                 # TODO this call here is really expensive!
-                groups = lh5.ls(f, wildcard)
+                groups = ls(f, wildcard)
                 if len(groups) > 0 and parse(template, groups[0]) is None:
                     log.warning(f"groups in {fpath} don't match template")
                 else:
@@ -431,7 +429,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
                     table_name = template
 
                 try:
-                    col = lh5.ls(f[table_name])
+                    col = ls(f[table_name])
                 except KeyError:
                     log.warning(f"cannot find '{table_name}' in {fpath}")
                     continue
@@ -477,8 +475,8 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
             columns_vov = VectorOfVectors(
                 flattened_data=flattened, cumulative_length=length
             )
-            sto = lh5.LH5Store()
-            sto.write_object(columns_vov, "unique_columns", to_file)
+            sto = LH5Store()
+            sto.write(columns_vov, "unique_columns", to_file)
 
         return self.columns
 
@@ -501,12 +499,12 @@ def from_disk(self, path: str | list[str]) -> None:
         # expand wildcards
         paths = []
         for p in path:
-            paths += lgdo.lgdo_utils.expand_path(p, list=True)
+            paths += expand_path(p, list=True)
 
         if not paths:
             raise FileNotFoundError(path)
 
-        sto = lh5.LH5Store()
+        sto = LH5Store()
         # objects/accumulators that will be used to configure the FileDB at the end
         _cfg = None
         _df = None
@@ -528,7 +526,7 @@ def _replace_idx(row, trans, tier):
 
         # loop over the files
         for p in paths:
-            cfg, _ = sto.read_object("config", p)
+            cfg, _ = sto.read("config", p)
             cfg = json.loads(cfg.value.decode())
 
             # make sure configurations are all the same
@@ -540,7 +538,7 @@ def _replace_idx(row, trans, tier):
                 )
 
             # read in unique columns
-            vov, _ = sto.read_object("columns", p)
+            vov, _ = sto.read("columns", p)
             # Convert back from VoV of UTF-8 bytestrings to a list of lists of strings
             columns = [[v.decode("utf-8") for v in ov] for ov in list(vov)]
 
@@ -599,14 +597,12 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
         filename
             output LH5 file name.
         wo_mode
-            passed to :meth:`~.lgdo.lh5_store.write_object`.
+            passed to :meth:`~.lgdo.lh5.write`.
         """
         log.debug(f"writing database to {filename}")
 
-        sto = lh5.LH5Store()
-        sto.write_object(
-            Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode
-        )
+        sto = LH5Store()
+        sto.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode)
 
         if wo_mode in ["write_safe", "w", "overwrite_file", "of"]:
             wo_mode = "a"
@@ -623,7 +619,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
                 flattened_data=Array(nda=np.array(flat).astype("S")),
                 cumulative_length=Array(nda=np.array(cum_l)),
             )
-            sto.write_object(col_vov, "columns", filename, wo_mode=wo_mode)
+            sto.write(col_vov, "columns", filename, wo_mode=wo_mode)
 
         # FIXME: to_hdf() throws this:
         #

diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py
@@ -69,7 +69,7 @@ def build_hit(
     n_max
         maximum number of rows to process
     wo_mode
-        forwarded to :meth:`~.lgdo.lh5_store.write_object`.
+        forwarded to :meth:`~.lgdo.lh5.write`.
     """
     store = LH5Store()
 
@@ -168,7 +168,7 @@ def build_hit(
                         if col not in cfg["outputs"]:
                             outtbl_obj.remove_column(col, delete=True)
 
-            store.write_object(
+            store.write(
                 obj=outtbl_obj,
                 name=tbl.replace("/dsp", "/hit"),
                 lh5_file=outfile,

diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py
@@ -15,7 +15,7 @@
 import matplotlib as mpl
 
 mpl.use("agg")
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
 import matplotlib.cm as cmx
 import matplotlib.colors as mcolors
 import matplotlib.dates as mdates

diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py
@@ -9,7 +9,7 @@
 import logging
 import os
 
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
 import numpy as np
 import pandas as pd
 from scipy import stats

diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py
@@ -15,7 +15,7 @@
 from scipy.stats import binned_statistic
 
 mpl.use("agg")
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd