Skip to content

Commit

Permalink
Pre-compute col_tiers in DataLoader.build_entry_list() for speedup (
Browse files Browse the repository at this point in the history
#523)

pre-compute col_tiers for speedup
  • Loading branch information
jasondet authored Nov 3, 2023
1 parent 29cce25 commit 62f8d3b
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/pygama/flow/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,8 @@ def build_entry_list(

# Find out which columns are needed for any cuts
cut_cols = {}
# ... and pre-load which tiers need to be loaded to make the cuts
col_tiers_dict = {}

for level in [child, parent]:
cut_cols[level] = []
Expand All @@ -527,6 +529,9 @@ def build_entry_list(
and save_output_columns
):
for_output.append(term)
col_tiers_dict[level] = self.get_tiers_for_col(
cut_cols[level], merge_files=False
)

if save_output_columns:
entry_cols += for_output
Expand Down Expand Up @@ -611,7 +616,7 @@ def build_entry_list(
if level in self.cuts.keys():
cut = self.cuts[level]

col_tiers = self.get_tiers_for_col(cut_cols[level], merge_files=False)
col_tiers = col_tiers_dict[level]

# Tables in first tier of event should be the same for all tiers in one level
tables = self.filedb.df.loc[file, f"{self.tiers[level][0]}_tables"]
Expand Down

0 comments on commit 62f8d3b

Please sign in to comment.