From 62f8d3b87c46f65607e553f85dce6023655cdacc Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Fri, 3 Nov 2023 02:53:33 -0700 Subject: [PATCH] Pre-compute `col_tiers` in `DataLoader.build_entry_list()` for speedup (#523) pre-compute col_tiers for speedup --- src/pygama/flow/data_loader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index ab4ed5150..7490271b0 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -505,6 +505,8 @@ def build_entry_list( # Find out which columns are needed for any cuts cut_cols = {} + # ... and pre-load which tiers need to be loaded to make the cuts + col_tiers_dict = {} for level in [child, parent]: cut_cols[level] = [] @@ -527,6 +529,9 @@ def build_entry_list( and save_output_columns ): for_output.append(term) + col_tiers_dict[level] = self.get_tiers_for_col( + cut_cols[level], merge_files=False + ) if save_output_columns: entry_cols += for_output @@ -611,7 +616,7 @@ def build_entry_list( if level in self.cuts.keys(): cut = self.cuts[level] - col_tiers = self.get_tiers_for_col(cut_cols[level], merge_files=False) + col_tiers = col_tiers_dict[level] # Tables in first tier of event should be the same for all tiers in one level tables = self.filedb.df.loc[file, f"{self.tiers[level][0]}_tables"]