From a4ea61aee9a5acb361364ef99c22d45dcf7b94bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 28 Sep 2023 19:03:02 +0200 Subject: [PATCH 01/73] first idea for evt tier --- src/pygama/evt/build_evt.py | 309 ++++++++++++++++++++++++++++++++++ src/pygama/evt/modules/spm.py | 44 +++++ 2 files changed, 353 insertions(+) create mode 100644 src/pygama/evt/build_evt.py create mode 100644 src/pygama/evt/modules/spm.py diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py new file mode 100644 index 000000000..5765e1372 --- /dev/null +++ b/src/pygama/evt/build_evt.py @@ -0,0 +1,309 @@ +""" +This module implements routines to build the evt tier. + +TODO: +- make me faster! Currently 37.70 ms/evt +- write tests +- get feedback +- write everything smart +""" +from __future__ import annotations +from importlib import import_module +import itertools +import json +from legendmeta import LegendMetadata +import logging +import numpy as np +import pygama.lgdo.lh5_store as store +from pygama.lgdo import Array +import re +import os + +log = logging.getLogger(__name__) + +def num_and_pars(value: str,par_dic: dict): + # function tries to convert a string to a int, float, bool + # or returns the value if value is a key in par_dic + if value in par_dic.keys(): return par_dic[value] + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError: + try: + value = bool(value) + except ValueError: + pass + return value + +def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, getch: bool = False) -> np.ndarray: + """ + Evaluates the expression defined by the user across all channels according to the mode + Parameters + ---------- + f_evt + Path to event tier file + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + List of channel names across which expression gets evaluated (form: "ch") + mode + The mode determines how the event entry is calculated across channels. Options are: + - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. + - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. + - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flage) can be given here, and the value of this specific channels is used. + - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + expr + The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operaions order matters), or from the "parameters" field can be used. + para + Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + getch + Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + """ + #define dimension of output array + out = np.zeros(store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp)) + out[:] = defv + out_chs = np.zeros(len(out),dtype=int) + + if mode == "func": + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + var = {} + if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if para: var = var | para + + # evaluate expression + func, params = expr.split('(') + params = params[:-1].split(',') + params = [f_hit,f_dsp,chns]+[num_and_pars(e,var) for e in params] + + # load function dynamically + p,m = func.rsplit('.',1) + mod = import_module(p) + met = getattr(mod,m) + + return met(*params) + + else: + for ch in chns: + #find all potential variables + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + + # find fields in either dsp, hit, evt or parameters, prepare evaluation + evt_dic = {} + if os.path.exists(f_evt): evt_dic = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + hit_dic = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") + dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") + + var= hit_dic | dsp_dic | evt_dic + if para: var = var | para + + # evaluate expression + res = eval(expr,var) + if not isinstance(res, np.ndarray): + res = np.full(len(out),res,dtype=type(res)) + + # append to out according to mode + ops = re.findall(r'([<>]=?|==)', mode) + if len(ops)>0: + op = ops[0] + lim = float(mode.split(op)[-1]) + limarr = eval("res"+op+"lim",{"res":res,"lim":lim}) + else: + + limarr = np.ones(len(res)).astype(bool) + if "first" in mode: + outt = np.zeros(len(out)) + outt[:] = np.inf + t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] + out = np.where((t0outt) & (limarr),res,out) + out_chs = np.where((t0outt) & (limarr),t0,outt) + elif "tot" in mode: + if ch == chns[0]: out[:] = 0 + if res.dtype == bool: res = res.astype(int) + out += np.where(limarr,res,out) + elif mode == "any": + if ch == chns[0]: + out = out.astype(bool) + if res.dtype != bool: res = res.astype(bool) + out = out | res + elif mode == "all": + if ch == chns[0]: + out = out.astype(bool) + if res.dtype != bool: res = res.astype(bool) + out = out & res + elif mode in store.ls(f_evt): + ch_comp = store.load_nda(f_evt,[mode])[mode] + out = np.where(int(ch[2:]) == ch_comp,res,out) + else: + raise ValueError(mode + " not a valid mode") + + if getch: return out, out_chs + else: return out + +def build_evt( + f_dsp: str, + f_hit: str, + f_evt: str, + meta_path: str = None, + evt_config: str | dict = None, + wo_mode: str = "write_safe" +) -> None: + """ + Transform data from the hit and dsp levels which a channel sorted + to a event sorted data format + + Parameters + ---------- + f_dsp + input LH5 file of the dsp level + f_hit + input LH5 file of the hit level + f_evt + name of the output file + evt_config + dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) seperated by underscores (e.g. "meta_geds_on") in the "channels" dictonary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + + .. code-block::json + + { + "channels": { + "geds_on": "meta_geds_on", + "geds_no_psd": "meta_geds_no_psd", + "geds_ac": "meta_geds_ac", + "spms_on": "meta_spms_on", + "pulser": "PULS01", + "baseline": "BSLN01", + "muon": "MUON01", + "ts_master":"S060" + }, + "operations": { + "energy":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe":{ + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "AoE_Classifier" + }, + "is_muon_tagged":{ + "channels": "muon", + "mode": "any", + "expression": "wf_max>a", + "parameters": {"a":15100} + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": {"a":25} + }, + "lar_energy":{ + "channels": "spms_on", + "mode": "func", + "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + } + } + } + """ + lstore = store.LH5Store() + tbl_cfg = evt_config + if isinstance(tbl_cfg,str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) + + # create channel list according to config + # This can be either read from the meta data + # or a list of channel names + log.debug("Creating channel dictionary") + if meta_path: lmeta = LegendMetadata(path=meta_path) + else: lmeta = LegendMetadata() + chmap = lmeta.channelmap(re.search("\d{8}T\d{6}Z",f_dsp).group(0)) + chns = {} + for k, v in tbl_cfg['channels'].items(): + if isinstance(v,str): + if "meta" in v: + m,sys,usa = v.split("_",2) + tmp = [f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]['system'] == sys] + chns[k] = [e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]['analysis']['usability'] == usa] + else: + chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + elif isinstance(v,list): + chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] + + + # do operations + first_iter = True + log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") + for k, v in tbl_cfg['operations'].items(): + log.debug("Processing field" + k) + + # if channels not defined in operation, it can only be an operation on the evt level. + if 'channels' not in v.keys(): + exprl = re.findall(r"[a-zA-Z_$][\w$]*",v["expression"]) + var = {} + if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if "parameters" in v.keys(): var = var | v['parameters'] + res = Array(eval(v["expression"],var)) + lstore.write_object( + obj=res, + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + continue + + if isinstance(v['channels'],str): chns_e = chns[v['channels']] + elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + + pars = None + defaultv = np.nan + if "parameters" in v.keys(): pars = v['parameters'] + if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + + if "get_ch" in v.keys(): + if "first" in v['mode'] or "last" in v['mode']: + res, chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv, v["get_ch"]) + lstore.write_object( + obj=Array(res), + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + lstore.write_object( + obj=Array(chs), + name= k+"_id", + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + + else: + raise ValueError("get_ch can be only applied to first and last modes") + + else: + res = Array(evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv)) + + lstore.write_object( + obj=res, + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + if first_iter: first_iter = False + + log.info("Done") \ No newline at end of file diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py new file mode 100644 index 000000000..4d4604452 --- /dev/null +++ b/src/pygama/evt/modules/spm.py @@ -0,0 +1,44 @@ +import numpy as np +import pygama.lgdo.lh5_store as store + +""" +Module for special event level routines for SiPMs + +functions must take as the first 3 args in order: +- path to the hit file +- path to the dsp file +- list of channels processed +additional parameters are free to the user and need to be defined in the JSON +""" +#get LAr energy per event over all channels +def get_energy(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): + trig = np.where(np.isnan(trgr),tdefault,trgr) + tmi = trig - tmin + tma = trig + tmax + sum = np.zeros(len(trig)) + for ch in chs: + df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pes=df["energy_in_pe"] + pes= np.where(np.isnan(pes), 0, pes) + pes= np.where(mask,pes,0) + chsum= np.nansum(pes, axis=1) + sum = sum + chsum + return sum + +#get LAr majority per event over all channels +def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): + trig = np.where(np.isnan(trgr),tdefault,trgr) + tmi = trig - tmin + tma = trig + tmax + maj = np.zeros(len(trig)) + for ch in chs: + df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pes=df["energy_in_pe"] + pes= np.where(np.isnan(pes), 0, pes) + pes= np.where(mask,pes,0) + chsum= np.nansum(pes, axis=1) + chmaj = np.where(chsum>lim,1,0) + maj = maj + chmaj + return maj \ No newline at end of file From 1a966de2186ab238cea2873a25fe3d9ad72664fc Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Sep 2023 14:05:36 +0200 Subject: [PATCH 02/73] 3x speedup and added LAr classifier module --- src/pygama/evt/build_evt.py | 133 +++++++++++++--------------------- src/pygama/evt/modules/spm.py | 41 ++++++++++- 2 files changed, 91 insertions(+), 83 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5765e1372..3340c58fc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -37,7 +37,7 @@ def num_and_pars(value: str,par_dic: dict): pass return value -def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, getch: bool = False) -> np.ndarray: +def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, nrows: int = None) -> np.ndarray: """ Evaluates the expression defined by the user across all channels according to the mode Parameters @@ -67,59 +67,47 @@ def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. """ #define dimension of output array - out = np.zeros(store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp)) - out[:] = defv + n = nrows if nrows is not None else store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp) + out = np.full(n,defv,dtype=type(defv)) out_chs = np.zeros(len(out),dtype=int) + # find parameters in evt file or in parameters + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + var_ph = {} + if os.path.exists(f_evt):var_ph = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if para: var_ph = var_ph | para + if mode == "func": - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - var = {} - if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if para: var = var | para - # evaluate expression func, params = expr.split('(') - params = params[:-1].split(',') - params = [f_hit,f_dsp,chns]+[num_and_pars(e,var) for e in params] + params = [f_hit,f_dsp,chns]+[num_and_pars(e,var_ph) for e in params[:-1].split(',')] # load function dynamically p,m = func.rsplit('.',1) - mod = import_module(p) - met = getattr(mod,m) - - return met(*params) + met = getattr(import_module(p),m) + out = met(*params) else: + # evaluate operator in mode + ops = re.findall(r'([<>]=?|==)', mode) + ch_comp = None + if os.path.exists(f_evt) and mode in store.ls(f_evt): + ch_comp = store.load_nda(f_evt,[mode])[mode] for ch in chns: - #find all potential variables - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - - # find fields in either dsp, hit, evt or parameters, prepare evaluation - evt_dic = {} - if os.path.exists(f_evt): evt_dic = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - hit_dic = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") + # find fields in either dsp, hit + var = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") - - var= hit_dic | dsp_dic | evt_dic - if para: var = var | para - + var= var |dsp_dic | var_ph + # evaluate expression res = eval(expr,var) - if not isinstance(res, np.ndarray): - res = np.full(len(out),res,dtype=type(res)) + if not isinstance(res, np.ndarray): res = np.full(len(out),res,dtype=type(res)) # append to out according to mode - ops = re.findall(r'([<>]=?|==)', mode) - if len(ops)>0: - op = ops[0] - lim = float(mode.split(op)[-1]) - limarr = eval("res"+op+"lim",{"res":res,"lim":lim}) - else: - - limarr = np.ones(len(res)).astype(bool) + if len(ops)>0: limarr = eval("".join(["res",ops[0],"lim"]),{"res":res,"lim":float(mode.split(ops[0])[-1])}) + else: limarr = np.ones(len(out)).astype(bool) if "first" in mode: - outt = np.zeros(len(out)) - outt[:] = np.inf + outt = np.full(len(out),np.inf) t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] out = np.where((t0outt) & (limarr),res,out) - out_chs = np.where((t0outt) & (limarr),int(ch[2:]),out_chs) outt = np.where((t0>outt) & (limarr),t0,outt) elif "tot" in mode: - if ch == chns[0]: out[:] = 0 if res.dtype == bool: res = res.astype(int) out += np.where(limarr,res,out) elif mode == "any": - if ch == chns[0]: - out = out.astype(bool) if res.dtype != bool: res = res.astype(bool) out = out | res elif mode == "all": - if ch == chns[0]: - out = out.astype(bool) if res.dtype != bool: res = res.astype(bool) out = out & res - elif mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt,[mode])[mode] + elif ch_comp is not None: out = np.where(int(ch[2:]) == ch_comp,res,out) else: raise ValueError(mode + " not a valid mode") - if getch: return out, out_chs - else: return out + return out, out_chs def build_evt( f_dsp: str, @@ -248,7 +229,7 @@ def build_evt( # do operations - first_iter = True + first_iter,nrows = True,None log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") for k, v in tbl_cfg['operations'].items(): log.debug("Processing field" + k) @@ -266,44 +247,32 @@ def build_evt( lh5_file=f_evt, wo_mode=wo_mode #if first_iter else "append" ) - continue - - if isinstance(v['channels'],str): chns_e = chns[v['channels']] - elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) - - pars = None - defaultv = np.nan - if "parameters" in v.keys(): pars = v['parameters'] - if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] - - if "get_ch" in v.keys(): - if "first" in v['mode'] or "last" in v['mode']: - res, chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv, v["get_ch"]) - lstore.write_object( - obj=Array(res), - name= k, - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" - ) - lstore.write_object( - obj=Array(chs), - name= k+"_id", - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" - ) - - else: - raise ValueError("get_ch can be only applied to first and last modes") - else: - res = Array(evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv)) + if isinstance(v['channels'],str): chns_e = chns[v['channels']] + elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + + pars, defaultv = None , np.nan + if "parameters" in v.keys(): pars = v['parameters'] + if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + res,chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv,nrows) lstore.write_object( - obj=res, - name= k, - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" + obj=Array(res), + name= k, + lh5_file=f_evt, + wo_mode=wo_mode ) + + # if get_ch true flag in a first/last mode operation also obtain channel field + if "get_ch" in v.keys() and ("first" in v['mode'] or "last" in v['mode']) and v["get_ch"]: + lstore.write_object( + obj=Array(chs), + name= k+"_id", + lh5_file=f_evt, + wo_mode=wo_mode + ) + if first_iter: first_iter = False + if not nrows: nrows = len(res) log.info("Done") \ No newline at end of file diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 4d4604452..04956bec6 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -41,4 +41,43 @@ def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): chsum= np.nansum(pes, axis=1) chmaj = np.where(chsum>lim,1,0) maj = maj + chmaj - return maj \ No newline at end of file + return maj + +def get_etc(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax,swin,trail): + predf = store.load_nda(f_hit, ["energy_in_pe",'timestamp'],chs[0]+"/hit/") + + peshape = (predf["energy_in_pe"]).shape + # 1D = channel, 2D = event num, 3D = array per event + pes=np.zeros([len(chs),peshape[0],peshape[1]]) + times = np.zeros([len(chs),peshape[0],peshape[1]]) + + tge = np.where(np.isnan(trgr),tdefault,trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df =store.load_nda(f_hit, ["energy_in_pe",'trigger_pos','timestamp'],chs[i]+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pe=df["energy_in_pe"] + time = df["trigger_pos"]*16 + + pe= np.where(mask,pe,np.nan) + time= np.where(mask,time,np.nan) + + pes[i] = pe + times[i] = time + + outi = None + if trail >0: + t1d = np.nanmin(times,axis=(0,2)) + if trail == 2: t1d[t1d>tge] = tge[t1d>tge] + tt = t1d[:,None] + outi = np.where(np.nansum(np.where((times >= tt),pes,0),axis=(0,2)) > 0, + np.nansum(np.where((times >= tt) & (times < tt+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tt),pes,0),axis=(0,2)), + np.nansum(np.where((times >= tt),pes,0),axis=(0,2))) + return outi + + else: + outi = np.where(np.nansum(pes,axis=(0,2)) > 0, + np.nansum(np.where((times >= tge[:,None]) & (times <= tge[:,None]+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tge[:,None]),pes,0),axis=(0,2)), + np.nansum(pes,axis=(0,2))) + return outi \ No newline at end of file From 4d782302a718f3c04e9d644f3a41f7a58c441cbd Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Sep 2023 17:28:37 +0200 Subject: [PATCH 03/73] small changes and additional lar modules --- src/pygama/evt/build_evt.py | 384 +++++++++++++++++++++------------- src/pygama/evt/modules/spm.py | 225 ++++++++++++++------ 2 files changed, 401 insertions(+), 208 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 3340c58fc..92af205ca 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,30 +1,29 @@ """ This module implements routines to build the evt tier. - -TODO: -- make me faster! Currently 37.70 ms/evt -- write tests -- get feedback -- write everything smart """ from __future__ import annotations -from importlib import import_module + import itertools import json -from legendmeta import LegendMetadata import logging +import os +import re +from importlib import import_module + import numpy as np +from legendmeta import LegendMetadata + import pygama.lgdo.lh5_store as store from pygama.lgdo import Array -import re -import os log = logging.getLogger(__name__) -def num_and_pars(value: str,par_dic: dict): + +def num_and_pars(value: str, par_dic: dict): # function tries to convert a string to a int, float, bool # or returns the value if value is a key in par_dic - if value in par_dic.keys(): return par_dic[value] + if value in par_dic.keys(): + return par_dic[value] try: value = int(value) except ValueError: @@ -37,110 +36,160 @@ def num_and_pars(value: str,par_dic: dict): pass return value -def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, nrows: int = None) -> np.ndarray: - """ - Evaluates the expression defined by the user across all channels according to the mode - Parameters - ---------- - f_evt - Path to event tier file - f_hit - Path to hit tier file - f_dsp - Path to dsp tier file - chns - List of channel names across which expression gets evaluated (form: "ch") - mode - The mode determines how the event entry is calculated across channels. Options are: - - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flage) can be given here, and the value of this specific channels is used. - - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. - expr - The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operaions order matters), or from the "parameters" field can be used. - para - Dictionary of parameters defined in the "parameters" field in the configuration JSON file. - getch - Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. - """ - #define dimension of output array - n = nrows if nrows is not None else store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp) - out = np.full(n,defv,dtype=type(defv)) - out_chs = np.zeros(len(out),dtype=int) - - # find parameters in evt file or in parameters - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - var_ph = {} - if os.path.exists(f_evt):var_ph = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if para: var_ph = var_ph | para - - if mode == "func": + +def evaluate_expression( + f_evt: str, + f_hit: str, + f_dsp: str, + chns: list, + mode: str, + expr: str, + para: dict = None, + defv=np.nan, + nrows: int = None, +) -> np.ndarray: + """ + Evaluates the expression defined by the user across all channels according to the mode + Parameters + ---------- + f_evt + Path to event tier file + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + List of channel names across which expression gets evaluated (form: "ch") + mode + The mode determines how the event entry is calculated across channels. Options are: + - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. + - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. + - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. + - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + expr + The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + para + Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + getch + Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + """ + # define dimension of output array + n = ( + nrows + if nrows is not None + else store.LH5Store().read_n_rows(chns[0] + "/dsp/", f_dsp) + ) + out = np.full(n, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + # find parameters in evt file or in parameters + exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) + var_ph = {} + if os.path.exists(f_evt): + var_ph = store.load_nda( + f_evt, + [e.split("/")[-1] for e in store.ls(f_evt) if e.split("/")[-1] in exprl], + ) + if para: + var_ph = var_ph | para + + if mode == "func": # evaluate expression - func, params = expr.split('(') - params = [f_hit,f_dsp,chns]+[num_and_pars(e,var_ph) for e in params[:-1].split(',')] + func, params = expr.split("(") + params = [f_hit, f_dsp, chns] + [ + num_and_pars(e, var_ph) for e in params[:-1].split(",") + ] # load function dynamically - p,m = func.rsplit('.',1) - met = getattr(import_module(p),m) + p, m = func.rsplit(".", 1) + met = getattr(import_module(p), m) out = met(*params) - else: + else: # evaluate operator in mode - ops = re.findall(r'([<>]=?|==)', mode) + ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None if os.path.exists(f_evt) and mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt,[mode])[mode] + ch_comp = store.load_nda(f_evt, [mode])[mode] + for ch in chns: # find fields in either dsp, hit - var = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") - dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") - var= var |dsp_dic | var_ph - + var = store.load_nda( + f_hit, + [ + e.split("/")[-1] + for e in store.ls(f_hit, ch + "/hit/") + if e.split("/")[-1] in exprl + ], + ch + "/hit/", + ) + dsp_dic = store.load_nda( + f_dsp, + [ + e.split("/")[-1] + for e in store.ls(f_dsp, ch + "/dsp/") + if e.split("/")[-1] in exprl + ], + ch + "/dsp/", + ) + var = dsp_dic | var_ph | var + # evaluate expression - res = eval(expr,var) - if not isinstance(res, np.ndarray): res = np.full(len(out),res,dtype=type(res)) + res = eval(expr, var) + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) # append to out according to mode - if len(ops)>0: limarr = eval("".join(["res",ops[0],"lim"]),{"res":res,"lim":float(mode.split(ops[0])[-1])}) - else: limarr = np.ones(len(out)).astype(bool) + if len(ops) > 0: + limarr = eval( + "".join(["res", ops[0], "lim"]), + {"res": res, "lim": float(mode.split(ops[0])[-1])}, + ) + else: + limarr = np.ones(len(out)).astype(bool) if "first" in mode: - outt = np.full(len(out),np.inf) - t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] - out = np.where((t0outt) & (limarr),res,out) - out_chs = np.where((t0>outt) & (limarr),int(ch[2:]),out_chs) - outt = np.where((t0>outt) & (limarr),t0,outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] + out = np.where((t0 > outt) & (limarr), res, out) + out_chs = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs) + outt = np.where((t0 > outt) & (limarr), t0, outt) elif "tot" in mode: - if res.dtype == bool: res = res.astype(int) - out += np.where(limarr,res,out) + if res.dtype == bool: + res = res.astype(int) + out += np.where(limarr, res, out) elif mode == "any": - if res.dtype != bool: res = res.astype(bool) - out = out | res + if res.dtype != bool: + res = res.astype(bool) + out = out | res elif mode == "all": - if res.dtype != bool: res = res.astype(bool) - out = out & res + if res.dtype != bool: + res = res.astype(bool) + out = out & res elif ch_comp is not None: - out = np.where(int(ch[2:]) == ch_comp,res,out) + out = np.where(int(ch[2:]) == ch_comp, res, out) else: raise ValueError(mode + " not a valid mode") - return out, out_chs - + return out, out_chs + + def build_evt( - f_dsp: str, - f_hit: str, - f_evt: str, - meta_path: str = None, - evt_config: str | dict = None, - wo_mode: str = "write_safe" + f_dsp: str, + f_hit: str, + f_evt: str, + meta_path: str = None, + evt_config: str | dict = None, + wo_mode: str = "write_safe", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -155,8 +204,8 @@ def build_evt( f_evt name of the output file evt_config - dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) seperated by underscores (e.g. "meta_geds_on") in the "channels" dictonary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: - + dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + .. code-block::json { @@ -175,24 +224,28 @@ def build_evt( "channels": ["geds_on","geds_no_psd","geds_ac"], "mode": "first>25", "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" }, "aoe":{ "channels": ["geds_on"], "mode": "energy_id", - "expression": "AoE_Classifier" + "expression": "AoE_Classifier", + "initial": "np.nan" }, "is_muon_tagged":{ "channels": "muon", "mode": "any", "expression": "wf_max>a", - "parameters": {"a":15100} + "parameters": {"a":15100}, + "initial": false }, "multiplicity":{ "channels": ["geds_on","geds_no_psd","geds_ac"], "mode": "tot", "expression": "cuspEmax_ctc_cal > a", - "parameters": {"a":25} + "parameters": {"a":25}, + "initial": 0 }, "lar_energy":{ "channels": "spms_on", @@ -204,75 +257,110 @@ def build_evt( """ lstore = store.LH5Store() tbl_cfg = evt_config - if isinstance(tbl_cfg,str): + if isinstance(tbl_cfg, str): with open(tbl_cfg) as f: - tbl_cfg = json.load(f) - + tbl_cfg = json.load(f) + # create channel list according to config - # This can be either read from the meta data + # This can be either read from the meta data # or a list of channel names log.debug("Creating channel dictionary") - if meta_path: lmeta = LegendMetadata(path=meta_path) - else: lmeta = LegendMetadata() - chmap = lmeta.channelmap(re.search("\d{8}T\d{6}Z",f_dsp).group(0)) + if meta_path: + lmeta = LegendMetadata(path=meta_path) + else: + lmeta = LegendMetadata() + chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) chns = {} - for k, v in tbl_cfg['channels'].items(): - if isinstance(v,str): + for k, v in tbl_cfg["channels"].items(): + if isinstance(v, str): if "meta" in v: - m,sys,usa = v.split("_",2) - tmp = [f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]['system'] == sys] - chns[k] = [e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]['analysis']['usability'] == usa] + m, sys, usa = v.split("_", 2) + tmp = [ + f"ch{e}" + for e in chmap.map("daq.rawid") + if chmap.map("daq.rawid")[e]["system"] == sys + ] + chns[k] = [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] + == usa + ] else: - chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] - elif isinstance(v,list): - chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] - + chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + elif isinstance(v, list): + chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] # do operations - first_iter,nrows = True,None - log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") - for k, v in tbl_cfg['operations'].items(): - log.debug("Processing field" + k) + first_iter, nrows = True, None + log.info( + f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}" + ) + for k, v in tbl_cfg["operations"].items(): + log.debug("Processing field" + k) - # if channels not defined in operation, it can only be an operation on the evt level. - if 'channels' not in v.keys(): - exprl = re.findall(r"[a-zA-Z_$][\w$]*",v["expression"]) + # if channels not defined in operation, it can only be an operation on the evt level. + if "channels" not in v.keys(): + exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} - if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if "parameters" in v.keys(): var = var | v['parameters'] - res = Array(eval(v["expression"],var)) + if os.path.exists(f_evt): + var = store.load_nda( + f_evt, + [ + e.split("/")[-1] + for e in store.ls(f_evt) + if e.split("/")[-1] in exprl + ], + ) + if "parameters" in v.keys(): + var = var | v["parameters"] + res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name= k, + name=k, lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" + wo_mode=wo_mode, # if first_iter else "append" ) - else: - if isinstance(v['channels'],str): chns_e = chns[v['channels']] - elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + else: + if isinstance(v["channels"], str): + chns_e = chns[v["channels"]] + elif isinstance(v["channels"], list): + chns_e = list( + itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + ) - pars, defaultv = None , np.nan - if "parameters" in v.keys(): pars = v['parameters'] - if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + pars, defaultv = None, np.nan + if "parameters" in v.keys(): + pars = v["parameters"] + if "initial" in v.keys() and not v["initial"] == "np.nan": + defaultv = v["initial"] - res,chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv,nrows) - lstore.write_object( - obj=Array(res), - name= k, - lh5_file=f_evt, - wo_mode=wo_mode + res, chs = evaluate_expression( + f_evt, + f_hit, + f_dsp, + chns_e, + v["mode"], + v["expression"], + pars, + defaultv, + nrows, ) + lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) - # if get_ch true flag in a first/last mode operation also obtain channel field - if "get_ch" in v.keys() and ("first" in v['mode'] or "last" in v['mode']) and v["get_ch"]: + # if get_ch true flag in a first/last mode operation also obtain channel field + if ( + "get_ch" in v.keys() + and ("first" in v["mode"] or "last" in v["mode"]) + and v["get_ch"] + ): lstore.write_object( - obj=Array(chs), - name= k+"_id", - lh5_file=f_evt, - wo_mode=wo_mode + obj=Array(chs), name=k + "_id", lh5_file=f_evt, wo_mode=wo_mode ) - if first_iter: first_iter = False - if not nrows: nrows = len(res) + if first_iter: + first_iter = False + if not nrows: + nrows = len(res) - log.info("Done") \ No newline at end of file + log.info("Done") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 04956bec6..4722ded87 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,6 +1,3 @@ -import numpy as np -import pygama.lgdo.lh5_store as store - """ Module for special event level routines for SiPMs @@ -10,74 +7,182 @@ - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ -#get LAr energy per event over all channels -def get_energy(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): - trig = np.where(np.isnan(trgr),tdefault,trgr) + +import numpy as np + +import pygama.lgdo.lh5_store as store + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) for ch in chs: - df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pes=df["energy_in_pe"] - pes= np.where(np.isnan(pes), 0, pes) - pes= np.where(mask,pes,0) - chsum= np.nansum(pes, axis=1) + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pes = df["energy_in_pe"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) sum = sum + chsum return sum -#get LAr majority per event over all channels -def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): - trig = np.where(np.isnan(trgr),tdefault,trgr) + +# get LAr majority per event over all channels +def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) for ch in chs: - df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pes=df["energy_in_pe"] - pes= np.where(np.isnan(pes), 0, pes) - pes= np.where(mask,pes,0) - chsum= np.nansum(pes, axis=1) - chmaj = np.where(chsum>lim,1,0) + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pes = df["energy_in_pe"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + chmaj = np.where(chsum > lim, 1, 0) maj = maj + chmaj return maj -def get_etc(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax,swin,trail): - predf = store.load_nda(f_hit, ["energy_in_pe",'timestamp'],chs[0]+"/hit/") - - peshape = (predf["energy_in_pe"]).shape - # 1D = channel, 2D = event num, 3D = array per event - pes=np.zeros([len(chs),peshape[0],peshape[1]]) - times = np.zeros([len(chs),peshape[0],peshape[1]]) - - tge = np.where(np.isnan(trgr),tdefault,trgr) - tmi = tge - tmin - tma = tge + tmax - for i in range(len(chs)): - df =store.load_nda(f_hit, ["energy_in_pe",'trigger_pos','timestamp'],chs[i]+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pe=df["energy_in_pe"] - time = df["trigger_pos"]*16 - - pe= np.where(mask,pe,np.nan) - time= np.where(mask,time,np.nan) - - pes[i] = pe - times[i] = time - - outi = None - if trail >0: - t1d = np.nanmin(times,axis=(0,2)) - if trail == 2: t1d[t1d>tge] = tge[t1d>tge] - tt = t1d[:,None] - outi = np.where(np.nansum(np.where((times >= tt),pes,0),axis=(0,2)) > 0, - np.nansum(np.where((times >= tt) & (times < tt+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tt),pes,0),axis=(0,2)), - np.nansum(np.where((times >= tt),pes,0),axis=(0,2))) - return outi - - else: - outi = np.where(np.nansum(pes,axis=(0,2)) > 0, - np.nansum(np.where((times >= tge[:,None]) & (times <= tge[:,None]+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tge[:,None]),pes,0),axis=(0,2)), - np.nansum(pes,axis=(0,2))) - return outi \ No newline at end of file + +# get LAr energy per event over all channels +def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) + tmi = trig - tmin + tma = trig + tmax + sum = np.zeros(len(trig)) + for ch in chs: + df = store.load_nda( + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + ) + mask = ( + (df["trigger_pos_dplms"] < tma[:, None] / 16) + & (df["trigger_pos_dplms"] > tmi[:, None] / 16) + & (df["energy_in_pe_dplms"] > lim) + ) + pes = df["energy_in_pe_dplms"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + sum = sum + chsum + return sum + + +# get LAr majority per event over all channels +def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) + tmi = trig - tmin + tma = trig + tmax + maj = np.zeros(len(trig)) + for ch in chs: + df = store.load_nda( + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + ) + mask = ( + (df["trigger_pos_dplms"] < tma[:, None] / 16) + & (df["trigger_pos_dplms"] > tmi[:, None] / 16) + & (df["energy_in_pe_dplms"] > lim) + ) + pes = df["energy_in_pe_dplms"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + chmaj = np.where(chsum > lim, 1, 0) + maj = maj + chmaj + return maj + + +def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): + predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") + + peshape = (predf["energy_in_pe"]).shape + # 1D = channel, 2D = event num, 3D = array per event + pes = np.zeros([len(chs), peshape[0], peshape[1]]) + times = np.zeros([len(chs), peshape[0], peshape[1]]) + + tge = np.where(np.isnan(trgr), tdefault, trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/" + ) + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pe = df["energy_in_pe"] + time = df["trigger_pos"] * 16 + + pe = np.where(mask, pe, np.nan) + time = np.where(mask, time, np.nan) + + pes[i] = pe + times[i] = time + + outi = None + if trail > 0: + t1d = np.nanmin(times, axis=(0, 2)) + if trail == 2: + t1d[t1d > tge] = tge[t1d > tge] + tt = t1d[:, None] + outi = np.where( + np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)) > 0, + np.nansum( + np.where((times >= tt) & (times < tt + swin), pes, 0), axis=(0, 2) + ) + / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), + np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), + ) + return outi + + else: + outi = np.where( + np.nansum(pes, axis=(0, 2)) > 0, + np.nansum( + np.where( + (times >= tge[:, None]) & (times <= tge[:, None] + swin), pes, 0 + ), + axis=(0, 2), + ) + / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), + np.nansum(pes, axis=(0, 2)), + ) + return outi + + +def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") + peshape = (predf["energy_in_pe"]).shape + times = np.zeros([len(chs), peshape[0], peshape[1]]) + + tge = np.where(np.isnan(trgr), tdefault, trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + + time = df["trigger_pos"] * 16 + time = np.where(mask, time, np.nan) + times[i] = time + + t1d = np.nanmin(times, axis=(0, 2)) + + return t1d - tge From 90d3516e986738f92bb59644447926a9e937ff57 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 9 Oct 2023 17:04:43 +0200 Subject: [PATCH 04/73] Add TCM event building --- src/pygama/evt/build_evt.py | 74 +++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 92af205ca..2caa7c022 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -38,20 +38,23 @@ def num_and_pars(value: str, par_dic: dict): def evaluate_expression( + f_tcm: str, f_evt: str, f_hit: str, f_dsp: str, chns: list, mode: str, expr: str, + nrows: int, para: dict = None, defv=np.nan, - nrows: int = None, ) -> np.ndarray: """ Evaluates the expression defined by the user across all channels according to the mode Parameters ---------- + f_tcm + Path to tcm tier file f_evt Path to event tier file f_hit @@ -77,12 +80,7 @@ def evaluate_expression( Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. """ # define dimension of output array - n = ( - nrows - if nrows is not None - else store.LH5Store().read_n_rows(chns[0] + "/dsp/", f_dsp) - ) - out = np.full(n, defv, dtype=type(defv)) + out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) @@ -115,8 +113,17 @@ def evaluate_expression( ch_comp = None if os.path.exists(f_evt) and mode in store.ls(f_evt): ch_comp = store.load_nda(f_evt, [mode])[mode] + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] + # cl = nda['cumulative_length'] for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + # find fields in either dsp, hit var = store.load_nda( f_hit, @@ -126,6 +133,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/hit/", + idx_ch ) dsp_dic = store.load_nda( f_dsp, @@ -135,48 +143,54 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/dsp/", + idx_ch ) var = dsp_dic | var_ph | var # evaluate expression res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly if not isinstance(res, np.ndarray): res = np.full(len(out), res, dtype=type(res)) - # append to out according to mode + # get unification condition if present in mode if len(ops) > 0: limarr = eval( "".join(["res", ops[0], "lim"]), {"res": res, "lim": float(mode.split(ops[0])[-1])}, ) else: - limarr = np.ones(len(out)).astype(bool) + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode if "first" in mode: if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] - out = np.where((t0 < outt) & (limarr), res, out) - out_chs = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs) - outt = np.where((t0 < outt) & (limarr), t0, outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] - out = np.where((t0 > outt) & (limarr), res, out) - out_chs = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs) - outt = np.where((t0 > outt) & (limarr), t0, outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) elif "tot" in mode: if res.dtype == bool: res = res.astype(int) - out += np.where(limarr, res, out) + out[idx_ch] = np.where(limarr, res+out[idx_ch], out[idx_ch]) elif mode == "any": if res.dtype != bool: res = res.astype(bool) - out = out | res + out[idx_ch] = out[idx_ch] | res elif mode == "all": if res.dtype != bool: res = res.astype(bool) - out = out & res + out[idx_ch] = out[idx_ch] & res elif ch_comp is not None: - out = np.where(int(ch[2:]) == ch_comp, res, out) + out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) else: raise ValueError(mode + " not a valid mode") @@ -184,6 +198,7 @@ def evaluate_expression( def build_evt( + f_tcm: str, f_dsp: str, f_hit: str, f_evt: str, @@ -197,6 +212,8 @@ def build_evt( Parameters ---------- + f_tcm + input LH5 file of the tcm level f_dsp input LH5 file of the dsp level f_hit @@ -292,9 +309,12 @@ def build_evt( chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] # do operations - first_iter, nrows = True, None + first_iter = True + + # get number of rows from TCM file + nrows = len(store.load_nda(f_tcm,['cumulative_length'],'hardware_tcm_1/')['cumulative_length']) log.info( - f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}" + f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) @@ -336,15 +356,16 @@ def build_evt( defaultv = v["initial"] res, chs = evaluate_expression( + f_tcm, f_evt, f_hit, f_dsp, chns_e, v["mode"], v["expression"], - pars, - defaultv, nrows, + pars, + defaultv ) lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) @@ -360,7 +381,4 @@ def build_evt( if first_iter: first_iter = False - if not nrows: - nrows = len(res) - log.info("Done") From 21cfba12c13ded819689903b84eb4a3eb679cb0e Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 9 Oct 2023 18:04:45 +0200 Subject: [PATCH 05/73] adapted spm module to tcm based event building --- src/pygama/evt/build_evt.py | 2 +- src/pygama/evt/modules/spm.py | 77 ++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 2caa7c022..114d8c14a 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -98,7 +98,7 @@ def evaluate_expression( if mode == "func": # evaluate expression func, params = expr.split("(") - params = [f_hit, f_dsp, chns] + [ + params = [f_hit, f_dsp, f_tcm, chns] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") ] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 4722ded87..5f6ba4e05 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -4,6 +4,7 @@ functions must take as the first 3 args in order: - path to the hit file - path to the dsp file +- path to the tcm file - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ @@ -14,13 +15,19 @@ # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -30,18 +37,24 @@ def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) - sum = sum + chsum + sum[idx_ch] = sum[idx_ch] + chsum return sum # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -52,19 +65,25 @@ def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) - maj = maj + chmaj + maj[idx_ch] = maj[idx_ch] + chmaj return maj # get LAr energy per event over all channels -def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -75,19 +94,25 @@ def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) - sum = sum + chsum + sum[idx_ch] = sum[idx_ch] + chsum return sum # get LAr majority per event over all channels -def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/",idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -99,11 +124,11 @@ def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) - maj = maj + chmaj + maj[idx_ch] = maj[idx_ch] + chmaj return maj -def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): +def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape @@ -114,9 +139,16 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): tge = np.where(np.isnan(trgr), tdefault, trgr) tmi = tge - tmin tma = tge + tmax + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for i in range(len(chs)): + # get index list for this channel to be loaded + idx_ch = idx[ids==int(chs[i][2:])] df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/" + f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/", idx_ch ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) @@ -129,8 +161,8 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): pe = np.where(mask, pe, np.nan) time = np.where(mask, time, np.nan) - pes[i] = pe - times[i] = time + pes[i][idx_ch] = pe + times[i][idx_ch] = time outi = None if trail > 0: @@ -163,7 +195,7 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): return outi -def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape times = np.zeros([len(chs), peshape[0], peshape[1]]) @@ -171,8 +203,15 @@ def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): tge = np.where(np.isnan(trgr), tdefault, trgr) tmi = tge - tmin tma = tge + tmax + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for i in range(len(chs)): - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(chs[i][2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -181,7 +220,7 @@ def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): time = df["trigger_pos"] * 16 time = np.where(mask, time, np.nan) - times[i] = time + times[i][idx_ch] = time t1d = np.nanmin(times, axis=(0, 2)) From 7ff9151006b8fbc7a58e7cbff08f288671a192e5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 11 Oct 2023 14:30:12 +0200 Subject: [PATCH 06/73] add lh5 group parameter --- src/pygama/evt/build_evt.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 114d8c14a..df922c9f0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -46,6 +46,7 @@ def evaluate_expression( mode: str, expr: str, nrows: int, + group: str, para: dict = None, defv=np.nan, ) -> np.ndarray: @@ -74,10 +75,16 @@ def evaluate_expression( - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + nrows + Number of rows to be processed. + group + lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. getch Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + defv + default value of evaluation """ # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -90,7 +97,8 @@ def evaluate_expression( if os.path.exists(f_evt): var_ph = store.load_nda( f_evt, - [e.split("/")[-1] for e in store.ls(f_evt) if e.split("/")[-1] in exprl], + [e.split("/")[-1] for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl], + group ) if para: var_ph = var_ph | para @@ -111,8 +119,8 @@ def evaluate_expression( # evaluate operator in mode ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None - if os.path.exists(f_evt) and mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt, [mode])[mode] + if os.path.exists(f_evt) and mode in [e.split("/")[-1] for e in store.ls(f_evt,group)]: + ch_comp = store.load_nda(f_evt, [mode],group)[mode] # load TCM data to define an event nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') @@ -205,6 +213,7 @@ def build_evt( meta_path: str = None, evt_config: str | dict = None, wo_mode: str = "write_safe", + group: str = "/evt/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -271,6 +280,10 @@ def build_evt( } } } + wo_mode + writing mode + group + lh5 root group name """ lstore = store.LH5Store() tbl_cfg = evt_config @@ -328,16 +341,17 @@ def build_evt( f_evt, [ e.split("/")[-1] - for e in store.ls(f_evt) + for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl ], + group ) if "parameters" in v.keys(): var = var | v["parameters"] res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name=k, + name=group+k, lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) @@ -364,10 +378,11 @@ def build_evt( v["mode"], v["expression"], nrows, + group, pars, defaultv ) - lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) + lstore.write_object(obj=Array(res), name=group+k, lh5_file=f_evt, wo_mode=wo_mode) # if get_ch true flag in a first/last mode operation also obtain channel field if ( @@ -376,7 +391,7 @@ def build_evt( and v["get_ch"] ): lstore.write_object( - obj=Array(chs), name=k + "_id", lh5_file=f_evt, wo_mode=wo_mode + obj=Array(chs), name=group+k + "_id", lh5_file=f_evt, wo_mode=wo_mode ) if first_iter: From 099baeb78a5c3120cf2d00c8a90ec84bbda2a7e2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 11 Oct 2023 14:37:43 +0200 Subject: [PATCH 07/73] ignoring useless numpy warnings in sipm module --- src/pygama/evt/build_evt.py | 71 +++++++++++++++++++++++------------ src/pygama/evt/modules/spm.py | 6 ++- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index df922c9f0..5908c0a9a 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -97,8 +97,12 @@ def evaluate_expression( if os.path.exists(f_evt): var_ph = store.load_nda( f_evt, - [e.split("/")[-1] for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl], - group + [ + e.split("/")[-1] + for e in store.ls(f_evt, group) + if e.split("/")[-1] in exprl + ], + group, ) if para: var_ph = var_ph | para @@ -119,18 +123,20 @@ def evaluate_expression( # evaluate operator in mode ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None - if os.path.exists(f_evt) and mode in [e.split("/")[-1] for e in store.ls(f_evt,group)]: - ch_comp = store.load_nda(f_evt, [mode],group)[mode] - + if os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + ch_comp = store.load_nda(f_evt, [mode], group)[mode] + # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] # cl = nda['cumulative_length'] for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit var = store.load_nda( @@ -141,7 +147,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/hit/", - idx_ch + idx_ch, ) dsp_dic = store.load_nda( f_dsp, @@ -151,7 +157,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/dsp/", - idx_ch + idx_ch, ) var = dsp_dic | var_ph | var @@ -171,24 +177,32 @@ def evaluate_expression( ) else: limarr = np.ones(len(res)).astype(bool) - + # append to out according to mode if "first" in mode: if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ + "tp_0_est" + ] out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + out_chs[idx_ch] = np.where( + (t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch] + ) outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ + "tp_0_est" + ] out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + out_chs[idx_ch] = np.where( + (t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch] + ) outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) elif "tot" in mode: if res.dtype == bool: res = res.astype(int) - out[idx_ch] = np.where(limarr, res+out[idx_ch], out[idx_ch]) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) elif mode == "any": if res.dtype != bool: res = res.astype(bool) @@ -325,7 +339,11 @@ def build_evt( first_iter = True # get number of rows from TCM file - nrows = len(store.load_nda(f_tcm,['cumulative_length'],'hardware_tcm_1/')['cumulative_length']) + nrows = len( + store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ + "cumulative_length" + ] + ) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) @@ -341,17 +359,17 @@ def build_evt( f_evt, [ e.split("/")[-1] - for e in store.ls(f_evt,group) + for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl ], - group + group, ) if "parameters" in v.keys(): var = var | v["parameters"] res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name=group+k, + name=group + k, lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) @@ -380,9 +398,11 @@ def build_evt( nrows, group, pars, - defaultv + defaultv, + ) + lstore.write_object( + obj=Array(res), name=group + k, lh5_file=f_evt, wo_mode=wo_mode ) - lstore.write_object(obj=Array(res), name=group+k, lh5_file=f_evt, wo_mode=wo_mode) # if get_ch true flag in a first/last mode operation also obtain channel field if ( @@ -391,7 +411,10 @@ def build_evt( and v["get_ch"] ): lstore.write_object( - obj=Array(chs), name=group+k + "_id", lh5_file=f_evt, wo_mode=wo_mode + obj=Array(chs), + name=group + k + "_id", + lh5_file=f_evt, + wo_mode=wo_mode, ) if first_iter: diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 5f6ba4e05..14debb043 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -10,7 +10,7 @@ """ import numpy as np - +import warnings import pygama.lgdo.lh5_store as store @@ -129,6 +129,10 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): + # ignore stupid numpy warnings + warnings.filterwarnings('ignore', r'All-NaN slice encountered') + warnings.filterwarnings('ignore', r'invalid value encountered in true_divide') + predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape From 598e33bb5db641e1fc2b1116205a76cf202c6088 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 25 Oct 2023 11:19:57 +0200 Subject: [PATCH 08/73] relocated modes into own functions --- src/pygama/evt/build_evt.py | 475 +++++++++++++++++++++++++++------- src/pygama/evt/modules/spm.py | 77 +++--- 2 files changed, 428 insertions(+), 124 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5908c0a9a..e2eb8df60 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -49,7 +49,7 @@ def evaluate_expression( group: str, para: dict = None, defv=np.nan, -) -> np.ndarray: +) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode Parameters @@ -86,11 +86,6 @@ def evaluate_expression( defv default value of evaluation """ - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) - outt = np.zeros(len(out)) - # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -118,105 +113,397 @@ def evaluate_expression( p, m = func.rsplit(".", 1) met = getattr(import_module(p), m) out = met(*params) + return {"values": out} else: - # evaluate operator in mode + # evaluate possible operator in mode ops = re.findall(r"([<>]=?|==)", mode) - ch_comp = None - if os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: - ch_comp = store.load_nda(f_evt, [mode], group)[mode] + op, mode_lim = None, None + if len(ops) == 1: + op = ops[0] + mode_lim = float(mode.split(op)[-1]) + elif len(ops) > 1: + raise ValueError(mode + " contains invalid operator") # load TCM data to define an event nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") ids = nda["array_id"] idx = nda["array_idx"] - # cl = nda['cumulative_length'] - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - # find fields in either dsp, hit - var = store.load_nda( + # switch through modes + if "first" in mode: + return evaluate_to_first( + idx, + ids, + f_hit, + f_dsp, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, + ) + elif "last" in mode: + return evaluate_to_last( + idx, + ids, f_hit, - [ - e.split("/")[-1] - for e in store.ls(f_hit, ch + "/hit/") - if e.split("/")[-1] in exprl - ], - ch + "/hit/", - idx_ch, + f_dsp, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, ) - dsp_dic = store.load_nda( + elif "tot" in mode: + return evaluate_to_tot( + idx, + ids, + f_hit, f_dsp, - [ - e.split("/")[-1] - for e in store.ls(f_dsp, ch + "/dsp/") - if e.split("/")[-1] in exprl - ], - ch + "/dsp/", - idx_ch, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, + ) + elif "any" == mode: + return evaluate_to_any( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + ) + elif "all" == mode: + return evaluate_to_all( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + ) + elif os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + ch_comp = store.load_nda(f_evt, [mode], group)[mode] + return evaluate_at_channel( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, ch_comp, var_ph, defv ) - var = dsp_dic | var_ph | var - # evaluate expression - res = eval(expr, var) + else: + raise ValueError(mode + " not a valid mode") + + +def find_parameters( + f_hit: str, f_dsp: str, ch: str, idx_ch: np.ndarray, exprl: list +) -> dict: + # find fields in either dsp, hit + var = store.load_nda( + f_hit, + [ + e.split("/")[-1] + for e in store.ls(f_hit, ch + "/hit/") + if e.split("/")[-1] in exprl + ], + ch + "/hit/", + idx_ch, + ) + dsp_dic = store.load_nda( + f_dsp, + [ + e.split("/")[-1] + for e in store.ls(f_dsp, ch + "/dsp/") + if e.split("/")[-1] in exprl + ], + ch + "/dsp/", + idx_ch, + ) + return dsp_dic | var - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - # get unification condition if present in mode - if len(ops) > 0: - limarr = eval( - "".join(["res", ops[0], "lim"]), - {"res": res, "lim": float(mode.split(ops[0])[-1])}, - ) - else: - limarr = np.ones(len(res)).astype(bool) - - # append to out according to mode - if "first" in mode: - if ch == chns[0]: - outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ - "tp_0_est" - ] - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where( - (t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch] - ) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ - "tp_0_est" - ] - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where( - (t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch] - ) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - elif "tot" in mode: - if res.dtype == bool: - res = res.astype(int) - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - elif mode == "any": - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] | res - elif mode == "all": - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res - elif ch_comp is not None: - out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) - else: - raise ValueError(mode + " not a valid mode") +def evaluate_to_first( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) - return out, out_chs + # append to out according to mode == first + if ch == chns[0]: + outt[:] = np.inf + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) + + return {"values": out, "channels": out_chs} + + +def evaluate_to_last( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == last + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + + return {"values": out, "channels": out_chs} + + +def evaluate_to_tot( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == tot + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + + return {"values": out} + + +def evaluate_to_any( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == any + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | res + + return {"values": out} + + +def evaluate_to_all( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == all + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res + + return {"values": out} + + +def evaluate_at_channel( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + ch_comp: np.ndarray, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == any + out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) + + return {"values": out} + + +def evaluate_to_vector( + f_tcm: str, + f_evt: str, + f_hit: str, + f_dsp: str, + chns: list, + mode: str, + expr: str, + nrows: int, + group: str, + para: dict = None, + defv=np.nan, +) -> dict: + raise NotImplementedError def build_evt( @@ -373,6 +660,8 @@ def build_evt( lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) + + # Else we build the event entry else: if isinstance(v["channels"], str): chns_e = chns[v["channels"]] @@ -387,7 +676,7 @@ def build_evt( if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] - res, chs = evaluate_expression( + result = evaluate_expression( f_tcm, f_evt, f_hit, @@ -401,7 +690,10 @@ def build_evt( defaultv, ) lstore.write_object( - obj=Array(res), name=group + k, lh5_file=f_evt, wo_mode=wo_mode + obj=Array(result["values"]), + name=group + k, + lh5_file=f_evt, + wo_mode=wo_mode, ) # if get_ch true flag in a first/last mode operation also obtain channel field @@ -409,9 +701,10 @@ def build_evt( "get_ch" in v.keys() and ("first" in v["mode"] or "last" in v["mode"]) and v["get_ch"] + and "channels" in result.keys() ): lstore.write_object( - obj=Array(chs), + obj=Array(result["channels"]), name=group + k + "_id", lh5_file=f_evt, wo_mode=wo_mode, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 14debb043..8ef727381 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -9,9 +9,11 @@ additional parameters are free to the user and need to be defined in the JSON """ -import numpy as np import warnings -import pygama.lgdo.lh5_store as store + +import numpy as np + +import lgdo.lh5_store as store # get LAr energy per event over all channels @@ -21,13 +23,15 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) + idx_ch = idx[ids == int(ch[2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -48,13 +52,15 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) + idx_ch = idx[ids == int(ch[2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -76,12 +82,12 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] df = store.load_nda( f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) @@ -105,14 +111,14 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/",idx_ch + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -130,8 +136,8 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): # ignore stupid numpy warnings - warnings.filterwarnings('ignore', r'All-NaN slice encountered') - warnings.filterwarnings('ignore', r'invalid value encountered in true_divide') + warnings.filterwarnings("ignore", r"All-NaN slice encountered") + warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") @@ -145,14 +151,17 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for i in range(len(chs)): # get index list for this channel to be loaded - idx_ch = idx[ids==int(chs[i][2:])] + idx_ch = idx[ids == int(chs[i][2:])] df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/", idx_ch + f_hit, + ["energy_in_pe", "trigger_pos", "timestamp"], + chs[i] + "/hit/", + idx_ch, ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) @@ -209,13 +218,15 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for i in range(len(chs)): # get index list for this channel to be loaded - idx_ch = idx[ids==int(chs[i][2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/",idx_ch) + idx_ch = idx[ids == int(chs[i][2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) From ad8f2f810d74f54da4e56e24030a4a6edaf1954b Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 27 Oct 2023 13:30:07 +0200 Subject: [PATCH 09/73] add vov output option --- src/pygama/evt/build_evt.py | 190 +++++++++++++++++++++++++++------- src/pygama/evt/modules/spm.py | 3 +- 2 files changed, 154 insertions(+), 39 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e2eb8df60..e67694e4e 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -10,11 +10,10 @@ import re from importlib import import_module +import lgdo.lh5_store as store import numpy as np from legendmeta import LegendMetadata - -import pygama.lgdo.lh5_store as store -from pygama.lgdo import Array +from lgdo import Array, VectorOfVectors log = logging.getLogger(__name__) @@ -71,8 +70,8 @@ def evaluate_expression( - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. - - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. + - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows @@ -81,8 +80,6 @@ def evaluate_expression( lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. - getch - Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. defv default value of evaluation """ @@ -176,6 +173,10 @@ def evaluate_expression( var_ph, defv, ) + elif "vov" in mode: + return evaluate_to_vector( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, mode_lim, op, var_ph + ) elif "any" == mode: return evaluate_to_any( idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv @@ -187,10 +188,21 @@ def evaluate_expression( elif os.path.exists(f_evt) and mode in [ e.split("/")[-1] for e in store.ls(f_evt, group) ]: - ch_comp = store.load_nda(f_evt, [mode], group)[mode] - return evaluate_at_channel( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, ch_comp, var_ph, defv - ) + lstore = store.LH5Store() + ch_comp, _ = lstore.read_object(group + mode, f_evt) + if isinstance(ch_comp, Array): + return evaluate_at_channel( + idx, ids, f_hit, f_dsp, chns, expr, exprl, ch_comp, var_ph, defv + ) + elif isinstance(ch_comp, VectorOfVectors): + return evaluate_at_channel_vov( + idx, ids, f_hit, f_dsp, expr, exprl, ch_comp, var_ph + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) else: raise ValueError(mode + " not a valid mode") @@ -461,13 +473,11 @@ def evaluate_at_channel( chns: list, expr: str, exprl: list, - nrows: int, - ch_comp: np.ndarray, + ch_comp: Array, var_ph: dict = None, defv=np.nan, ) -> dict: - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) + out = np.full(len(ch_comp), defv, dtype=type(defv)) for ch in chns: # get index list for this channel to be loaded @@ -484,26 +494,120 @@ def evaluate_at_channel( if not isinstance(res, np.ndarray): res = np.full(len(out), res, dtype=type(res)) - # append to out according to mode == any - out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) + out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) return {"values": out} +def evaluate_at_channel_vov( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + expr: str, + exprl: list, + ch_comp: VectorOfVectors, + var_ph: dict = None, +) -> dict: + # blow up vov to aoesa + out = ch_comp.to_aoesa().nda + + chns = np.unique(out[~np.isnan(out)]).astype(int) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == ch] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # see in which events the current channel is present + mask = (out == ch).any(axis=1) + out[out == ch] = res[mask] + + # ok now implode the table again + out = VectorOfVectors( + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(res.dtype), + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + ) + return {"values": out, "channels": ch_comp} + + def evaluate_to_vector( - f_tcm: str, - f_evt: str, + idx: np.ndarray, + ids: np.ndarray, f_hit: str, f_dsp: str, chns: list, - mode: str, expr: str, + exprl: list, nrows: int, - group: str, - para: dict = None, - defv=np.nan, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, ) -> dict: - raise NotImplementedError + """ + Allows the evaluation as a vector of vectors. + Returns a dictionary of values: VoV of requested values + and channels: VoV of same dimensions with requested channel_id + """ + # raise NotImplementedError + + # define dimension of output array + out = np.full((nrows, len(chns)), np.nan) + out_chs = np.full((nrows, len(chns)), np.nan) + + i = 0 + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == vov + out[:, i][limarr] = res[limarr] + out_chs[:, i][limarr] = int(ch[2:]) + + i += 1 + + # This can be smarter + # shorten to vov (FUTURE: replace with awkward) + out = VectorOfVectors( + flattened_data=out.flatten()[~np.isnan(out.flatten())], + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + ) + out_chs = VectorOfVectors( + flattened_data=out_chs.flatten()[~np.isnan(out_chs.flatten())].astype(int), + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out_chs), axis=1)), + ) + + return {"values": out, "channels": out_chs} def build_evt( @@ -531,7 +635,7 @@ def build_evt( f_evt name of the output file evt_config - dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: .. code-block::json @@ -554,6 +658,12 @@ def build_evt( "expression": "cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_on":{ + "channels": ["geds_on"], + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, "aoe":{ "channels": ["geds_on"], "mode": "energy_id", @@ -637,8 +747,9 @@ def build_evt( for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) - # if channels not defined in operation, it can only be an operation on the evt level. - if "channels" not in v.keys(): + # if mode not defined in operation, it can only be an operation on the evt level. + # TODO need to adapt to handle VoVs + if "mode" not in v.keys(): exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): @@ -663,7 +774,9 @@ def build_evt( # Else we build the event entry else: - if isinstance(v["channels"], str): + if "channels" not in v.keys(): + chns_e = [] + elif isinstance(v["channels"], str): chns_e = chns[v["channels"]] elif isinstance(v["channels"], list): chns_e = list( @@ -689,22 +802,25 @@ def build_evt( pars, defaultv, ) + + obj = result["values"] + if isinstance(obj, np.ndarray): + obj = Array(result["values"]) lstore.write_object( - obj=Array(result["values"]), + obj=obj, name=group + k, lh5_file=f_evt, wo_mode=wo_mode, ) - # if get_ch true flag in a first/last mode operation also obtain channel field - if ( - "get_ch" in v.keys() - and ("first" in v["mode"] or "last" in v["mode"]) - and v["get_ch"] - and "channels" in result.keys() - ): + # if get_ch flag is true and exists and result dic contains channels entry + # write also channels information + if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): + obj = result["channels"] + if isinstance(obj, np.ndarray): + obj = Array(result["channels"]) lstore.write_object( - obj=Array(result["channels"]), + obj=obj, name=group + k + "_id", lh5_file=f_evt, wo_mode=wo_mode, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 8ef727381..5e7584e04 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,9 +11,8 @@ import warnings -import numpy as np - import lgdo.lh5_store as store +import numpy as np # get LAr energy per event over all channels From a301e37ef6d4c5876a53c4852000e393abc0bb33 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 18:47:45 +0200 Subject: [PATCH 10/73] Add tests --- src/pygama/evt/__init__.py | 3 +- src/pygama/evt/build_evt.py | 18 +- src/pygama/evt/modules/__init__.py | 21 +++ src/pygama/evt/modules/spm.py | 1 + tests/evt/configs/basic-evt-config.json | 53 ++++++ tests/evt/configs/module-test-evt-config.json | 39 ++++ tests/evt/configs/vov-test-evt-config.json | 24 +++ tests/evt/test_build_evt.py | 166 ++++++++++++++++++ 8 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 src/pygama/evt/modules/__init__.py create mode 100644 tests/evt/configs/basic-evt-config.json create mode 100644 tests/evt/configs/module-test-evt-config.json create mode 100644 tests/evt/configs/vov-test-evt-config.json create mode 100644 tests/evt/test_build_evt.py diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 8257a98e3..80b544455 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,7 +2,8 @@ Utilities for grouping hit data into events. """ +from .build_evt import build_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e67694e4e..0f2de86d5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -108,7 +108,7 @@ def evaluate_expression( # load function dynamically p, m = func.rsplit(".", 1) - met = getattr(import_module(p), m) + met = getattr(import_module(p, package=__package__), m) out = met(*params) return {"values": out} @@ -615,8 +615,8 @@ def build_evt( f_dsp: str, f_hit: str, f_evt: str, + evt_config: str | dict, meta_path: str = None, - evt_config: str | dict = None, wo_mode: str = "write_safe", group: str = "/evt/", ) -> None: @@ -635,7 +635,7 @@ def build_evt( f_evt name of the output file evt_config - name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: + name of JSON file or dict defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: .. code-block::json @@ -698,10 +698,17 @@ def build_evt( """ lstore = store.LH5Store() tbl_cfg = evt_config + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() if isinstance(tbl_cfg, str): with open(tbl_cfg) as f: tbl_cfg = json.load(f) + if "channels" not in tbl_cfg.keys(): + raise ValueError("channel field needs to be specified in the config") + if "operations" not in tbl_cfg.keys(): + raise ValueError("operations field needs to be specified in the config") + # create channel list according to config # This can be either read from the meta data # or a list of channel names @@ -712,6 +719,7 @@ def build_evt( lmeta = LegendMetadata() chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) chns = {} + for k, v in tbl_cfg["channels"].items(): if isinstance(v, str): if "meta" in v: @@ -736,6 +744,8 @@ def build_evt( first_iter = True # get number of rows from TCM file + if "hardware_tcm_1" not in store.ls(f_tcm): + raise ValueError(f"TCM {f_tcm} doesn't contain hardware_tcm_1 field.") nrows = len( store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ "cumulative_length" @@ -769,7 +779,7 @@ def build_evt( obj=res, name=group + k, lh5_file=f_evt, - wo_mode=wo_mode, # if first_iter else "append" + wo_mode=wo_mode, ) # Else we build the event entry diff --git a/src/pygama/evt/modules/__init__.py b/src/pygama/evt/modules/__init__.py new file mode 100644 index 000000000..bd80462f8 --- /dev/null +++ b/src/pygama/evt/modules/__init__.py @@ -0,0 +1,21 @@ +""" +Contains submodules for evt processing +""" + +from .spm import ( + get_energy, + get_energy_dplms, + get_etc, + get_majority, + get_majority_dplms, + get_time_shift, +) + +__all__ = [ + "get_energy", + "get_majority", + "get_energy_dplms", + "get_majority_dplms", + "get_etc", + "get_time_shift", +] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 5e7584e04..b43bf134d 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -137,6 +137,7 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") + warnings.filterwarnings("ignore", r"invalid value encountered in divide") predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json new file mode 100644 index 000000000..8c41913e1 --- /dev/null +++ b/tests/evt/configs/basic-evt-config.json @@ -0,0 +1,53 @@ +{ + "channels": { + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "multiplicity": { + "channels": "geds_on", + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + }, + "energy": { + "channels": "geds_on", + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_aux": { + "channels": "geds_on", + "mode": "last>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_sum": { + "channels": "geds_on", + "mode": "tot>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": 0.0 + }, + "is_usable_aoe": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "True", + "initial": false + }, + "aoe": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "AoE_Classifier", + "initial": "np.nan" + }, + "is_aoe_rejected": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "~(AoE_Double_Sided_Cut)", + "initial": false + } + } +} diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json new file mode 100644 index 000000000..e5500c9cb --- /dev/null +++ b/tests/evt/configs/module-test-evt-config.json @@ -0,0 +1,39 @@ +{ + "channels": { + "spms_on": ["S024", "S036", "S012"], + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "energy_first": { + "channels": ["geds_on"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "t0": { + "channels": ["geds_on"], + "mode": "energy_first_id", + "expression": "tp_0_est", + "initial": 0.0 + }, + "lar_energy": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + }, + "is_lar_rejected": { + "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + }, + "lar_classifier": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + } + } +} diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json new file mode 100644 index 000000000..24b195ded --- /dev/null +++ b/tests/evt/configs/vov-test-evt-config.json @@ -0,0 +1,24 @@ +{ + "channels": { + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe": { + "mode": "energy_id", + "expression": "AoE_Classifier" + }, + "multiplicity": { + "channels": "geds_on", + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + } + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py new file mode 100644 index 000000000..3a182d8c0 --- /dev/null +++ b/tests/evt/test_build_evt.py @@ -0,0 +1,166 @@ +import os +from pathlib import Path + +import lgdo.lh5_store as store +import numpy as np +import pytest +from lgdo import Array, VectorOfVectors, load_nda, ls + +from pygama.evt import build_evt + +config_dir = Path(__file__).parent / "configs" + + +def test_basics(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/basic-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert ( + len(ls(outfile, "/evt/")) == 9 + ) # 7 operations of which 2 are requesting channel field + nda = load_nda( + outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" + ) + assert ( + nda["energy"][nda["multiplicity"] == 1] + == nda["energy_aux"][nda["multiplicity"] == 1] + ).all() + assert ( + nda["energy"][nda["multiplicity"] == 1] + == nda["energy_sum"][nda["multiplicity"] == 1] + ).all() + assert ( + nda["energy_aux"][nda["multiplicity"] == 1] + == nda["energy_sum"][nda["multiplicity"] == 1] + ).all() + + +def test_lar_module(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/module-test-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 7 + assert ( + np.max(load_nda(outfile, ["lar_multiplicity"], "/evt/")["lar_multiplicity"]) + <= 3 + ) + + +def test_vov(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 4 + lstore = store.LH5Store() + vov_ene, _ = lstore.read_object("/evt/energy", outfile) + vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) + arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) + assert isinstance(vov_ene, VectorOfVectors) + assert isinstance(vov_aoe, VectorOfVectors) + assert isinstance(arr_ac, Array) + assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() + + +def test_graceful_crashing(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + meta_path = lgnd_test_data.get_path("legend/metadata") + f_config = f"{config_dir}/basic-evt-config.json" + + with pytest.raises(ValueError): + build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) + + with pytest.raises(NameError): + build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) + + with pytest.raises(TypeError): + build_evt(f_tcm, f_dsp, f_hit, outfile, None, meta_path) + + conf = {"operations": {}} + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = {"channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}} + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": {}, + } + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + assert not os.path.exists(outfile) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "first>pineapple", + "get_ch": True, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "first>25", + "get_ch": True, + "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(SyntaxError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) From 00d89304f9e1b5a9f6fdd301c1cad0db21d52f34 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 20:23:59 +0200 Subject: [PATCH 11/73] automatically load legendmeta on meta keyword in config --- src/pygama/evt/build_evt.py | 128 +++++++++--------- tests/evt/configs/basic-evt-config.json | 2 +- tests/evt/configs/module-test-evt-config.json | 4 +- tests/evt/configs/vov-test-evt-config.json | 2 +- tests/evt/test_build_evt.py | 16 +-- 5 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0f2de86d5..8cd07da5f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -12,7 +12,6 @@ import lgdo.lh5_store as store import numpy as np -from legendmeta import LegendMetadata from lgdo import Array, VectorOfVectors log = logging.getLogger(__name__) @@ -51,6 +50,7 @@ def evaluate_expression( ) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode + Parameters ---------- f_tcm @@ -77,12 +77,13 @@ def evaluate_expression( nrows Number of rows to be processed. group - lh5 root group name + lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv - default value of evaluation + default value of evaluation """ + # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -639,63 +640,65 @@ def build_evt( .. code-block::json - { - "channels": { - "geds_on": "meta_geds_on", - "geds_no_psd": "meta_geds_no_psd", - "geds_ac": "meta_geds_ac", - "spms_on": "meta_spms_on", - "pulser": "PULS01", - "baseline": "BSLN01", - "muon": "MUON01", - "ts_master":"S060" - }, - "operations": { - "energy":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "first>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan" - }, - "energy_on":{ - "channels": ["geds_on"], - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" - }, - "aoe":{ - "channels": ["geds_on"], - "mode": "energy_id", - "expression": "AoE_Classifier", - "initial": "np.nan" - }, - "is_muon_tagged":{ - "channels": "muon", - "mode": "any", - "expression": "wf_max>a", - "parameters": {"a":15100}, - "initial": false + { + "channels": { + "geds_on": "meta_geds_on", + "geds_no_psd": "meta_geds_no_psd", + "geds_ac": "meta_geds_ac", + "spms_on": "meta_spms_on", + "pulser": "PULS01", + "baseline": "BSLN01", + "muon": "MUON01", + "ts_master":"S060" }, - "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "lar_energy":{ - "channels": "spms_on", - "mode": "func", - "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "operations": { + "energy":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_on":{ + "channels": ["geds_on"], + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe":{ + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "AoE_Classifier", + "initial": "np.nan" + }, + "is_muon_tagged":{ + "channels": "muon", + "mode": "any", + "expression": "wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "lar_energy":{ + "channels": "spms_on", + "mode": "func", + "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + } } } - } + wo_mode writing mode group lh5 root group name """ + lstore = store.LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -713,16 +716,18 @@ def build_evt( # This can be either read from the meta data # or a list of channel names log.debug("Creating channel dictionary") - if meta_path: - lmeta = LegendMetadata(path=meta_path) - else: - lmeta = LegendMetadata() - chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) + chns = {} for k, v in tbl_cfg["channels"].items(): if isinstance(v, str): + # only import legend meta data when needed. + # LEGEND collaborators can use the meta keyword + # Why for users w/o access to the LEGEND meta data this is still working if "meta" in v: + lm = import_module("legendmeta") + lmeta = lm.LegendMetadata(path=meta_path) + chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) m, sys, usa = v.split("_", 2) tmp = [ f"ch{e}" @@ -736,12 +741,9 @@ def build_evt( == usa ] else: - chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + chns[k] = [v] elif isinstance(v, list): - chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] - - # do operations - first_iter = True + chns[k] = [e for e in v] # get number of rows from TCM file if "hardware_tcm_1" not in store.ls(f_tcm): @@ -836,6 +838,4 @@ def build_evt( wo_mode=wo_mode, ) - if first_iter: - first_iter = False log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 8c41913e1..5e0b2e662 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -1,6 +1,6 @@ { "channels": { - "geds_on": ["V00048A", "V01240A", "V00048B"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "multiplicity": { diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index e5500c9cb..7c60f3d80 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -1,7 +1,7 @@ { "channels": { - "spms_on": ["S024", "S036", "S012"], - "geds_on": ["V00048A", "V01240A", "V00048B"] + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "energy_first": { diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 24b195ded..a02c7da7c 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -1,6 +1,6 @@ { "channels": { - "geds_on": ["V00048A", "V01240A", "V00048B"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "energy": { diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 3a182d8c0..65ddb9996 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -21,7 +21,7 @@ def test_basics(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", @@ -58,7 +58,7 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", group="/evt/", @@ -82,7 +82,7 @@ def test_vov(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", group="/evt/", @@ -108,7 +108,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = lgnd_test_data.get_path("legend/metadata") + meta_path = None f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(ValueError): @@ -124,19 +124,19 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = {"channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}} + conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}} with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": {}, } build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) assert not os.path.exists(outfile) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { "energy": { "channels": "geds_on", @@ -151,7 +151,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { "energy": { "channels": "geds_on", From 648cb8f5f56af7a7e077e050be801f1815208062 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 21:48:23 +0200 Subject: [PATCH 12/73] parametrized dsp and hit lh5 root group --- src/pygama/evt/build_evt.py | 169 +++++++++++++++++++++++++++++++----- tests/evt/test_build_evt.py | 5 +- 2 files changed, 149 insertions(+), 25 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 8cd07da5f..d466515ea 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -45,6 +45,8 @@ def evaluate_expression( expr: str, nrows: int, group: str, + dsp_group: str, + hit_group: str, para: dict = None, defv=np.nan, ) -> dict: @@ -78,6 +80,10 @@ def evaluate_expression( Number of rows to be processed. group lh5 root group name + dsp_group + lh5 root group in dsp file + hit_group + lh5 root group in hit file para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv @@ -134,7 +140,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -149,7 +157,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -164,7 +174,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -176,15 +188,49 @@ def evaluate_expression( ) elif "vov" in mode: return evaluate_to_vector( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, mode_lim, op, var_ph + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, ) elif "any" == mode: return evaluate_to_any( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + var_ph, + defv, ) elif "all" == mode: return evaluate_to_all( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + var_ph, + defv, ) elif os.path.exists(f_evt) and mode in [ e.split("/")[-1] for e in store.ls(f_evt, group) @@ -193,11 +239,31 @@ def evaluate_expression( ch_comp, _ = lstore.read_object(group + mode, f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( - idx, ids, f_hit, f_dsp, chns, expr, exprl, ch_comp, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + ch_comp, + var_ph, + defv, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( - idx, ids, f_hit, f_dsp, expr, exprl, ch_comp, var_ph + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + expr, + exprl, + ch_comp, + var_ph, ) else: raise NotImplementedError( @@ -210,27 +276,33 @@ def evaluate_expression( def find_parameters( - f_hit: str, f_dsp: str, ch: str, idx_ch: np.ndarray, exprl: list + f_hit: str, + f_dsp: str, + ch: str, + idx_ch: np.ndarray, + exprl: list, + dsp_group: str, + hit_group: str, ) -> dict: # find fields in either dsp, hit var = store.load_nda( f_hit, [ e.split("/")[-1] - for e in store.ls(f_hit, ch + "/hit/") + for e in store.ls(f_hit, ch + hit_group) if e.split("/")[-1] in exprl ], - ch + "/hit/", + ch + hit_group, idx_ch, ) dsp_dic = store.load_nda( f_dsp, [ e.split("/")[-1] - for e in store.ls(f_dsp, ch + "/dsp/") + for e in store.ls(f_dsp, ch + dsp_group) if e.split("/")[-1] in exprl ], - ch + "/dsp/", + ch + dsp_group, idx_ch, ) return dsp_dic | var @@ -240,7 +312,9 @@ def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -259,7 +333,10 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -293,7 +370,9 @@ def evaluate_to_last( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -313,7 +392,10 @@ def evaluate_to_last( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -345,7 +427,9 @@ def evaluate_to_tot( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -363,7 +447,10 @@ def evaluate_to_tot( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -394,7 +481,9 @@ def evaluate_to_any( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -410,7 +499,10 @@ def evaluate_to_any( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -432,7 +524,9 @@ def evaluate_to_all( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -448,7 +542,10 @@ def evaluate_to_all( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -470,7 +567,9 @@ def evaluate_at_channel( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -485,7 +584,10 @@ def evaluate_at_channel( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -504,7 +606,9 @@ def evaluate_at_channel_vov( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, expr: str, exprl: list, ch_comp: VectorOfVectors, @@ -520,7 +624,12 @@ def evaluate_at_channel_vov( idx_ch = idx[ids == ch] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + var = ( + find_parameters( + f_hit, f_dsp, f"ch{ch}", idx_ch, exprl, dsp_group, hit_group + ) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -546,7 +655,9 @@ def evaluate_to_vector( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -572,7 +683,10 @@ def evaluate_to_vector( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -620,6 +734,9 @@ def build_evt( meta_path: str = None, wo_mode: str = "write_safe", group: str = "/evt/", + tcm_group: str = "/hardware_tcm_1/", + dsp_group: str = "/dsp/", + hit_group: str = "/hit/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -633,6 +750,7 @@ def build_evt( input LH5 file of the dsp level f_hit input LH5 file of the hit level + f_evt name of the output file evt_config @@ -697,6 +815,12 @@ def build_evt( writing mode group lh5 root group name + tcm_group + lh5 root group in tcm file + dsp_group + lh5 root group in dsp file + hit_group + lh5 root group in hit file """ lstore = store.LH5Store() @@ -745,13 +869,8 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - # get number of rows from TCM file - if "hardware_tcm_1" not in store.ls(f_tcm): - raise ValueError(f"TCM {f_tcm} doesn't contain hardware_tcm_1 field.") nrows = len( - store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ - "cumulative_length" - ] + store.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] ) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" @@ -811,6 +930,8 @@ def build_evt( v["expression"], nrows, group, + dsp_group, + hit_group, pars, defaultv, ) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 65ddb9996..12599f57e 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -25,6 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", + tcm_group="hardware_tcm_1", + dsp_group="/dsp/", + hit_group="/hit/", ) assert os.path.exists(outfile) @@ -111,7 +114,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): meta_path = None f_config = f"{config_dir}/basic-evt-config.json" - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) with pytest.raises(NameError): From 086ddc72ca839146e64c572e9a5f7b64d0348ab8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 22:27:33 +0200 Subject: [PATCH 13/73] generalized first and last sorter --- src/pygama/evt/build_evt.py | 110 +++++++++++------- tests/evt/configs/basic-evt-config.json | 18 ++- tests/evt/configs/module-test-evt-config.json | 17 ++- tests/evt/test_build_evt.py | 33 ++++-- 4 files changed, 125 insertions(+), 53 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d466515ea..e26012475 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -41,7 +41,7 @@ def evaluate_expression( f_hit: str, f_dsp: str, chns: list, - mode: str, + mod: str | list, expr: str, nrows: int, group: str, @@ -90,6 +90,12 @@ def evaluate_expression( default value of evaluation """ + # set modus variables + mode, sorter = mod, None + if isinstance(mod, list): + mode = mod[0] + sorter = mod[1] + # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -135,7 +141,45 @@ def evaluate_expression( idx = nda["array_idx"] # switch through modes - if "first" in mode: + if os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + lstore = store.LH5Store() + ch_comp, _ = lstore.read_object(group + mode, f_evt) + if isinstance(ch_comp, Array): + return evaluate_at_channel( + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + ch_comp, + var_ph, + defv, + ) + elif isinstance(ch_comp, VectorOfVectors): + return evaluate_at_channel_vov( + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + expr, + exprl, + ch_comp, + var_ph, + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + elif "first" in mode: return evaluate_to_first( idx, ids, @@ -148,6 +192,7 @@ def evaluate_expression( exprl, nrows, mode_lim, + sorter, op, var_ph, defv, @@ -165,6 +210,7 @@ def evaluate_expression( exprl, nrows, mode_lim, + sorter, op, var_ph, defv, @@ -232,45 +278,6 @@ def evaluate_expression( var_ph, defv, ) - elif os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: - lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(group + mode, f_evt) - if isinstance(ch_comp, Array): - return evaluate_at_channel( - idx, - ids, - f_hit, - hit_group, - f_dsp, - dsp_group, - chns, - expr, - exprl, - ch_comp, - var_ph, - defv, - ) - elif isinstance(ch_comp, VectorOfVectors): - return evaluate_at_channel_vov( - idx, - ids, - f_hit, - hit_group, - f_dsp, - dsp_group, - expr, - exprl, - ch_comp, - var_ph, - ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - else: raise ValueError(mode + " not a valid mode") @@ -320,6 +327,7 @@ def evaluate_to_first( exprl: list, nrows: int, mode_lim: int | float, + sorter: str, op: str = None, var_ph: dict = None, defv=np.nan, @@ -358,7 +366,15 @@ def evaluate_to_first( # append to out according to mode == first if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + + # find if sorter is in hit or dsp + if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: + t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] + elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: + t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] + else: + raise ValueError(f"Couldn't find sorter {sorter}") + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) @@ -378,6 +394,7 @@ def evaluate_to_last( exprl: list, nrows: int, mode_lim: int | float, + sorter: str, op: str = None, var_ph: dict = None, defv=np.nan, @@ -415,7 +432,14 @@ def evaluate_to_last( limarr = np.ones(len(res)).astype(bool) # append to out according to mode == last - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + # find if sorter is in hit or dsp + if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: + t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] + elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: + t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] + else: + raise ValueError(f"Couldn't find sorter {sorter}") + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 5e0b2e662..aa0b68456 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -12,14 +12,28 @@ }, "energy": { "channels": "geds_on", - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_any_above1MeV": { + "channels": "geds_on", + "mode": "any", + "get_ch": true, + "expression": "cuspEmax_ctc_cal>1000", + "initial": false + }, + "energy_all_above1MeV": { + "channels": "geds_on", + "mode": "all", + "get_ch": true, + "expression": "cuspEmax_ctc_cal>1000", + "initial": false + }, "energy_aux": { "channels": "geds_on", - "mode": "last>25", + "mode": ["last>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 7c60f3d80..8f084034a 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -6,7 +6,7 @@ "operations": { "energy_first": { "channels": ["geds_on"], - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" @@ -34,6 +34,21 @@ "channels": "spms_on", "mode": "func", "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + }, + "lar_energy_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_time_shift": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 12599f57e..0c6c1bde0 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -32,7 +32,7 @@ def test_basics(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert ( - len(ls(outfile, "/evt/")) == 9 + len(ls(outfile, "/evt/")) == 11 ) # 7 operations of which 2 are requesting channel field nda = load_nda( outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" @@ -68,11 +68,15 @@ def test_lar_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 7 - assert ( - np.max(load_nda(outfile, ["lar_multiplicity"], "/evt/")["lar_multiplicity"]) - <= 3 + assert len(ls(outfile, "/evt/")) == 10 + nda = load_nda( + outfile, + ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"], + "/evt/", ) + assert np.max(nda["lar_multiplicity"]) <= 3 + assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() def test_vov(lgnd_test_data, tmptestdir): @@ -143,7 +147,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): "operations": { "energy": { "channels": "geds_on", - "mode": "first>pineapple", + "mode": ["first>pineapple", "tp_0_est"], "get_ch": True, "expression": "cuspEmax_ctc_cal", "initial": "np.nan", @@ -158,7 +162,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): "operations": { "energy": { "channels": "geds_on", - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": True, "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", "initial": "np.nan", @@ -167,3 +171,18 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): } with pytest.raises(SyntaxError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": ["first>25", "coconut"], + "get_ch": True, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) From ad9c74e3455a0d1ce70f24a7b42011a2331e41e6 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sun, 29 Oct 2023 10:53:30 +0100 Subject: [PATCH 14/73] allow mix of VoV and array evaluation at evt level --- src/pygama/evt/build_evt.py | 50 +++++++++++++++++----- tests/evt/configs/vov-test-evt-config.json | 9 ++++ tests/evt/test_build_evt.py | 8 +++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e26012475..f0bd4c161 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -903,23 +903,51 @@ def build_evt( log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. - # TODO need to adapt to handle VoVs if "mode" not in v.keys(): exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): - var = store.load_nda( - f_evt, - [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ], - group, - ) + flds = [ + e.split("/")[-1] + for e in store.ls(f_evt, group) + if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + arr_keys.append(key) + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + if "parameters" in v.keys(): var = var | v["parameters"] - res = Array(eval(v["expression"], var)) + res = eval(v["expression"], var) + + # now check what dimension we have after the evaluation + if len(res.shape) == 1: + res = Array(res) + elif len(res.shape) == 2: + res = VectorOfVectors( + flattened_data=res.flatten()[~np.isnan(res.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(res), axis=1) + ), + ) + else: + raise NotImplementedError( + f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" + ) + lstore.write_object( obj=res, name=group + k, diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index a02c7da7c..d1bfc4120 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -19,6 +19,15 @@ "expression": "cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 + }, + "energy_times_aoe": { + "expression": "energy*aoe" + }, + "energy_times_multiplicity": { + "expression": "energy*multiplicity" + }, + "multiplicity_squared": { + "expression": "multiplicity*multiplicity" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 0c6c1bde0..b5a405323 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -96,14 +96,20 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 4 + assert len(ls(outfile, "/evt/")) == 7 lstore = store.LH5Store() vov_ene, _ = lstore.read_object("/evt/energy", outfile) vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) + vov_aoeene, _ = lstore.read_object("/evt/energy_times_aoe", outfile) + vov_eneac, _ = lstore.read_object("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = lstore.read_object("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) + assert isinstance(vov_aoeene, VectorOfVectors) + assert isinstance(vov_eneac, VectorOfVectors) + assert isinstance(arr_ac2, Array) assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() From db0495a43aca905c24810af2866d4c7fafe8e0f2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sun, 29 Oct 2023 12:19:15 +0100 Subject: [PATCH 15/73] add event skimming function --- src/pygama/evt/__init__.py | 4 +- src/pygama/evt/build_evt.py | 90 +++++++++++++++++++++++++++++++++++++ tests/evt/test_build_evt.py | 27 ++++++++++- 3 files changed, 118 insertions(+), 3 deletions(-) diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 80b544455..8bc8bf058 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,8 +2,8 @@ Utilities for grouping hit data into events. """ -from .build_evt import build_evt +from .build_evt import build_evt, skim_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt", "skim_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index f0bd4c161..8febc5f55 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1012,3 +1012,93 @@ def build_evt( ) log.info("Done") + + +def skim_evt( + f_evt: str, + expression: str, + params: dict = None, + f_out: str = None, + wo_mode="n", + evt_group="/evt/", +) -> None: + """ + Skimms events from a evt file which are fullfling the expression, discards all other events. + + Parameters + ---------- + f_evt + input LH5 file of the evt level + expression + skimming expression. Can contain variabels from event file or from the params dictionary. + f_out + output LH5 file. Can be None if wo_mode is set to overwrite f_evt. + wo_mode + Write mode: "o"/"overwrite" overwrites f_evt. "n"/"new" writes to a new file specified in f_out. + evt_group + lh5 root group of the evt file + """ + + if wo_mode not in ["o", "overwrite", "n", "new"]: + raise ValueError( + wo_mode + + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" + ) + lstore = store.LH5Store() + fields = store.ls(f_evt, evt_group) + nrows = lstore.read_n_rows(fields[0], f_evt) + # load fields in expression + exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) + var = {} + + flds = [ + e.split("/")[-1] + for e in store.ls(f_evt, evt_group) + if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(evt_group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + arr_keys.append(key) + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + + if params is not None: + var = var | params + res = eval(expression, var) + + if res.shape != (nrows,): + raise ValueError( + f"The expression must result to 1D with length = event number. Current shape is {res.shape}" + ) + + res = res.astype(bool) + idx_list = np.arange(nrows, dtype=int)[res] + + of = f_out + if wo_mode in ["o", "overwrite"]: + of = f_evt + of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) + + for fld in fields: + ob, _ = lstore.read_object(fld, f_evt, idx=idx_list) + lstore.write_object( + obj=ob, + name=fld, + lh5_file=of_tmp, + wo_mode="o", + ) + + if os.path.exists(of): + os.remove(of) + os.rename(of_tmp, of) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index b5a405323..a08848934 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -6,7 +6,7 @@ import pytest from lgdo import Array, VectorOfVectors, load_nda, ls -from pygama.evt import build_evt +from pygama.evt import build_evt, skim_evt config_dir = Path(__file__).parent / "configs" @@ -192,3 +192,28 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): } with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + +def test_skimming(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + meta_path = None + f_config = f"{config_dir}/vov-test-evt-config.json" + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, meta_path) + + lstore = store.LH5Store() + ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda + ac = len(ac[ac == 3]) + + outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + + skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") + assert ac == len(lstore.read_object("/evt/energy", outfile_skm)[0].to_aoesa().nda) + + skim_evt(outfile, "multiplicity == 3", None, None, "o") + assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) From 6e4264a207c7bf7749951a37d4d27921a17f5852 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 30 Oct 2023 16:44:07 +0100 Subject: [PATCH 16/73] allow VoV t0 in spm module --- src/pygama/evt/build_evt.py | 84 ++++++++---------- src/pygama/evt/modules/spm.py | 87 +++++++++++++++++-- .../module-test-t0-vov-evt-config.json | 53 +++++++++++ tests/evt/test_build_evt.py | 27 ++++++ 4 files changed, 195 insertions(+), 56 deletions(-) create mode 100644 tests/evt/configs/module-test-t0-vov-evt-config.json diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 8febc5f55..f606e3774 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -100,15 +100,7 @@ def evaluate_expression( exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} if os.path.exists(f_evt): - var_ph = store.load_nda( - f_evt, - [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ], - group, - ) + var_ph = load_vars_to_nda(f_evt, group, exprl) if para: var_ph = var_ph | para @@ -292,29 +284,41 @@ def find_parameters( hit_group: str, ) -> dict: # find fields in either dsp, hit - var = store.load_nda( - f_hit, - [ - e.split("/")[-1] - for e in store.ls(f_hit, ch + hit_group) - if e.split("/")[-1] in exprl - ], - ch + hit_group, - idx_ch, - ) - dsp_dic = store.load_nda( - f_dsp, - [ - e.split("/")[-1] - for e in store.ls(f_dsp, ch + dsp_group) - if e.split("/")[-1] in exprl - ], - ch + dsp_group, - idx_ch, - ) + var = load_vars_to_nda(f_hit, ch + hit_group, exprl) + dsp_dic = load_vars_to_nda(f_dsp, ch + dsp_group, exprl) + return dsp_dic | var +def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: + lstore = store.LH5Store() + flds = [ + e.split("/")[-1] for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + if var[key].ndim > 2: + raise ValueError("Dim > 2 not supported") + if var[key].ndim == 1: + arr_keys.append(key) + else: + raise ValueError(f"{type(value)} not supported") + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + return var + + def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, @@ -907,27 +911,7 @@ def build_evt( exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): - flds = [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ] - var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} - - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - arr_keys.append(key) - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] + var = load_vars_to_nda(f_evt, group, exprl) if "parameters" in v.keys(): var = var | v["parameters"] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index b43bf134d..7bd530531 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -13,11 +13,24 @@ import lgdo.lh5_store as store import numpy as np +from lgdo import Array, VectorOfVectors # get LAr energy per event over all channels def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) @@ -46,7 +59,19 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) @@ -76,7 +101,19 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr energy per event over all channels def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) @@ -105,7 +142,19 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr majority per event over all channels def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) @@ -146,7 +195,20 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = np.where(np.isnan(trgr), tdefault, trgr) + tge = trgr + if isinstance(trgr, VectorOfVectors): + tge = tge.to_aoesa().nda + elif isinstance(trgr, Array): + tge = tge.nda + if isinstance(tge, np.ndarray) and tge.ndim == 2: + tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) + tge = np.nanmin(tge, axis=1) + + elif isinstance(tge, np.ndarray) and tge.ndim == 1: + tge = np.where(np.isnan(tge), tdefault, tge) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = tge - tmin tma = tge + tmax @@ -213,7 +275,20 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): peshape = (predf["energy_in_pe"]).shape times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = np.where(np.isnan(trgr), tdefault, trgr) + tge = trgr + if isinstance(trgr, VectorOfVectors): + tge = tge.to_aoesa().nda + elif isinstance(trgr, Array): + tge = tge.nda + if isinstance(tge, np.ndarray) and tge.ndim == 2: + tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) + tge = np.nanmin(tge, axis=1) + + elif isinstance(tge, np.ndarray) and tge.ndim == 1: + tge = np.where(np.isnan(tge), tdefault, tge) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = tge - tmin tma = tge + tmax diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json new file mode 100644 index 000000000..436332409 --- /dev/null +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -0,0 +1,53 @@ +{ + "channels": { + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "t0": { + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "tp_0_est", + "initial": 0.0 + }, + "lar_energy": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + }, + "is_lar_rejected": { + "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + }, + "lar_classifier": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + }, + "lar_energy_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_time_shift": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + } + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index a08848934..128833e5b 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -79,6 +79,33 @@ def test_lar_module(lgnd_test_data, tmptestdir): assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() +def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=None, + evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 10 + nda = load_nda( + outfile, + ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"], + "/evt/", + ) + assert np.max(nda["lar_multiplicity"]) <= 3 + assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + + def test_vov(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" From f700e34a374904d7c1bf2ab2cc63e2a787d398bd Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 28 Nov 2023 15:54:45 +0100 Subject: [PATCH 17/73] implemented suggestions of luigi: retrieve ids in seperate config block, explicit tier naming, field naming changes --- src/pygama/evt/build_evt.py | 566 ++++++++++-------- tests/evt/configs/basic-evt-config.json | 49 +- tests/evt/configs/module-test-evt-config.json | 45 +- .../module-test-t0-vov-evt-config.json | 42 +- tests/evt/configs/query-test-evt-config.json | 88 +++ tests/evt/configs/vov-test-evt-config.json | 26 +- tests/evt/test_build_evt.py | 73 +-- 7 files changed, 531 insertions(+), 358 deletions(-) create mode 100644 tests/evt/configs/query-test-evt-config.json diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index f606e3774..cc1e237e5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -45,9 +45,8 @@ def evaluate_expression( expr: str, nrows: int, group: str, - dsp_group: str, - hit_group: str, para: dict = None, + qry: str = None, defv=np.nan, ) -> dict: """ @@ -74,6 +73,8 @@ def evaluate_expression( - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. + qry + A query that can set a condition on mode. Can be any tier (i.e. a channelxevents shaped boolean matrix for tiers below event or an events long boolean array at the evt level) expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows @@ -94,21 +95,21 @@ def evaluate_expression( mode, sorter = mod, None if isinstance(mod, list): mode = mod[0] - sorter = mod[1] + sorter = mod[1].split(".") # find parameters in evt file or in parameters - exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) + exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} if os.path.exists(f_evt): - var_ph = load_vars_to_nda(f_evt, group, exprl) + var_ph = load_vars_to_nda(f_evt, "", exprl) if para: var_ph = var_ph | para - if mode == "func": + if mode == "function": # evaluate expression func, params = expr.split("(") params = [f_hit, f_dsp, f_tcm, chns] + [ - num_and_pars(e, var_ph) for e in params[:-1].split(",") + num_and_pars(e.replace(".", "_"), var_ph) for e in params[:-1].split(",") ] # load function dynamically @@ -118,14 +119,18 @@ def evaluate_expression( return {"values": out} else: - # evaluate possible operator in mode - ops = re.findall(r"([<>]=?|==)", mode) - op, mode_lim = None, None - if len(ops) == 1: - op = ops[0] - mode_lim = float(mode.split(op)[-1]) - elif len(ops) > 1: - raise ValueError(mode + " contains invalid operator") + # check if query is either on channel basis or evt basis (and not a mix) + qry_mask = qry + if qry is not None: + if "evt." in qry and ("hit." in qry or "dsp." in qry): + raise ValueError("Query can't be a mix of evt tier and lower tiers.") + + # if it is an evt query we can evaluate it directly here + if os.path.exists(f_evt) and "evt." in qry: + var_qry = load_vars_to_nda( + f_evt, "", re.findall(r"(evt).([a-zA-Z_$][\w$]*)", qry) + ) + qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) # load TCM data to define an event nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") @@ -133,19 +138,20 @@ def evaluate_expression( idx = nda["array_idx"] # switch through modes - if os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: + if ( + os.path.exists(f_evt) + and "evt." == mode[:4] + and mode.split(".")[-1] + in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] + ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(group + mode, f_evt) + ch_comp, _ = lstore.read_object(mode.replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, @@ -158,9 +164,7 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, expr, exprl, ch_comp, @@ -171,73 +175,62 @@ def evaluate_expression( type(ch_comp) + " not supported (only Array and VectorOfVectors are supported)" ) - elif "first" in mode: + + elif "first" == mode: return evaluate_to_first( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, sorter, - op, var_ph, defv, ) - elif "last" in mode: + elif "last" == mode: return evaluate_to_last( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, sorter, - op, var_ph, defv, ) - elif "tot" in mode: + elif "sum" == mode: return evaluate_to_tot( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, - op, var_ph, defv, ) - elif "vov" in mode: + elif "vov" == mode: return evaluate_to_vector( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, - op, var_ph, ) elif "any" == mode: @@ -245,12 +238,11 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, var_ph, defv, @@ -260,12 +252,11 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, var_ph, defv, @@ -280,22 +271,31 @@ def find_parameters( ch: str, idx_ch: np.ndarray, exprl: list, - dsp_group: str, - hit_group: str, ) -> dict: # find fields in either dsp, hit - var = load_vars_to_nda(f_hit, ch + hit_group, exprl) - dsp_dic = load_vars_to_nda(f_dsp, ch + dsp_group, exprl) + var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) + dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) return dsp_dic | var -def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: +def load_vars_to_nda( + f_evt: str, group: str, exprl: list, idx: np.ndarray = None +) -> dict: lstore = store.LH5Store() - flds = [ - e.split("/")[-1] for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl - ] - var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + var = { + f"{e[0]}_{e[1]}": lstore.read_object( + f"{group.replace('/','')}/{e[0]}/{e[1]}", + f_evt, + idx=idx, + )[0] + for e in exprl + if e[1] + in [ + x.split("/")[-1] + for x in store.ls(f_evt, f"{group.replace('/','')}/{e[0]}/") + ] + } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -316,6 +316,8 @@ def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): for key in arr_keys: var[key] = var[key][:, None] + + log.debug(f"Found parameters {var.keys()}") return var @@ -323,16 +325,13 @@ def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - sorter: str, - op: str = None, + sorter: list, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -345,39 +344,53 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) + if limarr.dtype != bool: + limarr = limarr.astype(bool) + # append to out according to mode == first if ch == chns[0]: outt[:] = np.inf # find if sorter is in hit or dsp - if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: - t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] - elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: - t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] - else: - raise ValueError(f"Couldn't find sorter {sorter}") + t0 = store.load_nda( + f_hit if "hit" == sorter[0] else f_dsp, + [sorter[1]], + f"{ch}/{sorter[0]}/", + idx_ch, + )[sorter[1]] out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -390,16 +403,13 @@ def evaluate_to_last( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - sorter: str, - op: str = None, + sorter: list, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -411,38 +421,49 @@ def evaluate_to_last( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, - ) + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) - + if limarr.dtype != bool: + limarr = limarr.astype(bool) # append to out according to mode == last # find if sorter is in hit or dsp - if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: - t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] - elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: - t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] - else: - raise ValueError(f"Couldn't find sorter {sorter}") + t0 = store.load_nda( + f_hit if "hit" == sorter[0] else f_dsp, + [sorter[1]], + f"{ch}/{sorter[0]}/", + idx_ch, + )[sorter[1]] out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -455,15 +476,12 @@ def evaluate_to_tot( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - op: str = None, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -474,32 +492,45 @@ def evaluate_to_tot( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) # append to out according to mode == tot if res.dtype == bool: res = res.astype(int) + if limarr.dtype != bool: + limarr = limarr.astype(bool) out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) return {"values": out} @@ -509,12 +540,11 @@ def evaluate_to_any( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, var_ph: dict = None, defv=np.nan, @@ -526,24 +556,46 @@ def evaluate_to_any( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # evaluate expression - res = eval(expr, var) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(len(res)).astype(bool) # append to out according to mode == any if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] | res + if limarr.dtype != bool: + limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) return {"values": out} @@ -552,12 +604,11 @@ def evaluate_to_all( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, var_ph: dict = None, defv=np.nan, @@ -569,24 +620,45 @@ def evaluate_to_all( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # evaluate expression - res = eval(expr, var) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if no condition, it must be true + else: + limarr = np.ones(len(res)).astype(bool) # append to out according to mode == all if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res + if limarr.dtype != bool: + limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr return {"values": out} @@ -595,9 +667,7 @@ def evaluate_at_channel( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, @@ -611,19 +681,24 @@ def evaluate_at_channel( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) @@ -634,9 +709,7 @@ def evaluate_at_channel_vov( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, expr: str, exprl: list, ch_comp: VectorOfVectors, @@ -651,21 +724,24 @@ def evaluate_at_channel_vov( # get index list for this channel to be loaded idx_ch = idx[ids == ch] - # find fields in either dsp, hit - var = ( - find_parameters( - f_hit, f_dsp, f"ch{ch}", idx_ch, exprl, dsp_group, hit_group + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) # see in which events the current channel is present mask = (out == ch).any(axis=1) @@ -683,15 +759,12 @@ def evaluate_to_vector( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - op: str = None, var_ph: dict = None, ) -> dict: """ @@ -710,29 +783,41 @@ def evaluate_to_vector( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) + if limarr.dtype != bool: + limarr = limarr.astype(bool) # append to out according to mode == vov out[:, i][limarr] = res[limarr] out_chs[:, i][limarr] = int(ch[2:]) @@ -763,8 +848,6 @@ def build_evt( wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", - dsp_group: str = "/dsp/", - hit_group: str = "/hit/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -845,10 +928,6 @@ def build_evt( lh5 root group name tcm_group lh5 root group in tcm file - dsp_group - lh5 root group in dsp file - hit_group - lh5 root group in hit file """ lstore = store.LH5Store() @@ -907,15 +986,15 @@ def build_evt( log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. - if "mode" not in v.keys(): - exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) + if "aggregation_mode" not in v.keys(): + exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} if os.path.exists(f_evt): - var = load_vars_to_nda(f_evt, group, exprl) + var = load_vars_to_nda(f_evt, "", exprl) if "parameters" in v.keys(): var = var | v["parameters"] - res = eval(v["expression"], var) + res = eval(v["expression"].replace("evt.", "evt_"), var) # now check what dimension we have after the evaluation if len(res.shape) == 1: @@ -950,9 +1029,11 @@ def build_evt( itertools.chain.from_iterable([chns[e] for e in v["channels"]]) ) - pars, defaultv = None, np.nan + pars, qry, defaultv = None, None, np.nan if "parameters" in v.keys(): pars = v["parameters"] + if "query" in v.keys(): + qry = v["query"] if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] @@ -962,13 +1043,12 @@ def build_evt( f_hit, f_dsp, chns_e, - v["mode"], + v["aggregation_mode"], v["expression"], nrows, group, - dsp_group, - hit_group, pars, + qry, defaultv, ) @@ -984,16 +1064,16 @@ def build_evt( # if get_ch flag is true and exists and result dic contains channels entry # write also channels information - if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): - obj = result["channels"] - if isinstance(obj, np.ndarray): - obj = Array(result["channels"]) - lstore.write_object( - obj=obj, - name=group + k + "_id", - lh5_file=f_evt, - wo_mode=wo_mode, - ) + # if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): + # obj = result["channels"] + # if isinstance(obj, np.ndarray): + # obj = Array(result["channels"]) + # lstore.write_object( + # obj=obj, + # name=group + k + "_id", + # lh5_file=f_evt, + # wo_mode=wo_mode, + # ) log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index aa0b68456..c573c89fb 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -5,62 +5,67 @@ "operations": { "multiplicity": { "channels": "geds_on", - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 }, "energy": { "channels": "geds_on", - "mode": ["first>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "initial": 0 + }, "energy_any_above1MeV": { "channels": "geds_on", - "mode": "any", - "get_ch": true, - "expression": "cuspEmax_ctc_cal>1000", + "aggregation_mode": "any", + "expression": "hit.cuspEmax_ctc_cal>1000", "initial": false }, "energy_all_above1MeV": { "channels": "geds_on", - "mode": "all", - "get_ch": true, - "expression": "cuspEmax_ctc_cal>1000", + "aggregation_mode": "all", + "expression": "hit.cuspEmax_ctc_cal>1000", "initial": false }, "energy_aux": { "channels": "geds_on", - "mode": ["last>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_sum": { "channels": "geds_on", - "mode": "tot>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": "sum", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": 0.0 }, "is_usable_aoe": { "channels": "geds_on", - "mode": "energy_id", + "aggregation_mode": "evt.energy_id", "expression": "True", "initial": false }, "aoe": { "channels": "geds_on", - "mode": "energy_id", - "expression": "AoE_Classifier", + "aggregation_mode": "evt.energy_id", + "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { "channels": "geds_on", - "mode": "energy_id", - "expression": "~(AoE_Double_Sided_Cut)", + "aggregation_mode": "evt.energy_id", + "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 8f084034a..4810b91e0 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -5,50 +5,57 @@ }, "operations": { "energy_first": { - "channels": ["geds_on"], - "mode": ["first>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_first_id": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "initial": 0 + }, "t0": { "channels": ["geds_on"], - "mode": "energy_first_id", - "expression": "tp_0_est", + "aggregation_mode": "evt.energy_first_id", + "expression": "dsp.tp_0_est", "initial": 0.0 }, "lar_energy": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)" }, "is_lar_rejected": { - "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) " }, "lar_classifier": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" }, "lar_energy_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_time_shift": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" } } } diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 436332409..06918a421 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,48 +6,54 @@ "operations": { "energy": { "channels": "geds_on", - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" + }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, "t0": { "channels": ["geds_on"], - "mode": "energy_id", - "expression": "tp_0_est", + "aggregation_mode": "evt.energy_id", + "expression": "dsp.tp_0_est", "initial": 0.0 }, "lar_energy": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)" }, "is_lar_rejected": { - "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) " }, "lar_classifier": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" }, "lar_energy_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_time_shift": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" } } } diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json new file mode 100644 index 000000000..abbaa8da4 --- /dev/null +++ b/tests/evt/configs/query-test-evt-config.json @@ -0,0 +1,88 @@ +{ + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations":{ + "multiplicity": { + "channels": "geds_on", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + }, + "test_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first2": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_last": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_last2": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_any": { + "channels": "geds_on", + "aggregation_mode": "any", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_any2": { + "channels": "geds_on", + "aggregation_mode": "any", + "query":"hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_all": { + "channels": "geds_on", + "aggregation_mode": "all", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_all2": { + "channels": "geds_on", + "aggregation_mode": "all", + "query":"hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_vov": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_vov2": { + "channels": "geds_on", + "aggregation_mode": "vov", + "expression": "True", + "initial": false + } + } +} \ No newline at end of file diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index d1bfc4120..6f057d18c 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,29 +5,35 @@ "operations": { "energy": { "channels": "geds_on", - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" + }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, "aoe": { - "mode": "energy_id", - "expression": "AoE_Classifier" + "aggregation_mode": "evt.energy_id", + "expression": "hit.AoE_Classifier" }, "multiplicity": { "channels": "geds_on", - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 }, "energy_times_aoe": { - "expression": "energy*aoe" + "expression": "evt.energy*evt.aoe" }, "energy_times_multiplicity": { - "expression": "energy*multiplicity" + "expression": "evt.energy*evt.multiplicity" }, "multiplicity_squared": { - "expression": "multiplicity*multiplicity" + "expression": "evt.multiplicity*evt.multiplicity" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 128833e5b..146fe5150 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -26,14 +26,10 @@ def test_basics(lgnd_test_data, tmptestdir): wo_mode="o", group="/evt/", tcm_group="hardware_tcm_1", - dsp_group="/dsp/", - hit_group="/hit/", ) assert os.path.exists(outfile) - assert ( - len(ls(outfile, "/evt/")) == 11 - ) # 7 operations of which 2 are requesting channel field + assert len(ls(outfile, "/evt/")) == 10 nda = load_nda( outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" ) @@ -154,7 +150,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) - with pytest.raises(NameError): + with pytest.raises(RuntimeError): build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) with pytest.raises(TypeError): @@ -168,57 +164,39 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": {}, - } - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - assert not os.path.exists(outfile) - conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { - "energy": { + "foo": { "channels": "geds_on", - "mode": ["first>pineapple", "tp_0_est"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan", + "aggregation_mode": "banana", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a": 25}, + "initial": 0, } }, } with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": { - "energy": { - "channels": "geds_on", - "mode": ["first>25", "tp_0_est"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", - "initial": "np.nan", - } - }, - } - with pytest.raises(SyntaxError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": { - "energy": { - "channels": "geds_on", - "mode": ["first>25", "coconut"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan", - } - }, - } - with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) +def test_query(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=None, + evt_config=f"{config_dir}/query-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + assert len(ls(outfile, "/evt/")) == 12 def test_skimming(lgnd_test_data, tmptestdir): @@ -244,3 +222,6 @@ def test_skimming(lgnd_test_data, tmptestdir): skim_evt(outfile, "multiplicity == 3", None, None, "o") assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) + + with pytest.raises(ValueError): + skim_evt(outfile, "multiplicity == 3", None, None, "bla") From 5821aaf5095ee7047497a4ebfa92a5524d1cc246 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 28 Nov 2023 18:04:00 +0100 Subject: [PATCH 18/73] Moved channel obtaining by meta data to its own module --- src/pygama/evt/build_evt.py | 68 ++++--- src/pygama/evt/modules/legend_meta.py | 27 +++ tests/evt/configs/module-test-evt-config.json | 2 +- tests/evt/configs/query-test-evt-config.json | 170 +++++++++--------- tests/evt/test_build_evt.py | 21 +-- 5 files changed, 151 insertions(+), 137 deletions(-) create mode 100644 src/pygama/evt/modules/legend_meta.py diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index cc1e237e5..0288015a0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -108,8 +108,13 @@ def evaluate_expression( if mode == "function": # evaluate expression func, params = expr.split("(") + params = ( + params.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_") + ) params = [f_hit, f_dsp, f_tcm, chns] + [ - num_and_pars(e.replace(".", "_"), var_ph) for e in params[:-1].split(",") + num_and_pars(e, var_ph) for e in params[:-1].split(",") ] # load function dynamically @@ -844,7 +849,6 @@ def build_evt( f_hit: str, f_evt: str, evt_config: str | dict, - meta_path: str = None, wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", @@ -951,28 +955,31 @@ def build_evt( chns = {} for k, v in tbl_cfg["channels"].items(): - if isinstance(v, str): - # only import legend meta data when needed. - # LEGEND collaborators can use the meta keyword - # Why for users w/o access to the LEGEND meta data this is still working - if "meta" in v: - lm = import_module("legendmeta") - lmeta = lm.LegendMetadata(path=meta_path) - chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) - m, sys, usa = v.split("_", 2) - tmp = [ - f"ch{e}" - for e in chmap.map("daq.rawid") - if chmap.map("daq.rawid")[e]["system"] == sys - ] - chns[k] = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] - == usa - ] - else: - chns[k] = [v] + if isinstance(v, dict): + # it is a meta module. module_name must exist + if "module" not in v.keys(): + raise ValueError( + "Need module_name to load channel via a meta data module" + ) + + attr = {} + # the time_key argument is set to the time key of the DSP file + # in case it is not provided by the config + if "time_key" not in v.keys(): + attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) + + # if "None" do None + elif "None" == v["time_key"]: + attr["time_key"] = None + + # load module + p, m = v["module"].rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + chns[k] = met(v | attr) + + elif isinstance(v, str): + chns[k] = [v] + elif isinstance(v, list): chns[k] = [e for e in v] @@ -1062,19 +1069,6 @@ def build_evt( wo_mode=wo_mode, ) - # if get_ch flag is true and exists and result dic contains channels entry - # write also channels information - # if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): - # obj = result["channels"] - # if isinstance(obj, np.ndarray): - # obj = Array(result["channels"]) - # lstore.write_object( - # obj=obj, - # name=group + k + "_id", - # lh5_file=f_evt, - # wo_mode=wo_mode, - # ) - log.info("Done") diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend_meta.py new file mode 100644 index 000000000..d188c2a14 --- /dev/null +++ b/src/pygama/evt/modules/legend_meta.py @@ -0,0 +1,27 @@ +""" +Module for importing channel lists from LEGEND meta data +""" +from importlib import import_module + + +def legend_meta(params: dict) -> list: + # only import legend meta data when needed. + # LEGEND collaborators can use the meta keyword + # While for users w/o access to the LEGEND meta data this is still working + lm = import_module("legendmeta") + lmeta = lm.LegendMetadata(path=params["meta_path"]) + chmap = lmeta.channelmap(params["time_key"]) + tmp = [ + f"ch{e}" + for e in chmap.map("daq.rawid") + if chmap.map("daq.rawid")[e]["system"] == params["system"] + ] + if "usability" not in params.keys(): + return tmp + else: + return [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] + == params["usability"] + ] diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 4810b91e0..d4d6c1148 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -27,7 +27,7 @@ "lar_energy": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + "expression": "pygama.evt.modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index abbaa8da4..0bf7fe4f9 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -1,88 +1,88 @@ { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations": { + "multiplicity": { + "channels": "geds_on", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 }, - "operations":{ - "multiplicity": { - "channels": "geds_on", - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": { "a": 25 }, - "initial": 0 - }, - "test_sum": { - "channels": "geds_on", - "aggregation_mode": "sum", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_first": { - "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_first2": { - "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], - "expression": "True", - "initial": false - }, - "test_last": { - "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_last2": { - "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], - "expression": "True", - "initial": false - }, - "test_any": { - "channels": "geds_on", - "aggregation_mode": "any", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_any2": { - "channels": "geds_on", - "aggregation_mode": "any", - "query":"hit.cuspEmax_ctc_cal >25", - "expression": "True", - "initial": false - }, - "test_all": { - "channels": "geds_on", - "aggregation_mode": "all", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_all2": { - "channels": "geds_on", - "aggregation_mode": "all", - "query":"hit.cuspEmax_ctc_cal >25", - "expression": "True", - "initial": false - }, - "test_vov": { - "channels": "geds_on", - "aggregation_mode": "vov", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_vov2": { - "channels": "geds_on", - "aggregation_mode": "vov", - "expression": "True", - "initial": false - } + "test_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first2": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_last": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_last2": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_any": { + "channels": "geds_on", + "aggregation_mode": "any", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_any2": { + "channels": "geds_on", + "aggregation_mode": "any", + "query": "hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_all": { + "channels": "geds_on", + "aggregation_mode": "all", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_all2": { + "channels": "geds_on", + "aggregation_mode": "all", + "query": "hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_vov": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_vov2": { + "channels": "geds_on", + "aggregation_mode": "vov", + "expression": "True", + "initial": false } -} \ No newline at end of file + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 146fe5150..e7cae4e4d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -21,7 +21,6 @@ def test_basics(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", @@ -57,7 +56,6 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", group="/evt/", @@ -85,7 +83,6 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", wo_mode="o", group="/evt/", @@ -112,7 +109,6 @@ def test_vov(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", group="/evt/", @@ -144,25 +140,24 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = None f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(RuntimeError): - build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) + build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) with pytest.raises(RuntimeError): - build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) + build_evt(f_tcm, f_hit, f_dsp, outfile, f_config) with pytest.raises(TypeError): - build_evt(f_tcm, f_dsp, f_hit, outfile, None, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, None) conf = {"operations": {}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, @@ -177,7 +172,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): }, } with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) def test_query(lgnd_test_data, tmptestdir): @@ -190,7 +185,6 @@ def test_query(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/query-test-evt-config.json", wo_mode="o", group="/evt/", @@ -207,9 +201,8 @@ def test_skimming(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = None f_config = f"{config_dir}/vov-test-evt-config.json" - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) lstore = store.LH5Store() ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda From 8e2bab13825f9dd17cf0223445afbb2ab68570ce Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 29 Nov 2023 19:29:39 +0100 Subject: [PATCH 19/73] many things --- src/pygama/evt/build_evt.py | 918 ++++++++++++------ src/pygama/evt/modules/legend_meta.py | 13 +- tests/evt/configs/basic-evt-config.json | 12 +- tests/evt/configs/module-test-evt-config.json | 8 +- .../module-test-t0-vov-evt-config.json | 8 +- tests/evt/configs/query-test-evt-config.json | 12 +- tests/evt/configs/vov-test-evt-config.json | 6 +- 7 files changed, 633 insertions(+), 344 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0288015a0..249d08c2f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,6 +1,7 @@ """ This module implements routines to build the evt tier. """ + from __future__ import annotations import itertools @@ -41,10 +42,10 @@ def evaluate_expression( f_hit: str, f_dsp: str, chns: list, - mod: str | list, + chns_rm: list, + mode: str, expr: str, nrows: int, - group: str, para: dict = None, qry: str = None, defv=np.nan, @@ -64,39 +65,29 @@ def evaluate_expression( Path to dsp tier file chns List of channel names across which expression gets evaluated (form: "ch") + chns_rm + List of channels which get set to default value during evaluation. In function mode they are removed entirely (form: "ch") mode The mode determines how the event entry is calculated across channels. Options are: - - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. - - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. + - "first_at:sorter": aggregates across channels by returning the expression of the channel with smallest value of sorter. + - "last_at": aggregates across channels by returning the expression of the channel with largest value of sorter. + - "sum": aggregates by summation. + - "any": aggregates by logical or. + - "all": aggregates by logical and. + - "keep_at:ch_field": aggregates according to passed ch_field + - "vectorize": Channels are not combined, but result saved as VectorOfVectors. qry - A query that can set a condition on mode. Can be any tier (i.e. a channelxevents shaped boolean matrix for tiers below event or an events long boolean array at the evt level) + A query that can mask the aggregation. expr - The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + The expression. That can be any mathematical equation/comparison. If mode == function, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows Number of rows to be processed. - group - lh5 root group name - dsp_group - lh5 root group in dsp file - hit_group - lh5 root group in hit file para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv default value of evaluation """ - # set modus variables - mode, sorter = mod, None - if isinstance(mod, list): - mode = mod[0] - sorter = mod[1].split(".") - # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} @@ -113,7 +104,7 @@ def evaluate_expression( .replace("hit.", "hit_") .replace("evt.", "evt_") ) - params = [f_hit, f_dsp, f_tcm, chns] + [ + params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") ] @@ -145,12 +136,13 @@ def evaluate_expression( # switch through modes if ( os.path.exists(f_evt) - and "evt." == mode[:4] - and mode.split(".")[-1] + and "keep_at:" == mode[:8] + and "evt." == mode[8:][:4] + and mode[8:].split(".")[-1] in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(mode.replace(".", "/"), f_evt) + ch_comp, _ = lstore.read_object(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -158,6 +150,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, ch_comp, @@ -173,7 +166,9 @@ def evaluate_expression( expr, exprl, ch_comp, + chns_rm, var_ph, + defv, ) else: raise NotImplementedError( @@ -181,13 +176,19 @@ def evaluate_expression( + " not supported (only Array and VectorOfVectors are supported)" ) - elif "first" == mode: + elif "first_at:" in mode: + sorter = tuple( + re.findall( + r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] + )[0] + ) return evaluate_to_first( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -196,13 +197,19 @@ def evaluate_expression( var_ph, defv, ) - elif "last" == mode: + elif "last_at:" in mode: + sorter = tuple( + re.findall( + r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("last_at:")[-1] + )[0] + ) return evaluate_to_last( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -218,6 +225,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -225,18 +233,20 @@ def evaluate_expression( var_ph, defv, ) - elif "vov" == mode: + elif "vectorize" == mode: return evaluate_to_vector( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, nrows, var_ph, + defv, ) elif "any" == mode: return evaluate_to_any( @@ -245,6 +255,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -259,6 +270,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -277,6 +289,23 @@ def find_parameters( idx_ch: np.ndarray, exprl: list, ) -> dict: + """ + Wraps :func:`load_vars_to_nda` to return parameters from hit and dsp tiers. + + Parameters + ---------- + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + ch + rawid in the tiers + idx_ch + index array of entries to be read from files + exprl + list of tuples (tier, field) to be found in the hit/dsp tiers + """ + # find fields in either dsp, hit var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) @@ -284,22 +313,33 @@ def find_parameters( return dsp_dic | var -def load_vars_to_nda( - f_evt: str, group: str, exprl: list, idx: np.ndarray = None -) -> dict: +def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: + """ + Maps parameter expressions to parameters if found in f. + Blows up VectorOfVectors to ArrayOfEqualSizedArrays. + + Parameters + ---------- + f + Path to a LGDO file + group + additional group in f + idx + index array of entries to be read from files + exprl + list of parameter-tuples (root_group, field) to be found in f + """ + lstore = store.LH5Store() var = { f"{e[0]}_{e[1]}": lstore.read_object( f"{group.replace('/','')}/{e[0]}/{e[1]}", - f_evt, + f, idx=idx, )[0] for e in exprl if e[1] - in [ - x.split("/")[-1] - for x in store.ls(f_evt, f"{group.replace('/','')}/{e[0]}/") - ] + in [x.split("/")[-1] for x in store.ls(f, f"{group.replace('/','')}/{e[0]}/")] } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) @@ -326,20 +366,164 @@ def load_vars_to_nda( return var +def get_data_at_channel( + ch: str, + idx_ch: np.ndarray, + expr: str, + exprl: list, + var_ph: dict, + is_evaluated: bool, + f_hit: str, + f_dsp: str, + outsize: int, + defv, +) -> np.ndarray: + """ + Evaluates an expression and returns the result + + Parameters + ---------- + ch + rawid of channel to be evaluated + idx_ch + array of indices to be evaluated + expr + expression to be evaluated + exprl + list of parameter-tuples (root_group, field) found in the expression + var_ph + dict of additional parameters that are not channel dependent + is_evaluated + if false, the expression does not get evaluated but an array of default values is returned + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + outsize + size of the return array + defv + default value + """ + + if not is_evaluated: + res = np.full(outsize, defv, dtype=type(defv)) + elif "tcm.array_id" == expr: + res = np.full(outsize, int(ch[2:]), dtype=int) + else: + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(outsize, res, dtype=type(res)) + + return res + + +def get_mask_from_query( + qry: str | np.ndarray, + length: int, + ch: str, + idx_ch: np.ndarray, + f_hit: str, + f_dsp: str, +) -> np.ndarray: + """ + Evaluates an query expression and returns a mask accordingly + + Parameters + ---------- + qry + query expression + length + length of the return mask + ch + rawid of channel to be evaluated + idx_ch + array of indices to be evaluated + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + """ + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(length).astype(bool) + + if limarr.dtype != bool: + limarr = limarr.astype(bool) + + return limarr + + def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, - sorter: list, + sorter: tuple, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates across channels by returning the expression of the channel with smallest value of sorter. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + sorter + tuple of field in hit/dsp/evt tier to evaluate (tier,field) + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) @@ -349,41 +533,22 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry - - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # evaluate at channel + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == first if ch == chns[0]: @@ -410,14 +575,48 @@ def evaluate_to_last( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, - sorter: list, + sorter: tuple, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates across channels by returning the expression of the channel with largest value of sorter. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + sorter + tuple of field in hit/dsp/evt tier to evaluate (tier,field) + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) @@ -426,41 +625,24 @@ def evaluate_to_last( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + # evaluate at channel + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) # append to out according to mode == last # find if sorter is in hit or dsp t0 = store.load_nda( @@ -483,6 +665,7 @@ def evaluate_to_tot( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -490,6 +673,37 @@ def evaluate_to_tot( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by summation across channels. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -497,45 +711,26 @@ def evaluate_to_tot( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == tot if res.dtype == bool: res = res.astype(int) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) return {"values": out} @@ -547,6 +742,7 @@ def evaluate_to_any( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -554,6 +750,37 @@ def evaluate_to_any( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -561,45 +788,26 @@ def evaluate_to_any( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == any if res.dtype != bool: res = res.astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) return {"values": out} @@ -611,6 +819,7 @@ def evaluate_to_all( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -618,6 +827,37 @@ def evaluate_to_all( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -625,44 +865,26 @@ def evaluate_to_all( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == all if res.dtype != bool: res = res.astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr return {"values": out} @@ -674,36 +896,60 @@ def evaluate_at_channel( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, ch_comp: Array, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by evaluating the expression at a given channel + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + out = np.full(len(ch_comp), defv, dtype=type(defv)) for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) @@ -718,43 +964,68 @@ def evaluate_at_channel_vov( expr: str, exprl: list, ch_comp: VectorOfVectors, + chns_rm: list, var_ph: dict = None, + defv=np.nan, ) -> dict: + """ + same as :func:`evaluate_at_channel` but evaluates expression at non flat channels VectorOfVectors. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + ch_comp + array of rawids at which the expression is evaluated + chns_rm + list of channels to be skipped from evaluation and set to default value + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # blow up vov to aoesa out = ch_comp.to_aoesa().nda chns = np.unique(out[~np.isnan(out)]).astype(int) - + type_name = None for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == ch] - - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + res = get_data_at_channel( + f"ch{ch}", + idx_ch, + expr, + exprl, + var_ph, + f"ch{ch}" not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) # see in which events the current channel is present mask = (out == ch).any(axis=1) out[out == ch] = res[mask] + if ch == chns[0]: + type_name = res.dtype + # ok now implode the table again out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(res.dtype), + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) return {"values": out, "channels": ch_comp} @@ -766,18 +1037,46 @@ def evaluate_to_vector( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, var_ph: dict = None, + defv=np.nan, ) -> dict: """ - Allows the evaluation as a vector of vectors. - Returns a dictionary of values: VoV of requested values - and channels: VoV of same dimensions with requested channel_id + Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output VectorOfVectors + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value """ - # raise NotImplementedError # define dimension of output array out = np.full((nrows, len(chns)), np.nan) @@ -788,41 +1087,22 @@ def evaluate_to_vector( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - if limarr.dtype != bool: - limarr = limarr.astype(bool) # append to out according to mode == vov out[:, i][limarr] = res[limarr] out_chs[:, i][limarr] = int(ch[2:]) @@ -869,60 +1149,54 @@ def build_evt( f_evt name of the output file evt_config - name of JSON file or dict defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: + name of JSON file or dict defining evt fields. Channel lists can be defined by importing a meta module. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "aggregation_mode" defines how the channels should be combined (see evaluate_expression). "expression" defnies the mathematical/special function to apply (see evaluate_expression), + "query" defines an expression to mask the aggregation. + "parameters" defines any other parameter used in expression. For example: .. code-block::json { "channels": { - "geds_on": "meta_geds_on", - "geds_no_psd": "meta_geds_no_psd", - "geds_ac": "meta_geds_ac", - "spms_on": "meta_spms_on", - "pulser": "PULS01", - "baseline": "BSLN01", - "muon": "MUON01", - "ts_master":"S060" + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", }, "operations": { "energy":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "first>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan" - }, - "energy_on":{ - "channels": ["geds_on"], - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" }, - "aoe":{ - "channels": ["geds_on"], - "mode": "energy_id", - "expression": "AoE_Classifier", - "initial": "np.nan" + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, - "is_muon_tagged":{ + "is_muon_rejected":{ "channels": "muon", - "mode": "any", - "expression": "wf_max>a", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", "parameters": {"a":15100}, "initial": false }, "multiplicity":{ "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": {"a":25}, "initial": 0 }, + "t0":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "dsp.tp_0_est" + }, "lar_energy":{ "channels": "spms_on", - "mode": "func", - "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" - } + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + }, } } @@ -1035,6 +1309,16 @@ def build_evt( chns_e = list( itertools.chain.from_iterable([chns[e] for e in v["channels"]]) ) + chns_rm = [] + if "exclude_channels" in v.keys(): + if isinstance(v["exclude_channels"], str): + chns_rm = chns[v["exclude_channels"]] + elif isinstance(v["exclude_channels"], list): + chns_rm = list( + itertools.chain.from_iterable( + [chns[e] for e in v["exclude_channels"]] + ) + ) pars, qry, defaultv = None, None, np.nan if "parameters" in v.keys(): @@ -1050,10 +1334,10 @@ def build_evt( f_hit, f_dsp, chns_e, + chns_rm, v["aggregation_mode"], v["expression"], nrows, - group, pars, qry, defaultv, diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend_meta.py index d188c2a14..8e98f6385 100644 --- a/src/pygama/evt/modules/legend_meta.py +++ b/src/pygama/evt/modules/legend_meta.py @@ -16,12 +16,17 @@ def legend_meta(params: dict) -> list: for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]["system"] == params["system"] ] - if "usability" not in params.keys(): - return tmp - else: - return [ + if "usability" in params.keys(): + tmp = [ e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] == params["usability"] ] + if "geds" == params["system"] and "type" in params.keys(): + tmp = [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["type"] == params["type"] + ] + return tmp diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index c573c89fb..1fd0527cb 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -12,14 +12,14 @@ }, "energy": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "initial": 0 @@ -38,7 +38,7 @@ }, "energy_aux": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" @@ -52,19 +52,19 @@ }, "is_usable_aoe": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "True", "initial": false }, "aoe": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index d4d6c1148..07262cc3d 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -6,21 +6,21 @@ "operations": { "energy_first": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_first_id": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "initial": 0 }, "t0": { - "channels": ["geds_on"], - "aggregation_mode": "evt.energy_first_id", + "channels": "geds_on", + "aggregation_mode": "keep_at:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 06918a421..61782b01e 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,19 +6,19 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, "t0": { - "channels": ["geds_on"], - "aggregation_mode": "evt.energy_id", + "channels": "geds_on", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index 0bf7fe4f9..7998a496c 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -19,27 +19,27 @@ }, "test_first": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_first2": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "expression": "True", "initial": false }, "test_last": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_last2": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "expression": "True", "initial": false }, @@ -73,14 +73,14 @@ }, "test_vov": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_vov2": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "expression": "True", "initial": false } diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 6f057d18c..f5b3679bb 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,18 +5,18 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, "aoe": { - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier" }, "multiplicity": { From d069968897cf433d04cba5f24481a1c25c42f770 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 1 Dec 2023 23:40:37 +0100 Subject: [PATCH 20/73] add sorter to vector evaluation --- src/pygama/evt/build_evt.py | 129 ++++++++++++++++++++++++++++++++---- tests/evt/test_build_evt.py | 49 ++++++++++++++ 2 files changed, 164 insertions(+), 14 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 249d08c2f..5cb25c711 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -49,6 +49,7 @@ def evaluate_expression( para: dict = None, qry: str = None, defv=np.nan, + sorter: str = None, ) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode @@ -86,6 +87,8 @@ def evaluate_expression( Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv default value of evaluation + sorter + can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) """ # find parameters in evt file or in parameters @@ -247,6 +250,7 @@ def evaluate_expression( nrows, var_ph, defv, + sorter, ) elif "any" == mode: return evaluate_to_any( @@ -410,7 +414,10 @@ def get_data_at_channel( elif "tcm.array_id" == expr: res = np.full(outsize, int(ch[2:]), dtype=int) else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) + + if var_ph is not None: + var = var | var_ph # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) @@ -1031,7 +1038,7 @@ def evaluate_at_channel_vov( return {"values": out, "channels": ch_comp} -def evaluate_to_vector( +def evaluate_to_aoesa( idx: np.ndarray, ids: np.ndarray, f_hit: str, @@ -1044,9 +1051,10 @@ def evaluate_to_vector( nrows: int, var_ph: dict = None, defv=np.nan, -) -> dict: + missv=np.nan, +) -> np.ndarray: """ - Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + Aggregates by returning a ArrayOfEqualSizedArrays of evaluated expressions of channels that fulfill a query expression. Parameters ---------- @@ -1076,11 +1084,13 @@ def evaluate_to_vector( dictionary of evt and additional parameters and their values defv default value + missv + missing value + sorter + sorts the entries in the vector according to sorter expression """ - # define dimension of output array - out = np.full((nrows, len(chns)), np.nan) - out_chs = np.full((nrows, len(chns)), np.nan) + out = np.full((nrows, len(chns)), missv) i = 0 for ch in chns: @@ -1105,22 +1115,110 @@ def evaluate_to_vector( # append to out according to mode == vov out[:, i][limarr] = res[limarr] - out_chs[:, i][limarr] = int(ch[2:]) i += 1 + return out + + +def evaluate_to_vector( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | np.ndarray, + nrows: int, + var_ph: dict = None, + defv=np.nan, + sorter: str = None, +) -> dict: + """ + Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output VectorOfVectors + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value + sorter + sorts the entries in the vector according to sorter expression. acend_by: results in an vector ordered ascending, decend_by: sorts descending + """ + out = evaluate_to_aoesa( + idx, + ids, + f_hit, + f_dsp, + chns, + chns_rm, + expr, + exprl, + qry, + nrows, + var_ph, + defv, + np.nan, + ) + + # if a sorter is given sort accordingly + if sorter is not None: + md, fld = sorter.split(":") + s_val = evaluate_to_aoesa( + idx, + ids, + f_hit, + f_dsp, + chns, + chns_rm, + fld, + [tuple(fld.split("."))], + None, + nrows, + ) + if "ascend_by" == md: + out[np.arange(len(out))[:, None], np.argsort(s_val)] + + elif "descend_by" == md: + out[np.arange(len(out))[:, None], np.argsort(-s_val)] + else: + raise ValueError( + "sorter values can only have 'ascend_by' or 'descend_by' prefixes" + ) + # This can be smarter # shorten to vov (FUTURE: replace with awkward) out = VectorOfVectors( flattened_data=out.flatten()[~np.isnan(out.flatten())], cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - out_chs = VectorOfVectors( - flattened_data=out_chs.flatten()[~np.isnan(out_chs.flatten())].astype(int), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out_chs), axis=1)), - ) - return {"values": out, "channels": out_chs} + return {"values": out} def build_evt( @@ -1320,13 +1418,15 @@ def build_evt( ) ) - pars, qry, defaultv = None, None, np.nan + pars, qry, defaultv, srter = None, None, np.nan, None if "parameters" in v.keys(): pars = v["parameters"] if "query" in v.keys(): qry = v["query"] if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] + if "sort" in v.keys(): + srter = v["sort"] result = evaluate_expression( f_tcm, @@ -1341,6 +1441,7 @@ def build_evt( pars, qry, defaultv, + srter, ) obj = result["values"] diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e7cae4e4d..e9a0b4e01 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -193,6 +193,55 @@ def test_query(lgnd_test_data, tmptestdir): assert len(ls(outfile, "/evt/")) == 12 +def test_vector_sort(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + + conf = { + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "operations": { + "acend_id": { + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est", + }, + "t0_acend": { + "aggregation_mode": "keep_at:evt.acend_id", + "expression": "dsp.tp_0_est", + }, + "decend_id": { + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "descend_by:dsp.tp_0_est", + }, + "t0_decend": { + "aggregation_mode": "keep_at:evt.acend_id", + "expression": "dsp.tp_0_est", + }, + }, + } + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 4 + lstore = store.LH5Store() + vov_t0, _ = lstore.read_object("/evt/t0_acend", outfile) + nda_t0 = vov_t0.to_aoesa().nda + assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() + vov_t0, _ = lstore.read_object("/evt/t0_decend", outfile) + nda_t0 = vov_t0.to_aoesa().nda + assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() + + def test_skimming(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" From 2247ccb878c4bbebeeaa9f63a4ba19b434ebfa83 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 1 Dec 2023 23:52:35 +0100 Subject: [PATCH 21/73] change aggregation mode vectorize to gather --- src/pygama/evt/build_evt.py | 19 +++++++++---------- .../module-test-t0-vov-evt-config.json | 4 ++-- tests/evt/configs/query-test-evt-config.json | 4 ++-- tests/evt/configs/vov-test-evt-config.json | 4 ++-- tests/evt/test_build_evt.py | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5cb25c711..fb2a73ed7 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -76,7 +76,7 @@ def evaluate_expression( - "any": aggregates by logical or. - "all": aggregates by logical and. - "keep_at:ch_field": aggregates according to passed ch_field - - "vectorize": Channels are not combined, but result saved as VectorOfVectors. + - "gather": Channels are not combined, but result saved as VectorOfVectors. qry A query that can mask the aggregation. expr @@ -236,7 +236,7 @@ def evaluate_expression( var_ph, defv, ) - elif "vectorize" == mode: + elif "gather" == mode: return evaluate_to_vector( idx, ids, @@ -1260,18 +1260,17 @@ def build_evt( "muon": "ch1027202", }, "operations": { - "energy":{ - "channels": "geds_on", - "aggregation_mode": "vectorize", - "query": "hit.cuspEmax_ctc_cal>25", - "expression": "hit.cuspEmax_ctc_cal" - }, "energy_id":{ "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id" + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" }, + "energy":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal>25" + } "is_muon_rejected":{ "channels": "muon", "aggregation_mode": "any", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 61782b01e..6479d4587 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,13 +6,13 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index 7998a496c..ff59e2a0b 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -73,14 +73,14 @@ }, "test_vov": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_vov2": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "expression": "True", "initial": false } diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index f5b3679bb..9b0b37078 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,13 +5,13 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e9a0b4e01..43105a31d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -207,7 +207,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "operations": { "acend_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "sort": "ascend_by:dsp.tp_0_est", @@ -218,7 +218,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): }, "decend_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "sort": "descend_by:dsp.tp_0_est", From 8ac2ebdf9c1d8192329adde6997210e9f8736dea Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 2 Dec 2023 00:28:50 +0100 Subject: [PATCH 22/73] renaming of legend meta module --- src/pygama/evt/modules/{legend_meta.py => legend.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/pygama/evt/modules/{legend_meta.py => legend.py} (94%) diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend.py similarity index 94% rename from src/pygama/evt/modules/legend_meta.py rename to src/pygama/evt/modules/legend.py index 8e98f6385..0bfe59d63 100644 --- a/src/pygama/evt/modules/legend_meta.py +++ b/src/pygama/evt/modules/legend.py @@ -1,5 +1,5 @@ """ -Module for importing channel lists from LEGEND meta data +Module provides LEGEND internal functions """ from importlib import import_module From b6ff699a93b79229d0e7cd0915aac3b059a38ddf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 2 Dec 2023 13:43:17 +0100 Subject: [PATCH 23/73] Add skm tier --- src/pygama/skm/__init__.py | 7 + src/pygama/skm/build_skm.py | 316 +++++++++++++++++++++ tests/evt/configs/vov-test-evt-config.json | 13 +- tests/evt/test_build_evt.py | 2 +- tests/skm/configs/basic-skm-config.json | 35 +++ tests/skm/test_build_skm.py | 50 ++++ 6 files changed, 421 insertions(+), 2 deletions(-) create mode 100644 src/pygama/skm/__init__.py create mode 100644 src/pygama/skm/build_skm.py create mode 100644 tests/skm/configs/basic-skm-config.json create mode 100644 tests/skm/test_build_skm.py diff --git a/src/pygama/skm/__init__.py b/src/pygama/skm/__init__.py new file mode 100644 index 000000000..7b9ae88d2 --- /dev/null +++ b/src/pygama/skm/__init__.py @@ -0,0 +1,7 @@ +""" +Utilities for grouping hit data into events. +""" + +from .build_skm import build_skm + +__all__ = ["build_skm"] diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py new file mode 100644 index 000000000..aace36501 --- /dev/null +++ b/src/pygama/skm/build_skm.py @@ -0,0 +1,316 @@ +""" +This module implements routines to build the evt tier. +""" + +from __future__ import annotations + +import json +import logging +import os + +import awkward as ak +import h5py +import lgdo.lh5_store as store +import numpy as np +import pandas as pd +from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors + +log = logging.getLogger(__name__) + + +def vov_to_ak(vov: VectorOfVectors) -> ak.Array: + """ + Temporary function to convert VectorOfVectors to awkward arrays. This function will be removed soon. + + Parameters + ---------- + vov + VectorOfVectors to be converted. + """ + flattened_data = vov.flattened_data + cumulative_length = vov.cumulative_length + if isinstance(flattened_data, Array): + flattened_data = flattened_data.nda + if isinstance(cumulative_length, Array): + cumulative_length = cumulative_length.nda + + offsets = np.empty(len(cumulative_length) + 1, dtype=cumulative_length.dtype) + offsets[1:] = cumulative_length + offsets[0] = 0 + + layout = ak.contents.ListOffsetArray( + offsets=ak.index.Index(offsets), content=ak.contents.NumpyArray(flattened_data) + ) + return ak.Array(layout) + + +def vov_to_aoesa( + vov: VectorOfVectors, missing_value=np.nan, length: int = None +) -> ArrayOfEqualSizedArrays: + """ + Temporary function to convert VectorOfVectors to ArrayOfEqualSizedArrays. This function will be removed soon. + + Parameters + ---------- + vov + VectorOfVectors to be converted. + missing_value + missing value to be inserted. Determines the datatype of the output ArrayOfEqualSizedArrays + length + length of each row in the ArrayOfEqualSizedArrays. If the row in VectorOfVectors is shorter than length, the row gets padded with missing_value. If the row in VectorOfVectors is longer than length, the row gets clipped. + """ + arr = vov_to_ak(vov) + if length is not None: + max_len = length + else: + max_len = int(ak.max(ak.count(arr, axis=-1))) + return ArrayOfEqualSizedArrays( + nda=ak.fill_none(ak.pad_none(arr, max_len, clip=True), missing_value) + .to_numpy(allow_missing=False) + .astype(type(missing_value)), + attrs=vov.getattrs(), + ) + + +def build_skm( + f_evt: str | list, + f_skm: str, + skm_conf: dict | str, + wo_mode="w", + group: str = "/evt/", + skim_format: str = "parquet", +): + """ + Builds a skimmed file from a (set) of evt tier file(s). + + Parameters + ---------- + f_evt + list/path of evt file(s) + f_skm + name of the skm output file + skm_conf + name of JSON file or dict defining skm fields. multiplicity defines upto which row length VectorOfVector fields should be kept. Skimmed fields are forwarded from the evt tier and clipped/padded according to missing_value if needed. Global fields define an operation to reduce the dimension of VectorOfVector event fields. + For example: + + .. code-block::json + + { + "multiplicity": 2, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp":{ + "evt_field": "timestamp" + }, + "is_muon_rejected":{ + "evt_field": "is_muon_rejected" + }, + "multiplicity":{ + "evt_field": "multiplicity" + }, + "energy":{ + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id":{ + "evt_field": "energy_id", + "missing_value": 0 + }, + "global_fields":{ + "energy_sum":{ + "aggregation_mode": "sum", + "evt_field": "energy" + }, + "is_all_physical":{ + "aggregation_mode": "all", + "evt_field": "is_physical" + }, + } + } + } + + wo_mode + writing mode. + - ``write_safe`` or ``w``: only proceed with writing if the file does not already exis. + - ``append`` or ``a``: append to file. + - ``overwrite`` or ``o``: replaces existing file. + group + lh5 root group name of the evt tier + skim_format + data format of the skimmed output (hdf or parquet) + """ + + log = logging.getLogger(__name__) + log.info("Starting skimming") + log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + tbl_cfg = skm_conf + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() + if isinstance(tbl_cfg, str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) + + flds, flds_vov, flds_arr, multi = None, None, None, None + if "skimmed_fields" in tbl_cfg.keys(): + flds = tbl_cfg["skimmed_fields"].keys() + evt_flds = [(e, tbl_cfg["skimmed_fields"][e]["evt_field"]) for e in flds] + f = h5py.File(f_evt[0] if isinstance(f_evt, list) else f_evt, "r") + flds_vov = [ + x + for x in evt_flds + if x[1] + in [ + e.split("/")[-1] + for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + if "array<1>{array<1>{" in f[e].attrs.get("datatype") + ] + ] + flds_arr = [ + x + for x in evt_flds + if x not in flds_vov + and x[1] + in [ + e.split("/")[-1] + for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + ] + ] + + gflds = None + if "global_fields" in tbl_cfg.keys(): + gflds = list(tbl_cfg["global_fields"].keys()) + + if flds is None and gflds is None: + return + + # Check if multiplicity is given, if vector like fields are skimmed + if ( + isinstance(flds_vov, list) + and len(flds_vov) > 0 + and "multiplicity" not in tbl_cfg.keys() + ): + raise ValueError("If skiime fields are passed, multiplicity must be given") + + elif "multiplicity" in tbl_cfg.keys(): + multi = tbl_cfg["multiplicity"] + + # init pandas df + df = pd.DataFrame() + + # add array like fields + if isinstance(flds_arr, list): + log.debug("Crunching array-like fields") + df = df.join( + store.load_dfs(f_evt, [x[1] for x in flds_arr], group).rename( + columns={y: x for x, y in flds_arr} + ), + how="outer", + ) + + # take care of vector like fields + if isinstance(flds_vov, list): + log.debug("Processing VoV-like fields") + lstore = store.LH5Store() + for fld in flds_vov: + if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): + raise ValueError( + f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" + ) + vls, _ = lstore.read_object(group + fld[1], f_evt) + mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] + if mv in ["np.inf", "-np.inf", "np.nan"]: + mv = eval(mv) + out = vov_to_aoesa(vls, missing_value=mv, length=multi).nda + nms = [fld[0] + f"_{e}" for e in range(multi)] + df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") + + # ok now build global fields if requested + if isinstance(gflds, list): + log.debug("Defining global fields") + for k in gflds: + if "aggregation_mode" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError(f"global {k} operation needs aggregation mode") + if "evt_field" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError(f"global {k} operation needs evt_field") + mode = tbl_cfg["global_fields"][k]["aggregation_mode"] + fld = tbl_cfg["global_fields"][k]["evt_field"] + + obj, _ = lstore.read_object(group + fld, f_evt) + if not isinstance(obj, VectorOfVectors): + raise ValueError( + f"global {k} operation not possible, since {fld} is not an VectorOfVectors" + ) + + obj_ak = vov_to_ak(obj) + if mode in [ + "sum", + "prod", + "nansum", + "nanprod", + "any", + "all", + "mean", + "std", + "var", + ]: + df = df.join( + pd.DataFrame( + data=getattr(ak, mode)(obj_ak, axis=-1).to_numpy( + allow_missing=False + ), + columns=[k], + ) + ) + + elif mode in ["min", "max"]: + val = getattr(ak, mode)(obj_ak, axis=-1, mask_identity=True) + if "missing_value" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError( + f"global {k} {mode} operation needs a missing value assigned" + ) + mv = tbl_cfg["global_fields"][k]["missing_value"] + if mv in ["np.inf", "-np.inf"]: + mv = eval(mv) + val = ak.fill_none(val, mv) + df = df.join( + pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) + ) + else: + raise ValueError("aggregation mode not supported") + + # Set an index column if specified + if "index_field" in tbl_cfg.keys(): + log.debug("Setting index") + if tbl_cfg["index_field"] in df.keys(): + df = df.set_index(tbl_cfg["index_field"]) + else: + raise ValueError( + "index field not found. Needs to be a previously defined skm field" + ) + + # last thing missing is writing it out + log.debug("saving skm file") + if skim_format not in ["parquet", "hdf"]: + raise ValueError("Not supported skim data format. Operations are hdf, parquet") + if wo_mode in ["w", "write_safe"]: + if os.path.exists(f_skm): + raise FileExistsError(f"Write_safe mode: {f_skm} exists.") + else: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="w") + elif "parquet" == skim_format: + df.to_parquet(f_skm) + elif wo_mode in ["o", "overwrite"]: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="w") + elif "parquet" == skim_format: + df.to_parquet(f_skm) + elif wo_mode in ["a", "append"]: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="a") + elif "parquet" == skim_format: + df.to_parquet(f_skm, append=True) + else: + raise ValueError(f"wo_mode {wo_mode} not valid.") + + log.info("done") diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 9b0b37078..cc0d129ce 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -1,8 +1,15 @@ { "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "ts_master": "ch1084803" }, "operations": { + "timestamp": { + "channels": "ts_master", + "aggregation_mode": "sum", + "expression": "dsp.timestamp", + "initial": 0.0 + }, "energy": { "channels": "geds_on", "aggregation_mode": "gather", @@ -26,6 +33,10 @@ "parameters": { "a": 25 }, "initial": 0 }, + "is_saturated": { + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.is_saturated" + }, "energy_times_aoe": { "expression": "evt.energy*evt.aoe" }, diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 43105a31d..2cac630d8 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -115,7 +115,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 7 + assert len(ls(outfile, "/evt/")) == 9 lstore = store.LH5Store() vov_ene, _ = lstore.read_object("/evt/energy", outfile) vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json new file mode 100644 index 000000000..b1844ecb0 --- /dev/null +++ b/tests/skm/configs/basic-skm-config.json @@ -0,0 +1,35 @@ +{ + "multiplicity": 3, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp": { + "evt_field": "timestamp" + }, + "multiplicity": { + "evt_field": "multiplicity" + }, + "energy": { + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id": { + "evt_field": "energy_id", + "missing_value": 0 + } + }, + "global_fields": { + "energy_sum": { + "aggregation_mode": "nansum", + "evt_field": "energy" + }, + "is_any_saturated": { + "aggregation_mode": "any", + "evt_field": "is_saturated" + }, + "max_energy": { + "aggregation_mode": "max", + "evt_field": "energy", + "missing_value": "np.inf" + } + } +} diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py new file mode 100644 index 000000000..984be7936 --- /dev/null +++ b/tests/skm/test_build_skm.py @@ -0,0 +1,50 @@ +import os +from pathlib import Path + +import numpy as np +import pandas as pd + +from pygama.evt import build_evt +from pygama.skm import build_skm + +config_dir = Path(__file__).parent / "configs" +evt_config_dir = Path(__file__).parent.parent / "evt" / "configs" + + +def test_basics(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + build_skm(outfile, skm_out, skm_conf, wo_mode="o") + + assert os.path.exists(skm_out) + df = pd.read_parquet(skm_out) + assert df.index.name == "timestamp" + assert "energy_0" in df.keys() + assert "energy_1" in df.keys() + assert "energy_2" in df.keys() + assert "multiplicity" in df.keys() + assert (df.multiplicity.to_numpy() <= 3).all() + assert ( + np.nan_to_num(df.energy_0.to_numpy()) + + np.nan_to_num(df.energy_1.to_numpy()) + + np.nan_to_num(df.energy_2.to_numpy()) + == df.energy_sum.to_numpy() + ).all() + assert (np.nan_to_num(df.energy_0.to_numpy()) <= df.max_energy.to_numpy()).all() + assert (np.nan_to_num(df.energy_1.to_numpy()) <= df.max_energy.to_numpy()).all() + assert (np.nan_to_num(df.energy_2.to_numpy()) <= df.max_energy.to_numpy()).all() From 985b35b12ccaf30950270bc91113510e951c3702 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 6 Dec 2023 13:28:51 +0100 Subject: [PATCH 24/73] generalized legend meta module --- src/pygama/evt/modules/legend.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index 0bfe59d63..f12b81e6e 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -11,22 +11,23 @@ def legend_meta(params: dict) -> list: lm = import_module("legendmeta") lmeta = lm.LegendMetadata(path=params["meta_path"]) chmap = lmeta.channelmap(params["time_key"]) + tmp = [ f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]["system"] == params["system"] ] - if "usability" in params.keys(): - tmp = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] - == params["usability"] - ] - if "geds" == params["system"] and "type" in params.keys(): - tmp = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["type"] == params["type"] - ] + + if "selectors" in params.keys(): + for k in params["selectors"].keys(): + s = "" + for e in k.split("."): + s += f"['{e}']" + + tmp = [ + e + for e in tmp + if eval("dotter" + s, {"dotter": chmap.map("daq.rawid")[int(e[2:])]}) + == params["selectors"][k] + ] return tmp From ceac4db59c714cb516db5a09323347ae600e3d51 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 6 Dec 2023 15:26:54 +0100 Subject: [PATCH 25/73] small changes --- src/pygama/evt/build_evt.py | 19 +++++++++---------- tests/evt/configs/basic-evt-config.json | 3 --- tests/evt/configs/module-test-evt-config.json | 1 - .../module-test-t0-vov-evt-config.json | 1 - 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index fb2a73ed7..e43a713b8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -152,7 +152,6 @@ def evaluate_expression( ids, f_hit, f_dsp, - chns, chns_rm, expr, exprl, @@ -902,7 +901,6 @@ def evaluate_at_channel( ids: np.ndarray, f_hit: str, f_dsp: str, - chns: list, chns_rm: list, expr: str, exprl: list, @@ -923,8 +921,6 @@ def evaluate_at_channel( Path to hit tier file f_dsp Path to dsp tier file - chns - list of channels to be aggregated chns_rm list of channels to be skipped from evaluation and set to default value expr @@ -939,26 +935,29 @@ def evaluate_at_channel( default value """ - out = np.full(len(ch_comp), defv, dtype=type(defv)) + out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) - for ch in chns: + for ch in np.unique(ch_comp.nda.astype(int)): + # skip default value + if f"ch{ch}" not in store.ls(f_hit): + continue # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == ch] res = get_data_at_channel( - ch, + f"ch{ch}", idx_ch, expr, exprl, var_ph, - ch not in chns_rm, + f"ch{ch}" not in chns_rm, f_hit, f_dsp, len(out), defv, ) - out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) + out = np.where(ch == ch_comp.nda, res, out) return {"values": out} diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 1fd0527cb..c68ea7cd6 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -51,19 +51,16 @@ "initial": 0.0 }, "is_usable_aoe": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "True", "initial": false }, "aoe": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 07262cc3d..6117ddd8d 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -19,7 +19,6 @@ "initial": 0 }, "t0": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 6479d4587..0b64d0e43 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -17,7 +17,6 @@ "expression": "tcm.array_id" }, "t0": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 From b8e87171548ecc5b444b60b82d8edbcfde19acfc Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Dec 2023 14:35:50 +0100 Subject: [PATCH 26/73] add outputs field in configuration --- src/pygama/evt/build_evt.py | 30 ++++++++++++++++--- tests/evt/configs/basic-evt-config.json | 12 ++++++++ tests/evt/configs/module-test-evt-config.json | 12 ++++++++ .../module-test-t0-vov-evt-config.json | 12 ++++++++ tests/evt/configs/query-test-evt-config.json | 14 +++++++++ tests/evt/configs/vov-test-evt-config.json | 11 +++++++ tests/evt/test_build_evt.py | 2 ++ 7 files changed, 89 insertions(+), 4 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e43a713b8..47e1f0b16 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -8,6 +8,7 @@ import json import logging import os +import random import re from importlib import import_module @@ -1359,6 +1360,10 @@ def build_evt( log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) + + # Define temporary file + f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" + for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) @@ -1366,8 +1371,8 @@ def build_evt( if "aggregation_mode" not in v.keys(): exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} - if os.path.exists(f_evt): - var = load_vars_to_nda(f_evt, "", exprl) + if os.path.exists(f_evt_tmp): + var = load_vars_to_nda(f_evt_tmp, "", exprl) if "parameters" in v.keys(): var = var | v["parameters"] @@ -1391,7 +1396,7 @@ def build_evt( lstore.write_object( obj=res, name=group + k, - lh5_file=f_evt, + lh5_file=f_evt_tmp, wo_mode=wo_mode, ) @@ -1428,7 +1433,7 @@ def build_evt( result = evaluate_expression( f_tcm, - f_evt, + f_evt_tmp, f_hit, f_dsp, chns_e, @@ -1448,9 +1453,26 @@ def build_evt( lstore.write_object( obj=obj, name=group + k, + lh5_file=f_evt_tmp, + wo_mode=wo_mode, + ) + + # write output fields into f_evt and delete temporary file + if "outputs" in tbl_cfg.keys(): + if len(tbl_cfg["outputs"]) < 1: + log.warning("No output fields specified, no file will be written.") + for fld in tbl_cfg["outputs"]: + obj, _ = lstore.read_object(group + fld, f_evt_tmp) + lstore.write_object( + obj=obj, + name=group + fld, lh5_file=f_evt, wo_mode=wo_mode, ) + else: + log.warning("No output fields specified, no file will be written.") + + os.remove(f_evt_tmp) log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index c68ea7cd6..8eb23adf2 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -2,6 +2,18 @@ "channels": { "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "multiplicity", + "energy", + "energy_id", + "energy_any_above1MeV", + "energy_all_above1MeV", + "energy_aux", + "energy_sum", + "is_usable_aoe", + "aoe", + "is_aoe_rejected" + ], "operations": { "multiplicity": { "channels": "geds_on", diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 6117ddd8d..595999d60 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -3,6 +3,18 @@ "spms_on": ["ch1057600", "ch1059201", "ch1062405"], "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "energy_first", + "energy_first_id", + "t0", + "lar_energy", + "lar_multiplicity", + "is_lar_rejected", + "lar_classifier", + "lar_energy_dplms", + "lar_multiplicity_dplms", + "lar_time_shift" + ], "operations": { "energy_first": { "channels": "geds_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 0b64d0e43..f1bf09a8e 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -3,6 +3,18 @@ "spms_on": ["ch1057600", "ch1059201", "ch1062405"], "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "energy", + "energy_id", + "t0", + "lar_energy", + "lar_multiplicity", + "is_lar_rejected", + "lar_classifier", + "lar_energy_dplms", + "lar_multiplicity_dplms", + "lar_time_shift" + ], "operations": { "energy": { "channels": "geds_on", diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index ff59e2a0b..901d2d6c1 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -2,6 +2,20 @@ "channels": { "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "multiplicity", + "test_sum", + "test_first", + "test_first2", + "test_last", + "test_last2", + "test_any", + "test_any2", + "test_all", + "test_all2", + "test_vov", + "test_vov2" + ], "operations": { "multiplicity": { "channels": "geds_on", diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index cc0d129ce..ffdce3b31 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -3,6 +3,17 @@ "geds_on": ["ch1084803", "ch1084804", "ch1121600"], "ts_master": "ch1084803" }, + "outputs": [ + "timestamp", + "energy", + "energy_id", + "aoe", + "multiplicity", + "is_saturated", + "energy_times_aoe", + "energy_times_multiplicity", + "multiplicity_squared" + ], "operations": { "timestamp": { "channels": "ts_master", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 2cac630d8..838f76b38 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -161,6 +161,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "outputs": ["foo"], "operations": { "foo": { "channels": "geds_on", @@ -204,6 +205,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "outputs": ["acend_id", "t0_acend", "decend_id", "t0_decend"], "operations": { "acend_id": { "channels": "geds_on", From c70ec82e05d0788faba08c477b47ec9aae916292 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Jan 2024 18:34:09 +0100 Subject: [PATCH 27/73] LH5Store function renaming fixes --- src/pygama/evt/build_evt.py | 20 +++++------ src/pygama/skm/build_skm.py | 67 ++++--------------------------------- tests/evt/test_build_evt.py | 22 ++++++------ tests/skm/test_build_skm.py | 4 +-- 4 files changed, 29 insertions(+), 84 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 47e1f0b16..49c621cd0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -146,7 +146,7 @@ def evaluate_expression( in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(mode[8:].replace(".", "/"), f_evt) + ch_comp, _ = lstore.read(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -336,7 +336,7 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> lstore = store.LH5Store() var = { - f"{e[0]}_{e[1]}": lstore.read_object( + f"{e[0]}_{e[1]}": lstore.read( f"{group.replace('/','')}/{e[0]}/{e[1]}", f, idx=idx, @@ -1393,7 +1393,7 @@ def build_evt( f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" ) - lstore.write_object( + lstore.write( obj=res, name=group + k, lh5_file=f_evt_tmp, @@ -1450,7 +1450,7 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - lstore.write_object( + lstore.write( obj=obj, name=group + k, lh5_file=f_evt_tmp, @@ -1462,8 +1462,8 @@ def build_evt( if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") for fld in tbl_cfg["outputs"]: - obj, _ = lstore.read_object(group + fld, f_evt_tmp) - lstore.write_object( + obj, _ = lstore.read(group + fld, f_evt_tmp) + lstore.write( obj=obj, name=group + fld, lh5_file=f_evt, @@ -1493,7 +1493,7 @@ def skim_evt( f_evt input LH5 file of the evt level expression - skimming expression. Can contain variabels from event file or from the params dictionary. + skimming expression. Can contain variables from event file or from the params dictionary. f_out output LH5 file. Can be None if wo_mode is set to overwrite f_evt. wo_mode @@ -1519,7 +1519,7 @@ def skim_evt( for e in store.ls(f_evt, evt_group) if e.split("/")[-1] in exprl ] - var = {e: lstore.read_object(evt_group + e, f_evt)[0] for e in flds} + var = {e: lstore.read(evt_group + e, f_evt)[0] for e in flds} # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -1554,8 +1554,8 @@ def skim_evt( of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) for fld in fields: - ob, _ = lstore.read_object(fld, f_evt, idx=idx_list) - lstore.write_object( + ob, _ = lstore.read(fld, f_evt, idx=idx_list) + lstore.write( obj=ob, name=fld, lh5_file=of_tmp, diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index aace36501..51f025638 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,67 +11,12 @@ import awkward as ak import h5py import lgdo.lh5_store as store -import numpy as np import pandas as pd -from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors +from lgdo import VectorOfVectors log = logging.getLogger(__name__) -def vov_to_ak(vov: VectorOfVectors) -> ak.Array: - """ - Temporary function to convert VectorOfVectors to awkward arrays. This function will be removed soon. - - Parameters - ---------- - vov - VectorOfVectors to be converted. - """ - flattened_data = vov.flattened_data - cumulative_length = vov.cumulative_length - if isinstance(flattened_data, Array): - flattened_data = flattened_data.nda - if isinstance(cumulative_length, Array): - cumulative_length = cumulative_length.nda - - offsets = np.empty(len(cumulative_length) + 1, dtype=cumulative_length.dtype) - offsets[1:] = cumulative_length - offsets[0] = 0 - - layout = ak.contents.ListOffsetArray( - offsets=ak.index.Index(offsets), content=ak.contents.NumpyArray(flattened_data) - ) - return ak.Array(layout) - - -def vov_to_aoesa( - vov: VectorOfVectors, missing_value=np.nan, length: int = None -) -> ArrayOfEqualSizedArrays: - """ - Temporary function to convert VectorOfVectors to ArrayOfEqualSizedArrays. This function will be removed soon. - - Parameters - ---------- - vov - VectorOfVectors to be converted. - missing_value - missing value to be inserted. Determines the datatype of the output ArrayOfEqualSizedArrays - length - length of each row in the ArrayOfEqualSizedArrays. If the row in VectorOfVectors is shorter than length, the row gets padded with missing_value. If the row in VectorOfVectors is longer than length, the row gets clipped. - """ - arr = vov_to_ak(vov) - if length is not None: - max_len = length - else: - max_len = int(ak.max(ak.count(arr, axis=-1))) - return ArrayOfEqualSizedArrays( - nda=ak.fill_none(ak.pad_none(arr, max_len, clip=True), missing_value) - .to_numpy(allow_missing=False) - .astype(type(missing_value)), - attrs=vov.getattrs(), - ) - - def build_skm( f_evt: str | list, f_skm: str, @@ -131,7 +76,7 @@ def build_skm( wo_mode writing mode. - - ``write_safe`` or ``w``: only proceed with writing if the file does not already exis. + - ``write_safe`` or ``w``: only proceed with writing if the file does not already exists. - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. group @@ -216,11 +161,11 @@ def build_skm( raise ValueError( f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" ) - vls, _ = lstore.read_object(group + fld[1], f_evt) + vls, _ = lstore.read(group + fld[1], f_evt) mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) - out = vov_to_aoesa(vls, missing_value=mv, length=multi).nda + out = vls.vov_to_aoesa(max_len=multi, fill_val=mv).nda nms = [fld[0] + f"_{e}" for e in range(multi)] df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") @@ -235,13 +180,13 @@ def build_skm( mode = tbl_cfg["global_fields"][k]["aggregation_mode"] fld = tbl_cfg["global_fields"][k]["evt_field"] - obj, _ = lstore.read_object(group + fld, f_evt) + obj, _ = lstore.read(group + fld, f_evt) if not isinstance(obj, VectorOfVectors): raise ValueError( f"global {k} operation not possible, since {fld} is not an VectorOfVectors" ) - obj_ak = vov_to_ak(obj) + obj_ak = obj.view_as("ak") if mode in [ "sum", "prod", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 838f76b38..f771579c5 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -117,12 +117,12 @@ def test_vov(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert len(ls(outfile, "/evt/")) == 9 lstore = store.LH5Store() - vov_ene, _ = lstore.read_object("/evt/energy", outfile) - vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) - arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) - vov_aoeene, _ = lstore.read_object("/evt/energy_times_aoe", outfile) - vov_eneac, _ = lstore.read_object("/evt/energy_times_multiplicity", outfile) - arr_ac2, _ = lstore.read_object("/evt/multiplicity_squared", outfile) + vov_ene, _ = lstore.read("/evt/energy", outfile) + vov_aoe, _ = lstore.read("/evt/aoe", outfile) + arr_ac, _ = lstore.read("/evt/multiplicity", outfile) + vov_aoeene, _ = lstore.read("/evt/energy_times_aoe", outfile) + vov_eneac, _ = lstore.read("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = lstore.read("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) @@ -236,10 +236,10 @@ def test_vector_sort(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert len(ls(outfile, "/evt/")) == 4 lstore = store.LH5Store() - vov_t0, _ = lstore.read_object("/evt/t0_acend", outfile) + vov_t0, _ = lstore.read("/evt/t0_acend", outfile) nda_t0 = vov_t0.to_aoesa().nda assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() - vov_t0, _ = lstore.read_object("/evt/t0_decend", outfile) + vov_t0, _ = lstore.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().nda assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() @@ -256,16 +256,16 @@ def test_skimming(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) lstore = store.LH5Store() - ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda + ac = lstore.read("/evt/multiplicity", outfile)[0].nda ac = len(ac[ac == 3]) outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(lstore.read_object("/evt/energy", outfile_skm)[0].to_aoesa().nda) + assert ac == len(lstore.read("/evt/energy", outfile_skm)[0].to_aoesa().nda) skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) + assert ac == len(lstore.read("/evt/energy", outfile)[0].to_aoesa().nda) with pytest.raises(ValueError): skim_evt(outfile, "multiplicity == 3", None, None, "bla") diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 984be7936..0b2beebe4 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -29,10 +29,10 @@ def test_basics(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - build_skm(outfile, skm_out, skm_conf, wo_mode="o") + build_skm(outfile, skm_out, skm_conf, wo_mode="o", skim_format="hdf") assert os.path.exists(skm_out) - df = pd.read_parquet(skm_out) + df = pd.read_hdf(skm_out) assert df.index.name == "timestamp" assert "energy_0" in df.keys() assert "energy_1" in df.keys() From c636d75cc9b0ac02afb98667dfff22069f2383bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Jan 2024 19:25:47 +0100 Subject: [PATCH 28/73] explicit numpy import --- src/pygama/skm/build_skm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 51f025638..b0be9d620 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,6 +11,7 @@ import awkward as ak import h5py import lgdo.lh5_store as store +import numpy as np import pandas as pd from lgdo import VectorOfVectors @@ -165,7 +166,7 @@ def build_skm( mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) - out = vls.vov_to_aoesa(max_len=multi, fill_val=mv).nda + out = vls.to_aoesa(max_len=multi, fill_val=mv).nda nms = [fld[0] + f"_{e}" for e in range(multi)] df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") @@ -214,8 +215,10 @@ def build_skm( f"global {k} {mode} operation needs a missing value assigned" ) mv = tbl_cfg["global_fields"][k]["missing_value"] - if mv in ["np.inf", "-np.inf"]: - mv = eval(mv) + if mv == "np.inf": + mv = np.inf + elif mv == "-np.inf": + mv = -1 * np.inf val = ak.fill_none(val, mv) df = df.join( pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) From 218330af55772348c33f82a02605e0e4d88dd403 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 4 Jan 2024 11:31:00 +0100 Subject: [PATCH 29/73] [ci] disable tqdm, should fix the random MacOS CI failures --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 95dba9133..e5e817249 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,6 +12,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + TQDM_MININTERVAL: 10 + jobs: build-and-test: From 4a73c935ca93b9ea2fdec7e4010a33edf3fcd0ca Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 14:48:27 +0100 Subject: [PATCH 30/73] Update to latest pydataobj version --- src/pygama/evt/build_evt.py | 75 +++++++++-------- src/pygama/evt/modules/spm.py | 154 +++++++++++++++++++--------------- src/pygama/skm/build_skm.py | 31 ++++--- 3 files changed, 143 insertions(+), 117 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 49c621cd0..5c9f291d4 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -12,9 +12,9 @@ import re from importlib import import_module -import lgdo.lh5_store as store import numpy as np -from lgdo import Array, VectorOfVectors +from lgdo import Array, VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -92,6 +92,8 @@ def evaluate_expression( can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) """ + store = LH5Store() + # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} @@ -133,9 +135,8 @@ def evaluate_expression( qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") # switch through modes if ( @@ -143,10 +144,9 @@ def evaluate_expression( and "keep_at:" == mode[:8] and "evt." == mode[8:][:4] and mode[8:].split(".")[-1] - in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] + in [e.split("/")[-1] for e in lh5.ls(f_evt, "/evt/")] ): - lstore = store.LH5Store() - ch_comp, _ = lstore.read(mode[8:].replace(".", "/"), f_evt) + ch_comp, _ = store.read(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -334,16 +334,16 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> list of parameter-tuples (root_group, field) to be found in f """ - lstore = store.LH5Store() + store = LH5Store() var = { - f"{e[0]}_{e[1]}": lstore.read( + f"{e[0]}_{e[1]}": store.read( f"{group.replace('/','')}/{e[0]}/{e[1]}", f, idx=idx, )[0] for e in exprl if e[1] - in [x.split("/")[-1] for x in store.ls(f, f"{group.replace('/','')}/{e[0]}/")] + in [x.split("/")[-1] for x in lh5.ls(f, f"{group.replace('/','')}/{e[0]}/")] } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) @@ -536,6 +536,8 @@ def evaluate_to_first( out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) + store = LH5Store() + for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -562,12 +564,11 @@ def evaluate_to_first( outt[:] = np.inf # find if sorter is in hit or dsp - t0 = store.load_nda( + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", f_hit if "hit" == sorter[0] else f_dsp, - [sorter[1]], - f"{ch}/{sorter[0]}/", - idx_ch, - )[sorter[1]] + idx=idx_ch, + )[0].view_as("np") out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -629,6 +630,8 @@ def evaluate_to_last( out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) + store = LH5Store() + for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -652,12 +655,11 @@ def evaluate_to_last( # append to out according to mode == last # find if sorter is in hit or dsp - t0 = store.load_nda( + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", f_hit if "hit" == sorter[0] else f_dsp, - [sorter[1]], - f"{ch}/{sorter[0]}/", - idx_ch, - )[sorter[1]] + idx=idx_ch, + )[0].view_as("np") out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -940,7 +942,7 @@ def evaluate_at_channel( for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if f"ch{ch}" not in store.ls(f_hit): + if f"ch{ch}" not in lh5.ls(f_hit): continue # get index list for this channel to be loaded idx_ch = idx[ids == ch] @@ -1305,7 +1307,7 @@ def build_evt( lh5 root group in tcm file """ - lstore = store.LH5Store() + store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): raise TypeError() @@ -1355,8 +1357,9 @@ def build_evt( chns[k] = [e for e in v] nrows = len( - store.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] + lh5.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] ) + # nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) @@ -1393,7 +1396,7 @@ def build_evt( f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" ) - lstore.write( + store.write( obj=res, name=group + k, lh5_file=f_evt_tmp, @@ -1450,7 +1453,7 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - lstore.write( + store.write( obj=obj, name=group + k, lh5_file=f_evt_tmp, @@ -1462,8 +1465,8 @@ def build_evt( if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") for fld in tbl_cfg["outputs"]: - obj, _ = lstore.read(group + fld, f_evt_tmp) - lstore.write( + obj, _ = store.read(group + fld, f_evt_tmp) + store.write( obj=obj, name=group + fld, lh5_file=f_evt, @@ -1507,19 +1510,17 @@ def skim_evt( wo_mode + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" ) - lstore = store.LH5Store() - fields = store.ls(f_evt, evt_group) - nrows = lstore.read_n_rows(fields[0], f_evt) + store = LH5Store() + fields = lh5.ls(f_evt, evt_group) + nrows = store.read_n_rows(fields[0], f_evt) # load fields in expression exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) var = {} flds = [ - e.split("/")[-1] - for e in store.ls(f_evt, evt_group) - if e.split("/")[-1] in exprl + e.split("/")[-1] for e in lh5.ls(f_evt, evt_group) if e.split("/")[-1] in exprl ] - var = {e: lstore.read(evt_group + e, f_evt)[0] for e in flds} + var = {e: store.read(evt_group + e, f_evt)[0] for e in flds} # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -1554,8 +1555,8 @@ def skim_evt( of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) for fld in fields: - ob, _ = lstore.read(fld, f_evt, idx=idx_list) - lstore.write( + ob, _ = store.read(fld, f_evt, idx=idx_list) + store.write( obj=ob, name=fld, lh5_file=of_tmp, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 7bd530531..0907b3a13 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,9 +11,9 @@ import warnings -import lgdo.lh5_store as store import numpy as np from lgdo import Array, VectorOfVectors +from lgdo.lh5 import LH5Store # get LAr energy per event over all channels @@ -35,21 +35,25 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" ) mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pes = df["energy_in_pe"] + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -76,21 +80,24 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" ) mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pes = df["energy_in_pe"] + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -118,21 +125,24 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch - ) + energy_in_pe_dplms = store.read( + f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("np") + trigger_pos_dplms = store.read( + f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("np") mask = ( - (df["trigger_pos_dplms"] < tma[:, None] / 16) - & (df["trigger_pos_dplms"] > tmi[:, None] / 16) - & (df["energy_in_pe_dplms"] > lim) + (trigger_pos_dplms < tma[:, None] / 16) + & (trigger_pos_dplms > tmi[:, None] / 16) + & (energy_in_pe_dplms > lim) ) - pes = df["energy_in_pe_dplms"] + pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -159,21 +169,24 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch - ) + energy_in_pe_dplms = store.read( + f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("np") + trigger_pos_dplms = store.read( + f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("np") mask = ( - (df["trigger_pos_dplms"] < tma[:, None] / 16) - & (df["trigger_pos_dplms"] > tmi[:, None] / 16) - & (df["energy_in_pe_dplms"] > lim) + (trigger_pos_dplms < tma[:, None] / 16) + & (trigger_pos_dplms > tmi[:, None] / 16) + & (energy_in_pe_dplms > lim) ) - pes = df["energy_in_pe_dplms"] + pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -188,9 +201,10 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") warnings.filterwarnings("ignore", r"invalid value encountered in divide") - predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") + store = LH5Store() + energy_in_pe, _ = store.read(f"{chs[0]}/hit/energy_in_pe", f_hit) - peshape = (predf["energy_in_pe"]).shape + peshape = energy_in_pe.view_as("np").shape # 1D = channel, 2D = event num, 3D = array per event pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) @@ -213,25 +227,25 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] - df = store.load_nda( - f_hit, - ["energy_in_pe", "trigger_pos", "timestamp"], - chs[i] + "/hit/", - idx_ch, - ) + energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pe = df["energy_in_pe"] - time = df["trigger_pos"] * 16 + pe = energy_in_pe + time = trigger_pos * 16 pe = np.where(mask, pe, np.nan) time = np.where(mask, time, np.nan) @@ -271,8 +285,12 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") - peshape = (predf["energy_in_pe"]).shape + store = LH5Store() + energy_in_pe, _ = store.read( + f"{chs[0]}/hit/energy_in_pe", + f_hit, + ) + peshape = energy_in_pe.view_as("np").shape times = np.zeros([len(chs), peshape[0], peshape[1]]) tge = trgr @@ -293,22 +311,24 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/", idx_ch - ) + energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - time = df["trigger_pos"] * 16 + time = trigger_pos * 16 time = np.where(mask, time, np.nan) times[i][idx_ch] = time diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index b0be9d620..7f5d2de3b 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -10,10 +10,10 @@ import awkward as ak import h5py -import lgdo.lh5_store as store import numpy as np import pandas as pd -from lgdo import VectorOfVectors +from lgdo import VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def build_skm( if x[1] in [ e.split("/")[-1] - for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) if "array<1>{array<1>{" in f[e].attrs.get("datatype") ] ] @@ -118,7 +118,7 @@ def build_skm( and x[1] in [ e.split("/")[-1] - for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) ] ] @@ -142,27 +142,32 @@ def build_skm( # init pandas df df = pd.DataFrame() + store = LH5Store() # add array like fields if isinstance(flds_arr, list): log.debug("Crunching array-like fields") - df = df.join( - store.load_dfs(f_evt, [x[1] for x in flds_arr], group).rename( - columns={y: x for x, y in flds_arr} - ), - how="outer", - ) + + _df = store.read( + group, + f_evt, + field_mask=[x[1] for x in flds_arr], + )[ + 0 + ].view_as("pd") + + _df = _df.rename(columns={y: x for x, y in flds_arr}) + df = df.join(_df, how="outer") # take care of vector like fields if isinstance(flds_vov, list): log.debug("Processing VoV-like fields") - lstore = store.LH5Store() for fld in flds_vov: if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): raise ValueError( f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" ) - vls, _ = lstore.read(group + fld[1], f_evt) + vls, _ = store.read(group + fld[1], f_evt) mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) @@ -181,7 +186,7 @@ def build_skm( mode = tbl_cfg["global_fields"][k]["aggregation_mode"] fld = tbl_cfg["global_fields"][k]["evt_field"] - obj, _ = lstore.read(group + fld, f_evt) + obj, _ = store.read(group + fld, f_evt) if not isinstance(obj, VectorOfVectors): raise ValueError( f"global {k} operation not possible, since {fld} is not an VectorOfVectors" From a0cda34a29aa93ec54220d0bacceefa3381acde5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 15:29:18 +0100 Subject: [PATCH 31/73] Bug fix in build_evt tests --- src/pygama/evt/build_evt.py | 24 ++++++++++++------------ tests/evt/test_build_evt.py | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5c9f291d4..faea920cc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1397,9 +1397,9 @@ def build_evt( ) store.write( - obj=res, - name=group + k, - lh5_file=f_evt_tmp, + res, + group + k, + f_evt_tmp, wo_mode=wo_mode, ) @@ -1454,9 +1454,9 @@ def build_evt( if isinstance(obj, np.ndarray): obj = Array(result["values"]) store.write( - obj=obj, - name=group + k, - lh5_file=f_evt_tmp, + obj, + group + k, + f_evt_tmp, wo_mode=wo_mode, ) @@ -1467,9 +1467,9 @@ def build_evt( for fld in tbl_cfg["outputs"]: obj, _ = store.read(group + fld, f_evt_tmp) store.write( - obj=obj, - name=group + fld, - lh5_file=f_evt, + obj, + group + fld, + f_evt, wo_mode=wo_mode, ) else: @@ -1557,9 +1557,9 @@ def skim_evt( for fld in fields: ob, _ = store.read(fld, f_evt, idx=idx_list) store.write( - obj=ob, - name=fld, - lh5_file=of_tmp, + ob, + fld, + of_tmp, wo_mode="o", ) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index f771579c5..94e4dfb58 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -145,7 +145,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): build_evt(f_tcm, f_hit, f_dsp, outfile, f_config) with pytest.raises(TypeError): From ebdcd9c05e2e7d29e005496078d141459c650407 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 16:52:08 +0100 Subject: [PATCH 32/73] Docstring cosmetics --- src/pygama/evt/build_evt.py | 520 +++++++++++++++++++----------------- src/pygama/hit/build_hit.py | 16 +- src/pygama/skm/build_skm.py | 95 ++++--- 3 files changed, 333 insertions(+), 298 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index faea920cc..d36255be8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,5 +1,5 @@ """ -This module implements routines to build the evt tier. +This module implements routines to build the `evt` tier. """ from __future__ import annotations @@ -52,44 +52,59 @@ def evaluate_expression( defv=np.nan, sorter: str = None, ) -> dict: - """ - Evaluates the expression defined by the user across all channels according to the mode + """Evaluates the expression defined by the user across all channels + according to the mode. Parameters ---------- f_tcm - Path to tcm tier file + path to `tcm` tier file. f_evt - Path to event tier file + path to `evt` tier file. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - List of channel names across which expression gets evaluated (form: "ch") + list of channel names across which expression gets evaluated (form: + ``ch``). chns_rm - List of channels which get set to default value during evaluation. In function mode they are removed entirely (form: "ch") + list of channels which get set to default value during evaluation. In + function mode they are removed entirely (form: ``ch``) mode - The mode determines how the event entry is calculated across channels. Options are: - - "first_at:sorter": aggregates across channels by returning the expression of the channel with smallest value of sorter. - - "last_at": aggregates across channels by returning the expression of the channel with largest value of sorter. - - "sum": aggregates by summation. - - "any": aggregates by logical or. - - "all": aggregates by logical and. - - "keep_at:ch_field": aggregates according to passed ch_field - - "gather": Channels are not combined, but result saved as VectorOfVectors. + The mode determines how the event entry is calculated across channels. + Options are: + + - ``first_at:sorter``: aggregates across channels by returning the + expression of the channel with smallest value of sorter. + - ``last_at``: aggregates across channels by returning the expression of + the channel with largest value of sorter. + - ``sum``: aggregates by summation. + - ``any``: aggregates by logical or. + - ``all``: aggregates by logical and. + - ``keep_at:ch_field``: aggregates according to passed ch_field + - ``gather``: Channels are not combined, but result saved as + :class:`.VectorOfVectors`. + qry - A query that can mask the aggregation. + a query that can mask the aggregation. expr - The expression. That can be any mathematical equation/comparison. If mode == function, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + the expression. That can be any mathematical equation/comparison. If + `mode` is ``function``, the expression needs to be a special processing + function defined in modules (e.g. :func:`.modules.spm.get_energy`). In + the expression parameters from either hit, dsp, evt tier (from + operations performed before this one! Dictionary operations order + matters), or from the ``parameters`` field can be used. nrows - Number of rows to be processed. + number of rows to be processed. para - Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + dictionary of parameters defined in the ``parameters`` field in the + configuration dictionary. defv - default value of evaluation + default value of evaluation. sorter - can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) + can be used to sort vector outputs according to sorter expression (see + :func:`evaluate_to_vector`). """ store = LH5Store() @@ -293,21 +308,21 @@ def find_parameters( idx_ch: np.ndarray, exprl: list, ) -> dict: - """ - Wraps :func:`load_vars_to_nda` to return parameters from hit and dsp tiers. + """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` + tiers. Parameters ---------- f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. ch - rawid in the tiers + "rawid" in the tiers. idx_ch - index array of entries to be read from files + index array of entries to be read from files. exprl - list of tuples (tier, field) to be found in the hit/dsp tiers + list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. """ # find fields in either dsp, hit @@ -318,20 +333,19 @@ def find_parameters( def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: - """ - Maps parameter expressions to parameters if found in f. - Blows up VectorOfVectors to ArrayOfEqualSizedArrays. + """Maps parameter expressions to parameters if found in `f`. + Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. Parameters ---------- f - Path to a LGDO file + path to a LGDO file. group - additional group in f + additional group in `f`. idx - index array of entries to be read from files + index array of entries to be read from files. exprl - list of parameter-tuples (root_group, field) to be found in f + list of parameter-tuples ``(root_group, field)`` to be found in `f`. """ store = LH5Store() @@ -382,31 +396,31 @@ def get_data_at_channel( outsize: int, defv, ) -> np.ndarray: - """ - Evaluates an expression and returns the result + """Evaluates an expression and returns the result. Parameters ---------- ch - rawid of channel to be evaluated + "rawid" of channel to be evaluated. idx_ch - array of indices to be evaluated + array of indices to be evaluated. expr - expression to be evaluated + expression to be evaluated. exprl - list of parameter-tuples (root_group, field) found in the expression + list of parameter-tuples ``(root_group, field)`` found in the expression. var_ph - dict of additional parameters that are not channel dependent + dict of additional parameters that are not channel dependent. is_evaluated - if false, the expression does not get evaluated but an array of default values is returned + if false, the expression does not get evaluated but an array of default + values is returned. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. outsize - size of the return array + size of the return array. defv - default value + default value. """ if not is_evaluated: @@ -444,23 +458,22 @@ def get_mask_from_query( f_hit: str, f_dsp: str, ) -> np.ndarray: - """ - Evaluates an query expression and returns a mask accordingly + """Evaluates a query expression and returns a mask accordingly. Parameters ---------- qry - query expression + query expression. length - length of the return mask + length of the return mask. ch - rawid of channel to be evaluated + "rawid" of channel to be evaluated. idx_ch - array of indices to be evaluated + array of indices to be evaluated. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. """ # get sub evt based query condition if needed @@ -498,37 +511,37 @@ def evaluate_to_first( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates across channels by returning the expression of the channel with smallest value of sorter. + """Aggregates across channels by returning the expression of the channel + with smallest value of `sorter`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. sorter - tuple of field in hit/dsp/evt tier to evaluate (tier,field) + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -592,37 +605,37 @@ def evaluate_to_last( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates across channels by returning the expression of the channel with largest value of sorter. + """Aggregates across channels by returning the expression of the channel + with largest value of `sorter`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of dsp/hit/evt parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. sorter - tuple of field in hit/dsp/evt tier to evaluate (tier,field) + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -682,35 +695,34 @@ def evaluate_to_tot( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by summation across channels. + """Aggregates by summation across channels. Parameters ---------- idx - tcm index array + tcm index array. ids - tcm id array + tcm id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of dsp/hit/evt parameter tuples in expression (tier, field). qry - query expression to mask aggregation + query expression to mask aggregation. nrows length of output array var_ph - dictionary of evt and additional parameters and their values + dictionary of evt and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -759,35 +771,35 @@ def evaluate_to_any( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + """Aggregates by logical or operation across channels. If the expression + evaluates to a non boolean value it is casted to boolean. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -836,35 +848,35 @@ def evaluate_to_all( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + """Aggregates by logical and operation across channels. If the expression + evaluates to a non boolean value it is casted to boolean. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. var_ph - dictionary of evt and additional parameters and their values + dictionary of evt and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -911,31 +923,30 @@ def evaluate_at_channel( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by evaluating the expression at a given channel + """Aggregates by evaluating the expression at a given channel. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. ch_comp - array of rawids at which the expression is evaluated + array of rawids at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -977,31 +988,31 @@ def evaluate_at_channel_vov( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - same as :func:`evaluate_at_channel` but evaluates expression at non flat channels VectorOfVectors. + """Same as :func:`evaluate_at_channel` but evaluates expression at non + flat channels :class:`.VectorOfVectors`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. ch_comp - array of rawids at which the expression is evaluated + array of "rawid"s at which the expression is evaluated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # blow up vov to aoesa @@ -1055,41 +1066,41 @@ def evaluate_to_aoesa( defv=np.nan, missv=np.nan, ) -> np.ndarray: - """ - Aggregates by returning a ArrayOfEqualSizedArrays of evaluated expressions of channels that fulfill a query expression. + """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated + expressions of channels that fulfill a query expression. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output VectorOfVectors + length of output :class:`.VectorOfVectors`. ch_comp - array of rawids at which the expression is evaluated + array of "rawid"s at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. missv - missing value + missing value. sorter - sorts the entries in the vector according to sorter expression + sorts the entries in the vector according to sorter expression. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -1138,39 +1149,41 @@ def evaluate_to_vector( defv=np.nan, sorter: str = None, ) -> dict: - """ - Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + """Aggregates by returning a :class:`.VectorOfVector` of evaluated + expressions of channels that fulfill a query expression. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output VectorOfVectors + length of output :class:`.VectorOfVectors`. ch_comp - array of rawids at which the expression is evaluated + array of "rawids" at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. sorter - sorts the entries in the vector according to sorter expression. acend_by: results in an vector ordered ascending, decend_by: sorts descending + sorts the entries in the vector according to sorter expression. + ``ascend_by:`` results in an vector ordered ascending, + ``decend_by:`` sorts descending. """ out = evaluate_to_aoesa( idx, @@ -1233,80 +1246,88 @@ def build_evt( group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", ) -> None: - """ - Transform data from the hit and dsp levels which a channel sorted - to a event sorted data format + """Transform data from the `hit` and `dsp` levels which a channel sorted to a + event sorted data format. Parameters ---------- f_tcm - input LH5 file of the tcm level + input LH5 file of the tcm level. f_dsp - input LH5 file of the dsp level + input LH5 file of the dsp level. f_hit - input LH5 file of the hit level - + input LH5 file of the hit level. f_evt - name of the output file + name of the output file. evt_config - name of JSON file or dict defining evt fields. Channel lists can be defined by importing a meta module. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "aggregation_mode" defines how the channels should be combined (see evaluate_expression). "expression" defnies the mathematical/special function to apply (see evaluate_expression), - "query" defines an expression to mask the aggregation. - "parameters" defines any other parameter used in expression. For example: + name of configuration file or dictionary defining event fields. Channel + lists can be defined by importing a metadata module. + + - ``operations`` defines the fields ``name=key``, where ``channels`` + specifies the channels used to for this field (either a string or a + list of strings), + - ``aggregation_mode`` defines how the channels should be combined (see + :func:`evaluate_expression`). + - ``expression`` defnies the mathematical/special function to apply + (see :func:`evaluate_expression`), + - ``query`` defines an expression to mask the aggregation. + - ``parameters`` defines any other parameter used in expression. - .. code-block::json + For example: + + .. code-block:: json { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"], - "spms_on": ["ch1057600", "ch1059201", "ch1062405"], - "muon": "ch1027202", + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", + }, + "operations": { + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" }, - "operations": { - "energy_id":{ - "channels": "geds_on", - "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id", - "sort": "ascend_by:dsp.tp_0_est" - }, - "energy":{ - "aggregation_mode": "keep_at:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal>25" - } - "is_muon_rejected":{ - "channels": "muon", - "aggregation_mode": "any", - "expression": "dsp.wf_max>a", - "parameters": {"a":15100}, - "initial": false - }, - "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "t0":{ - "aggregation_mode": "keep_at:evt.energy_id", - "expression": "dsp.tp_0_est" - }, - "lar_energy":{ - "channels": "spms_on", - "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" - }, + "energy":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal>25" } + "is_muon_rejected":{ + "channels": "muon", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "t0":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "dsp.tp_0_est" + }, + "lar_energy":{ + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + }, + } } wo_mode - writing mode + writing mode. group - lh5 root group name + LH5 root group name. tcm_group - lh5 root group in tcm file + LH5 root group in tcm file. """ - store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -1488,21 +1509,23 @@ def skim_evt( wo_mode="n", evt_group="/evt/", ) -> None: - """ - Skimms events from a evt file which are fullfling the expression, discards all other events. + """Skims events from an `evt` file which are fulfilling the expression, + discards all other events. Parameters ---------- f_evt - input LH5 file of the evt level + input LH5 file of the `evt` level. expression - skimming expression. Can contain variables from event file or from the params dictionary. + skimming expression. Can contain variables from event file or from the + `params` dictionary. f_out - output LH5 file. Can be None if wo_mode is set to overwrite f_evt. + output LH5 file. Can be ``None`` if `wo_mode` is set to overwrite `f_evt`. wo_mode - Write mode: "o"/"overwrite" overwrites f_evt. "n"/"new" writes to a new file specified in f_out. + Write mode: ``o``/``overwrite`` overwrites f_evt. ``n``/``new`` writes + to a new file specified in `f_out`. evt_group - lh5 root group of the evt file + LH5 root group of the `evt` file. """ if wo_mode not in ["o", "overwrite", "n", "new"]: @@ -1543,7 +1566,8 @@ def skim_evt( if res.shape != (nrows,): raise ValueError( - f"The expression must result to 1D with length = event number. Current shape is {res.shape}" + "The expression must result to 1D with length = event number. " + f"Current shape is {res.shape}" ) res = res.astype(bool) diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index 2b3e8ef5f..2a6d6a066 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -48,14 +48,14 @@ def build_hit( .. code-block:: json { - "outputs": ["calE", "AoE"], - "operations": { - "calE": { - "expression": "sqrt(a + b * trapEmax**2)", - "parameters": {"a": "1.23", "b": "42.69"}, - }, - "AoE": {"expression": "A_max/calE"}, - } + "outputs": ["calE", "AoE"], + "operations": { + "calE": { + "expression": "sqrt(a + b * trapEmax**2)", + "parameters": {"a": "1.23", "b": "42.69"}, + }, + "AoE": {"expression": "A_max/calE"}, + } } The ``outputs`` array lists columns that will be effectively written in diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 7f5d2de3b..8bad1050e 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -1,5 +1,6 @@ """ -This module implements routines to build the evt tier. +This module implements routines to build the `skm` tier, consisting of skimmed +data from the `evt` tier. """ from __future__ import annotations @@ -25,67 +26,77 @@ def build_skm( wo_mode="w", group: str = "/evt/", skim_format: str = "parquet", -): - """ - Builds a skimmed file from a (set) of evt tier file(s). +) -> None: + """Builds a skimmed file from a (set) of evt tier file(s). Parameters ---------- f_evt - list/path of evt file(s) + list/path of `evt` file(s). f_skm - name of the skm output file + name of the `skm` output file. skm_conf - name of JSON file or dict defining skm fields. multiplicity defines upto which row length VectorOfVector fields should be kept. Skimmed fields are forwarded from the evt tier and clipped/padded according to missing_value if needed. Global fields define an operation to reduce the dimension of VectorOfVector event fields. + name of configuration file or dictionary defining `skm` fields. + + - ``multiplicity`` defines up to which row length + :class:`.VectorOfVector` fields should be kept. + - ``index_field`` + - ``skimmed_fields`` are forwarded from the evt tier and clipped/padded + according to ``missing_value`` if needed. + - ``global_fields`` defines an operation to reduce the dimension of + :class:`.VectorOfVector` event fields. + For example: - .. code-block::json + .. code-block:: json { - "multiplicity": 2, - "index_field": "timestamp", - "skimmed_fields": { - "timestamp":{ - "evt_field": "timestamp" - }, - "is_muon_rejected":{ - "evt_field": "is_muon_rejected" - }, - "multiplicity":{ - "evt_field": "multiplicity" - }, - "energy":{ - "evt_field": "energy", - "missing_value": "np.nan" - }, - "energy_id":{ - "evt_field": "energy_id", - "missing_value": 0 - }, - "global_fields":{ - "energy_sum":{ - "aggregation_mode": "sum", - "evt_field": "energy" - }, - "is_all_physical":{ - "aggregation_mode": "all", - "evt_field": "is_physical" - }, - } + "multiplicity": 2, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp":{ + "evt_field": "timestamp" + }, + "is_muon_rejected":{ + "evt_field": "is_muon_rejected" + }, + "multiplicity":{ + "evt_field": "multiplicity" + }, + "energy":{ + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id":{ + "evt_field": "energy_id", + "missing_value": 0 + }, + "global_fields":{ + "energy_sum":{ + "aggregation_mode": "sum", + "evt_field": "energy" + }, + "is_all_physical":{ + "aggregation_mode": "all", + "evt_field": "is_physical" + }, } + } } wo_mode writing mode. - - ``write_safe`` or ``w``: only proceed with writing if the file does not already exists. + + - ``write_safe`` or ``w``: only proceed with writing if the file does + not already exists. - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. + group - lh5 root group name of the evt tier + LH5 root group name of the evt tier. skim_format - data format of the skimmed output (hdf or parquet) + data format of the skimmed output (``hdf`` or ``parquet``). """ - log = logging.getLogger(__name__) log.info("Starting skimming") log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") From b4a422b5506cd286fc7642d762bbf9f71211798e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 16:59:05 +0100 Subject: [PATCH 33/73] Type hints cosmetics --- src/pygama/evt/build_evt.py | 89 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d36255be8..b42a88ef5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -15,6 +15,7 @@ import numpy as np from lgdo import Array, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store +from numpy.typing import NDArray log = logging.getLogger(__name__) @@ -49,7 +50,7 @@ def evaluate_expression( nrows: int, para: dict = None, qry: str = None, - defv=np.nan, + defv: bool | int | float = np.nan, sorter: str = None, ) -> dict: """Evaluates the expression defined by the user across all channels @@ -305,7 +306,7 @@ def find_parameters( f_hit: str, f_dsp: str, ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, exprl: list, ) -> dict: """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` @@ -332,7 +333,7 @@ def find_parameters( return dsp_dic | var -def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: +def load_vars_to_nda(f: str, group: str, exprl: list, idx: NDArray = None) -> dict: """Maps parameter expressions to parameters if found in `f`. Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. @@ -386,7 +387,7 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> def get_data_at_channel( ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, expr: str, exprl: list, var_ph: dict, @@ -451,10 +452,10 @@ def get_data_at_channel( def get_mask_from_query( - qry: str | np.ndarray, + qry: str | NDArray, length: int, ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -497,19 +498,19 @@ def get_mask_from_query( def evaluate_to_first( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, sorter: tuple, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates across channels by returning the expression of the channel with smallest value of `sorter`. @@ -591,19 +592,19 @@ def evaluate_to_first( def evaluate_to_last( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, sorter: tuple, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates across channels by returning the expression of the channel with largest value of `sorter`. @@ -682,18 +683,18 @@ def evaluate_to_last( def evaluate_to_tot( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by summation across channels. @@ -758,18 +759,18 @@ def evaluate_to_tot( def evaluate_to_any( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to boolean. @@ -835,18 +836,18 @@ def evaluate_to_any( def evaluate_to_all( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to boolean. @@ -912,8 +913,8 @@ def evaluate_to_all( def evaluate_at_channel( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns_rm: list, @@ -921,7 +922,7 @@ def evaluate_at_channel( exprl: list, ch_comp: Array, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by evaluating the expression at a given channel. @@ -977,8 +978,8 @@ def evaluate_at_channel( def evaluate_at_channel_vov( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, expr: str, @@ -986,7 +987,7 @@ def evaluate_at_channel_vov( ch_comp: VectorOfVectors, chns_rm: list, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -1052,18 +1053,18 @@ def evaluate_at_channel_vov( def evaluate_to_aoesa( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, missv=np.nan, ) -> np.ndarray: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated @@ -1135,18 +1136,18 @@ def evaluate_to_aoesa( def evaluate_to_vector( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, sorter: str = None, ) -> dict: """Aggregates by returning a :class:`.VectorOfVector` of evaluated @@ -1287,13 +1288,13 @@ def build_evt( "energy_id":{ "channels": "geds_on", "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal>25", + "query": "hit.cuspEmax_ctc_cal > 25", "expression": "tcm.array_id", "sort": "ascend_by:dsp.tp_0_est" }, "energy":{ "aggregation_mode": "keep_at:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal>25" + "expression": "hit.cuspEmax_ctc_cal > 25" } "is_muon_rejected":{ "channels": "muon", @@ -1303,7 +1304,7 @@ def build_evt( "initial": false }, "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], + "channels": ["geds_on", "geds_no_psd", "geds_ac"], "aggregation_mode": "sum", "expression": "hit.cuspEmax_ctc_cal > a", "parameters": {"a":25}, @@ -1316,7 +1317,7 @@ def build_evt( "lar_energy":{ "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" }, } } From c4184f8ebe39d9771e4ddd1383f452856e7ee460 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 17:22:18 +0100 Subject: [PATCH 34/73] Replace another deprecated call to load_nda --- src/pygama/evt/build_evt.py | 10 +--------- src/pygama/skm/build_skm.py | 4 +--- tests/evt/test_build_evt.py | 2 +- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index b42a88ef5..37e360bb0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1378,13 +1378,7 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - nrows = len( - lh5.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] - ) - # nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) - log.info( - f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" - ) + nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) # Define temporary file f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" @@ -1499,8 +1493,6 @@ def build_evt( os.remove(f_evt_tmp) - log.info("Done") - def skim_evt( f_evt: str, diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 8bad1050e..98d02a033 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -98,8 +98,8 @@ def build_skm( data format of the skimmed output (``hdf`` or ``parquet``). """ log = logging.getLogger(__name__) - log.info("Starting skimming") log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + tbl_cfg = skm_conf if not isinstance(tbl_cfg, (str, dict)): raise TypeError() @@ -276,5 +276,3 @@ def build_skm( df.to_parquet(f_skm, append=True) else: raise ValueError(f"wo_mode {wo_mode} not valid.") - - log.info("done") diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 94e4dfb58..e3e378ab6 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -142,7 +142,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) f_config = f"{config_dir}/basic-evt-config.json" - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) with pytest.raises(KeyError): From ad906ffc8d0799863a567e3abebd2bbee709cb65 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 17:30:56 +0100 Subject: [PATCH 35/73] Rename evt.modules.legend.legend_meta to evt.modules.legend.metadata --- src/pygama/evt/modules/legend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index f12b81e6e..f2f8137ef 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -4,7 +4,7 @@ from importlib import import_module -def legend_meta(params: dict) -> list: +def metadata(params: dict) -> list: # only import legend meta data when needed. # LEGEND collaborators can use the meta keyword # While for users w/o access to the LEGEND meta data this is still working From 7fd3a93bca8b760c2a4885633b5ef7e559dc4eb5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 18:06:55 +0100 Subject: [PATCH 36/73] keep evt tier in memory instead of io after each operation --- src/pygama/evt/build_evt.py | 90 ++++++++++--------------------------- 1 file changed, 24 insertions(+), 66 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 37e360bb0..1f845cb5b 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -8,12 +8,11 @@ import json import logging import os -import random import re from importlib import import_module import numpy as np -from lgdo import Array, VectorOfVectors, lh5 +from lgdo import Array, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store from numpy.typing import NDArray @@ -40,7 +39,6 @@ def num_and_pars(value: str, par_dic: dict): def evaluate_expression( f_tcm: str, - f_evt: str, f_hit: str, f_dsp: str, chns: list, @@ -48,6 +46,7 @@ def evaluate_expression( mode: str, expr: str, nrows: int, + table: Table = None, para: dict = None, qry: str = None, defv: bool | int | float = np.nan, @@ -60,8 +59,6 @@ def evaluate_expression( ---------- f_tcm path to `tcm` tier file. - f_evt - path to `evt` tier file. f_hit path to `hit` tier file. f_dsp @@ -98,6 +95,8 @@ def evaluate_expression( matters), or from the ``parameters`` field can be used. nrows number of rows to be processed. + table + table of 'evt' tier data. para dictionary of parameters defined in the ``parameters`` field in the configuration dictionary. @@ -113,8 +112,8 @@ def evaluate_expression( # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} - if os.path.exists(f_evt): - var_ph = load_vars_to_nda(f_evt, "", exprl) + if table: + var_ph = var_ph | table if para: var_ph = var_ph | para @@ -122,9 +121,7 @@ def evaluate_expression( # evaluate expression func, params = expr.split("(") params = ( - params.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_") + params.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", "") ) params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") @@ -144,11 +141,8 @@ def evaluate_expression( raise ValueError("Query can't be a mix of evt tier and lower tiers.") # if it is an evt query we can evaluate it directly here - if os.path.exists(f_evt) and "evt." in qry: - var_qry = load_vars_to_nda( - f_evt, "", re.findall(r"(evt).([a-zA-Z_$][\w$]*)", qry) - ) - qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) + if table and "evt." in qry: + qry_mask = eval(qry.replace("evt.", ""), table) # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") @@ -156,13 +150,12 @@ def evaluate_expression( # switch through modes if ( - os.path.exists(f_evt) + table and "keep_at:" == mode[:8] and "evt." == mode[8:][:4] - and mode[8:].split(".")[-1] - in [e.split("/")[-1] for e in lh5.ls(f_evt, "/evt/")] + and mode[8:].split(".")[-1] in table.keys() ): - ch_comp, _ = store.read(mode[8:].replace(".", "/"), f_evt) + ch_comp = table[mode[8:].replace("evt.", "")] if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -1380,44 +1373,18 @@ def build_evt( nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) - # Define temporary file - f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" + table = Table(size=nrows) for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. if "aggregation_mode" not in v.keys(): - exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} - if os.path.exists(f_evt_tmp): - var = load_vars_to_nda(f_evt_tmp, "", exprl) - if "parameters" in v.keys(): var = var | v["parameters"] - res = eval(v["expression"].replace("evt.", "evt_"), var) - - # now check what dimension we have after the evaluation - if len(res.shape) == 1: - res = Array(res) - elif len(res.shape) == 2: - res = VectorOfVectors( - flattened_data=res.flatten()[~np.isnan(res.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(res), axis=1) - ), - ) - else: - raise NotImplementedError( - f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" - ) - - store.write( - res, - group + k, - f_evt_tmp, - wo_mode=wo_mode, - ) + res = table.eval(v["expression"].replace("evt.", ""), var) + table.add_field(k, res) # Else we build the event entry else: @@ -1452,7 +1419,6 @@ def build_evt( result = evaluate_expression( f_tcm, - f_evt_tmp, f_hit, f_dsp, chns_e, @@ -1460,6 +1426,7 @@ def build_evt( v["aggregation_mode"], v["expression"], nrows, + table, pars, qry, defaultv, @@ -1469,30 +1436,21 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - store.write( - obj, - group + k, - f_evt_tmp, - wo_mode=wo_mode, - ) - # write output fields into f_evt and delete temporary file + table.add_field(k, obj) + + # write output fields into f_evt if "outputs" in tbl_cfg.keys(): if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") - for fld in tbl_cfg["outputs"]: - obj, _ = store.read(group + fld, f_evt_tmp) - store.write( - obj, - group + fld, - f_evt, - wo_mode=wo_mode, - ) + else: + clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] + for fld in clms_to_remove: + table.remove_field(fld, True) + store.write(obj=table, name=group, lh5_file=f_evt, wo_mode=wo_mode) else: log.warning("No output fields specified, no file will be written.") - os.remove(f_evt_tmp) - def skim_evt( f_evt: str, From 17e0fb227d866e2b4bb30f150b41d4ef2576ff7e Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 18:36:31 +0100 Subject: [PATCH 37/73] fix deprecated syntax --- tests/evt/test_build_evt.py | 71 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e3e378ab6..939b1263d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -1,14 +1,15 @@ import os from pathlib import Path -import lgdo.lh5_store as store import numpy as np import pytest -from lgdo import Array, VectorOfVectors, load_nda, ls +from lgdo import Array, VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store from pygama.evt import build_evt, skim_evt config_dir = Path(__file__).parent / "configs" +store = LH5Store() def test_basics(lgnd_test_data, tmptestdir): @@ -28,10 +29,11 @@ def test_basics(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["energy", "energy_aux", "energy_sum", "multiplicity"] + } assert ( nda["energy"][nda["multiplicity"] == 1] == nda["energy_aux"][nda["multiplicity"] == 1] @@ -62,12 +64,11 @@ def test_lar_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, - ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"], - "/evt/", - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"] + } assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() @@ -89,12 +90,11 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, - ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"], - "/evt/", - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"] + } assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 @@ -115,14 +115,13 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 9 - lstore = store.LH5Store() - vov_ene, _ = lstore.read("/evt/energy", outfile) - vov_aoe, _ = lstore.read("/evt/aoe", outfile) - arr_ac, _ = lstore.read("/evt/multiplicity", outfile) - vov_aoeene, _ = lstore.read("/evt/energy_times_aoe", outfile) - vov_eneac, _ = lstore.read("/evt/energy_times_multiplicity", outfile) - arr_ac2, _ = lstore.read("/evt/multiplicity_squared", outfile) + assert len(lh5.ls(outfile, "/evt/")) == 9 + vov_ene, _ = store.read("/evt/energy", outfile) + vov_aoe, _ = store.read("/evt/aoe", outfile) + arr_ac, _ = store.read("/evt/multiplicity", outfile) + vov_aoeene, _ = store.read("/evt/energy_times_aoe", outfile) + vov_eneac, _ = store.read("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = store.read("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) @@ -191,7 +190,7 @@ def test_query(lgnd_test_data, tmptestdir): group="/evt/", tcm_group="hardware_tcm_1", ) - assert len(ls(outfile, "/evt/")) == 12 + assert len(lh5.ls(outfile, "/evt/")) == 12 def test_vector_sort(lgnd_test_data, tmptestdir): @@ -234,13 +233,12 @@ def test_vector_sort(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, conf) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 4 - lstore = store.LH5Store() - vov_t0, _ = lstore.read("/evt/t0_acend", outfile) - nda_t0 = vov_t0.to_aoesa().nda + assert len(lh5.ls(outfile, "/evt/")) == 4 + vov_t0, _ = store.read("/evt/t0_acend", outfile) + nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() - vov_t0, _ = lstore.read("/evt/t0_decend", outfile) - nda_t0 = vov_t0.to_aoesa().nda + vov_t0, _ = store.read("/evt/t0_decend", outfile) + nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() @@ -255,17 +253,16 @@ def test_skimming(lgnd_test_data, tmptestdir): f_config = f"{config_dir}/vov-test-evt-config.json" build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) - lstore = store.LH5Store() - ac = lstore.read("/evt/multiplicity", outfile)[0].nda + ac = store.read("/evt/multiplicity", outfile)[0].view_as("np") ac = len(ac[ac == 3]) outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(lstore.read("/evt/energy", outfile_skm)[0].to_aoesa().nda) + assert ac == len(store.read("/evt/energy", outfile_skm)[0].to_aoesa().view_as("np")) skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(lstore.read("/evt/energy", outfile)[0].to_aoesa().nda) + assert ac == len(store.read("/evt/energy", outfile)[0].to_aoesa().view_as("np")) with pytest.raises(ValueError): skim_evt(outfile, "multiplicity == 3", None, None, "bla") From 67a34930642a9f4bf078bd98dd2c4d63791b513a Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 23:59:27 +0100 Subject: [PATCH 38/73] add awkward evaluation of VoVs --- src/pygama/evt/build_evt.py | 550 ++++++++-------------------------- src/pygama/evt/modules/spm.py | 32 +- 2 files changed, 150 insertions(+), 432 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 1f845cb5b..0e30c175c 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -11,8 +11,9 @@ import re from importlib import import_module +import awkward as ak import numpy as np -from lgdo import Array, Table, VectorOfVectors, lh5 +from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store from numpy.typing import NDArray @@ -51,7 +52,7 @@ def evaluate_expression( qry: str = None, defv: bool | int | float = np.nan, sorter: str = None, -) -> dict: +) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -130,8 +131,7 @@ def evaluate_expression( # load function dynamically p, m = func.rsplit(".", 1) met = getattr(import_module(p, package=__package__), m) - out = met(*params) - return {"values": out} + return met(*params) else: # check if query is either on channel basis or evt basis (and not a mix) @@ -187,35 +187,13 @@ def evaluate_expression( type(ch_comp) + " not supported (only Array and VectorOfVectors are supported)" ) - - elif "first_at:" in mode: + elif "first_at:" in mode or "last_at:" in mode: sorter = tuple( re.findall( r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] )[0] ) - return evaluate_to_first( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - sorter, - var_ph, - defv, - ) - elif "last_at:" in mode: - sorter = tuple( - re.findall( - r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("last_at:")[-1] - )[0] - ) - return evaluate_to_last( + return evaluate_to_first_or_last( idx, ids, f_hit, @@ -229,9 +207,11 @@ def evaluate_expression( sorter, var_ph, defv, + is_first=True if "first_at:" in mode else False, ) - elif "sum" == mode: - return evaluate_to_tot( + elif mode in ["sum", "any", "all"]: + return evaluate_to_scalar( + mode, idx, ids, f_hit, @@ -261,36 +241,6 @@ def evaluate_expression( defv, sorter, ) - elif "any" == mode: - return evaluate_to_any( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - ) - elif "all" == mode: - return evaluate_to_all( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - ) else: raise ValueError(mode + " not a valid mode") @@ -320,67 +270,29 @@ def find_parameters( """ # find fields in either dsp, hit - var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) - dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) - - return dsp_dic | var - - -def load_vars_to_nda(f: str, group: str, exprl: list, idx: NDArray = None) -> dict: - """Maps parameter expressions to parameters if found in `f`. - Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. - - Parameters - ---------- - f - path to a LGDO file. - group - additional group in `f`. - idx - index array of entries to be read from files. - exprl - list of parameter-tuples ``(root_group, field)`` to be found in `f`. - """ + dsp_flds = [e[1] for e in exprl if e[0] == "dsp"] + hit_flds = [e[1] for e in exprl if e[0] == "hit"] store = LH5Store() - var = { - f"{e[0]}_{e[1]}": store.read( - f"{group.replace('/','')}/{e[0]}/{e[1]}", - f, - idx=idx, - )[0] - for e in exprl - if e[1] - in [x.split("/")[-1] for x in lh5.ls(f, f"{group.replace('/','')}/{e[0]}/")] - } + hit_dict, dsp_dict = {}, {} + if len(hit_flds) > 0: + hit_ak = store.read( + f"{ch.replace('/','')}/hit/", f_hit, field_mask=hit_flds, idx=idx_ch + )[0].view_as("ak") + hit_dict = dict(zip(["hit_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak))) + if len(dsp_flds) > 0: + dsp_ak = store.read( + f"{ch.replace('/','')}/dsp/", f_dsp, field_mask=dsp_flds, idx=idx_ch + )[0].view_as("ak") + dsp_dict = dict(zip(["dsp_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak))) - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - if var[key].ndim > 2: - raise ValueError("Dim > 2 not supported") - if var[key].ndim == 1: - arr_keys.append(key) - else: - raise ValueError(f"{type(value)} not supported") - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] - - log.debug(f"Found parameters {var.keys()}") - return var + return hit_dict | dsp_dict def get_data_at_channel( ch: str, - idx_ch: NDArray, + ids: NDArray, + idx: NDArray, expr: str, exprl: list, var_ph: dict, @@ -396,8 +308,10 @@ def get_data_at_channel( ---------- ch "rawid" of channel to be evaluated. - idx_ch - array of indices to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. expr expression to be evaluated. exprl @@ -417,10 +331,15 @@ def get_data_at_channel( default value. """ + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) elif "tcm.array_id" == expr: res = np.full(outsize, int(ch[2:]), dtype=int) + elif "tcm.index" == expr: + res = np.where(ids == int(ch[2:]))[0] else: var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) @@ -436,10 +355,20 @@ def get_data_at_channel( var, ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(outsize, res, dtype=type(res)) + # in case the expression evaluates to a single value blow it up + if (not hasattr(res, "__len__")) or (isinstance(res, str)): + return np.full(outsize, res) + + # the resulting arrays need to be 1D from the operation, + # this can only change once we support larger than two dimensional LGDOs + # ak.to_numpy() raises error if array not regular + res = ak.to_numpy(res, allow_missing=False) + + # in this method only 1D values are allowed + if res.ndim > 1: + raise ValueError( + f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) return res @@ -448,7 +377,8 @@ def get_mask_from_query( qry: str | NDArray, length: int, ch: str, - idx_ch: NDArray, + ids: NDArray, + idx: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -462,13 +392,17 @@ def get_mask_from_query( length of the return mask. ch "rawid" of channel to be evaluated. - idx_ch - array of indices to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. """ + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] # get sub evt based query condition if needed if isinstance(qry, str): @@ -476,6 +410,16 @@ def get_mask_from_query( qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + # in case the expression evaluates to a single value blow it up + if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): + return np.full(len(idx_ch), limarr) + + limarr = ak.to_numpy(limarr, allow_missing=False) + if limarr.ndim > 1: + raise ValueError( + f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + # or forward the array elif isinstance(qry, np.ndarray): limarr = qry @@ -484,13 +428,14 @@ def get_mask_from_query( else: limarr = np.ones(length).astype(bool) + # explicit cast to bool if limarr.dtype != bool: limarr = limarr.astype(bool) return limarr -def evaluate_to_first( +def evaluate_to_first_or_last( idx: NDArray, ids: NDArray, f_hit: str, @@ -504,9 +449,10 @@ def evaluate_to_first( sorter: tuple, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: + is_first: bool = True, +) -> Array: """Aggregates across channels by returning the expression of the channel - with smallest value of `sorter`. + with value of `sorter`. Parameters ---------- @@ -536,11 +482,12 @@ def evaluate_to_first( dictionary of `evt` and additional parameters and their values. defv default value. + is_first + defines if sorted by smallest or largest value of `sorter` """ # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) store = LH5Store() @@ -552,7 +499,8 @@ def evaluate_to_first( # evaluate at channel res = get_data_at_channel( ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -564,11 +512,7 @@ def evaluate_to_first( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == first - if ch == chns[0]: - outt[:] = np.inf + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) # find if sorter is in hit or dsp t0 = store.read( @@ -577,105 +521,25 @@ def evaluate_to_first( idx=idx_ch, )[0].view_as("np") - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - - return {"values": out, "channels": out_chs} - - -def evaluate_to_last( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - sorter: tuple, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates across channels by returning the expression of the channel - with largest value of `sorter`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of dsp/hit/evt parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - sorter - tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) - outt = np.zeros(len(out)) - - store = LH5Store() - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - # evaluate at channel - res = get_data_at_channel( - ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) + if t0.ndim > 1: + raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + if is_first: + if ch == chns[0]: + outt[:] = np.inf - # append to out according to mode == last - # find if sorter is in hit or dsp - t0 = store.read( - f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if "hit" == sorter[0] else f_dsp, - idx=idx_ch, - )[0].view_as("np") + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + else: + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - return {"values": out, "channels": out_chs} + return Array(nda=out) -def evaluate_to_tot( +def evaluate_to_scalar( + mode: str, idx: NDArray, ids: NDArray, f_hit: str, @@ -688,11 +552,13 @@ def evaluate_to_tot( nrows: int, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> Array: """Aggregates by summation across channels. Parameters ---------- + mode + aggregation mode. idx tcm index array. ids @@ -728,161 +594,8 @@ def evaluate_to_tot( res = get_data_at_channel( ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) - - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == tot - if res.dtype == bool: - res = res.astype(int) - - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - - return {"values": out} - - -def evaluate_to_any( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates by logical or operation across channels. If the expression - evaluates to a non boolean value it is casted to boolean. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - res = get_data_at_channel( - ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) - - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == any - if res.dtype != bool: - res = res.astype(bool) - - out[idx_ch] = out[idx_ch] | (res & limarr) - - return {"values": out} - - -def evaluate_to_all( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates by logical and operation across channels. If the expression - evaluates to a non boolean value it is casted to boolean. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - var_ph - dictionary of evt and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - res = get_data_at_channel( - ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -894,15 +607,23 @@ def evaluate_to_all( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == all - if res.dtype != bool: - res = res.astype(bool) - - out[idx_ch] = out[idx_ch] & res & limarr + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) - return {"values": out} + # switch through modes + if "sum" == mode: + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + if "any" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) + if "all" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr + + return Array(nda=out) def evaluate_at_channel( @@ -916,7 +637,7 @@ def evaluate_at_channel( ch_comp: Array, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> Array: """Aggregates by evaluating the expression at a given channel. Parameters @@ -949,12 +670,11 @@ def evaluate_at_channel( # skip default value if f"ch{ch}" not in lh5.ls(f_hit): continue - # get index list for this channel to be loaded - idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -967,7 +687,7 @@ def evaluate_at_channel( out = np.where(ch == ch_comp.nda, res, out) - return {"values": out} + return Array(nda=out) def evaluate_at_channel_vov( @@ -981,7 +701,7 @@ def evaluate_at_channel_vov( chns_rm: list, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -1010,16 +730,16 @@ def evaluate_at_channel_vov( """ # blow up vov to aoesa - out = ch_comp.to_aoesa().nda + out = ch_comp.to_aoesa().view_as("np") chns = np.unique(out[~np.isnan(out)]).astype(int) + type_name = None for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -1042,7 +762,7 @@ def evaluate_at_channel_vov( flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - return {"values": out, "channels": ch_comp} + return out def evaluate_to_aoesa( @@ -1059,7 +779,7 @@ def evaluate_to_aoesa( var_ph: dict = None, defv: bool | int | float = np.nan, missv=np.nan, -) -> np.ndarray: +) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -1101,12 +821,10 @@ def evaluate_to_aoesa( i = 0 for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - res = get_data_at_channel( ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -1118,14 +836,14 @@ def evaluate_to_aoesa( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) # append to out according to mode == vov out[:, i][limarr] = res[limarr] i += 1 - return out + return ArrayOfEqualSizedArrays(nda=out) def evaluate_to_vector( @@ -1142,7 +860,7 @@ def evaluate_to_vector( var_ph: dict = None, defv: bool | int | float = np.nan, sorter: str = None, -) -> dict: +) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -1193,7 +911,7 @@ def evaluate_to_vector( var_ph, defv, np.nan, - ) + ).view_as("np") # if a sorter is given sort accordingly if sorter is not None: @@ -1209,7 +927,7 @@ def evaluate_to_vector( [tuple(fld.split("."))], None, nrows, - ) + ).view_as("np") if "ascend_by" == md: out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -1220,14 +938,12 @@ def evaluate_to_vector( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" ) - # This can be smarter - # shorten to vov (FUTURE: replace with awkward) out = VectorOfVectors( flattened_data=out.flatten()[~np.isnan(out.flatten())], cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - return {"values": out} + return out def build_evt( @@ -1417,7 +1133,7 @@ def build_evt( if "sort" in v.keys(): srter = v["sort"] - result = evaluate_expression( + obj = evaluate_expression( f_tcm, f_hit, f_dsp, @@ -1433,10 +1149,6 @@ def build_evt( srter, ) - obj = result["values"] - if isinstance(obj, np.ndarray): - obj = Array(result["values"]) - table.add_field(k, obj) # write output fields into f_evt diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 0907b3a13..a96c134c9 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -17,7 +17,7 @@ # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -58,11 +58,11 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) sum[idx_ch] = sum[idx_ch] + chsum - return sum + return Array(nda=sum) # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -103,11 +103,13 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) maj[idx_ch] = maj[idx_ch] + chmaj - return maj + return Array(nda=maj) # get LAr energy per event over all channels -def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy_dplms( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax +) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -147,11 +149,13 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) sum[idx_ch] = sum[idx_ch] + chsum - return sum + return Array(nda=sum) # get LAr majority per event over all channels -def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority_dplms( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax +) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -192,10 +196,12 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) maj[idx_ch] = maj[idx_ch] + chmaj - return maj + return Array(nda=maj) -def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): +def get_etc( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail +) -> Array: # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") @@ -267,7 +273,7 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), ) - return outi + return Array(nda=outi) else: outi = np.where( @@ -281,10 +287,10 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), np.nansum(pes, axis=(0, 2)), ) - return outi + return Array(nda=outi) -def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() energy_in_pe, _ = store.read( f"{chs[0]}/hit/energy_in_pe", @@ -334,4 +340,4 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): t1d = np.nanmin(times, axis=(0, 2)) - return t1d - tge + return Array(t1d - tge) From 333e90227b2f039c47b20e69acf42b65c2e9ce63 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 11:40:42 +0100 Subject: [PATCH 39/73] allow for tcm index aggeregation and LAr pulse index --- src/pygama/evt/modules/spm.py | 216 ++++++++---------- .../module-test-t0-vov-evt-config.json | 14 +- tests/evt/test_build_evt.py | 8 +- 3 files changed, 110 insertions(+), 128 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index a96c134c9..deb2f6ca8 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,18 +11,19 @@ import warnings +import awkward as ak import numpy as np from lgdo import Array, VectorOfVectors from lgdo.lh5 import LH5Store -# get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +# get SiPM coincidence window mask +def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: trig = trgr if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda + trig = trig.to_aoesa().view_as("np") elif isinstance(trgr, Array): - trig = trig.nda + trig = trig.view_as("np") if isinstance(trig, np.ndarray) and trig.ndim == 2: trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) trig = np.nanmin(trig, axis=1) @@ -31,14 +32,70 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr trig = np.where(np.isnan(trig), tdefault, trig) else: raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = trig - tmin tma = trig + tmax - sum = np.zeros(len(trig)) + + mask = (times < tma[:, None] / 16) & (times > tmi[:, None] / 16) & (pe > lim) + return mask, trig + + +# get LAr indices according to mask per event over all channels +def get_masked_tcm_idx( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, get_pls_idx=False +) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + arr_lst = [] + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" + ) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + ) + + if get_pls_idx: + out_idx = np.repeat( + np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 + ).T + out_idx = np.where(mask, out_idx, np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak") + else: + out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak") + + arr_lst.append(out_idx) + + return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: + # load TCM data to define an event + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + sum = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -48,10 +105,8 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( "np" ) - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) @@ -63,26 +118,11 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - maj = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + maj = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -92,11 +132,10 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> A trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( "np" ) - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) @@ -110,26 +149,11 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> A def get_energy_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - sum = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + sum = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -139,10 +163,8 @@ def get_energy_dplms( trigger_pos_dplms = store.read( f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch )[0].view_as("np") - mask = ( - (trigger_pos_dplms < tma[:, None] / 16) - & (trigger_pos_dplms > tmi[:, None] / 16) - & (energy_in_pe_dplms > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms ) pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) @@ -156,26 +178,11 @@ def get_energy_dplms( def get_majority_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - maj = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + maj = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -185,10 +192,8 @@ def get_majority_dplms( trigger_pos_dplms = store.read( f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch )[0].view_as("np") - mask = ( - (trigger_pos_dplms < tma[:, None] / 16) - & (trigger_pos_dplms > tmi[:, None] / 16) - & (energy_in_pe_dplms > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms ) pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) @@ -215,23 +220,6 @@ def get_etc( pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = trgr - if isinstance(trgr, VectorOfVectors): - tge = tge.to_aoesa().nda - elif isinstance(trgr, Array): - tge = tge.nda - if isinstance(tge, np.ndarray) and tge.ndim == 2: - tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) - tge = np.nanmin(tge, axis=1) - - elif isinstance(tge, np.ndarray) and tge.ndim == 1: - tge = np.where(np.isnan(tge), tdefault, tge) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - - tmi = tge - tmin - tma = tge + tmax - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") @@ -245,10 +233,8 @@ def get_etc( trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ 0 ].view_as("np") - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, tge = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) pe = energy_in_pe time = trigger_pos * 16 @@ -292,52 +278,30 @@ def get_etc( def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() - energy_in_pe, _ = store.read( - f"{chs[0]}/hit/energy_in_pe", - f_hit, - ) - peshape = energy_in_pe.view_as("np").shape - times = np.zeros([len(chs), peshape[0], peshape[1]]) - - tge = trgr - if isinstance(trgr, VectorOfVectors): - tge = tge.to_aoesa().nda - elif isinstance(trgr, Array): - tge = tge.nda - if isinstance(tge, np.ndarray) and tge.ndim == 2: - tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) - tge = np.nanmin(tge, axis=1) - - elif isinstance(tge, np.ndarray) and tge.ndim == 1: - tge = np.where(np.isnan(tge), tdefault, tge) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - - tmi = tge - tmin - tma = tge + tmax - # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + spm_tmin = np.full(np.max(idx), np.inf) for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ 0 - ].view_as("np") + ].view_as("ak") trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ 0 - ].view_as("np") - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + ].view_as("ak") + mask, tge = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) time = trigger_pos * 16 - time = np.where(mask, time, np.nan) - times[i][idx_ch] = time - - t1d = np.nanmin(times, axis=(0, 2)) - - return Array(t1d - tge) + time = ak.min(ak.nan_to_none(time[mask]), axis=-1) + if not time: + return Array(nda=np.zeros(len(spm_tmin))) + time = ak.fill_none(time, tdefault) + if not time: + time = ak.to_numpy(time, allow_missing=False) + spm_tmin = np.where(time < spm_tmin, time, spm_tmin) + + return Array(spm_tmin - tge) diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index f1bf09a8e..ff16ea628 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -13,7 +13,9 @@ "lar_classifier", "lar_energy_dplms", "lar_multiplicity_dplms", - "lar_time_shift" + "lar_time_shift", + "lar_tcm_index", + "lar_pulse_index" ], "operations": { "energy": { @@ -65,6 +67,16 @@ "channels": "spms_on", "aggregation_mode": "function", "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" + }, + "lar_tcm_index": { + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,False)" + }, + "lar_pulse_index": { + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,True)" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 939b1263d..cfd3b92cb 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import awkward as ak import numpy as np import pytest from lgdo import Array, VectorOfVectors, lh5 @@ -90,7 +91,7 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 10 + assert len(lh5.ls(outfile, "/evt/")) == 12 nda = { e: store.read(f"/evt/{e}", outfile)[0].view_as("np") for e in ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"] @@ -98,6 +99,11 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + ch_idx = store.read("/evt/lar_tcm_index", outfile)[0].view_as("ak") + pls_idx = store.read("/evt/lar_pulse_index", outfile)[0].view_as("ak") + assert ak.count(ch_idx) == ak.count(pls_idx) + assert ak.all(ak.count(ch_idx, axis=-1) == ak.count(pls_idx, axis=-1)) + def test_vov(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" From bad7d461648b2d69b050279793e748a4b5a8085d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 17:56:49 +0100 Subject: [PATCH 40/73] small bug fixes --- src/pygama/evt/build_evt.py | 10 ++++++---- src/pygama/evt/modules/spm.py | 13 ++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0e30c175c..cd64d6c57 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -114,7 +114,11 @@ def evaluate_expression( exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} if table: - var_ph = var_ph | table + var_ph = var_ph | { + e: table[e].view_as("ak") + for e in table.keys() + if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) + } if para: var_ph = var_ph | para @@ -349,9 +353,7 @@ def get_data_at_channel( # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), + expr.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", ""), var, ) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index deb2f6ca8..8e2a8f3e7 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -24,6 +24,17 @@ def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: trig = trig.to_aoesa().view_as("np") elif isinstance(trgr, Array): trig = trig.view_as("np") + elif isinstance(trgr, ak.Array): + if trgr.ndim == 1: + trig = ak.to_numpy(trig) + else: + trig = ak.to_numpy( + ak.fill_none( + ak.pad_none(trig, target=ak.max(ak.count(trig, axis=-1)), axis=-1), + np.nan, + ), + allow_missing=False, + ) if isinstance(trig, np.ndarray) and trig.ndim == 2: trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) trig = np.nanmin(trig, axis=1) @@ -281,7 +292,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - spm_tmin = np.full(np.max(idx), np.inf) + spm_tmin = np.full(np.max(idx) + 1, np.inf) for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] From 983663bfe86b9a0ec8ccc75169a8fd03288abcb2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 18:43:26 +0100 Subject: [PATCH 41/73] a bit of verbosity --- src/pygama/evt/build_evt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index cd64d6c57..5fdc9310e 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1165,6 +1165,11 @@ def build_evt( else: log.warning("No output fields specified, no file will be written.") + key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) + log.info( + f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" + ) + def skim_evt( f_evt: str, From e5bf02486e3465779685b86169f59436aeb9836f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 22:19:55 +0100 Subject: [PATCH 42/73] spm module enhancement --- src/pygama/evt/modules/spm.py | 34 ++++++++++++++++--- .../module-test-t0-vov-evt-config.json | 4 +-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 8e2a8f3e7..aa0e41a71 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -52,8 +52,12 @@ def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: # get LAr indices according to mask per event over all channels +# mode 0 -> return pulse indices +# mode 1 -> return tcm indices +# mode 2 -> return rawids +# mode 3 -> return tcm_idx def get_masked_tcm_idx( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, get_pls_idx=False + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode=0 ) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() @@ -73,7 +77,7 @@ def get_masked_tcm_idx( lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) - if get_pls_idx: + if mode == 0: out_idx = np.repeat( np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 ).T @@ -84,8 +88,8 @@ def get_masked_tcm_idx( np.count_nonzero(~np.isnan(out_idx), axis=1) ), dtype=int, - ).view_as("ak") - else: + ).view_as("ak", preserve_dtype=True) + elif mode == 1: out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) out_idx = VectorOfVectors( flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], @@ -93,7 +97,27 @@ def get_masked_tcm_idx( np.count_nonzero(~np.isnan(out_idx), axis=1) ), dtype=int, - ).view_as("ak") + ).view_as("ak", preserve_dtype=True) + elif mode == 2: + out_idx = np.where(mask, int(ch[2:]), np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak", preserve_dtype=True) + elif mode == 3: + out_idx = np.where(mask, idx_ch[:, None], np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak", preserve_dtype=True) + else: + raise ValueError("Unknown mode") arr_lst.append(out_idx) diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index ff16ea628..64d3dd0e3 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -71,12 +71,12 @@ "lar_tcm_index": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,False)" + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,1)" }, "lar_pulse_index": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,True)" + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,0)" } } } From ccf73092ab5615c366bd529e882fbc7a27adebb9 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 17:58:15 +0100 Subject: [PATCH 43/73] skm tier refactor and cleanup works --- src/pygama/evt/__init__.py | 4 +- src/pygama/evt/build_evt.py | 100 +------ src/pygama/skm/build_skm.py | 292 +++++++++------------ tests/evt/configs/vov-test-evt-config.json | 21 +- tests/evt/test_build_evt.py | 42 +-- tests/skm/configs/basic-skm-config.json | 34 +-- tests/skm/test_build_skm.py | 34 ++- 7 files changed, 204 insertions(+), 323 deletions(-) diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 8bc8bf058..80b544455 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,8 +2,8 @@ Utilities for grouping hit data into events. """ -from .build_evt import build_evt, skim_evt +from .build_evt import build_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols", "build_evt", "skim_evt"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5fdc9310e..a0cf1b5dc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -7,7 +7,6 @@ import itertools import json import logging -import os import re from importlib import import_module @@ -941,7 +940,7 @@ def evaluate_to_vector( ) out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())], + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type(defv)), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) @@ -1130,8 +1129,12 @@ def build_evt( pars = v["parameters"] if "query" in v.keys(): qry = v["query"] - if "initial" in v.keys() and not v["initial"] == "np.nan": + if "initial" in v.keys(): defaultv = v["initial"] + if isinstance(defaultv, str) and ( + defaultv in ["np.nan", "np.inf", "-np.inf"] + ): + defaultv = eval(defaultv) if "sort" in v.keys(): srter = v["sort"] @@ -1169,94 +1172,3 @@ def build_evt( log.info( f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" ) - - -def skim_evt( - f_evt: str, - expression: str, - params: dict = None, - f_out: str = None, - wo_mode="n", - evt_group="/evt/", -) -> None: - """Skims events from an `evt` file which are fulfilling the expression, - discards all other events. - - Parameters - ---------- - f_evt - input LH5 file of the `evt` level. - expression - skimming expression. Can contain variables from event file or from the - `params` dictionary. - f_out - output LH5 file. Can be ``None`` if `wo_mode` is set to overwrite `f_evt`. - wo_mode - Write mode: ``o``/``overwrite`` overwrites f_evt. ``n``/``new`` writes - to a new file specified in `f_out`. - evt_group - LH5 root group of the `evt` file. - """ - - if wo_mode not in ["o", "overwrite", "n", "new"]: - raise ValueError( - wo_mode - + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" - ) - store = LH5Store() - fields = lh5.ls(f_evt, evt_group) - nrows = store.read_n_rows(fields[0], f_evt) - # load fields in expression - exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) - var = {} - - flds = [ - e.split("/")[-1] for e in lh5.ls(f_evt, evt_group) if e.split("/")[-1] in exprl - ] - var = {e: store.read(evt_group + e, f_evt)[0] for e in flds} - - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - arr_keys.append(key) - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] - - if params is not None: - var = var | params - res = eval(expression, var) - - if res.shape != (nrows,): - raise ValueError( - "The expression must result to 1D with length = event number. " - f"Current shape is {res.shape}" - ) - - res = res.astype(bool) - idx_list = np.arange(nrows, dtype=int)[res] - - of = f_out - if wo_mode in ["o", "overwrite"]: - of = f_evt - of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) - - for fld in fields: - ob, _ = store.read(fld, f_evt, idx=idx_list) - store.write( - ob, - fld, - of_tmp, - wo_mode="o", - ) - - if os.path.exists(of): - os.remove(of) - os.rename(of_tmp, of) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 98d02a033..28e07bbee 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -1,6 +1,6 @@ """ This module implements routines to build the `skm` tier, consisting of skimmed -data from the `evt` tier. +data from lower tiers. """ from __future__ import annotations @@ -10,29 +10,36 @@ import os import awkward as ak -import h5py import numpy as np import pandas as pd -from lgdo import VectorOfVectors, lh5 +from lgdo import Array from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) def build_skm( - f_evt: str | list, + f_evt: str, + f_hit: str, + f_dsp: str, + f_tcm: str, f_skm: str, skm_conf: dict | str, wo_mode="w", - group: str = "/evt/", skim_format: str = "parquet", ) -> None: - """Builds a skimmed file from a (set) of evt tier file(s). + """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). Parameters ---------- f_evt - list/path of `evt` file(s). + path of `evt` file. + f_hit + path of `hit` file. + f_dsp + path of `dsp` file. + f_tcm + path of `tcm` file. f_skm name of the `skm` output file. skm_conf @@ -40,11 +47,12 @@ def build_skm( - ``multiplicity`` defines up to which row length :class:`.VectorOfVector` fields should be kept. - - ``index_field`` - - ``skimmed_fields`` are forwarded from the evt tier and clipped/padded - according to ``missing_value`` if needed. - - ``global_fields`` defines an operation to reduce the dimension of - :class:`.VectorOfVector` event fields. + - ``index_field`` sets the index of the output table. If not given + the index are set es increasing integers. + - ``operations`` are forwarded from lower tiers and clipped/padded + according to ``missing_value`` if needed. If the forwarded field + is not an evt tier, ``tcm_idx`` must be passed that specifies the + value to pick across channels. For example: @@ -53,35 +61,24 @@ def build_skm( { "multiplicity": 2, "index_field": "timestamp", - "skimmed_fields": { - "timestamp":{ - "evt_field": "timestamp" - }, - "is_muon_rejected":{ - "evt_field": "is_muon_rejected" - }, - "multiplicity":{ - "evt_field": "multiplicity" - }, - "energy":{ - "evt_field": "energy", - "missing_value": "np.nan" - }, - "energy_id":{ - "evt_field": "energy_id", - "missing_value": 0 - }, - "global_fields":{ - "energy_sum":{ - "aggregation_mode": "sum", - "evt_field": "energy" - }, - "is_all_physical":{ - "aggregation_mode": "all", - "evt_field": "is_physical" - }, + "operations": { + "timestamp":{ + "forward_field": "evt.timestamp" + }, + "multiplicity":{ + "forward_field": "evt.multiplicity" + }, + "energy":{ + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" + }, + "energy_id":{ + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" + } } - } } wo_mode @@ -92,11 +89,10 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - group - LH5 root group name of the evt tier. skim_format data format of the skimmed output (``hdf`` or ``parquet``). """ + f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") @@ -107,140 +103,92 @@ def build_skm( with open(tbl_cfg) as f: tbl_cfg = json.load(f) - flds, flds_vov, flds_arr, multi = None, None, None, None - if "skimmed_fields" in tbl_cfg.keys(): - flds = tbl_cfg["skimmed_fields"].keys() - evt_flds = [(e, tbl_cfg["skimmed_fields"][e]["evt_field"]) for e in flds] - f = h5py.File(f_evt[0] if isinstance(f_evt, list) else f_evt, "r") - flds_vov = [ - x - for x in evt_flds - if x[1] - in [ - e.split("/")[-1] - for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) - if "array<1>{array<1>{" in f[e].attrs.get("datatype") - ] - ] - flds_arr = [ - x - for x in evt_flds - if x not in flds_vov - and x[1] - in [ - e.split("/")[-1] - for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) - ] - ] - - gflds = None - if "global_fields" in tbl_cfg.keys(): - gflds = list(tbl_cfg["global_fields"].keys()) - - if flds is None and gflds is None: - return - - # Check if multiplicity is given, if vector like fields are skimmed - if ( - isinstance(flds_vov, list) - and len(flds_vov) > 0 - and "multiplicity" not in tbl_cfg.keys() - ): - raise ValueError("If skiime fields are passed, multiplicity must be given") - - elif "multiplicity" in tbl_cfg.keys(): - multi = tbl_cfg["multiplicity"] - - # init pandas df - df = pd.DataFrame() - store = LH5Store() + # Check if multiplicity is given + if "multiplicity" not in tbl_cfg.keys(): + raise ValueError("multiplicity field missing") - # add array like fields - if isinstance(flds_arr, list): - log.debug("Crunching array-like fields") - - _df = store.read( - group, - f_evt, - field_mask=[x[1] for x in flds_arr], - )[ - 0 - ].view_as("pd") - - _df = _df.rename(columns={y: x for x, y in flds_arr}) - df = df.join(_df, how="outer") - - # take care of vector like fields - if isinstance(flds_vov, list): - log.debug("Processing VoV-like fields") - for fld in flds_vov: - if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): - raise ValueError( - f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" - ) - vls, _ = store.read(group + fld[1], f_evt) - mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] - if mv in ["np.inf", "-np.inf", "np.nan"]: - mv = eval(mv) - out = vls.to_aoesa(max_len=multi, fill_val=mv).nda - nms = [fld[0] + f"_{e}" for e in range(multi)] - df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") - - # ok now build global fields if requested - if isinstance(gflds, list): - log.debug("Defining global fields") - for k in gflds: - if "aggregation_mode" not in tbl_cfg["global_fields"][k].keys(): - raise ValueError(f"global {k} operation needs aggregation mode") - if "evt_field" not in tbl_cfg["global_fields"][k].keys(): - raise ValueError(f"global {k} operation needs evt_field") - mode = tbl_cfg["global_fields"][k]["aggregation_mode"] - fld = tbl_cfg["global_fields"][k]["evt_field"] - - obj, _ = store.read(group + fld, f_evt) - if not isinstance(obj, VectorOfVectors): - raise ValueError( - f"global {k} operation not possible, since {fld} is not an VectorOfVectors" - ) - - obj_ak = obj.view_as("ak") - if mode in [ - "sum", - "prod", - "nansum", - "nanprod", - "any", - "all", - "mean", - "std", - "var", - ]: - df = df.join( - pd.DataFrame( - data=getattr(ak, mode)(obj_ak, axis=-1).to_numpy( - allow_missing=False - ), - columns=[k], - ) - ) + multi = int(tbl_cfg["multiplicity"]) + store = LH5Store() + df = pd.DataFrame() - elif mode in ["min", "max"]: - val = getattr(ak, mode)(obj_ak, axis=-1, mask_identity=True) - if "missing_value" not in tbl_cfg["global_fields"][k].keys(): + if "operations" in tbl_cfg.keys(): + for op in tbl_cfg["operations"].keys(): + miss_val = np.nan + if "missing_value" in tbl_cfg["operations"][op].keys(): + miss_val = tbl_cfg["operations"][op]["missing_value"] + if isinstance(miss_val, str) and ( + miss_val in ["np.nan", "np.inf", "-np.inf"] + ): + miss_val = eval(miss_val) + + fw_fld = tbl_cfg["operations"][op]["forward_field"].split(".") + if fw_fld[0] not in ["evt", "hit", "dsp", "tcm"]: + raise ValueError(f"{fw_fld[0]} is not a valid tier") + + # load object if from evt tier + if fw_fld[0] == "evt": + obj = store.read(f"/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]])[ + 0 + ].view_as("ak") + + # else collect data from lower tier via tcm_idx + else: + if "tcm_idx" not in tbl_cfg["operations"][op].keys(): raise ValueError( - f"global {k} {mode} operation needs a missing value assigned" + f"{op} is an sub evt level operation. tcm_idx field must be specified" ) - mv = tbl_cfg["global_fields"][k]["missing_value"] - if mv == "np.inf": - mv = np.inf - elif mv == "-np.inf": - mv = -1 * np.inf - val = ak.fill_none(val, mv) - df = df.join( - pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) - ) - else: - raise ValueError("aggregation mode not supported") + tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"].split(".") + tcm_idx = store.read( + f"/{tcm_idx_fld[0]}/{tcm_idx_fld[1]}", f_dict[tcm_idx_fld[0]] + )[0].view_as("ak")[:, :multi] + + obj = ak.Array([[] for x in range(len(tcm_idx))]) + + # load TCM data to define an event + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("ak") + ids = ak.unflatten(ids[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) + + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("ak") + idx = ak.unflatten(idx[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) + + if "tcm.array_id" == tbl_cfg["operations"][op]["forward_field"]: + obj = ids + elif "tcm.array_idx" == tbl_cfg["operations"][op]["forward_field"]: + obj = idx + + else: + chns = np.unique( + ak.to_numpy(ak.flatten(ids), allow_missing=False) + ).astype(int) + + # Get the data + for ch in chns: + ch_idx = idx[ids == ch] + ct_idx = ak.count(ch_idx, axis=-1) + fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) + och, _ = store.read( + f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f_dict[fw_fld[0]], + idx=fl_idx, + ) + if not isinstance(och, Array): + raise ValueError( + f"{type(och)} not supported. Forward only Array fields" + ) + och = och.view_as("ak") + och = ak.unflatten(och, ct_idx) + obj = ak.concatenate((obj, och), axis=-1) + + # Pad, clip and numpyfy + if obj.ndim > 1: + obj = ak.pad_none(obj, multi, clip=True) + obj = ak.to_numpy(ak.fill_none(obj, miss_val)) + + nms = [op] + if obj.ndim > 1: + nms = [f"{op}_{x}" for x in range(multi)] + + df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") # Set an index column if specified if "index_field" in tbl_cfg.keys(): diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index ffdce3b31..32c2c0b59 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -6,7 +6,9 @@ "outputs": [ "timestamp", "energy", + "energy_sum", "energy_id", + "energy_idx", "aoe", "multiplicity", "is_saturated", @@ -27,11 +29,28 @@ "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, + "energy_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", + "initial": 0.0 + }, + "energy_idx": { + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.index", + "sort": "ascend_by:dsp.tp_0_est", + "initial": 0 + }, "energy_id": { "channels": "geds_on", "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id" + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est", + "initial": 0 }, "aoe": { "aggregation_mode": "keep_at:evt.energy_id", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index cfd3b92cb..89cc24386 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -7,7 +7,7 @@ from lgdo import Array, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from pygama.evt import build_evt, skim_evt +from pygama.evt import build_evt config_dir = Path(__file__).parent / "configs" store = LH5Store() @@ -121,7 +121,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 9 + assert len(lh5.ls(outfile, "/evt/")) == 11 vov_ene, _ = store.read("/evt/energy", outfile) vov_aoe, _ = store.read("/evt/aoe", outfile) arr_ac, _ = store.read("/evt/multiplicity", outfile) @@ -136,6 +136,18 @@ def test_vov(lgnd_test_data, tmptestdir): assert isinstance(arr_ac2, Array) assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() + vov_eid = store.read("/evt/energy_id", outfile)[0].view_as("ak") + vov_eidx = store.read("/evt/energy_idx", outfile)[0].view_as("ak") + + ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ + 0 + ].view_as("ak") + ids = ak.unflatten(ids[ak.flatten(vov_eidx)], ak.count(vov_eidx, axis=-1)) + assert ak.all(ids == vov_eid) + + arr_ene = store.read("/evt/energy_sum", outfile)[0].view_as("ak") + assert ak.all(arr_ene == ak.nansum(vov_ene.view_as("ak"), axis=-1)) + def test_graceful_crashing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" @@ -246,29 +258,3 @@ def test_vector_sort(lgnd_test_data, tmptestdir): vov_t0, _ = store.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() - - -def test_skimming(lgnd_test_data, tmptestdir): - outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" - tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" - if os.path.exists(outfile): - os.remove(outfile) - f_tcm = lgnd_test_data.get_path(tcm_path) - f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) - f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - f_config = f"{config_dir}/vov-test-evt-config.json" - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) - - ac = store.read("/evt/multiplicity", outfile)[0].view_as("np") - ac = len(ac[ac == 3]) - - outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - - skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(store.read("/evt/energy", outfile_skm)[0].to_aoesa().view_as("np")) - - skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(store.read("/evt/energy", outfile)[0].to_aoesa().view_as("np")) - - with pytest.raises(ValueError): - skim_evt(outfile, "multiplicity == 3", None, None, "bla") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index b1844ecb0..faf5e56cb 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -1,35 +1,25 @@ { "multiplicity": 3, "index_field": "timestamp", - "skimmed_fields": { + "operations": { "timestamp": { - "evt_field": "timestamp" + "forward_field": "evt.timestamp" + }, + "energy_sum": { + "forward_field": "evt.energy_sum" }, "multiplicity": { - "evt_field": "multiplicity" + "forward_field": "evt.multiplicity" }, "energy": { - "evt_field": "energy", - "missing_value": "np.nan" + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" }, "energy_id": { - "evt_field": "energy_id", - "missing_value": 0 - } - }, - "global_fields": { - "energy_sum": { - "aggregation_mode": "nansum", - "evt_field": "energy" - }, - "is_any_saturated": { - "aggregation_mode": "any", - "evt_field": "is_saturated" - }, - "max_energy": { - "aggregation_mode": "max", - "evt_field": "energy", - "missing_value": "np.inf" + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" } } } diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 0b2beebe4..678fe2c41 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -1,14 +1,17 @@ import os from pathlib import Path +import awkward as ak import numpy as np import pandas as pd +from lgdo.lh5 import LH5Store from pygama.evt import build_evt from pygama.skm import build_skm config_dir = Path(__file__).parent / "configs" evt_config_dir = Path(__file__).parent.parent / "evt" / "configs" +store = LH5Store() def test_basics(lgnd_test_data, tmptestdir): @@ -29,7 +32,16 @@ def test_basics(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - build_skm(outfile, skm_out, skm_conf, wo_mode="o", skim_format="hdf") + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="hdf", + ) assert os.path.exists(skm_out) df = pd.read_hdf(skm_out) @@ -37,7 +49,11 @@ def test_basics(lgnd_test_data, tmptestdir): assert "energy_0" in df.keys() assert "energy_1" in df.keys() assert "energy_2" in df.keys() + assert "energy_id_0" in df.keys() + assert "energy_id_1" in df.keys() + assert "energy_id_2" in df.keys() assert "multiplicity" in df.keys() + assert "energy_sum" in df.keys() assert (df.multiplicity.to_numpy() <= 3).all() assert ( np.nan_to_num(df.energy_0.to_numpy()) @@ -45,6 +61,16 @@ def test_basics(lgnd_test_data, tmptestdir): + np.nan_to_num(df.energy_2.to_numpy()) == df.energy_sum.to_numpy() ).all() - assert (np.nan_to_num(df.energy_0.to_numpy()) <= df.max_energy.to_numpy()).all() - assert (np.nan_to_num(df.energy_1.to_numpy()) <= df.max_energy.to_numpy()).all() - assert (np.nan_to_num(df.energy_2.to_numpy()) <= df.max_energy.to_numpy()).all() + + vov_eid = ak.to_numpy( + ak.fill_none( + ak.pad_none( + store.read("/evt/energy_id", outfile)[0].view_as("ak"), 3, clip=True + ), + 0, + ), + allow_missing=False, + ) + assert (vov_eid[:, 0] == df.energy_id_0.to_numpy()).all() + assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() + assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() From f262b020e1d57f4498e997dc20d3364d471efa57 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 19:25:55 +0100 Subject: [PATCH 44/73] skm tier feature addition --- src/pygama/skm/build_skm.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 28e07bbee..aed71e1eb 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -12,7 +12,7 @@ import awkward as ak import numpy as np import pandas as pd -from lgdo import Array +from lgdo import Array, lh5 from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -47,6 +47,9 @@ def build_skm( - ``multiplicity`` defines up to which row length :class:`.VectorOfVector` fields should be kept. + - ``postfixes`` list of postfixes must be list of + ``len(multiplicity)``. If not given, numbers from 0 to + ``multiplicity -1`` are used - ``index_field`` sets the index of the output table. If not given the index are set es increasing integers. - ``operations`` are forwarded from lower tiers and clipped/padded @@ -60,6 +63,7 @@ def build_skm( { "multiplicity": 2, + "postfixes":["","aux"], "index_field": "timestamp", "operations": { "timestamp":{ @@ -166,11 +170,17 @@ def build_skm( ch_idx = idx[ids == ch] ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - och, _ = store.read( - f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", - f_dict[fw_fld[0]], - idx=fl_idx, - ) + + if f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}" not in lh5.ls( + f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/" + ): + och = Array(nda=np.full(len(fl_idx), miss_val)) + else: + och, _ = store.read( + f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f_dict[fw_fld[0]], + idx=fl_idx, + ) if not isinstance(och, Array): raise ValueError( f"{type(och)} not supported. Forward only Array fields" @@ -186,7 +196,10 @@ def build_skm( nms = [op] if obj.ndim > 1: - nms = [f"{op}_{x}" for x in range(multi)] + if "postfixes" in tbl_cfg.keys(): + nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] + else: + nms = [f"{op}_{x}" for x in range(multi)] df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") From c087649819b2d983635026c85ba6f475a38ccf7d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 21:51:01 +0100 Subject: [PATCH 45/73] added option to keep at index --- src/pygama/evt/build_evt.py | 35 +++++++++++++------ tests/evt/configs/basic-evt-config.json | 14 ++++++-- tests/evt/configs/module-test-evt-config.json | 2 +- .../module-test-t0-vov-evt-config.json | 2 +- tests/evt/configs/vov-test-evt-config.json | 9 +++-- tests/evt/test_build_evt.py | 20 ++++++++--- 6 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index a0cf1b5dc..b37f5a3a5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -80,7 +80,9 @@ def evaluate_expression( - ``sum``: aggregates by summation. - ``any``: aggregates by logical or. - ``all``: aggregates by logical and. - - ``keep_at:ch_field``: aggregates according to passed ch_field + - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. + - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm + index field. - ``gather``: Channels are not combined, but result saved as :class:`.VectorOfVectors`. @@ -152,13 +154,26 @@ def evaluate_expression( idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") # switch through modes - if ( - table - and "keep_at:" == mode[:8] - and "evt." == mode[8:][:4] - and mode[8:].split(".")[-1] in table.keys() - ): - ch_comp = table[mode[8:].replace("evt.", "")] + if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): + if "keep_at_ch:" == mode[:11]: + ch_comp = table[mode[11:].replace("evt.", "")] + else: + ch_comp = table[mode[12:].replace("evt.", "")] + if isinstance(ch_comp, Array): + ch_comp = Array(nda=ids[ch_comp.view_as("np")]) + elif isinstance(ch_comp, VectorOfVectors): + ch_comp = ch_comp.view_as("ak") + ch_comp = VectorOfVectors( + array=ak.unflatten( + ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + ) + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -1003,7 +1018,7 @@ def build_evt( "sort": "ascend_by:dsp.tp_0_est" }, "energy":{ - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.cuspEmax_ctc_cal > 25" } "is_muon_rejected":{ @@ -1021,7 +1036,7 @@ def build_evt( "initial": 0 }, "t0":{ - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "dsp.tp_0_est" }, "lar_energy":{ diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 8eb23adf2..0c82c673f 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -6,6 +6,7 @@ "multiplicity", "energy", "energy_id", + "energy_idx", "energy_any_above1MeV", "energy_all_above1MeV", "energy_aux", @@ -36,6 +37,13 @@ "expression": "tcm.array_id", "initial": 0 }, + "energy_idx": { + "channels": "geds_on", + "aggregation_mode": "first_at:dsp.tp_0_est", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.index", + "initial": 999999999999 + }, "energy_any_above1MeV": { "channels": "geds_on", "aggregation_mode": "any", @@ -63,17 +71,17 @@ "initial": 0.0 }, "is_usable_aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "True", "initial": false }, "aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 595999d60..d0ea1bc68 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -31,7 +31,7 @@ "initial": 0 }, "t0": { - "aggregation_mode": "keep_at:evt.energy_first_id", + "aggregation_mode": "keep_at_ch:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 64d3dd0e3..d31e9717a 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -31,7 +31,7 @@ "expression": "tcm.array_id" }, "t0": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 32c2c0b59..31334101e 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -10,6 +10,7 @@ "energy_id", "energy_idx", "aoe", + "aoe_idx", "multiplicity", "is_saturated", "energy_times_aoe", @@ -53,7 +54,11 @@ "initial": 0 }, "aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "hit.AoE_Classifier" + }, + "aoe_idx": { + "aggregation_mode": "keep_at_idx:evt.energy_idx", "expression": "hit.AoE_Classifier" }, "multiplicity": { @@ -64,7 +69,7 @@ "initial": 0 }, "is_saturated": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.is_saturated" }, "energy_times_aoe": { diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 89cc24386..64ad133ed 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -30,7 +30,7 @@ def test_basics(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 10 + assert len(lh5.ls(outfile, "/evt/")) == 11 nda = { e: store.read(f"/evt/{e}", outfile)[0].view_as("np") for e in ["energy", "energy_aux", "energy_sum", "multiplicity"] @@ -48,6 +48,16 @@ def test_basics(lgnd_test_data, tmptestdir): == nda["energy_sum"][nda["multiplicity"] == 1] ).all() + eid = store.read("/evt/energy_id", outfile)[0].view_as("np") + eidx = store.read("/evt/energy_idx", outfile)[0].view_as("np") + eidx = eidx[eidx != 999999999999] + + ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ + 0 + ].view_as("np") + ids = ids[eidx] + assert ak.all(ids == eid[eid != 0]) + def test_lar_module(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" @@ -121,7 +131,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 11 + assert len(lh5.ls(outfile, "/evt/")) == 12 vov_ene, _ = store.read("/evt/energy", outfile) vov_aoe, _ = store.read("/evt/aoe", outfile) arr_ac, _ = store.read("/evt/multiplicity", outfile) @@ -138,6 +148,7 @@ def test_vov(lgnd_test_data, tmptestdir): vov_eid = store.read("/evt/energy_id", outfile)[0].view_as("ak") vov_eidx = store.read("/evt/energy_idx", outfile)[0].view_as("ak") + vov_aoe_idx = store.read("/evt/aoe_idx", outfile)[0].view_as("ak") ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ 0 @@ -147,6 +158,7 @@ def test_vov(lgnd_test_data, tmptestdir): arr_ene = store.read("/evt/energy_sum", outfile)[0].view_as("ak") assert ak.all(arr_ene == ak.nansum(vov_ene.view_as("ak"), axis=-1)) + assert ak.all(vov_aoe.view_as("ak") == vov_aoe_idx) def test_graceful_crashing(lgnd_test_data, tmptestdir): @@ -232,7 +244,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "sort": "ascend_by:dsp.tp_0_est", }, "t0_acend": { - "aggregation_mode": "keep_at:evt.acend_id", + "aggregation_mode": "keep_at_ch:evt.acend_id", "expression": "dsp.tp_0_est", }, "decend_id": { @@ -243,7 +255,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "sort": "descend_by:dsp.tp_0_est", }, "t0_decend": { - "aggregation_mode": "keep_at:evt.acend_id", + "aggregation_mode": "keep_at_ch:evt.acend_id", "expression": "dsp.tp_0_est", }, }, From 7af7e16a975b5e2c764c68d2fc78b2a1fe16c93f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:24:42 +0100 Subject: [PATCH 46/73] Spm module awkward refactor --- src/pygama/evt/modules/spm.py | 473 +++++++++--------- tests/evt/configs/module-test-evt-config.json | 2 +- .../module-test-t0-vov-evt-config.json | 2 +- 3 files changed, 244 insertions(+), 233 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index aa0e41a71..cd1cfb812 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,7 +1,7 @@ """ Module for special event level routines for SiPMs -functions must take as the first 3 args in order: +functions must take as the first 4 args in order: - path to the hit file - path to the dsp file - path to the tcm file @@ -17,38 +17,52 @@ from lgdo.lh5 import LH5Store -# get SiPM coincidence window mask -def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().view_as("np") - elif isinstance(trgr, Array): - trig = trig.view_as("np") +# get an 1D akward array from 0 to 2D array +# casted by minimum of a 2D array +def cast_trigger( + trgr: int | float | Array | VectorOfVectors | ak.Array, + tdefault: float, + length: int = None, +) -> ak.Array: + if isinstance(trgr, Array): + return ak.fill_none(ak.nan_to_none(trgr.view_as("ak")), tdefault) + + elif isinstance(trgr, (VectorOfVectors)): + return ak.fill_none( + ak.min(ak.fill_none(trgr.view_as("ak"), tdefault), axis=-1), tdefault + ) + elif isinstance(trgr, ak.Array): if trgr.ndim == 1: - trig = ak.to_numpy(trig) + return ak.fill_none(trgr, tdefault) + elif trgr.ndim == 2: + return ak.fill_none(ak.min(ak.fill_none(trgr, tdefault), axis=-1), tdefault) else: - trig = ak.to_numpy( - ak.fill_none( - ak.pad_none(trig, target=ak.max(ak.count(trig, axis=-1)), axis=-1), - np.nan, - ), - allow_missing=False, - ) - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) + raise ValueError(f"Too many dimensions: {trgr.ndim}") + elif isinstance(trgr, (float, int)) and isinstance(length, int): + return ak.Array([trgr] * length) else: raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - mask = (times < tma[:, None] / 16) & (times > tmi[:, None] / 16) & (pe > lim) - return mask, trig +# get SiPM coincidence window mask +def get_spm_mask( + lim: float, trgr: ak.Array, tmin: float, tmax: float, pe: ak.Array, times: ak.Array +) -> ak.Array: + if trgr.ndim != 1: + raise ValueError("trigger array muse be 1 dimensional!") + if (len(trgr) != len(pe)) or (len(trgr) != len(times)): + raise ValueError( + f"All arrays must have same dimension across first axis len(pe)={len(pe)}, len(times)={len(times)}, len(trgr)={len(trgr)}" + ) + + tmi = trgr - tmin + tma = trgr + tmax + + mask = ( + ((times * 16.0) < tma[:, None]) & ((times * 16.0) > tmi[:, None]) & (pe > lim) + ) + return mask # get LAr indices according to mask per event over all channels @@ -65,57 +79,45 @@ def get_masked_tcm_idx( idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") arr_lst = [] + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + for ch in chs: idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" + + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + if mode == 0: - out_idx = np.repeat( - np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 - ).T - out_idx = np.where(mask, out_idx, np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.local_index(mask)[mask] + elif mode == 1: - out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array(np.where(ids == int(ch[2:]))[0]) + out_idx = out_idx[:, None][mask[mask] - 1] + elif mode == 2: - out_idx = np.where(mask, int(ch[2:]), np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array([int(ch[2:])] * len(mask)) + out_idx = out_idx[:, None][mask[mask] - 1] + elif mode == 3: - out_idx = np.where(mask, idx_ch[:, None], np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array(idx_ch) + out_idx = out_idx[:, None][mask[mask] - 1] + else: raise ValueError("Unknown mode") @@ -124,219 +126,228 @@ def get_masked_tcm_idx( return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) -# get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode): + if mode not in ["energy_hc", "energy_dplms", "majority_hc", "majority_dplms"]: + raise ValueError("Unknown mode") + # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - sum = np.zeros(np.max(idx) + 1) + out = np.zeros(np.max(idx) + 1) + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + for ch in chs: - # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" - ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - pes = energy_in_pe - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - sum[idx_ch] = sum[idx_ch] + chsum - return Array(nda=sum) + + if mode in ["energy_dplms", "majority_dplms"]: + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") + ) + ) + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") + ) + ) + + else: + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "ak" + ) + ) + ) + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "ak" + ) + ) + ) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + pe = pe[mask] + + if mode in ["energy_hc", "energy_dplms"]: + out[idx_ch] = out[idx_ch] + ak.to_numpy(ak.nansum(pe, axis=-1)) + + else: + out[idx_ch] = out[idx_ch] + ak.to_numpy( + ak.where(ak.nansum(pe, axis=-1) > lim, 1, 0) + ) + + return Array(nda=out) + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_hc" + ) # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - maj = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" - ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - - pes = energy_in_pe - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - chmaj = np.where(chsum > lim, 1, 0) - maj[idx_ch] = maj[idx_ch] + chmaj - return Array(nda=maj) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_hc" + ) # get LAr energy per event over all channels def get_energy_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - sum = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe_dplms = store.read( - f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - trigger_pos_dplms = store.read( - f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms - ) - pes = energy_in_pe_dplms - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - sum[idx_ch] = sum[idx_ch] + chsum - return Array(nda=sum) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_dplms" + ) # get LAr majority per event over all channels def get_majority_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - maj = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe_dplms = store.read( - f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - trigger_pos_dplms = store.read( - f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms - ) - pes = energy_in_pe_dplms - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - chmaj = np.where(chsum > lim, 1, 0) - maj[idx_ch] = maj[idx_ch] + chmaj - return Array(nda=maj) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_dplms" + ) +# Calculate the ETC in different trailing modes: +# trail = 0: Singlet window = [tge,tge+swin] +# trail = 1: Singlet window = [t_first_lar_pulse, t_first_lar_pulse+ swin] +# trail = 2: Like trail = 1, but t_first_lar_pulse <= tge is ensured +# min_first_pls_ene sets the minimum energy of the first pulse (only used in trail > 0) def get_etc( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail + f_hit, + f_dsp, + f_tcm, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + swin, + trail, + min_first_pls_ene, ) -> Array: # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") warnings.filterwarnings("ignore", r"invalid value encountered in divide") - store = LH5Store() - energy_in_pe, _ = store.read(f"{chs[0]}/hit/energy_in_pe", f_hit) - - peshape = energy_in_pe.view_as("np").shape - # 1D = channel, 2D = event num, 3D = array per event - pes = np.zeros([len(chs), peshape[0], peshape[1]]) - times = np.zeros([len(chs), peshape[0], peshape[1]]) - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - for i in range(len(chs)): - # get index list for this channel to be loaded - idx_ch = idx[ids == int(chs[i][2:])] - energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - mask, tge = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - pe = energy_in_pe - time = trigger_pos * 16 + pe_lst = [] + time_lst = [] - pe = np.where(mask, pe, np.nan) - time = np.where(mask, time, np.nan) + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) - pes[i][idx_ch] = pe - times[i][idx_ch] = time + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] - outi = None - if trail > 0: - t1d = np.nanmin(times, axis=(0, 2)) - if trail == 2: - t1d[t1d > tge] = tge[t1d > tge] - tt = t1d[:, None] - outi = np.where( - np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)) > 0, - np.nansum( - np.where((times >= tt) & (times < tt + swin), pes, 0), axis=(0, 2) + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") ) - / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), - np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), ) - return Array(nda=outi) - else: - outi = np.where( - np.nansum(pes, axis=(0, 2)) > 0, - np.nansum( - np.where( - (times >= tge[:, None]) & (times <= tge[:, None] + swin), pes, 0 - ), - axis=(0, 2), + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") ) - / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), - np.nansum(pes, axis=(0, 2)), ) - return Array(nda=outi) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + + pe = pe[mask] + pe_lst.append(pe) + + times = times[mask] * 16 + time_lst.append(times) + + pe_all = ak.concatenate(pe_lst, axis=-1) + time_all = ak.concatenate(time_lst, axis=-1) + + if trail > 0: + t1d = ak.min(time_all[pe_all > min_first_pls_ene], axis=-1) + + if trail == 2: + t1d = ak.where(t1d > tge, tge, t1d) + + mask_total = time_all > t1d + mask_singlet = (time_all > t1d) & (time_all < t1d + swin) + + else: + mask_total = time_all > tge + mask_singlet = (time_all > tge) & (time_all < tge + swin) + + pe_singlet = ak.nansum(pe_all[mask_singlet], axis=-1) + pe_total = ak.nansum(pe_all[mask_total], axis=-1) + etc = ak.where(pe_total > 0, pe_singlet / pe_total, np.nan) + + return Array(nda=ak.to_numpy(ak.fill_none(etc, np.nan), allow_missing=False)) +# returns relative time shift of the first LAr pulse relative to the Ge trigger def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - spm_tmin = np.full(np.max(idx) + 1, np.inf) - for i in range(len(chs)): - # get index list for this channel to be loaded - idx_ch = idx[ids == int(chs[i][2:])] - energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") - trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") - mask, tge = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + time_lst = [] + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] + + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) - time = trigger_pos * 16 - time = ak.min(ak.nan_to_none(time[mask]), axis=-1) - if not time: - return Array(nda=np.zeros(len(spm_tmin))) - time = ak.fill_none(time, tdefault) - if not time: - time = ak.to_numpy(time, allow_missing=False) - spm_tmin = np.where(time < spm_tmin, time, spm_tmin) + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") + ) + ) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + + # apply mask and convert sample units to ns + time_lst.append(times[mask] * 16) + + time_all = ak.concatenate(time_lst, axis=-1) + out = ak.min(time_all, axis=-1) + + # Convert to 1D numpy array + out = ak.to_numpy(ak.fill_none(out, np.inf), allow_missing=False) + tge = ak.to_numpy(tge, allow_missing=False) - return Array(spm_tmin - tge) + return Array(out - tge) diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index d0ea1bc68..6aba3bf75 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" }, "lar_energy_dplms": { "channels": "spms_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index d31e9717a..5d1c6f256 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" }, "lar_energy_dplms": { "channels": "spms_on", From d35976985c37bd44aae769b5f7057b2d91da1fc5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:52:15 +0100 Subject: [PATCH 47/73] add possibility to add atributes to evt LGDO --- src/pygama/evt/build_evt.py | 9 +++++++++ tests/evt/configs/basic-evt-config.json | 3 ++- tests/evt/test_build_evt.py | 6 +++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index b37f5a3a5..c39ddffb4 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1116,6 +1116,11 @@ def build_evt( if "parameters" in v.keys(): var = var | v["parameters"] res = table.eval(v["expression"].replace("evt.", ""), var) + + # add attribute if present + if "lgdo_attrs" in v.keys(): + res.attrs |= v["lgdo_attrs"] + table.add_field(k, res) # Else we build the event entry @@ -1169,6 +1174,10 @@ def build_evt( srter, ) + # add attribute if present + if "lgdo_attrs" in v.keys(): + obj.attrs |= v["lgdo_attrs"] + table.add_field(k, obj) # write output fields into f_evt diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 0c82c673f..3a8c62753 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -21,7 +21,8 @@ "aggregation_mode": "sum", "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, - "initial": 0 + "initial": 0, + "lgdo_attrs": { "statement": "0bb decay is real" } }, "energy": { "channels": "geds_on", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 64ad133ed..2a7269e9d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -28,7 +28,11 @@ def test_basics(lgnd_test_data, tmptestdir): group="/evt/", tcm_group="hardware_tcm_1", ) - + assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys() + assert ( + store.read("/evt/multiplicity", outfile)[0].getattrs()["statement"] + == "0bb decay is real" + ) assert os.path.exists(outfile) assert len(lh5.ls(outfile, "/evt/")) == 11 nda = { From c3324478288117b3b440f0ce8e0dfa32c50a5d82 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:58:13 +0100 Subject: [PATCH 48/73] removed explicit typing in a spm module function to be compatible with Python 3.9 --- src/pygama/evt/modules/spm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index cd1cfb812..96a1098b6 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -20,7 +20,7 @@ # get an 1D akward array from 0 to 2D array # casted by minimum of a 2D array def cast_trigger( - trgr: int | float | Array | VectorOfVectors | ak.Array, + trgr, tdefault: float, length: int = None, ) -> ak.Array: @@ -80,7 +80,7 @@ def get_masked_tcm_idx( arr_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -136,7 +136,7 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") out = np.zeros(np.max(idx) + 1) - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -257,7 +257,7 @@ def get_etc( pe_lst = [] time_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -317,7 +317,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") time_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) From 978836ef5757813c1addeb19e1938330dc71ebbd Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 13:01:23 +0100 Subject: [PATCH 49/73] routine to create dplms dictionary for Ge processing --- src/pygama/pargen/dplms_ge_dict.py | 732 +++++++++++++++++++++++ src/pygama/pargen/energy_optimisation.py | 57 +- 2 files changed, 760 insertions(+), 29 deletions(-) create mode 100644 src/pygama/pargen/dplms_ge_dict.py diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py new file mode 100644 index 000000000..8651ddc2f --- /dev/null +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -0,0 +1,732 @@ +""" +This module is for creating dplms dictionary for ge processing +""" + +from __future__ import annotations + +import itertools +import json +import logging +import os +import pathlib +import pickle +import time +from collections import OrderedDict + +import lgdo +import lgdo.lh5_store as lh5 +import matplotlib.pyplot as plt +import numpy as np +from lgdo import Array +from scipy.signal import convolve, convolve2d + +from pygama.math.histogram import get_hist +from pygama.math.peak_fitting import ( + extended_gauss_step_pdf, + extended_radford_pdf, + gauss_step_pdf, + radford_pdf, +) +from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.energy_cal import hpge_find_E_peaks +from pygama.pargen.energy_optimisation import ( + event_selection, + fom_FWHM, + fom_FWHM_with_dt_corr_fit, + index_data, +) +from pygama.pargen.noise_optimization import calculate_spread + +log = logging.getLogger(__name__) +sto = lh5.LH5Store() + + +def dplms_ge_dict( + lh5_path: str, + fft_files: list[str], + cal_files: list[str], + dsp_config: dict, + par_dsp: dict, + par_dsp_lh5: str, + dplms_dict: dict, + decay_const: float = 0, + ene_par: str = "dplmsEmax", + display: int = 0, +) -> dict: + """ + This function calculates the dplms dictionary for HPGe detectors. + + Parameters + ---------- + lh5_path: str + Name of channel to process, should be name of lh5 group in raw files + fft_files : list[str] + raw files with fft data + cal_files : list[str] + raw files with cal data + dsp_config: dict + dsp config file + par_dsp: dict + Dictionary with db parameters for dsp processing + par_dsp_lh5: str + Path for saving dplms coefficients + dplms_dict: dict + Dictionary with various parameters + + Returns + ------- + out_dict : dict + """ + + t0 = time.time() + log.info(f"\nSelecting baselines") + raw_bls = load_data( + fft_files, + lh5_path, + "bls", + n_events=dplms_dict["n_baselines"], + raw_wf_field=dplms_dict["raw_wf_field"], + ) + + dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_bls, cut_dict) + bl_field = dplms_dict["bl_field"] + log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + + bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls_par = {} + bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] + for par in bls_cut_pars: + bls_par[par] = dsp_bls[par].nda + t1 = time.time() + log.info( + f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + ) + + log.info( + "\nCalculating noise matrix of length", + dplms_dict["length"], + "n. events", + bls.shape[0], + "size", + bls.shape[1], + ) + nmat = noise_matrix(bls, dplms_dict["length"]) + t2 = time.time() + log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") + + log.info("\nSelecting signals") + peaks_keV = np.array(dplms_dict["peaks_keV"]) + wsize = dplms_dict["wsize"] + wf_field = dplms_dict["wf_field"] + kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + + raw_cal, idx_list = event_selection( + cal_files, + f"{lh5_path}/raw", + dsp_config, + par_dsp[lh5_path], + peaks_keV, + np.arange(0, len(peaks_keV), 1).tolist(), + kev_widths, + cut_parameters=dplms_dict["wfs_cut_pars"], + n_events=dplms_dict["n_signals"], + ) + t3 = time.time() + log.info( + f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" + ) + + raw_cal = index_data(raw_cal, idx_list[-1]) + log.info(f"Produce dsp data for {len(raw_cal)} events") + dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + t4 = time.time() + log.info(f"Time to run dsp production {(t4-t3):.2f} s") + + # minimal processing chain + with open(dsp_config) as r: + dsp_config = json.load(r) + dsp_config["outputs"] = [ene_par, "dt_eff"] + + # dictionary for peak fitting + peak_dict = { + "peak": peaks_keV[-1], + "kev_width": kev_widths[-1], + "parameter": ene_par, + "func": extended_gauss_step_pdf, + "gof_func": gauss_step_pdf, + } + + if display > 0: + plot_dict = {} + plot_dict["dplms"] = {} + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + + # penalized coefficients + dp_coeffs = dplms_dict["dp_coeffs"] + if lh5_path in dplms_dict["noisy_bl"]: + log.info("Setting explicit zero area condition") + za_coeff = dp_coeffs["za"] + else: + za_coeff = dplms_dict["dp_def"]["za"] + dp_coeffs.pop("za") + coeff_keys = [key for key in dp_coeffs.keys()] + lists = [dp_coeffs[key] for key in dp_coeffs.keys()] + + prod = list(itertools.product(*lists)) + grid_dict = {} + min_fom = float("inf") + min_idx = None + + for i, values in enumerate(prod): + coeff_values = dict(zip(coeff_keys, values)) + + log.info( + "\nCase", + i, + "->", + ", ".join(f"{key} = {value}" for key, value in coeff_values.items()), + ) + grid_dict[i] = coeff_values + + sel_dict = signal_selection(dsp_cal, dplms_dict, coeff_values) + wfs = dsp_cal[wf_field].nda[sel_dict["idxs"], :] + log.info(f"... {len(wfs)} signals after signal selection") + + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + t_tmp = time.time() + nm_coeff = coeff_values["nm"] + ft_coeff = coeff_values["ft"] + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + par_dsp[lh5_path]["dplms"] = {} + par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] + par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + log.info( + f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) + ) + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + enc_results = calculate_spread(energies, 10, 90, 1000) + enc, enc_err = enc_results["fom"], enc_results["fom_err"] + log.info( + f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" + ) + grid_dict[i]["enc"] = enc + grid_dict[i]["enc_err"] = enc_err + + if display > 0: + hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) + bc = (bins[:-1] + bins[1:]) / 2.0 + ax.plot( + bc, + hist, + ds="steps", + label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", + ) + ax.set_xlabel("energy (ADC)") + ax.set_ylabel("counts") + ax.legend(loc="upper right") + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + + try: + res = fom_FWHM_with_dt_corr_fit( + dsp_opt, + peak_dict, + "QDrift", + idxs=np.where(~np.isnan(dsp_opt["dt_eff"].nda))[0], + ) + except: + log.debug("FWHM not calculated") + continue + + fwhm, fwhm_err, alpha, chisquare = ( + res["fwhm"], + res["fwhm_err"], + res["alpha"], + res["chisquare"], + ) + log.info( + f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s" + ) + + grid_dict[i]["fwhm"] = fwhm + grid_dict[i]["fwhm_err"] = fwhm_err + grid_dict[i]["alpha"] = alpha + + if ( + fwhm < dplms_dict["fwhm_limit"] + and fwhm_err < dplms_dict["err_limit"] + and chisquare < dplms_dict["chi_limit"] + ): + if fwhm < min_fom: + min_idx, min_fom = i, fwhm + + if min_idx is not None: + min_result = grid_dict[min_idx] + best_case_values = {key: min_result[key] for key in min_result.keys()} + + enc = best_case_values.get("enc", None) + enc_err = best_case_values.get("enc_err", 0) + fwhm = best_case_values.get("fwhm", None) + fwhm_err = best_case_values.get("fwhm_err", 0) + alpha = best_case_values.get("alpha", 0) + nm_coeff = best_case_values.get("nm", dplms_dict["dp_def"]["nm"]) + ft_coeff = best_case_values.get("ft", dplms_dict["dp_def"]["nm"]) + rt_coeff = best_case_values.get("rt", dplms_dict["dp_def"]["rt"]) + pt_coeff = best_case_values.get("pt", dplms_dict["dp_def"]["pt"]) + + if all( + v is not None + for v in [ + enc, + enc_err, + fwhm, + fwhm_err, + alpha, + nm_coeff, + ft_coeff, + rt_coeff, + pt_coeff, + ] + ): + log.info( + f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}" + ) + else: + log.error("Some values are missing in the best case results") + else: + log.error("Filter synthesis failed") + nm_coeff = dplms_dict["dp_def"]["nm"] + ft_coeff = dplms_dict["dp_def"]["ft"] + rt_coeff = dplms_dict["dp_def"]["rt"] + pt_coeff = dplms_dict["dp_def"]["pt"] + + # filter synthesis + sel_dict = signal_selection(dsp_cal, dplms_dict, best_case_values) + idxs = sel_dict["idxs"] + wfs = dsp_cal[wf_field].nda[idxs, :] + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + + sto.write_object( + Array(x), + name="dplms", + lh5_file=par_dsp_lh5, + wo_mode="overwrite", + group=lh5_path, + ) + + out_dict = { + "dplms": { + "length": dplms_dict["length"], + "coefficients": f"loadlh5('{par_dsp_lh5}', '{lh5_path}/dplms')", + "dp_coeffs": { + "nm": nm_coeff, + "za": za_coeff, + "ft": ft_coeff, + "rt": rt_coeff, + "pt": pt_coeff, + }, + } + } + out_alpha_dict = { + f"{ene_par}_ctc": { + "expression": f"{ene_par}*(1+dt_eff*a)", + "parameters": {"a": round(alpha, 9)}, + } + } + out_dict.update({"ctc_params": out_alpha_dict}) + + log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") + + if display > 0: + plot_dict["dplms"]["enc_hist"] = fig + plot_dict["dplms"]["enc"] = enc + plot_dict["dplms"]["enc_err"] = enc_err + plot_dict["dplms"]["ref"] = ref + plot_dict["dplms"]["coefficients"] = x + + bl_idxs = np.random.choice(len(bls), dplms_dict["n_plot"]) + bls = bls[bl_idxs] + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(bls): + if ii < 10: + ax.plot(wf, label=f"mean = {wf.mean():.1f}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bls"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + for ii, par in enumerate(bls_cut_pars): + mean = cut_dict[par]["Mean Value"] + llo, lup = cut_dict[par]["Lower Boundary"], cut_dict[par]["Upper Boundary"] + plo, pup = mean - 2 * (mean - llo), mean + 2 * (lup - mean) + hh, bb = np.histogram(bls_par[par], bins=np.linspace(plo, pup, 200)) + ax.flat[ii].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii].axvline(lup, color="k", linestyle=":", label="selection") + ax.flat[ii].axvline(llo, color="k", linestyle=":") + ax.flat[ii].set_xlabel(par) + ax.flat[ii].set_yscale("log") + ax.flat[ii].legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bl_sel"] = fig + + wf_idxs = np.random.choice(len(wfs), dplms_dict["n_plot"]) + wfs = wfs[wf_idxs] + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + cal_par = {} + wfs_cut_pars = [par for par in dplms_dict["wfs_cut_pars"].keys()] + for par in wfs_cut_pars: + cal_par[par] = dsp_cal[par].nda + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(wfs): + if ii < 10: + ax.plot(wf, label=f"centr = {centroid[ii]}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + axin = ax.inset_axes([0.1, 0.15, 0.35, 0.5]) + for wf in wfs: + axin.plot(wf) + axin.set_xlim(wsize / 2 - dplms_dict["zoom"], wsize / 2 + dplms_dict["zoom"]) + axin.set_yticklabels("") + plot_dict["dplms"]["wfs"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + wfs_cut_pars.append("centroid") + wfs_cut_pars.append("peak_pos") + wfs_cut_pars.append("risetime") + for ii, par in enumerate(wfs_cut_pars): + pspace = np.linspace( + wsize / 2 - peak_lim, wsize / 2 + peak_lim, 2 * peak_lim + ) + if par == "centroid": + llo, lup = sel_dict["ct_ll"], sel_dict["ct_hh"] + hh, bb = np.histogram(centroid, bins=pspace) + elif par == "peak_pos": + llo, lup = sel_dict["pp_ll"], sel_dict["pp_hh"] + hh, bb = np.histogram(peak_pos, bins=pspace) + elif par == "risetime": + llo, lup = sel_dict["rt_ll"], sel_dict["rt_hh"] + rt_bins = int((rt_high - rt_low) / dplms_dict["period"]) + rt_space = np.linspace(rt_low, rt_high, rt_bins) + hh, bb = np.histogram(risetime, bins=rt_space) + else: + llo, lup = np.min(cal_par[par]), np.max(cal_par[par]) + hh, bb = np.histogram(cal_par[par], bins=np.linspace(llo, lup, 200)) + ax.flat[ii + 1].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii + 1].axvline( + llo, color="k", linestyle=":", label=f"sel. {llo:.1f} {lup:.1f}" + ) + if par != "centroid": + ax.flat[ii + 1].axvline(lup, color="k", linestyle=":") + ax.flat[ii + 1].set_xlabel(par) + ax.flat[ii + 1].set_yscale("log") + ax.flat[ii + 1].legend(title=f"{lh5_path}", loc="upper right") + roughenergy = dsp_cal["trapTmax"].nda + roughenergy_sel = roughenergy[idxs] + ell, ehh = roughenergy.min(), roughenergy.max() + he, be = np.histogram(roughenergy, bins=np.linspace(ell, ehh, 1000)) + hs, be = np.histogram(roughenergy_sel, bins=np.linspace(ell, ehh, 1000)) + ax.flat[0].plot(be[1:], he, c="b", ds="steps", label="initial") + ax.flat[0].plot(be[1:], hs, c="r", ds="steps", label="selected") + ax.flat[0].set_xlabel("rough energy (ADC)") + ax.flat[0].set_yscale("log") + ax.flat[0].legend(loc="upper right", title=f"{lh5_path}") + plot_dict["dplms"]["wf_sel"] = fig + + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + ax.plot(np.flip(x), "r-", label=f"filter") + ax.axhline(0, color="black", linestyle=":") + ax.legend(loc="upper right", title=f"{lh5_path}") + axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) + axin.plot(np.flip(x), "r-") + axin.set_xlim( + dplms_dict["length"] / 2 - dplms_dict["zoom"], + dplms_dict["length"] / 2 + dplms_dict["zoom"], + ) + axin.set_yticklabels("") + ax.indicate_inset_zoom(axin) + + return out_dict, plot_dict + else: + return out_dict + + +def load_data( + raw_file: list[str], + lh5_path: str, + sel_type: str, + peaks: np.array = [], + n_events: int = 5000, + e_lower_lim: float = 1200, + e_upper_lim: float = 2700, + raw_wf_field: str = "waveform", +) -> lgdo.Table: + sto = lh5.LH5Store() + df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + if sel_type == "bls": + cuts = np.where(df.daqenergy.values == 0)[0] + idx_list = [] + waveforms = sto.read_object( + f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) + return tb_data + else: + pulser_props = find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + pulser_e, pulser_err = entry[0], entry[1] + if pulser_err < 10: + pulser_err = 10 + e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( + df.daqenergy.values > pulser_e - pulser_err + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) + if sel_type == "pul": + cuts = np.where(ids == True)[0] + log.debug(f"{len(cuts)} events found for pulser") + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data + else: + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug( + f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" + ) + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data, rough_energy + + +def is_valid_centroid( + centroid: np.array, lim: int, size: int, full_size: int +) -> list[bool]: + llim = size / 2 - lim + hlim = full_size - size / 2 + idxs = (centroid > llim) & (centroid < hlim) + return idxs, llim, hlim + + +def is_not_pile_up( + peak_pos: np.array, peak_pos_neg: np.array, thr: int, lim: int, size: int +) -> list[bool]: + bin_edges = np.linspace(size / 2 - lim, size / 2 + lim, 2 * lim) + hist, bin_edges = np.histogram(peak_pos, bins=bin_edges) + + thr = thr * hist.max() / 100 + low_thr_idxs = np.where(hist[: hist.argmax()] < thr)[0] + upp_thr_idxs = np.where(hist[hist.argmax() :] < thr)[0] + + idx_low = low_thr_idxs[-1] if low_thr_idxs.size > 0 else 0 + idx_upp = ( + upp_thr_idxs[0] + hist.argmax() if upp_thr_idxs.size > 0 else len(hist) - 1 + ) + + llow, lupp = bin_edges[idx_low], bin_edges[idx_upp] + + idxs = [] + for n, nn in zip(peak_pos, peak_pos_neg): + condition1 = np.count_nonzero(n > 0) == 1 + condition2 = ( + np.count_nonzero((n > 0) & ((n < llow) | (n > lupp) & (n < size))) == 0 + ) + condition3 = np.count_nonzero(nn > 0) == 0 + idxs.append(condition1 and condition2 and condition3) + return idxs, llow, lupp + + +def is_valid_risetime(risetime: np.array, llim: int, perc: float): + hlim = np.percentile(risetime[~np.isnan(risetime)], perc) + idxs = (risetime >= llim) & (risetime <= hlim) + return idxs, llim, hlim + + +def signal_selection(dsp_cal, dplms_dict, coeff_values): + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + wsize = dplms_dict["wsize"] + bsize = dplms_dict["bsize"] + + centroid_lim = dplms_dict["centroid_lim"] + if "rt" in coeff_values: + perc = coeff_values["rt"] + else: + perc = dplms_dict["dp_def"]["rt"] + if "pt" in coeff_values: + thr = coeff_values["pt"] + else: + thr = dplms_dict["dp_def"]["rt"] + + idxs_ct, ct_ll, ct_hh = is_valid_centroid(centroid, centroid_lim, wsize, bsize) + log.info(f"... {len(peak_pos[idxs_ct,:])} signals after alignment") + + idxs_pp, pp_ll, pp_hh = is_not_pile_up(peak_pos, peak_pos_neg, thr, peak_lim, wsize) + log.info(f"... {len(peak_pos[idxs_pp,:])} signals after pile-up cut") + + idxs_rt, rt_ll, rt_hh = is_valid_risetime(risetime, rt_low, perc) + log.info(f"... {len(peak_pos[idxs_rt,:])} signals after risetime cut") + + idxs = idxs_ct & idxs_pp & idxs_rt + sel_dict = { + "idxs": idxs, + "ct_ll": ct_ll, + "ct_hh": ct_hh, + "pp_ll": pp_ll, + "pp_hh": pp_hh, + "rt_ll": rt_ll, + "rt_hh": rt_hh, + } + return sel_dict + + +def noise_matrix(bls: np.array, length: int) -> np.array: + nev, size = bls.shape + ref = np.mean(bls, axis=0) + offset = np.mean(ref) + bls = bls - offset + nmat = np.matmul(bls.T, bls, dtype=float) / nev + kernel = np.identity(size - length + 1) + nmat = convolve2d(nmat, kernel, boundary="symm", mode="valid") / (size - length + 1) + return nmat + + +def signal_matrices( + wfs: np.array, length: int, decay_const: float, ff: int = 2 +) -> np.array: + nev, size = wfs.shape + lo = size // 2 - 100 + flo = size // 2 - length // 2 + fhi = size // 2 + length // 2 + offsets = np.mean(wfs[:, :lo], axis=1) + wfs = wfs - offsets[:, np.newaxis] + + # Reference signal + ref = np.sum(wfs, axis=0) + ref /= np.max(ref) + rmat = np.outer(ref[flo:fhi], ref[flo:fhi]) + + # Pile-up matrix + if decay_const > 0: + decay = np.exp(-np.arange(length) / decay_const) + else: + decay = np.zeros(length) + pmat = np.outer(decay, decay) + + # Flat top matrix + flo -= ff // 2 + fhi += ff // 2 + wfs = wfs[:, flo:fhi] + fmat = np.matmul(wfs.T, wfs, dtype=float) / nev + m1 = ((1, -1), (-1, 1)) + fmat = convolve2d(fmat, m1, boundary="symm", mode="valid") + if ff > 0: + fmat = convolve2d(fmat, np.identity(ff), boundary="symm", mode="valid") / ff + return ref, rmat, pmat, fmat + + +def filter_synthesis( + ref: np.array, + nmat: np.array, + rmat: np.array, + za: int, + pmat: np.array, + fmat: np.array, + length: int, + size: int, +) -> np.array: + mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat + flo = (size // 2) - (length // 2) + fhi = (size // 2) + (length // 2) + x = np.linalg.solve(mat, ref[flo:fhi]) + y = convolve(ref, np.flip(x), mode="valid") + maxy = np.max(y) + x /= maxy + y /= maxy + refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1c34901d9..5da39c84f 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -857,6 +857,7 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, + _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -938,6 +939,7 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask + print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -950,18 +952,14 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = threshold / guess_keV + Euc_min = 0 # threshold / guess_keV Euc_max = 2620 / guess_keV * 1.1 - dEuc = 5 / guess_keV + dEuc = 1 / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, - bins, - var, - np.array( - [238.632, 583.191, 727.330, 860.564, 1592.5, 1620.5, 2103.53, 2614.553] - ), + hist, bins, var, peaks_keV, n_sigma=3 ) + print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1075,11 +1073,11 @@ def event_selection( return out_events, idx_list -def fwhm_slope(x, m0, m1, m2): +def fwhm_slope(x, m0, m1): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1087,7 +1085,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001, 0.000001] # + param_guess = [2, 0.001] # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1137,6 +1135,11 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") + + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1151,10 +1154,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - # Return fwhm of optimal alpha in kev with error try: ( @@ -1207,39 +1206,37 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict): +def new_fom(data, kwarg_dict, alpha=None): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] + if alpha is None: + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] + log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - for i, peak in enumerate(peaks[:-1]): + chisquares = [] + for i, peak in enumerate(peaks): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) - # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] - # if peak_dict["n_sig"] Date: Tue, 28 Nov 2023 15:41:39 +0100 Subject: [PATCH 50/73] removed dependency on nopt routine --- src/pygama/pargen/dplms_ge_dict.py | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 8651ddc2f..71311f4ce 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -36,7 +36,6 @@ fom_FWHM_with_dt_corr_fit, index_data, ) -from pygama.pargen.noise_optimization import calculate_spread log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -217,30 +216,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - enc_results = calculate_spread(energies, 10, 90, 1000) - enc, enc_err = enc_results["fom"], enc_results["fom_err"] - log.info( - f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" - ) - grid_dict[i]["enc"] = enc - grid_dict[i]["enc_err"] = enc_err - - if display > 0: - hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) - bc = (bins[:-1] + bins[1:]) / 2.0 - ax.plot( - bc, - hist, - ds="steps", - label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", - ) - ax.set_xlabel("energy (ADC)") - ax.set_ylabel("counts") - ax.legend(loc="upper right") - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -281,8 +256,6 @@ def dplms_ge_dict( min_result = grid_dict[min_idx] best_case_values = {key: min_result[key] for key in min_result.keys()} - enc = best_case_values.get("enc", None) - enc_err = best_case_values.get("enc_err", 0) fwhm = best_case_values.get("fwhm", None) fwhm_err = best_case_values.get("fwhm_err", 0) alpha = best_case_values.get("alpha", 0) @@ -294,8 +267,6 @@ def dplms_ge_dict( if all( v is not None for v in [ - enc, - enc_err, fwhm, fwhm_err, alpha, @@ -366,9 +337,6 @@ def dplms_ge_dict( log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") if display > 0: - plot_dict["dplms"]["enc_hist"] = fig - plot_dict["dplms"]["enc"] = enc - plot_dict["dplms"]["enc_err"] = enc_err plot_dict["dplms"]["ref"] = ref plot_dict["dplms"]["coefficients"] = x From e800f393a87b478fbeeda37053c467091b0d216b Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:19:52 +0100 Subject: [PATCH 51/73] loading full raw table --- src/pygama/pargen/dplms_ge_dict.py | 98 +++++++++++------------------- 1 file changed, 35 insertions(+), 63 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 71311f4ce..3ca56cb66 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -80,13 +80,7 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data( - fft_files, - lh5_path, - "bls", - n_events=dplms_dict["n_baselines"], - raw_wf_field=dplms_dict["raw_wf_field"], - ) + raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) @@ -216,6 +210,10 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -459,7 +457,6 @@ def load_data( n_events: int = 5000, e_lower_lim: float = 1200, e_upper_lim: float = 2700, - raw_wf_field: str = "waveform", ) -> lgdo.Table: sto = lh5.LH5Store() df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") @@ -467,13 +464,9 @@ def load_data( if sel_type == "bls": cuts = np.where(df.daqenergy.values == 0)[0] idx_list = [] - waveforms = sto.read_object( - f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) return tb_data else: pulser_props = find_pulser_properties(df, energy="daqenergy") @@ -495,55 +488,34 @@ def load_data( else: log.debug("no pulser") ids = np.zeros(len(df.daqenergy.values), dtype=bool) - if sel_type == "pul": - cuts = np.where(ids == True)[0] - log.debug(f"{len(cuts)} events found for pulser") - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data - else: - # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug( - f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" - ) - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data, rough_energy + + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts + )[0] + return tb_data, rough_energy def is_valid_centroid( From ffa4617e71f2a97ec09c5219296955922a726db4 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:45:53 +0100 Subject: [PATCH 52/73] smale change on loading data --- src/pygama/pargen/dplms_ge_dict.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 3ca56cb66..75ee98e04 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -459,10 +459,11 @@ def load_data( e_upper_lim: float = 2700, ) -> lgdo.Table: sto = lh5.LH5Store() - df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda if sel_type == "bls": - cuts = np.where(df.daqenergy.values == 0)[0] + cuts = np.where(daqenergy == 0)[0] idx_list = [] tb_data = sto.read_object( f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts @@ -476,8 +477,8 @@ def load_data( pulser_e, pulser_err = entry[0], entry[1] if pulser_err < 10: pulser_err = 10 - e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( - df.daqenergy.values > pulser_e - pulser_err + e_cut = (daqenergy < pulser_e + pulser_err) & ( + daqenergy > pulser_e - pulser_err ) if final_mask is None: final_mask = e_cut @@ -487,11 +488,11 @@ def load_data( log.debug(f"pulser found: {pulser_props}") else: log.debug("no pulser") - ids = np.zeros(len(df.daqenergy.values), dtype=bool) + ids = np.zeros(len(daqenergy), dtype=bool) # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] + initial_mask = (daqenergy > 0) & (~ids) + rough_energy = daqenergy[initial_mask] initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) From e4f7d6822e6e9f611a668bd6c3c0f0bfd62f25f3 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:48:11 +0100 Subject: [PATCH 53/73] moved load data out of pargen routine --- src/pygama/pargen/dplms_ge_dict.py | 130 +++-------------------- src/pygama/pargen/energy_optimisation.py | 13 +-- 2 files changed, 23 insertions(+), 120 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 75ee98e04..67caf4ced 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -34,7 +34,6 @@ event_selection, fom_FWHM, fom_FWHM_with_dt_corr_fit, - index_data, ) log = logging.getLogger(__name__) @@ -43,8 +42,8 @@ def dplms_ge_dict( lh5_path: str, - fft_files: list[str], - cal_files: list[str], + raw_fft: lgdo.Table, + raw_cal: lgdo.Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -60,10 +59,10 @@ def dplms_ge_dict( ---------- lh5_path: str Name of channel to process, should be name of lh5 group in raw files - fft_files : list[str] - raw files with fft data - cal_files : list[str] - raw files with cal data + fft_files : lgdo.Table + table with fft data + raw_cal : lgdo.Table + table with cal data dsp_config: dict dsp config file par_dsp: dict @@ -80,22 +79,20 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) - - dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) - idxs = get_cut_indexes(dsp_bls, cut_dict) + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_fft, cut_dict) bl_field = dplms_dict["bl_field"] - log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + log.info(f"... {len(dsp_fft[bl_field].values.nda[idxs,:])} baselines after cuts") - bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls = dsp_fft[bl_field].values.nda[idxs, : dplms_dict["bsize"]] bls_par = {} bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] for par in bls_cut_pars: - bls_par[par] = dsp_bls[par].nda + bls_par[par] = dsp_fft[par].nda t1 = time.time() log.info( - f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + f"total events {len(raw_fft)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" ) log.info( @@ -111,36 +108,16 @@ def dplms_ge_dict( log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") log.info("\nSelecting signals") - peaks_keV = np.array(dplms_dict["peaks_keV"]) wsize = dplms_dict["wsize"] wf_field = dplms_dict["wf_field"] + peaks_keV = np.array(dplms_dict["peaks_keV"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - raw_cal, idx_list = event_selection( - cal_files, - f"{lh5_path}/raw", - dsp_config, - par_dsp[lh5_path], - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - cut_parameters=dplms_dict["wfs_cut_pars"], - n_events=dplms_dict["n_signals"], - ) - t3 = time.time() - log.info( - f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" - ) - - raw_cal = index_data(raw_cal, idx_list[-1]) log.info(f"Produce dsp data for {len(raw_cal)} events") dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) - t4 = time.time() - log.info(f"Time to run dsp production {(t4-t3):.2f} s") + t3 = time.time() + log.info(f"Time to run dsp production {(t3-t2):.2f} s") - # minimal processing chain - with open(dsp_config) as r: - dsp_config = json.load(r) dsp_config["outputs"] = [ene_par, "dt_eff"] # dictionary for peak fitting @@ -155,7 +132,6 @@ def dplms_ge_dict( if display > 0: plot_dict = {} plot_dict["dplms"] = {} - fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] @@ -210,10 +186,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -449,76 +421,6 @@ def dplms_ge_dict( return out_dict -def load_data( - raw_file: list[str], - lh5_path: str, - sel_type: str, - peaks: np.array = [], - n_events: int = 5000, - e_lower_lim: float = 1200, - e_upper_lim: float = 2700, -) -> lgdo.Table: - sto = lh5.LH5Store() - - daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda - - if sel_type == "bls": - cuts = np.where(daqenergy == 0)[0] - idx_list = [] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data - else: - pulser_props = find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - pulser_e, pulser_err = entry[0], entry[1] - if pulser_err < 10: - pulser_err = 10 - e_cut = (daqenergy < pulser_e + pulser_err) & ( - daqenergy > pulser_e - pulser_err - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = final_mask - log.debug(f"pulser found: {pulser_props}") - else: - log.debug("no pulser") - ids = np.zeros(len(daqenergy), dtype=bool) - - # Get events around peak using raw file values - initial_mask = (daqenergy > 0) & (~ids) - rough_energy = daqenergy[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data, rough_energy - - def is_valid_centroid( centroid: np.array, lim: int, size: int, full_size: int ) -> list[bool]: diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 5da39c84f..0fc12de87 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -939,7 +939,6 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask - print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -952,14 +951,13 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV + Euc_min = threshold / guess_keV * 0.6 Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 / guess_keV + dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( hist, bins, var, peaks_keV, n_sigma=3 ) - print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1073,11 +1071,14 @@ def event_selection( return out_events, idx_list -def fwhm_slope(x, m0, m1): +def fwhm_slope(x, m0, m1, m2=None): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x) + if m2 is None: + return np.sqrt(m0 + m1 * x) + else: + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): From 8811839bba907ea3244d267a34d53dc2d8da904f Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:53:40 +0100 Subject: [PATCH 54/73] small changes --- src/pygama/pargen/energy_optimisation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 0fc12de87..fb990dc9e 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -956,7 +956,10 @@ def event_selection( dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, bins, var, peaks_keV, n_sigma=3 + hist, + bins, + var, + np.array([238.632, 583.191, 727.330, 860.564, 1620.5, 2103.53, 2614.553]), ) log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") From d2b162d0fdb04513113fcf2005ac4688ebe08826 Mon Sep 17 00:00:00 2001 From: valerioda Date: Fri, 12 Jan 2024 10:37:51 +0100 Subject: [PATCH 55/73] update the LH5 file writes/reads to match the new LH5Store syntax according suggestions --- src/pygama/pargen/dplms_ge_dict.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 67caf4ced..0c1f9fcbc 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -13,11 +13,9 @@ import time from collections import OrderedDict -import lgdo -import lgdo.lh5_store as lh5 import matplotlib.pyplot as plt import numpy as np -from lgdo import Array +from lgdo import Array, Table, lh5 from scipy.signal import convolve, convolve2d from pygama.math.histogram import get_hist @@ -42,8 +40,8 @@ def dplms_ge_dict( lh5_path: str, - raw_fft: lgdo.Table, - raw_cal: lgdo.Table, + raw_fft: Table, + raw_cal: Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -57,19 +55,19 @@ def dplms_ge_dict( Parameters ---------- - lh5_path: str + lh5_path Name of channel to process, should be name of lh5 group in raw files - fft_files : lgdo.Table + fft_files table with fft data - raw_cal : lgdo.Table + raw_cal table with cal data - dsp_config: dict + dsp_config dsp config file - par_dsp: dict + par_dsp Dictionary with db parameters for dsp processing - par_dsp_lh5: str + par_dsp_lh5 Path for saving dplms coefficients - dplms_dict: dict + dplms_dict Dictionary with various parameters Returns @@ -275,7 +273,7 @@ def dplms_ge_dict( wsize, ) - sto.write_object( + sto.write( Array(x), name="dplms", lh5_file=par_dsp_lh5, From 2aa52e2380206cd4da3c3cb28ebf64ee7da99f9f Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 14:44:56 +0100 Subject: [PATCH 56/73] modification for dsp processing --- src/pygama/pargen/dplms_ge_dict.py | 37 +++++++++--------------- src/pygama/pargen/energy_optimisation.py | 10 +++---- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 0c1f9fcbc..6a155d239 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -8,10 +8,7 @@ import json import logging import os -import pathlib -import pickle import time -from collections import OrderedDict import matplotlib.pyplot as plt import numpy as np @@ -25,14 +22,9 @@ gauss_step_pdf, radford_pdf, ) -from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.cuts import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.energy_cal import hpge_find_E_peaks -from pygama.pargen.energy_optimisation import ( - event_selection, - fom_FWHM, - fom_FWHM_with_dt_corr_fit, -) +from pygama.pargen.energy_optimisation import fom_FWHM_with_dt_corr_fit log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -72,11 +64,12 @@ def dplms_ge_dict( Returns ------- - out_dict : dict + out_dict """ t0 = time.time() log.info(f"\nSelecting baselines") + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) idxs = get_cut_indexes(dsp_fft, cut_dict) @@ -133,11 +126,7 @@ def dplms_ge_dict( # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] - if lh5_path in dplms_dict["noisy_bl"]: - log.info("Setting explicit zero area condition") - za_coeff = dp_coeffs["za"] - else: - za_coeff = dplms_dict["dp_def"]["za"] + za_coeff = dplms_dict["dp_def"]["za"] dp_coeffs.pop("za") coeff_keys = [key for key in dp_coeffs.keys()] lists = [dp_coeffs[key] for key in dp_coeffs.keys()] @@ -177,9 +166,7 @@ def dplms_ge_dict( dplms_dict["length"], wsize, ) - par_dsp[lh5_path]["dplms"] = {} - par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] - par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + par_dsp[lh5_path]["dplms"] = {"length": dplms_dict["length"], "coefficients": x} log.info( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) @@ -402,11 +389,11 @@ def dplms_ge_dict( plot_dict["dplms"]["wf_sel"] = fig fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") - ax.plot(np.flip(x), "r-", label=f"filter") + ax.plot(x, "r-", label=f"filter") ax.axhline(0, color="black", linestyle=":") ax.legend(loc="upper right", title=f"{lh5_path}") axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) - axin.plot(np.flip(x), "r-") + axin.plot(x, "r-") axin.set_xlim( dplms_dict["length"] / 2 - dplms_dict["zoom"], dplms_dict["length"] / 2 + dplms_dict["zoom"], @@ -560,14 +547,18 @@ def filter_synthesis( fmat: np.array, length: int, size: int, + flip: bool = True, ) -> np.array: mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat flo = (size // 2) - (length // 2) fhi = (size // 2) + (length // 2) - x = np.linalg.solve(mat, ref[flo:fhi]) + x = np.linalg.solve(mat, ref[flo:fhi]).astype(np.float32) y = convolve(ref, np.flip(x), mode="valid") maxy = np.max(y) x /= maxy y /= maxy refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] - return x, y, refy + if flip: + return np.flip(x), y, refy + else: + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index fb990dc9e..e84c93f36 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple -import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -892,14 +892,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = lh5.Array(data["baseline"].nda[indexes]) + new_baselines = Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = lh5.WaveformTable( + new_waveform = WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1068,7 +1068,7 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.array(out_events).flatten()) + out_events = np.unique(np.concatenate(out_events)) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list From e255e1ec327d3f1231386840f92d28c64b244963 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 15:24:55 +0100 Subject: [PATCH 57/73] revert modification on ene_opt --- src/pygama/pargen/energy_optimisation.py | 57 +++++++++++------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index e84c93f36..ecad4bbd7 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple +import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util -from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -857,7 +857,6 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, - _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -892,14 +891,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = Array(data["baseline"].nda[indexes]) + new_baselines = lh5.Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = WaveformTable( + new_waveform = lh5.WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1068,20 +1067,17 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.concatenate(out_events)) + out_events = np.unique(np.array(out_events).flatten()) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list -def fwhm_slope(x, m0, m1, m2=None): +def fwhm_slope(x, m0, m1, m2): """ Fit the energy resolution curve """ - if m2 is None: - return np.sqrt(m0 + m1 * x) - else: - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1089,7 +1085,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001] + param_guess = [2, 0.001, 0.000001] # # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1139,11 +1135,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") - - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1158,6 +1149,10 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + # Return fwhm of optimal alpha in kev with error try: ( @@ -1210,37 +1205,39 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict, alpha=None): +def new_fom(data, kwarg_dict): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - if alpha is None: - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] - + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - chisquares = [] - for i, peak in enumerate(peaks): + for i, peak in enumerate(peaks[:-1]): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) + # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] + # if peak_dict["n_sig"] Date: Tue, 16 Jan 2024 17:15:59 +0100 Subject: [PATCH 58/73] change load data --- src/pygama/pargen/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e58785e4e..5c8f8c101 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -3,10 +3,10 @@ import logging from types import FunctionType -import lgdo.lh5_store as lh5 import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Table, lh5 log = logging.getLogger(__name__) @@ -70,15 +70,20 @@ def load_data( masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read(lh5_path, tfiles)[0] + file_df = pd.DataFrame(columns=params) if tstamp in cal_dict: - file_df = table.eval(cal_dict[tstamp]).get_dataframe() + cal_dict_ts = cal_dict[tstamp] else: - file_df = table.eval(cal_dict).get_dataframe() + cal_dict_ts = cal_dict + for param in params: + if param in cal_dict_ts: + expression = cal_dict_ts[param]["expression"] + parameters = cal_dict_ts[param].get("parameters", None) + file_df[param] = table.eval(expression, parameters) + else: + file_df[param] = table[param] file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") - for param in params: - if param not in file_df: - file_df[param] = lh5.load_nda(tfiles, [param], lh5_path)[param] if threshold is not None: mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(~mask)[0], inplace=True) @@ -96,10 +101,14 @@ def load_data( params = get_params(keys + list(cal_dict.keys()), params) table = sto.read(lh5_path, files)[0] - df = table.eval(cal_dict).get_dataframe() + df = pd.DataFrame(columns=params) for param in params: - if param not in df: - df[param] = lh5.load_nda(files, [param], lh5_path)[param] + if param in cal_dict: + expression = cal_dict[param]["expression"] + parameters = cal_dict[param].get("parameters", None) + df[param] = table.eval(expression, parameters) + else: + df[param] = table[param] if threshold is not None: masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) From 5523ac72107b01e74734f088e87bf67724c5dc38 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 02:45:36 +0100 Subject: [PATCH 59/73] added lh5 output format for skm tier --- src/pygama/skm/build_skm.py | 87 +++++++++++++++---------- tests/skm/configs/basic-skm-config.json | 3 +- tests/skm/test_build_skm.py | 87 +++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 37 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index aed71e1eb..5ed4166f7 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -12,12 +12,11 @@ import awkward as ak import numpy as np import pandas as pd -from lgdo import Array, lh5 +from lgdo import Array, lh5, Table from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) - def build_skm( f_evt: str, f_hit: str, @@ -27,6 +26,7 @@ def build_skm( skm_conf: dict | str, wo_mode="w", skim_format: str = "parquet", + group: str = "/skm/", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -94,7 +94,9 @@ def build_skm( - ``overwrite`` or ``o``: replaces existing file. skim_format - data format of the skimmed output (``hdf`` or ``parquet``). + data format of the skimmed output (``hdf``, ``lh5`` or ``parquet``). + group + LH5 root group name (only used if ``skim_format`` is ``lh5``). """ f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) @@ -113,8 +115,8 @@ def build_skm( multi = int(tbl_cfg["multiplicity"]) store = LH5Store() - df = pd.DataFrame() - + # df = pd.DataFrame() + table = Table() if "operations" in tbl_cfg.keys(): for op in tbl_cfg["operations"].keys(): miss_val = np.nan @@ -194,46 +196,59 @@ def build_skm( obj = ak.pad_none(obj, multi, clip=True) obj = ak.to_numpy(ak.fill_none(obj, miss_val)) - nms = [op] if obj.ndim > 1: if "postfixes" in tbl_cfg.keys(): nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] else: nms = [f"{op}_{x}" for x in range(multi)] - - df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") - - # Set an index column if specified - if "index_field" in tbl_cfg.keys(): - log.debug("Setting index") - if tbl_cfg["index_field"] in df.keys(): - df = df.set_index(tbl_cfg["index_field"]) - else: - raise ValueError( - "index field not found. Needs to be a previously defined skm field" - ) + + for i in range(len(nms)): + # add attribute if present + ob = Array(nda=obj[:,i]) + if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): + ob.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] + table.add_field(nms[i], ob,True) + else: + obj = Array(nda=obj) + if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): + obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] + table.add_field(op, obj,True) # last thing missing is writing it out log.debug("saving skm file") - if skim_format not in ["parquet", "hdf"]: - raise ValueError("Not supported skim data format. Operations are hdf, parquet") - if wo_mode in ["w", "write_safe"]: - if os.path.exists(f_skm): - raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - else: - if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="w") - elif "parquet" == skim_format: - df.to_parquet(f_skm) - elif wo_mode in ["o", "overwrite"]: - if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="w") - elif "parquet" == skim_format: - df.to_parquet(f_skm) - elif wo_mode in ["a", "append"]: + if skim_format not in ["parquet", "hdf","lh5"]: + raise ValueError("Not supported skim data format. Operations are hdf, lh5, parquet") + + if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): + raise FileExistsError(f"Write_safe mode: {f_skm} exists.") + + if skim_format in ["hdf","parquet"]: + df = table.view_as("pd") + # Set an index column if specified + if "index_field" in tbl_cfg.keys(): + log.debug("Setting index") + if tbl_cfg["index_field"] in df.keys(): + df = df.set_index(tbl_cfg["index_field"]) + else: + raise ValueError( + "index field not found. Needs to be a previously defined skm field" + ) + if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="a") + if wo_mode in ["w", "write_safe","o", "overwrite"]: + df.to_hdf(f_skm, key="df", mode="w") + elif wo_mode in ["a", "append"]: + df.to_hdf(f_skm, key="df", mode="a") + elif "parquet" == skim_format: - df.to_parquet(f_skm, append=True) + if wo_mode in ["w", "write_safe","o", "overwrite"]: + df.to_parquet(f_skm) + elif wo_mode in ["a", "append"]: + df.to_parquet(f_skm, append=True) + + elif "lh5" == skim_format: + wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" + store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) + else: raise ValueError(f"wo_mode {wo_mode} not valid.") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index faf5e56cb..8e57660cd 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -3,7 +3,8 @@ "index_field": "timestamp", "operations": { "timestamp": { - "forward_field": "evt.timestamp" + "forward_field": "evt.timestamp", + "lgdo_attrs": {"info":"pk was here"} }, "energy_sum": { "forward_field": "evt.energy_sum" diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 678fe2c41..45eaad4d9 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -74,3 +74,90 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 0] == df.energy_id_0.to_numpy()).all() assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() + +def test_df_to_table_conversion(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + skm_out2 = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="hdf", + ) + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out2, + skm_conf, + wo_mode="o", + skim_format="lh5", + ) + + assert os.path.exists(skm_out) + assert os.path.exists(skm_out2) + df = pd.read_hdf(skm_out) + tbl = store.read("/skm/",skm_out2)[0].view_as("pd") + assert isinstance(tbl,pd.DataFrame) + assert df.reset_index().equals(tbl) + +def test_attribute_passing(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="lh5", + ) + + assert os.path.exists(skm_out) + assert "info" in store.read("/skm/timestamp", skm_out)[0].getattrs().keys() + assert ( + store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] + == "pk was here" + ) + + From b6bd4e60c80c70283baad8108798a604277c3200 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 12:41:51 +0100 Subject: [PATCH 60/73] full sparse mode compatibility --- src/pygama/evt/build_evt.py | 65 +++++++++------------ src/pygama/evt/modules/spm.py | 75 ++++++++++++------------- src/pygama/skm/build_skm.py | 34 +++++------ tests/skm/configs/basic-skm-config.json | 2 +- tests/skm/test_build_skm.py | 15 ++--- 5 files changed, 88 insertions(+), 103 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index c39ddffb4..6db40d5d8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -317,7 +317,6 @@ def get_data_at_channel( is_evaluated: bool, f_hit: str, f_dsp: str, - outsize: int, defv, ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -343,14 +342,13 @@ def get_data_at_channel( path to `hit` tier file. f_dsp path to `dsp` tier file. - outsize - size of the return array. defv default value. """ # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] + outsize = len(idx_ch) if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) @@ -393,8 +391,7 @@ def get_mask_from_query( qry: str | NDArray, length: int, ch: str, - ids: NDArray, - idx: NDArray, + idx_ch: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -408,17 +405,13 @@ def get_mask_from_query( length of the return mask. ch "rawid" of channel to be evaluated. - idx - `tcm` index array. - ids - `tcm` id array. + idx_ch + channel indices to be read. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. """ - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] # get sub evt based query condition if needed if isinstance(qry, str): @@ -523,12 +516,11 @@ def evaluate_to_first_or_last( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # find if sorter is in hit or dsp t0 = store.read( @@ -618,12 +610,11 @@ def evaluate_to_scalar( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # switch through modes if "sum" == mode: @@ -686,7 +677,7 @@ def evaluate_at_channel( # skip default value if f"ch{ch}" not in lh5.ls(f_hit): continue - + idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", ids, @@ -697,11 +688,10 @@ def evaluate_at_channel( f"ch{ch}" not in chns_rm, f_hit, f_dsp, - len(out), defv, ) - out = np.where(ch == ch_comp.nda, res, out) + out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) return Array(nda=out) @@ -746,12 +736,14 @@ def evaluate_at_channel_vov( """ # blow up vov to aoesa - out = ch_comp.to_aoesa().view_as("np") + out = ak.Array([[] for x in range(len(ch_comp))]) - chns = np.unique(out[~np.isnan(out)]).astype(int) + chns = np.unique(ch_comp.flattened_data.nda).astype(int) + ch_comp = ch_comp.view_as("ak") type_name = None for ch in chns: + idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", ids, @@ -762,23 +754,22 @@ def evaluate_at_channel_vov( f"ch{ch}" not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # see in which events the current channel is present - mask = (out == ch).any(axis=1) - out[out == ch] = res[mask] + mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) + cv = np.full(len(ch_comp), np.nan) + cv[idx_ch] = res + cv[~mask] = np.nan + cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) + + out = ak.concatenate((out, cv), axis=-1) if ch == chns[0]: type_name = res.dtype - # ok now implode the table again - out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), - ) - return out + return VectorOfVectors(ak.values_astype(out, type_name)) def evaluate_to_aoesa( @@ -837,6 +828,7 @@ def evaluate_to_aoesa( i = 0 for ch in chns: + idx_ch = idx[ids == int(ch[2:])] res = get_data_at_channel( ch, ids, @@ -847,15 +839,13 @@ def evaluate_to_aoesa( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - # append to out according to mode == vov - out[:, i][limarr] = res[limarr] + out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) i += 1 @@ -954,13 +944,10 @@ def evaluate_to_vector( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" ) - out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type(defv)), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + return VectorOfVectors( + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) ) - return out - def build_evt( f_tcm: str, @@ -1108,7 +1095,7 @@ def build_evt( table = Table(size=nrows) for k, v in tbl_cfg["operations"].items(): - log.debug("Processing field" + k) + log.debug("Processing field " + k) # if mode not defined in operation, it can only be an operation on the evt level. if "aggregation_mode" not in v.keys(): diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 96a1098b6..a4020548c 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -88,18 +88,16 @@ def get_masked_tcm_idx( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) @@ -107,16 +105,20 @@ def get_masked_tcm_idx( out_idx = ak.local_index(mask)[mask] elif mode == 1: - out_idx = ak.Array(np.where(ids == int(ch[2:]))[0]) - out_idx = out_idx[:, None][mask[mask] - 1] + out_idx = np.full((np.max(idx) + 1), np.nan) + out_idx[idx_ch] = np.where(ids == int(ch[2:]))[0] + out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None])) + out_idx = out_idx[mask[mask] - 1] elif mode == 2: out_idx = ak.Array([int(ch[2:])] * len(mask)) out_idx = out_idx[:, None][mask[mask] - 1] elif mode == 3: - out_idx = ak.Array(idx_ch) - out_idx = out_idx[:, None][mask[mask] - 1] + out_idx = np.full((np.max(idx) + 1), np.nan) + out_idx[idx_ch] = idx_ch + out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None])) + out_idx = out_idx[mask[mask] - 1] else: raise ValueError("Unknown mode") @@ -180,7 +182,7 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) ) ) - mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + mask = get_spm_mask(lim, tge[idx_ch], tmin, tmax, pe, times) pe = pe[mask] if mode in ["energy_hc", "energy_dplms"]: @@ -265,18 +267,16 @@ def get_etc( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) @@ -315,7 +315,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - time_lst = [] + time_all = ak.Array([[] for x in range(np.max(idx) + 1)]) if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) @@ -325,25 +325,24 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) # apply mask and convert sample units to ns - time_lst.append(times[mask] * 16) + times = times[mask] * 16 + + time_all = ak.concatenate((time_all, times), axis=-1) - time_all = ak.concatenate(time_lst, axis=-1) out = ak.min(time_all, axis=-1) # Convert to 1D numpy array diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 5ed4166f7..0a2965493 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,12 +11,12 @@ import awkward as ak import numpy as np -import pandas as pd -from lgdo import Array, lh5, Table +from lgdo import Array, Table, lh5 from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) + def build_skm( f_evt: str, f_hit: str, @@ -201,28 +201,30 @@ def build_skm( nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] else: nms = [f"{op}_{x}" for x in range(multi)] - + for i in range(len(nms)): # add attribute if present - ob = Array(nda=obj[:,i]) + ob = Array(nda=obj[:, i]) if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): ob.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] - table.add_field(nms[i], ob,True) + table.add_field(nms[i], ob, True) else: obj = Array(nda=obj) if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] - table.add_field(op, obj,True) + table.add_field(op, obj, True) # last thing missing is writing it out log.debug("saving skm file") - if skim_format not in ["parquet", "hdf","lh5"]: - raise ValueError("Not supported skim data format. Operations are hdf, lh5, parquet") - + if skim_format not in ["parquet", "hdf", "lh5"]: + raise ValueError( + "Not supported skim data format. Operations are hdf, lh5, parquet" + ) + if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - - if skim_format in ["hdf","parquet"]: + + if skim_format in ["hdf", "parquet"]: df = table.view_as("pd") # Set an index column if specified if "index_field" in tbl_cfg.keys(): @@ -233,22 +235,22 @@ def build_skm( raise ValueError( "index field not found. Needs to be a previously defined skm field" ) - + if "hdf" == skim_format: - if wo_mode in ["w", "write_safe","o", "overwrite"]: + if wo_mode in ["w", "write_safe", "o", "overwrite"]: df.to_hdf(f_skm, key="df", mode="w") elif wo_mode in ["a", "append"]: df.to_hdf(f_skm, key="df", mode="a") elif "parquet" == skim_format: - if wo_mode in ["w", "write_safe","o", "overwrite"]: + if wo_mode in ["w", "write_safe", "o", "overwrite"]: df.to_parquet(f_skm) elif wo_mode in ["a", "append"]: df.to_parquet(f_skm, append=True) - + elif "lh5" == skim_format: wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) - + else: raise ValueError(f"wo_mode {wo_mode} not valid.") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index 8e57660cd..feb29bc17 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -4,7 +4,7 @@ "operations": { "timestamp": { "forward_field": "evt.timestamp", - "lgdo_attrs": {"info":"pk was here"} + "lgdo_attrs": { "info": "pk was here" } }, "energy_sum": { "forward_field": "evt.energy_sum" diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 45eaad4d9..56499ba94 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -75,6 +75,7 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() + def test_df_to_table_conversion(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -118,10 +119,11 @@ def test_df_to_table_conversion(lgnd_test_data, tmptestdir): assert os.path.exists(skm_out) assert os.path.exists(skm_out2) df = pd.read_hdf(skm_out) - tbl = store.read("/skm/",skm_out2)[0].view_as("pd") - assert isinstance(tbl,pd.DataFrame) + tbl = store.read("/skm/", skm_out2)[0].view_as("pd") + assert isinstance(tbl, pd.DataFrame) assert df.reset_index().equals(tbl) + def test_attribute_passing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -141,7 +143,7 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - + build_skm( outfile, lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), @@ -155,9 +157,4 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): assert os.path.exists(skm_out) assert "info" in store.read("/skm/timestamp", skm_out)[0].getattrs().keys() - assert ( - store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] - == "pk was here" - ) - - + assert store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] == "pk was here" From b4672eea161973ddeff70bbbf92d8c3131b20133 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 19:33:12 +0100 Subject: [PATCH 61/73] Friday evening changes --- src/pygama/evt/build_evt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 6db40d5d8..039cc756f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -935,10 +935,10 @@ def evaluate_to_vector( nrows, ).view_as("np") if "ascend_by" == md: - out[np.arange(len(out))[:, None], np.argsort(s_val)] + out = out[np.arange(len(out))[:, None], np.argsort(s_val)] elif "descend_by" == md: - out[np.arange(len(out))[:, None], np.argsort(-s_val)] + out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] else: raise ValueError( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" From b5240f4a558b8642575aeb0b53478d94b0aa3303 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 22 Jan 2024 18:39:13 +0100 Subject: [PATCH 62/73] allow passing of env vars in legend meta module --- src/pygama/evt/modules/legend.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index f2f8137ef..2ee2d7e8e 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -3,13 +3,15 @@ """ from importlib import import_module +from lgdo.lh5 import utils + def metadata(params: dict) -> list: # only import legend meta data when needed. # LEGEND collaborators can use the meta keyword # While for users w/o access to the LEGEND meta data this is still working lm = import_module("legendmeta") - lmeta = lm.LegendMetadata(path=params["meta_path"]) + lmeta = lm.LegendMetadata(path=utils.expand_path(params["meta_path"])) chmap = lmeta.channelmap(params["time_key"]) tmp = [ From a7856e5867e48678063cd5c82d1ee16929854375 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 11:40:21 +0100 Subject: [PATCH 63/73] add pyarrow dependence --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 6582215a1..74c036924 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,7 @@ install_requires = numpy>=1.21 pandas>=1.4.4 pint + pyarrow scikit-learn scipy>=1.0.1 tables From ccc1b71b31269bf3464430ef9c6329c65c9de06f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 12:17:09 +0100 Subject: [PATCH 64/73] error parameter deprecation in pandas 2.2 to_numeric function --- src/pygama/flow/file_db.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 4047f8c97..66545c419 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -272,7 +272,10 @@ def scan_files(self, dirs: list[str] = None) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue # sort rows according to timestamps utils.inplace_sort(self.df, self.sortby) @@ -669,7 +672,10 @@ def scan_daq_files(self, daq_dir: str, daq_template: str) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue def get_table_name(self, tier: str, tb: str) -> str: """Get the table name for a tier given its table identifier. From 56ec1d4089ac1428f1baff0d9078973b76ce2196 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 14:41:30 +0100 Subject: [PATCH 65/73] removed other output format options than lh5 from build_skm --- src/pygama/skm/build_skm.py | 47 +++----------------- tests/skm/configs/basic-skm-config.json | 1 - tests/skm/test_build_skm.py | 57 ++----------------------- 3 files changed, 8 insertions(+), 97 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 0a2965493..049012985 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -25,7 +25,6 @@ def build_skm( f_skm: str, skm_conf: dict | str, wo_mode="w", - skim_format: str = "parquet", group: str = "/skm/", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -50,8 +49,6 @@ def build_skm( - ``postfixes`` list of postfixes must be list of ``len(multiplicity)``. If not given, numbers from 0 to ``multiplicity -1`` are used - - ``index_field`` sets the index of the output table. If not given - the index are set es increasing integers. - ``operations`` are forwarded from lower tiers and clipped/padded according to ``missing_value`` if needed. If the forwarded field is not an evt tier, ``tcm_idx`` must be passed that specifies the @@ -64,7 +61,6 @@ def build_skm( { "multiplicity": 2, "postfixes":["","aux"], - "index_field": "timestamp", "operations": { "timestamp":{ "forward_field": "evt.timestamp" @@ -93,14 +89,12 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - skim_format - data format of the skimmed output (``hdf``, ``lh5`` or ``parquet``). group LH5 root group name (only used if ``skim_format`` is ``lh5``). """ f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) - log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + log.debug(f"I am skimming {len(f_evt) if isinstance(f_evt,list) else 1} files") tbl_cfg = skm_conf if not isinstance(tbl_cfg, (str, dict)): @@ -215,42 +209,11 @@ def build_skm( table.add_field(op, obj, True) # last thing missing is writing it out + if wo_mode not in ["w", "write_safe", "o", "overwrite", "a", "append"]: + raise ValueError(f"wo_mode {wo_mode} not valid.") log.debug("saving skm file") - if skim_format not in ["parquet", "hdf", "lh5"]: - raise ValueError( - "Not supported skim data format. Operations are hdf, lh5, parquet" - ) - if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - if skim_format in ["hdf", "parquet"]: - df = table.view_as("pd") - # Set an index column if specified - if "index_field" in tbl_cfg.keys(): - log.debug("Setting index") - if tbl_cfg["index_field"] in df.keys(): - df = df.set_index(tbl_cfg["index_field"]) - else: - raise ValueError( - "index field not found. Needs to be a previously defined skm field" - ) - - if "hdf" == skim_format: - if wo_mode in ["w", "write_safe", "o", "overwrite"]: - df.to_hdf(f_skm, key="df", mode="w") - elif wo_mode in ["a", "append"]: - df.to_hdf(f_skm, key="df", mode="a") - - elif "parquet" == skim_format: - if wo_mode in ["w", "write_safe", "o", "overwrite"]: - df.to_parquet(f_skm) - elif wo_mode in ["a", "append"]: - df.to_parquet(f_skm, append=True) - - elif "lh5" == skim_format: - wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" - store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) - - else: - raise ValueError(f"wo_mode {wo_mode} not valid.") + wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" + store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index feb29bc17..8037b21bf 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -1,6 +1,5 @@ { "multiplicity": 3, - "index_field": "timestamp", "operations": { "timestamp": { "forward_field": "evt.timestamp", diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 56499ba94..6957e3333 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -3,7 +3,6 @@ import awkward as ak import numpy as np -import pandas as pd from lgdo.lh5 import LH5Store from pygama.evt import build_evt @@ -31,7 +30,7 @@ def test_basics(lgnd_test_data, tmptestdir): ) skm_conf = f"{config_dir}/basic-skm-config.json" - skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" build_skm( outfile, lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), @@ -40,12 +39,11 @@ def test_basics(lgnd_test_data, tmptestdir): skm_out, skm_conf, wo_mode="o", - skim_format="hdf", ) assert os.path.exists(skm_out) - df = pd.read_hdf(skm_out) - assert df.index.name == "timestamp" + df = store.read("/skm/", skm_out)[0].view_as("pd") + assert "timestamp" in df.keys() assert "energy_0" in df.keys() assert "energy_1" in df.keys() assert "energy_2" in df.keys() @@ -76,54 +74,6 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() -def test_df_to_table_conversion(lgnd_test_data, tmptestdir): - outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" - tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" - if os.path.exists(outfile): - os.remove(outfile) - build_evt( - f_tcm=lgnd_test_data.get_path(tcm_path), - f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, - evt_config=f"{evt_config_dir}/vov-test-evt-config.json", - wo_mode="o", - group="/evt/", - tcm_group="hardware_tcm_1", - ) - - skm_conf = f"{config_dir}/basic-skm-config.json" - skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - skm_out2 = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - build_skm( - outfile, - lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - lgnd_test_data.get_path(tcm_path), - skm_out, - skm_conf, - wo_mode="o", - skim_format="hdf", - ) - build_skm( - outfile, - lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - lgnd_test_data.get_path(tcm_path), - skm_out2, - skm_conf, - wo_mode="o", - skim_format="lh5", - ) - - assert os.path.exists(skm_out) - assert os.path.exists(skm_out2) - df = pd.read_hdf(skm_out) - tbl = store.read("/skm/", skm_out2)[0].view_as("pd") - assert isinstance(tbl, pd.DataFrame) - assert df.reset_index().equals(tbl) - - def test_attribute_passing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -152,7 +102,6 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): skm_out, skm_conf, wo_mode="o", - skim_format="lh5", ) assert os.path.exists(skm_out) From a8c8393e921ecdb8a3fe0b516b7173e91723fbde Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 16:58:26 +0100 Subject: [PATCH 66/73] spm module cleanup --- src/pygama/evt/modules/spm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index a4020548c..c0df03470 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -9,7 +9,6 @@ additional parameters are free to the user and need to be defined in the JSON """ -import warnings import awkward as ak import numpy as np @@ -247,11 +246,6 @@ def get_etc( trail, min_first_pls_ene, ) -> Array: - # ignore stupid numpy warnings - warnings.filterwarnings("ignore", r"All-NaN slice encountered") - warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") - warnings.filterwarnings("ignore", r"invalid value encountered in divide") - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") From 18df7b96201266a631639efbe69796de5eadfcc2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 18:54:36 +0100 Subject: [PATCH 67/73] making channel table names agnostic --- src/pygama/evt/build_evt.py | 111 ++++++++++++++++++++++++++++++++---- tests/evt/test_build_evt.py | 24 ++++++++ 2 files changed, 124 insertions(+), 11 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 039cc756f..d13fb49af 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -19,6 +19,23 @@ log = logging.getLogger(__name__) +def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: + pre = tcm_id_table_pattern.split("{")[0] + post = tcm_id_table_pattern.split("}")[1] + return int(ch.strip(pre).strip(post)) + + +def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] + if pattern_check == "" or ":" == pattern_check[0]: + return tcm_id_table_pattern.format(ch_id) + else: + raise NotImplementedError( + "Only empty placeholders with format specifications are currently implemented" + ) + + def num_and_pars(value: str, par_dic: dict): # function tries to convert a string to a int, float, bool # or returns the value if value is a key in par_dic @@ -51,6 +68,7 @@ def evaluate_expression( qry: str = None, defv: bool | int | float = np.nan, sorter: str = None, + tcm_id_table_pattern: str = "ch{}", ) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -107,6 +125,9 @@ def evaluate_expression( sorter can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`). + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() @@ -186,6 +207,7 @@ def evaluate_expression( ch_comp, var_ph, defv, + tcm_id_table_pattern, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( @@ -199,6 +221,7 @@ def evaluate_expression( chns_rm, var_ph, defv, + tcm_id_table_pattern, ) else: raise NotImplementedError( @@ -226,6 +249,7 @@ def evaluate_expression( var_ph, defv, is_first=True if "first_at:" in mode else False, + tcm_id_table_pattern=tcm_id_table_pattern, ) elif mode in ["sum", "any", "all"]: return evaluate_to_scalar( @@ -242,6 +266,7 @@ def evaluate_expression( nrows, var_ph, defv, + tcm_id_table_pattern, ) elif "gather" == mode: return evaluate_to_vector( @@ -258,6 +283,7 @@ def evaluate_expression( var_ph, defv, sorter, + tcm_id_table_pattern, ) else: raise ValueError(mode + " not a valid mode") @@ -318,6 +344,7 @@ def get_data_at_channel( f_hit: str, f_dsp: str, defv, + tcm_id_table_pattern: str = "ch{}", ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -344,18 +371,23 @@ def get_data_at_channel( path to `dsp` tier file. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] outsize = len(idx_ch) if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) elif "tcm.array_id" == expr: - res = np.full(outsize, int(ch[2:]), dtype=int) + res = np.full( + outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int + ) elif "tcm.index" == expr: - res = np.where(ids == int(ch[2:]))[0] + res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] else: var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) @@ -459,6 +491,7 @@ def evaluate_to_first_or_last( var_ph: dict = None, defv: bool | int | float = np.nan, is_first: bool = True, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates across channels by returning the expression of the channel with value of `sorter`. @@ -493,6 +526,9 @@ def evaluate_to_first_or_last( default value. is_first defines if sorted by smallest or largest value of `sorter` + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array @@ -503,7 +539,7 @@ def evaluate_to_first_or_last( for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] # evaluate at channel res = get_data_at_channel( @@ -517,6 +553,7 @@ def evaluate_to_first_or_last( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -560,6 +597,7 @@ def evaluate_to_scalar( nrows: int, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates by summation across channels. @@ -591,6 +629,9 @@ def evaluate_to_scalar( dictionary of evt and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array @@ -598,7 +639,7 @@ def evaluate_to_scalar( for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( ch, @@ -611,6 +652,7 @@ def evaluate_to_scalar( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -644,6 +686,7 @@ def evaluate_at_channel( ch_comp: Array, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates by evaluating the expression at a given channel. @@ -669,26 +712,30 @@ def evaluate_at_channel( dictionary of `evt` and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if f"ch{ch}" not in lh5.ls(f_hit): + if get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): continue idx_ch = idx[ids == ch] res = get_data_at_channel( - f"ch{ch}", + get_table_name_by_pattern(tcm_id_table_pattern, ch), ids, idx, expr, exprl, var_ph, - f"ch{ch}" not in chns_rm, + get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, f_hit, f_dsp, defv, + tcm_id_table_pattern, ) out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) @@ -707,6 +754,7 @@ def evaluate_at_channel_vov( chns_rm: list, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -733,6 +781,9 @@ def evaluate_at_channel_vov( dictionary of `evt` and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # blow up vov to aoesa @@ -745,16 +796,17 @@ def evaluate_at_channel_vov( for ch in chns: idx_ch = idx[ids == ch] res = get_data_at_channel( - f"ch{ch}", + get_table_name_by_pattern(tcm_id_table_pattern, ch), ids, idx, expr, exprl, var_ph, - f"ch{ch}" not in chns_rm, + get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # see in which events the current channel is present @@ -786,6 +838,7 @@ def evaluate_to_aoesa( var_ph: dict = None, defv: bool | int | float = np.nan, missv=np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -822,13 +875,16 @@ def evaluate_to_aoesa( missing value. sorter sorts the entries in the vector according to sorter expression. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) i = 0 for ch in chns: - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( ch, ids, @@ -840,6 +896,7 @@ def evaluate_to_aoesa( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -866,6 +923,7 @@ def evaluate_to_vector( var_ph: dict = None, defv: bool | int | float = np.nan, sorter: str = None, + tcm_id_table_pattern: str = "ch{}", ) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -902,6 +960,9 @@ def evaluate_to_vector( sorts the entries in the vector according to sorter expression. ``ascend_by:`` results in an vector ordered ascending, ``decend_by:`` sorts descending. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ out = evaluate_to_aoesa( idx, @@ -917,6 +978,7 @@ def evaluate_to_vector( var_ph, defv, np.nan, + tcm_id_table_pattern, ).view_as("np") # if a sorter is given sort accordingly @@ -933,6 +995,7 @@ def evaluate_to_vector( [tuple(fld.split("."))], None, nrows, + tcm_id_table_pattern=tcm_id_table_pattern, ).view_as("np") if "ascend_by" == md: out = out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -958,6 +1021,7 @@ def build_evt( wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", + tcm_id_table_pattern: str = "ch{}", ) -> None: """Transform data from the `hit` and `dsp` levels which a channel sorted to a event sorted data format. @@ -1040,6 +1104,9 @@ def build_evt( LH5 root group name. tcm_group LH5 root group in tcm file. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() tbl_cfg = evt_config @@ -1054,6 +1121,28 @@ def build_evt( if "operations" not in tbl_cfg.keys(): raise ValueError("operations field needs to be specified in the config") + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) + if len(pattern_check) != 1: + raise ValueError( + f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." + ) + elif "{" in pattern_check[0] or "}" in pattern_check[0]: + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." + ) + + if ( + get_table_name_by_pattern( + tcm_id_table_pattern, + get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), + ) + != lh5.ls(f_hit)[0] + ): + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" + ) + # create channel list according to config # This can be either read from the meta data # or a list of channel names diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 2a7269e9d..ae7570f9f 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -274,3 +274,27 @@ def test_vector_sort(lgnd_test_data, tmptestdir): vov_t0, _ = store.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() + + +def test_tcm_id_table_pattern(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + f_config = f"{config_dir}/basic-evt-config.json" + + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{{}}") + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{}{}") + with pytest.raises(NotImplementedError): + build_evt( + f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{tcm_id}" + ) + with pytest.raises(ValueError): + build_evt( + f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="apple{}banana" + ) From 513871190526ff65b6efa83c2425b861b2c1d028 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 01:55:22 +0100 Subject: [PATCH 68/73] handle divide by 0 warnings correctly --- src/pygama/evt/modules/spm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index c0df03470..90033209e 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -296,11 +296,17 @@ def get_etc( mask_total = time_all > tge mask_singlet = (time_all > tge) & (time_all < tge + swin) - pe_singlet = ak.nansum(pe_all[mask_singlet], axis=-1) - pe_total = ak.nansum(pe_all[mask_total], axis=-1) - etc = ak.where(pe_total > 0, pe_singlet / pe_total, np.nan) + pe_singlet = ak.to_numpy( + ak.fill_none(ak.nansum(pe_all[mask_singlet], axis=-1), 0), allow_missing=False + ) + pe_total = ak.to_numpy( + ak.fill_none(ak.nansum(pe_all[mask_total], axis=-1), 0), allow_missing=False + ) + etc = np.divide( + pe_singlet, pe_total, out=np.full_like(pe_total, np.nan), where=pe_total != 0 + ) - return Array(nda=ak.to_numpy(ak.fill_none(etc, np.nan), allow_missing=False)) + return Array(nda=etc) # returns relative time shift of the first LAr pulse relative to the Ge trigger From 8fcd1fc2d722e1f71d6ceab211de318fe4dbd8a8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 14:22:19 +0100 Subject: [PATCH 69/73] become agnostic to group nameing --- src/pygama/evt/build_evt.py | 591 +++++++++++++++++++++++----------- src/pygama/evt/modules/spm.py | 207 +++++++++--- tests/evt/test_build_evt.py | 23 +- tests/skm/test_build_skm.py | 8 +- 4 files changed, 594 insertions(+), 235 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d13fb49af..2cb54b2fe 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -69,6 +69,10 @@ def evaluate_expression( defv: bool | int | float = np.nan, sorter: str = None, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", + tcm_group: str = "tcm", ) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -128,12 +132,22 @@ def evaluate_expression( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + evt group + LH5 root group name of evt tier. + tcm_group + LH5 root group in tcm file. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. """ store = LH5Store() # find parameters in evt file or in parameters - exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) + exprl = re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr + ) var_ph = {} if table: var_ph = var_ph | { @@ -148,11 +162,19 @@ def evaluate_expression( # evaluate expression func, params = expr.split("(") params = ( - params.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", "") + params.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", "") ) - params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ - num_and_pars(e, var_ph) for e in params[:-1].split(",") - ] + params = [ + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + [x for x in chns if x not in chns_rm], + ] + [num_and_pars(e, var_ph) for e in params[:-1].split(",")] # load function dynamically p, m = func.rsplit(".", 1) @@ -163,23 +185,27 @@ def evaluate_expression( # check if query is either on channel basis or evt basis (and not a mix) qry_mask = qry if qry is not None: - if "evt." in qry and ("hit." in qry or "dsp." in qry): - raise ValueError("Query can't be a mix of evt tier and lower tiers.") + if f"{evt_group}." in qry and ( + f"{hit_group}." in qry or f"{dsp_group}." in qry + ): + raise ValueError( + f"Query can't be a mix of {evt_group} tier and lower tiers." + ) # if it is an evt query we can evaluate it directly here - if table and "evt." in qry: - qry_mask = eval(qry.replace("evt.", ""), table) + if table and f"{evt_group}." in qry: + qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") # switch through modes if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): if "keep_at_ch:" == mode[:11]: - ch_comp = table[mode[11:].replace("evt.", "")] + ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] else: - ch_comp = table[mode[12:].replace("evt.", "")] + ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] if isinstance(ch_comp, Array): ch_comp = Array(nda=ids[ch_comp.view_as("np")]) elif isinstance(ch_comp, VectorOfVectors): @@ -197,31 +223,37 @@ def evaluate_expression( if isinstance(ch_comp, Array): return evaluate_at_channel( - idx, - ids, - f_hit, - f_dsp, - chns_rm, - expr, - exprl, - ch_comp, - var_ph, - defv, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( - idx, - ids, - f_hit, - f_dsp, - expr, - exprl, - ch_comp, - chns_rm, - var_ph, - defv, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + chns_rm=chns_rm, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) else: raise NotImplementedError( @@ -231,59 +263,69 @@ def evaluate_expression( elif "first_at:" in mode or "last_at:" in mode: sorter = tuple( re.findall( - r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", + mode.split("first_at:")[-1], )[0] ) return evaluate_to_first_or_last( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - sorter, - var_ph, - defv, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + sorter=sorter, + var_ph=var_ph, + defv=defv, is_first=True if "first_at:" in mode else False, tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif mode in ["sum", "any", "all"]: return evaluate_to_scalar( - mode, - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - tcm_id_table_pattern, + mode=mode, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif "gather" == mode: return evaluate_to_vector( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - sorter, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + sorter=sorter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) else: raise ValueError(mode + " not a valid mode") @@ -295,6 +337,8 @@ def find_parameters( ch: str, idx_ch: NDArray, exprl: list, + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> dict: """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` tiers. @@ -311,24 +355,32 @@ def find_parameters( index array of entries to be read from files. exprl list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. """ # find fields in either dsp, hit - dsp_flds = [e[1] for e in exprl if e[0] == "dsp"] - hit_flds = [e[1] for e in exprl if e[0] == "hit"] + dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] + hit_flds = [e[1] for e in exprl if e[0] == hit_group] store = LH5Store() hit_dict, dsp_dict = {}, {} if len(hit_flds) > 0: hit_ak = store.read( - f"{ch.replace('/','')}/hit/", f_hit, field_mask=hit_flds, idx=idx_ch + f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch )[0].view_as("ak") - hit_dict = dict(zip(["hit_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak))) + hit_dict = dict( + zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) + ) if len(dsp_flds) > 0: dsp_ak = store.read( - f"{ch.replace('/','')}/dsp/", f_dsp, field_mask=dsp_flds, idx=idx_ch + f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch )[0].view_as("ak") - dsp_dict = dict(zip(["dsp_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak))) + dsp_dict = dict( + zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) + ) return hit_dict | dsp_dict @@ -345,6 +397,9 @@ def get_data_at_channel( f_dsp: str, defv, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -374,6 +429,12 @@ def get_data_at_channel( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # get index list for this channel to be loaded @@ -389,7 +450,15 @@ def get_data_at_channel( elif "tcm.index" == expr: res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) + var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=exprl, + hit_group=hit_group, + dsp_group=dsp_group, + ) if var_ph is not None: var = var | var_ph @@ -397,7 +466,9 @@ def get_data_at_channel( # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) res = eval( - expr.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", ""), + expr.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", ""), var, ) @@ -426,6 +497,8 @@ def get_mask_from_query( idx_ch: NDArray, f_hit: str, f_dsp: str, + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> np.ndarray: """Evaluates a query expression and returns a mask accordingly. @@ -443,13 +516,30 @@ def get_mask_from_query( path to `hit` tier file. f_dsp path to `dsp` tier file. + hit_group + LH5 root group in hit file. + dsp_group + LH5 root group in dsp file. """ # get sub evt based query condition if needed if isinstance(qry, str): qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + qry_var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=qry_lst, + hit_group=hit_group, + dsp_group=dsp_group, + ) + limarr = eval( + qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( + f"{hit_group}.", f"{hit_group}_" + ), + qry_var, + ) # in case the expression evaluates to a single value blow it up if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): @@ -492,6 +582,9 @@ def evaluate_to_first_or_last( defv: bool | int | float = np.nan, is_first: bool = True, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates across channels by returning the expression of the channel with value of `sorter`. @@ -529,6 +622,12 @@ def evaluate_to_first_or_last( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array @@ -543,26 +642,38 @@ def evaluate_to_first_or_last( # evaluate at channel res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) # find if sorter is in hit or dsp t0 = store.read( f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if "hit" == sorter[0] else f_dsp, + f_hit if f"{hit_group}" == sorter[0] else f_dsp, idx=idx_ch, )[0].view_as("np") @@ -598,6 +709,9 @@ def evaluate_to_scalar( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates by summation across channels. @@ -632,6 +746,12 @@ def evaluate_to_scalar( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array @@ -642,21 +762,33 @@ def evaluate_to_scalar( idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) # switch through modes if "sum" == mode: @@ -687,6 +819,9 @@ def evaluate_at_channel( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates by evaluating the expression at a given channel. @@ -715,6 +850,12 @@ def evaluate_at_channel( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -725,17 +866,21 @@ def evaluate_at_channel( continue idx_ch = idx[ids == ch] res = get_data_at_channel( - get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids, - idx, - expr, - exprl, - var_ph, - get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) @@ -755,6 +900,9 @@ def evaluate_at_channel_vov( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -784,10 +932,16 @@ def evaluate_at_channel_vov( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # blow up vov to aoesa - out = ak.Array([[] for x in range(len(ch_comp))]) + out = ak.Array([[] for _ in range(len(ch_comp))]) chns = np.unique(ch_comp.flattened_data.nda).astype(int) ch_comp = ch_comp.view_as("ak") @@ -796,17 +950,21 @@ def evaluate_at_channel_vov( for ch in chns: idx_ch = idx[ids == ch] res = get_data_at_channel( - get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids, - idx, - expr, - exprl, - var_ph, - get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # see in which events the current channel is present @@ -839,6 +997,9 @@ def evaluate_to_aoesa( defv: bool | int | float = np.nan, missv=np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -878,6 +1039,12 @@ def evaluate_to_aoesa( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -886,21 +1053,33 @@ def evaluate_to_aoesa( for ch in chns: idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) @@ -924,6 +1103,9 @@ def evaluate_to_vector( defv: bool | int | float = np.nan, sorter: str = None, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -963,39 +1145,52 @@ def evaluate_to_vector( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ out = evaluate_to_aoesa( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry, - nrows, - var_ph, - defv, - np.nan, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry, + nrows=nrows, + var_ph=var_ph, + defv=defv, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ).view_as("np") # if a sorter is given sort accordingly if sorter is not None: md, fld = sorter.split(":") s_val = evaluate_to_aoesa( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - fld, - [tuple(fld.split("."))], - None, - nrows, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=fld, + exprl=[tuple(fld.split("."))], + qry=None, + nrows=nrows, + missv=np.nan, tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ).view_as("np") if "ascend_by" == md: out = out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -1019,8 +1214,10 @@ def build_evt( f_evt: str, evt_config: str | dict, wo_mode: str = "write_safe", - group: str = "/evt/", - tcm_group: str = "/hardware_tcm_1/", + evt_group: str = "evt", + tcm_group: str = "hardware_tcm_1", + dsp_group: str = "dsp", + hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", ) -> None: """Transform data from the `hit` and `dsp` levels which a channel sorted to a @@ -1100,14 +1297,19 @@ def build_evt( wo_mode writing mode. - group - LH5 root group name. + evt group + LH5 root group name of evt tier. tcm_group LH5 root group in tcm file. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. """ + store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -1179,7 +1381,7 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) + nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) table = Table(size=nrows) @@ -1191,7 +1393,7 @@ def build_evt( var = {} if "parameters" in v.keys(): var = var | v["parameters"] - res = table.eval(v["expression"].replace("evt.", ""), var) + res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) # add attribute if present if "lgdo_attrs" in v.keys(): @@ -1235,19 +1437,24 @@ def build_evt( srter = v["sort"] obj = evaluate_expression( - f_tcm, - f_hit, - f_dsp, - chns_e, - chns_rm, - v["aggregation_mode"], - v["expression"], - nrows, - table, - pars, - qry, - defaultv, - srter, + f_tcm=f_tcm, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns_e, + chns_rm=chns_rm, + mode=v["aggregation_mode"], + expr=v["expression"], + nrows=nrows, + table=table, + para=pars, + qry=qry, + defv=defaultv, + sorter=srter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + tcm_group=tcm_group, ) # add attribute if present @@ -1264,7 +1471,9 @@ def build_evt( clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] for fld in clms_to_remove: table.remove_field(fld, True) - store.write(obj=table, name=group, lh5_file=f_evt, wo_mode=wo_mode) + store.write( + obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + ) else: log.warning("No output fields specified, no file will be written.") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 90033209e..b72198a6f 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -5,6 +5,9 @@ - path to the hit file - path to the dsp file - path to the tcm file +- hit LH5 root group +- dsp LH5 root group +- tcm LH5 root group - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ @@ -70,12 +73,24 @@ def get_spm_mask( # mode 2 -> return rawids # mode 3 -> return tcm_idx def get_masked_tcm_idx( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode=0 + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + mode=0, ) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") arr_lst = [] @@ -87,13 +102,17 @@ def get_masked_tcm_idx( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) @@ -127,14 +146,16 @@ def get_masked_tcm_idx( return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) -def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode): +def get_spm_ene_or_maj( + f_hit, f_tcm, hit_group, tcm_group, chs, lim, trgr, tdefault, tmin, tmax, mode +): if mode not in ["energy_hc", "energy_dplms", "majority_hc", "majority_dplms"]: raise ValueError("Unknown mode") # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") out = np.zeros(np.max(idx) + 1) if isinstance(trgr, (float, int)): @@ -148,36 +169,36 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) if mode in ["energy_dplms", "majority_dplms"]: pe = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") + store.read( + f"{ch}/{hit_group}/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("ak") ) ) # times are in sample units times = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") + store.read( + f"{ch}/{hit_group}/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("ak") ) ) else: pe = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( - "ak" - ) + store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") ) ) # times are in sample units times = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "ak" - ) + store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") ) ) @@ -196,34 +217,122 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_energy( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_hc" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "energy_hc", ) # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_majority( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_hc" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "majority_hc", ) # get LAr energy per event over all channels def get_energy_dplms( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, ) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_dplms" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "energy_dplms", ) # get LAr majority per event over all channels def get_majority_dplms( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, ) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_dplms" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "majority_dplms", ) @@ -236,6 +345,9 @@ def get_etc( f_hit, f_dsp, f_tcm, + hit_group, + dsp_group, + tcm_group, chs, lim, trgr, @@ -248,8 +360,8 @@ def get_etc( ) -> Array: # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") pe_lst = [] time_lst = [] @@ -261,13 +373,17 @@ def get_etc( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) @@ -310,11 +426,24 @@ def get_etc( # returns relative time shift of the first LAr pulse relative to the Ge trigger -def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_time_shift( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: store = LH5Store() # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") time_all = ak.Array([[] for x in range(np.max(idx) + 1)]) if isinstance(trgr, (float, int)): @@ -325,13 +454,17 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index ae7570f9f..0f193074c 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -25,7 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys() @@ -75,7 +77,10 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -101,7 +106,10 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -131,7 +139,10 @@ def test_vov(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -221,7 +232,9 @@ def test_query(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/query-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) assert len(lh5.ls(outfile, "/evt/")) == 12 diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 6957e3333..b23137ec6 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -25,7 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) @@ -86,7 +88,9 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) From eaa6cb4347f258e8fc28c727ec29b0eb64be0cff Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 15:19:08 +0100 Subject: [PATCH 70/73] split build_evt into sub modules --- src/pygama/evt/aggregators.py | 653 ++++++++++++ src/pygama/evt/build_evt.py | 1815 ++++++++------------------------- src/pygama/evt/modules/spm.py | 47 +- src/pygama/evt/utils.py | 278 +++++ 4 files changed, 1422 insertions(+), 1371 deletions(-) create mode 100644 src/pygama/evt/aggregators.py create mode 100644 src/pygama/evt/utils.py diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py new file mode 100644 index 000000000..f9131ed96 --- /dev/null +++ b/src/pygama/evt/aggregators.py @@ -0,0 +1,653 @@ +""" +This module provides aggregators to build the `evt` tier. +""" + +from __future__ import annotations +import re +import numpy as np +from numpy.typing import NDArray +import awkward as ak +from lgdo.lh5 import LH5Store +from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 + +from . import utils + +def evaluate_to_first_or_last( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + sorter: tuple, + var_ph: dict = None, + defv: bool | int | float = np.nan, + is_first: bool = True, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates across channels by returning the expression of the channel + with value of `sorter`. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output array. + sorter + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + is_first + defines if sorted by smallest or largest value of `sorter` + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + outt = np.zeros(len(out)) + + store = LH5Store() + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + + # evaluate at channel + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # find if sorter is in hit or dsp + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", + f_hit if f"{hit_group}" == sorter[0] else f_dsp, + idx=idx_ch, + )[0].view_as("np") + + if t0.ndim > 1: + raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") + + if is_first: + if ch == chns[0]: + outt[:] = np.inf + + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) + + else: + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + + return Array(nda=out) + + +def evaluate_to_scalar( + mode: str, + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates by summation across channels. + + Parameters + ---------- + mode + aggregation mode. + idx + tcm index array. + ids + tcm id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of dsp/hit/evt parameter tuples in expression (tier, field). + qry + query expression to mask aggregation. + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # switch through modes + if "sum" == mode: + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + if "any" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) + if "all" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr + + return Array(nda=out) + + +def evaluate_at_channel( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns_rm: list, + expr: str, + exprl: list, + ch_comp: Array, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates by evaluating the expression at a given channel. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + ch_comp + array of rawids at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) + + for ch in np.unique(ch_comp.nda.astype(int)): + # skip default value + if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): + continue + idx_ch = idx[ids == ch] + res = utils.get_data_at_channel( + ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) + + return Array(nda=out) + + +def evaluate_at_channel_vov( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + expr: str, + exprl: list, + ch_comp: VectorOfVectors, + chns_rm: list, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> VectorOfVectors: + """Same as :func:`evaluate_at_channel` but evaluates expression at non + flat channels :class:`.VectorOfVectors`. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + ch_comp + array of "rawid"s at which the expression is evaluated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # blow up vov to aoesa + out = ak.Array([[] for _ in range(len(ch_comp))]) + + chns = np.unique(ch_comp.flattened_data.nda).astype(int) + ch_comp = ch_comp.view_as("ak") + + type_name = None + for ch in chns: + idx_ch = idx[ids == ch] + res = utils.get_data_at_channel( + ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # see in which events the current channel is present + mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) + cv = np.full(len(ch_comp), np.nan) + cv[idx_ch] = res + cv[~mask] = np.nan + cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) + + out = ak.concatenate((out, cv), axis=-1) + + if ch == chns[0]: + type_name = res.dtype + + return VectorOfVectors(ak.values_astype(out, type_name)) + + +def evaluate_to_aoesa( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + missv=np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> ArrayOfEqualSizedArrays: + """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated + expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output :class:`.VectorOfVectors`. + ch_comp + array of "rawid"s at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + missv + missing value. + sorter + sorts the entries in the vector according to sorter expression. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + # define dimension of output array + out = np.full((nrows, len(chns)), missv) + + i = 0 + for ch in chns: + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) + + i += 1 + + return ArrayOfEqualSizedArrays(nda=out) + + +def evaluate_to_vector( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + sorter: str = None, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> VectorOfVectors: + """Aggregates by returning a :class:`.VectorOfVector` of evaluated + expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output :class:`.VectorOfVectors`. + ch_comp + array of "rawids" at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + sorter + sorts the entries in the vector according to sorter expression. + ``ascend_by:`` results in an vector ordered ascending, + ``decend_by:`` sorts descending. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + out = evaluate_to_aoesa( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry, + nrows=nrows, + var_ph=var_ph, + defv=defv, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ).view_as("np") + + # if a sorter is given sort accordingly + if sorter is not None: + md, fld = sorter.split(":") + s_val = evaluate_to_aoesa( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=fld, + exprl=[tuple(fld.split("."))], + qry=None, + nrows=nrows, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ).view_as("np") + if "ascend_by" == md: + out = out[np.arange(len(out))[:, None], np.argsort(s_val)] + + elif "descend_by" == md: + out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] + else: + raise ValueError( + "sorter values can only have 'ascend_by' or 'descend_by' prefixes" + ) + + return VectorOfVectors( + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) + ) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 2cb54b2fe..e0c0dafb3 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -14,124 +14,102 @@ import numpy as np from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from numpy.typing import NDArray -log = logging.getLogger(__name__) - - -def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: - pre = tcm_id_table_pattern.split("{")[0] - post = tcm_id_table_pattern.split("}")[1] - return int(ch.strip(pre).strip(post)) - - -def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: - # check tcm_id_table_pattern validity - pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] - if pattern_check == "" or ":" == pattern_check[0]: - return tcm_id_table_pattern.format(ch_id) - else: - raise NotImplementedError( - "Only empty placeholders with format specifications are currently implemented" - ) +from . import aggregators, utils - -def num_and_pars(value: str, par_dic: dict): - # function tries to convert a string to a int, float, bool - # or returns the value if value is a key in par_dic - if value in par_dic.keys(): - return par_dic[value] - try: - value = int(value) - except ValueError: - try: - value = float(value) - except ValueError: - try: - value = bool(value) - except ValueError: - pass - return value +log = logging.getLogger(__name__) -def evaluate_expression( +def build_evt( f_tcm: str, - f_hit: str, f_dsp: str, - chns: list, - chns_rm: list, - mode: str, - expr: str, - nrows: int, - table: Table = None, - para: dict = None, - qry: str = None, - defv: bool | int | float = np.nan, - sorter: str = None, - tcm_id_table_pattern: str = "ch{}", + f_hit: str, + f_evt: str, + evt_config: str | dict, + wo_mode: str = "write_safe", evt_group: str = "evt", - hit_group: str = "hit", + tcm_group: str = "hardware_tcm_1", dsp_group: str = "dsp", - tcm_group: str = "tcm", -) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: - """Evaluates the expression defined by the user across all channels - according to the mode. + hit_group: str = "hit", + tcm_id_table_pattern: str = "ch{}", +) -> None: + """Transform data from the `hit` and `dsp` levels which a channel sorted to a + event sorted data format. Parameters ---------- f_tcm - path to `tcm` tier file. - f_hit - path to `hit` tier file. + input LH5 file of the tcm level. f_dsp - path to `dsp` tier file. - chns - list of channel names across which expression gets evaluated (form: - ``ch``). - chns_rm - list of channels which get set to default value during evaluation. In - function mode they are removed entirely (form: ``ch``) - mode - The mode determines how the event entry is calculated across channels. - Options are: + input LH5 file of the dsp level. + f_hit + input LH5 file of the hit level. + f_evt + name of the output file. + evt_config + name of configuration file or dictionary defining event fields. Channel + lists can be defined by importing a metadata module. - - ``first_at:sorter``: aggregates across channels by returning the - expression of the channel with smallest value of sorter. - - ``last_at``: aggregates across channels by returning the expression of - the channel with largest value of sorter. - - ``sum``: aggregates by summation. - - ``any``: aggregates by logical or. - - ``all``: aggregates by logical and. - - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. - - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm - index field. - - ``gather``: Channels are not combined, but result saved as - :class:`.VectorOfVectors`. + - ``operations`` defines the fields ``name=key``, where ``channels`` + specifies the channels used to for this field (either a string or a + list of strings), + - ``aggregation_mode`` defines how the channels should be combined (see + :func:`evaluate_expression`). + - ``expression`` defnies the mathematical/special function to apply + (see :func:`evaluate_expression`), + - ``query`` defines an expression to mask the aggregation. + - ``parameters`` defines any other parameter used in expression. - qry - a query that can mask the aggregation. - expr - the expression. That can be any mathematical equation/comparison. If - `mode` is ``function``, the expression needs to be a special processing - function defined in modules (e.g. :func:`.modules.spm.get_energy`). In - the expression parameters from either hit, dsp, evt tier (from - operations performed before this one! Dictionary operations order - matters), or from the ``parameters`` field can be used. - nrows - number of rows to be processed. - table - table of 'evt' tier data. - para - dictionary of parameters defined in the ``parameters`` field in the - configuration dictionary. - defv - default value of evaluation. - sorter - can be used to sort vector outputs according to sorter expression (see - :func:`evaluate_to_vector`). - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + For example: + + .. code-block:: json + + { + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", + }, + "operations": { + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal > 25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" + }, + "energy":{ + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal > 25" + } + "is_muon_rejected":{ + "channels": "muon", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on", "geds_no_psd", "geds_ac"], + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "t0":{ + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "dsp.tp_0_est" + }, + "lar_energy":{ + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" + }, + } + } + + wo_mode + writing mode. evt group LH5 root group name of evt tier. tcm_group @@ -140,1344 +118,457 @@ def evaluate_expression( LH5 root group in dsp file. hit_group LH5 root group in hit file. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() + tbl_cfg = evt_config + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() + if isinstance(tbl_cfg, str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) - # find parameters in evt file or in parameters - exprl = re.findall( - rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr - ) - var_ph = {} - if table: - var_ph = var_ph | { - e: table[e].view_as("ak") - for e in table.keys() - if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) - } - if para: - var_ph = var_ph | para + if "channels" not in tbl_cfg.keys(): + raise ValueError("channel field needs to be specified in the config") + if "operations" not in tbl_cfg.keys(): + raise ValueError("operations field needs to be specified in the config") - if mode == "function": - # evaluate expression - func, params = expr.split("(") - params = ( - params.replace(f"{dsp_group}.", f"{dsp_group}_") - .replace(f"{hit_group}.", f"{hit_group}_") - .replace(f"{evt_group}.", "") + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) + if len(pattern_check) != 1: + raise ValueError( + f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." + ) + elif "{" in pattern_check[0] or "}" in pattern_check[0]: + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." ) - params = [ - f_hit, - f_dsp, - f_tcm, - hit_group, - dsp_group, - tcm_group, - [x for x in chns if x not in chns_rm], - ] + [num_and_pars(e, var_ph) for e in params[:-1].split(",")] - # load function dynamically - p, m = func.rsplit(".", 1) - met = getattr(import_module(p, package=__package__), m) - return met(*params) + if ( + utils.get_table_name_by_pattern( + tcm_id_table_pattern, + utils.get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), + ) + != lh5.ls(f_hit)[0] + ): + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" + ) - else: - # check if query is either on channel basis or evt basis (and not a mix) - qry_mask = qry - if qry is not None: - if f"{evt_group}." in qry and ( - f"{hit_group}." in qry or f"{dsp_group}." in qry - ): + # create channel list according to config + # This can be either read from the meta data + # or a list of channel names + log.debug("Creating channel dictionary") + + chns = {} + + for k, v in tbl_cfg["channels"].items(): + if isinstance(v, dict): + # it is a meta module. module_name must exist + if "module" not in v.keys(): raise ValueError( - f"Query can't be a mix of {evt_group} tier and lower tiers." + "Need module_name to load channel via a meta data module" ) - # if it is an evt query we can evaluate it directly here - if table and f"{evt_group}." in qry: - qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) + attr = {} + # the time_key argument is set to the time key of the DSP file + # in case it is not provided by the config + if "time_key" not in v.keys(): + attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) - # load TCM data to define an event - ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") - idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") + # if "None" do None + elif "None" == v["time_key"]: + attr["time_key"] = None - # switch through modes - if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): - if "keep_at_ch:" == mode[:11]: - ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] - else: - ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] - if isinstance(ch_comp, Array): - ch_comp = Array(nda=ids[ch_comp.view_as("np")]) - elif isinstance(ch_comp, VectorOfVectors): - ch_comp = ch_comp.view_as("ak") - ch_comp = VectorOfVectors( - array=ak.unflatten( - ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + # load module + p, m = v["module"].rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + chns[k] = met(v | attr) + + elif isinstance(v, str): + chns[k] = [v] + + elif isinstance(v, list): + chns[k] = [e for e in v] + + nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) + + table = Table(size=nrows) + + for k, v in tbl_cfg["operations"].items(): + log.debug("Processing field " + k) + + # if mode not defined in operation, it can only be an operation on the evt level. + if "aggregation_mode" not in v.keys(): + var = {} + if "parameters" in v.keys(): + var = var | v["parameters"] + res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) + + # add attribute if present + if "lgdo_attrs" in v.keys(): + res.attrs |= v["lgdo_attrs"] + + table.add_field(k, res) + + # Else we build the event entry + else: + if "channels" not in v.keys(): + chns_e = [] + elif isinstance(v["channels"], str): + chns_e = chns[v["channels"]] + elif isinstance(v["channels"], list): + chns_e = list( + itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + ) + chns_rm = [] + if "exclude_channels" in v.keys(): + if isinstance(v["exclude_channels"], str): + chns_rm = chns[v["exclude_channels"]] + elif isinstance(v["exclude_channels"], list): + chns_rm = list( + itertools.chain.from_iterable( + [chns[e] for e in v["exclude_channels"]] ) ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - if isinstance(ch_comp, Array): - return evaluate_at_channel( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - ch_comp=ch_comp, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif isinstance(ch_comp, VectorOfVectors): - return evaluate_at_channel_vov( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - expr=expr, - exprl=exprl, - ch_comp=ch_comp, - chns_rm=chns_rm, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - elif "first_at:" in mode or "last_at:" in mode: - sorter = tuple( - re.findall( - rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", - mode.split("first_at:")[-1], - )[0] - ) - return evaluate_to_first_or_last( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, - nrows=nrows, - sorter=sorter, - var_ph=var_ph, - defv=defv, - is_first=True if "first_at:" in mode else False, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif mode in ["sum", "any", "all"]: - return evaluate_to_scalar( - mode=mode, - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, - nrows=nrows, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif "gather" == mode: - return evaluate_to_vector( - idx=idx, - ids=ids, + pars, qry, defaultv, srter = None, None, np.nan, None + if "parameters" in v.keys(): + pars = v["parameters"] + if "query" in v.keys(): + qry = v["query"] + if "initial" in v.keys(): + defaultv = v["initial"] + if isinstance(defaultv, str) and ( + defaultv in ["np.nan", "np.inf", "-np.inf"] + ): + defaultv = eval(defaultv) + if "sort" in v.keys(): + srter = v["sort"] + + obj = evaluate_expression( + f_tcm=f_tcm, f_hit=f_hit, f_dsp=f_dsp, - chns=chns, + chns=chns_e, chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, + mode=v["aggregation_mode"], + expr=v["expression"], nrows=nrows, - var_ph=var_ph, - defv=defv, - sorter=sorter, + table=table, + para=pars, + qry=qry, + defv=defaultv, + sorter=srter, tcm_id_table_pattern=tcm_id_table_pattern, evt_group=evt_group, hit_group=hit_group, dsp_group=dsp_group, + tcm_group=tcm_group, ) - else: - raise ValueError(mode + " not a valid mode") - - -def find_parameters( - f_hit: str, - f_dsp: str, - ch: str, - idx_ch: NDArray, - exprl: list, - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> dict: - """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` - tiers. - Parameters - ---------- - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - ch - "rawid" in the tiers. - idx_ch - index array of entries to be read from files. - exprl - list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - """ + # add attribute if present + if "lgdo_attrs" in v.keys(): + obj.attrs |= v["lgdo_attrs"] - # find fields in either dsp, hit - dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] - hit_flds = [e[1] for e in exprl if e[0] == hit_group] + table.add_field(k, obj) - store = LH5Store() - hit_dict, dsp_dict = {}, {} - if len(hit_flds) > 0: - hit_ak = store.read( - f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch - )[0].view_as("ak") - hit_dict = dict( - zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) - ) - if len(dsp_flds) > 0: - dsp_ak = store.read( - f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch - )[0].view_as("ak") - dsp_dict = dict( - zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) - ) + # write output fields into f_evt + if "outputs" in tbl_cfg.keys(): + if len(tbl_cfg["outputs"]) < 1: + log.warning("No output fields specified, no file will be written.") + else: + clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] + for fld in clms_to_remove: + table.remove_field(fld, True) + store.write( + obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + ) + else: + log.warning("No output fields specified, no file will be written.") - return hit_dict | dsp_dict + key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) + log.info( + f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" + ) -def get_data_at_channel( - ch: str, - ids: NDArray, - idx: NDArray, - expr: str, - exprl: list, - var_ph: dict, - is_evaluated: bool, +def evaluate_expression( + f_tcm: str, f_hit: str, f_dsp: str, - defv, + chns: list, + chns_rm: list, + mode: str, + expr: str, + nrows: int, + table: Table = None, + para: dict = None, + qry: str = None, + defv: bool | int | float = np.nan, + sorter: str = None, tcm_id_table_pattern: str = "ch{}", evt_group: str = "evt", hit_group: str = "hit", dsp_group: str = "dsp", -) -> np.ndarray: - """Evaluates an expression and returns the result. + tcm_group: str = "tcm", +) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: + """Evaluates the expression defined by the user across all channels + according to the mode. Parameters ---------- - ch - "rawid" of channel to be evaluated. - idx - `tcm` index array. - ids - `tcm` id array. - expr - expression to be evaluated. - exprl - list of parameter-tuples ``(root_group, field)`` found in the expression. - var_ph - dict of additional parameters that are not channel dependent. - is_evaluated - if false, the expression does not get evaluated but an array of default - values is returned. + f_tcm + path to `tcm` tier file. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - outsize = len(idx_ch) - - if not is_evaluated: - res = np.full(outsize, defv, dtype=type(defv)) - elif "tcm.array_id" == expr: - res = np.full( - outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int - ) - elif "tcm.index" == expr: - res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] - else: - var = find_parameters( - f_hit=f_hit, - f_dsp=f_dsp, - ch=ch, - idx_ch=idx_ch, - exprl=exprl, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - if var_ph is not None: - var = var | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace(f"{dsp_group}.", f"{dsp_group}_") - .replace(f"{hit_group}.", f"{hit_group}_") - .replace(f"{evt_group}.", ""), - var, - ) - - # in case the expression evaluates to a single value blow it up - if (not hasattr(res, "__len__")) or (isinstance(res, str)): - return np.full(outsize, res) - - # the resulting arrays need to be 1D from the operation, - # this can only change once we support larger than two dimensional LGDOs - # ak.to_numpy() raises error if array not regular - res = ak.to_numpy(res, allow_missing=False) - - # in this method only 1D values are allowed - if res.ndim > 1: - raise ValueError( - f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" - ) - - return res - + chns + list of channel names across which expression gets evaluated (form: + ``ch``). + chns_rm + list of channels which get set to default value during evaluation. In + function mode they are removed entirely (form: ``ch``) + mode + The mode determines how the event entry is calculated across channels. + Options are: -def get_mask_from_query( - qry: str | NDArray, - length: int, - ch: str, - idx_ch: NDArray, - f_hit: str, - f_dsp: str, - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> np.ndarray: - """Evaluates a query expression and returns a mask accordingly. + - ``first_at:sorter``: aggregates across channels by returning the + expression of the channel with smallest value of sorter. + - ``last_at``: aggregates across channels by returning the expression of + the channel with largest value of sorter. + - ``sum``: aggregates by summation. + - ``any``: aggregates by logical or. + - ``all``: aggregates by logical and. + - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. + - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm + index field. + - ``gather``: Channels are not combined, but result saved as + :class:`.VectorOfVectors`. - Parameters - ---------- qry - query expression. - length - length of the return mask. - ch - "rawid" of channel to be evaluated. - idx_ch - channel indices to be read. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - hit_group - LH5 root group in hit file. - dsp_group - LH5 root group in dsp file. - """ - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters( - f_hit=f_hit, - f_dsp=f_dsp, - ch=ch, - idx_ch=idx_ch, - exprl=qry_lst, - hit_group=hit_group, - dsp_group=dsp_group, - ) - limarr = eval( - qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( - f"{hit_group}.", f"{hit_group}_" - ), - qry_var, - ) - - # in case the expression evaluates to a single value blow it up - if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): - return np.full(len(idx_ch), limarr) - - limarr = ak.to_numpy(limarr, allow_missing=False) - if limarr.ndim > 1: - raise ValueError( - f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" - ) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry - - # if no condition, it must be true - else: - limarr = np.ones(length).astype(bool) - - # explicit cast to bool - if limarr.dtype != bool: - limarr = limarr.astype(bool) - - return limarr - - -def evaluate_to_first_or_last( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - sorter: tuple, - var_ph: dict = None, - defv: bool | int | float = np.nan, - is_first: bool = True, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates across channels by returning the expression of the channel - with value of `sorter`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. + a query that can mask the aggregation. expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. + the expression. That can be any mathematical equation/comparison. If + `mode` is ``function``, the expression needs to be a special processing + function defined in modules (e.g. :func:`.modules.spm.get_energy`). In + the expression parameters from either hit, dsp, evt tier (from + operations performed before this one! Dictionary operations order + matters), or from the ``parameters`` field can be used. nrows - length of output array. - sorter - tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. - var_ph - dictionary of `evt` and additional parameters and their values. + number of rows to be processed. + table + table of 'evt' tier data. + para + dictionary of parameters defined in the ``parameters`` field in the + configuration dictionary. defv - default value. - is_first - defines if sorted by smallest or largest value of `sorter` + default value of evaluation. + sorter + can be used to sort vector outputs according to sorter expression (see + :func:`evaluate_to_vector`). tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + evt group + LH5 root group name of evt tier. + tcm_group + LH5 root group in tcm file. dsp_group LH5 root group in dsp file. hit_group LH5 root group in hit file. - evt_group - LH5 root group in evt file. """ - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - outt = np.zeros(len(out)) - store = LH5Store() - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - - # evaluate at channel - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # find parameters in evt file or in parameters + exprl = re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr + ) + var_ph = {} + if table: + var_ph = var_ph | { + e: table[e].view_as("ak") + for e in table.keys() + if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) + } + if para: + var_ph = var_ph | para - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, + if mode == "function": + # evaluate expression + func, params = expr.split("(") + params = ( + params.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", "") ) + params = [ + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + tcm_id_table_pattern, + [x for x in chns if x not in chns_rm], + ] + [utils.num_and_pars(e, var_ph) for e in params[:-1].split(",")] - # find if sorter is in hit or dsp - t0 = store.read( - f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if f"{hit_group}" == sorter[0] else f_dsp, - idx=idx_ch, - )[0].view_as("np") - - if t0.ndim > 1: - raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") - - if is_first: - if ch == chns[0]: - outt[:] = np.inf - - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - - else: - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - - return Array(nda=out) - - -def evaluate_to_scalar( - mode: str, - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates by summation across channels. - - Parameters - ---------- - mode - aggregation mode. - idx - tcm index array. - ids - tcm id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of dsp/hit/evt parameter tuples in expression (tier, field). - qry - query expression to mask aggregation. - nrows - length of output array - var_ph - dictionary of evt and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) + # load function dynamically + p, m = func.rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + return met(*params) - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + else: + # check if query is either on channel basis or evt basis (and not a mix) + qry_mask = qry + if qry is not None: + if f"{evt_group}." in qry and ( + f"{hit_group}." in qry or f"{dsp_group}." in qry + ): + raise ValueError( + f"Query can't be a mix of {evt_group} tier and lower tiers." + ) - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # if it is an evt query we can evaluate it directly here + if table and f"{evt_group}." in qry: + qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # load TCM data to define an event + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") # switch through modes - if "sum" == mode: - if res.dtype == bool: - res = res.astype(int) - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - if "any" == mode: - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] | (res & limarr) - if "all" == mode: - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res & limarr - - return Array(nda=out) - - -def evaluate_at_channel( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns_rm: list, - expr: str, - exprl: list, - ch_comp: Array, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates by evaluating the expression at a given channel. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - ch_comp - array of rawids at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) - - for ch in np.unique(ch_comp.nda.astype(int)): - # skip default value - if get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): - continue - idx_ch = idx[ids == ch] - res = get_data_at_channel( - ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) - not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) - - return Array(nda=out) - + if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): + if "keep_at_ch:" == mode[:11]: + ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] + else: + ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] + if isinstance(ch_comp, Array): + ch_comp = Array(nda=ids[ch_comp.view_as("np")]) + elif isinstance(ch_comp, VectorOfVectors): + ch_comp = ch_comp.view_as("ak") + ch_comp = VectorOfVectors( + array=ak.unflatten( + ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + ) + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) -def evaluate_at_channel_vov( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - expr: str, - exprl: list, - ch_comp: VectorOfVectors, - chns_rm: list, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> VectorOfVectors: - """Same as :func:`evaluate_at_channel` but evaluates expression at non - flat channels :class:`.VectorOfVectors`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - ch_comp - array of "rawid"s at which the expression is evaluated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # blow up vov to aoesa - out = ak.Array([[] for _ in range(len(ch_comp))]) - - chns = np.unique(ch_comp.flattened_data.nda).astype(int) - ch_comp = ch_comp.view_as("ak") - - type_name = None - for ch in chns: - idx_ch = idx[ids == ch] - res = get_data_at_channel( - ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) - not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - # see in which events the current channel is present - mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) - cv = np.full(len(ch_comp), np.nan) - cv[idx_ch] = res - cv[~mask] = np.nan - cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) - - out = ak.concatenate((out, cv), axis=-1) - - if ch == chns[0]: - type_name = res.dtype - - return VectorOfVectors(ak.values_astype(out, type_name)) - - -def evaluate_to_aoesa( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - missv=np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> ArrayOfEqualSizedArrays: - """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated - expressions of channels that fulfill a query expression. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output :class:`.VectorOfVectors`. - ch_comp - array of "rawid"s at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - missv - missing value. - sorter - sorts the entries in the vector according to sorter expression. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - # define dimension of output array - out = np.full((nrows, len(chns)), missv) - - i = 0 - for ch in chns: - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) - - i += 1 - - return ArrayOfEqualSizedArrays(nda=out) - - -def evaluate_to_vector( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - sorter: str = None, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> VectorOfVectors: - """Aggregates by returning a :class:`.VectorOfVector` of evaluated - expressions of channels that fulfill a query expression. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output :class:`.VectorOfVectors`. - ch_comp - array of "rawids" at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - sorter - sorts the entries in the vector according to sorter expression. - ``ascend_by:`` results in an vector ordered ascending, - ``decend_by:`` sorts descending. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - out = evaluate_to_aoesa( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry, - nrows=nrows, - var_ph=var_ph, - defv=defv, - missv=np.nan, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ).view_as("np") - - # if a sorter is given sort accordingly - if sorter is not None: - md, fld = sorter.split(":") - s_val = evaluate_to_aoesa( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=fld, - exprl=[tuple(fld.split("."))], - qry=None, - nrows=nrows, - missv=np.nan, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ).view_as("np") - if "ascend_by" == md: - out = out[np.arange(len(out))[:, None], np.argsort(s_val)] - - elif "descend_by" == md: - out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] - else: - raise ValueError( - "sorter values can only have 'ascend_by' or 'descend_by' prefixes" - ) - - return VectorOfVectors( - ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) - ) - - -def build_evt( - f_tcm: str, - f_dsp: str, - f_hit: str, - f_evt: str, - evt_config: str | dict, - wo_mode: str = "write_safe", - evt_group: str = "evt", - tcm_group: str = "hardware_tcm_1", - dsp_group: str = "dsp", - hit_group: str = "hit", - tcm_id_table_pattern: str = "ch{}", -) -> None: - """Transform data from the `hit` and `dsp` levels which a channel sorted to a - event sorted data format. - - Parameters - ---------- - f_tcm - input LH5 file of the tcm level. - f_dsp - input LH5 file of the dsp level. - f_hit - input LH5 file of the hit level. - f_evt - name of the output file. - evt_config - name of configuration file or dictionary defining event fields. Channel - lists can be defined by importing a metadata module. - - - ``operations`` defines the fields ``name=key``, where ``channels`` - specifies the channels used to for this field (either a string or a - list of strings), - - ``aggregation_mode`` defines how the channels should be combined (see - :func:`evaluate_expression`). - - ``expression`` defnies the mathematical/special function to apply - (see :func:`evaluate_expression`), - - ``query`` defines an expression to mask the aggregation. - - ``parameters`` defines any other parameter used in expression. - - For example: - - .. code-block:: json - - { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"], - "spms_on": ["ch1057600", "ch1059201", "ch1062405"], - "muon": "ch1027202", - }, - "operations": { - "energy_id":{ - "channels": "geds_on", - "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal > 25", - "expression": "tcm.array_id", - "sort": "ascend_by:dsp.tp_0_est" - }, - "energy":{ - "aggregation_mode": "keep_at_ch:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal > 25" - } - "is_muon_rejected":{ - "channels": "muon", - "aggregation_mode": "any", - "expression": "dsp.wf_max>a", - "parameters": {"a":15100}, - "initial": false - }, - "multiplicity":{ - "channels": ["geds_on", "geds_no_psd", "geds_ac"], - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "t0":{ - "aggregation_mode": "keep_at_ch:evt.energy_id", - "expression": "dsp.tp_0_est" - }, - "lar_energy":{ - "channels": "spms_on", - "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" - }, - } - } - - wo_mode - writing mode. - evt group - LH5 root group name of evt tier. - tcm_group - LH5 root group in tcm file. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - """ - - store = LH5Store() - tbl_cfg = evt_config - if not isinstance(tbl_cfg, (str, dict)): - raise TypeError() - if isinstance(tbl_cfg, str): - with open(tbl_cfg) as f: - tbl_cfg = json.load(f) - - if "channels" not in tbl_cfg.keys(): - raise ValueError("channel field needs to be specified in the config") - if "operations" not in tbl_cfg.keys(): - raise ValueError("operations field needs to be specified in the config") - - # check tcm_id_table_pattern validity - pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) - if len(pattern_check) != 1: - raise ValueError( - f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." - ) - elif "{" in pattern_check[0] or "}" in pattern_check[0]: - raise ValueError( - f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." - ) - - if ( - get_table_name_by_pattern( - tcm_id_table_pattern, - get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), - ) - != lh5.ls(f_hit)[0] - ): - raise ValueError( - f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" - ) - - # create channel list according to config - # This can be either read from the meta data - # or a list of channel names - log.debug("Creating channel dictionary") - - chns = {} - - for k, v in tbl_cfg["channels"].items(): - if isinstance(v, dict): - # it is a meta module. module_name must exist - if "module" not in v.keys(): - raise ValueError( - "Need module_name to load channel via a meta data module" + if isinstance(ch_comp, Array): + return aggregators.evaluate_at_channel( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - - attr = {} - # the time_key argument is set to the time key of the DSP file - # in case it is not provided by the config - if "time_key" not in v.keys(): - attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) - - # if "None" do None - elif "None" == v["time_key"]: - attr["time_key"] = None - - # load module - p, m = v["module"].rsplit(".", 1) - met = getattr(import_module(p, package=__package__), m) - chns[k] = met(v | attr) - - elif isinstance(v, str): - chns[k] = [v] - - elif isinstance(v, list): - chns[k] = [e for e in v] - - nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) - - table = Table(size=nrows) - - for k, v in tbl_cfg["operations"].items(): - log.debug("Processing field " + k) - - # if mode not defined in operation, it can only be an operation on the evt level. - if "aggregation_mode" not in v.keys(): - var = {} - if "parameters" in v.keys(): - var = var | v["parameters"] - res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) - - # add attribute if present - if "lgdo_attrs" in v.keys(): - res.attrs |= v["lgdo_attrs"] - - table.add_field(k, res) - - # Else we build the event entry - else: - if "channels" not in v.keys(): - chns_e = [] - elif isinstance(v["channels"], str): - chns_e = chns[v["channels"]] - elif isinstance(v["channels"], list): - chns_e = list( - itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + elif isinstance(ch_comp, VectorOfVectors): + return aggregators.evaluate_at_channel_vov( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + chns_rm=chns_rm, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - chns_rm = [] - if "exclude_channels" in v.keys(): - if isinstance(v["exclude_channels"], str): - chns_rm = chns[v["exclude_channels"]] - elif isinstance(v["exclude_channels"], list): - chns_rm = list( - itertools.chain.from_iterable( - [chns[e] for e in v["exclude_channels"]] - ) - ) - - pars, qry, defaultv, srter = None, None, np.nan, None - if "parameters" in v.keys(): - pars = v["parameters"] - if "query" in v.keys(): - qry = v["query"] - if "initial" in v.keys(): - defaultv = v["initial"] - if isinstance(defaultv, str) and ( - defaultv in ["np.nan", "np.inf", "-np.inf"] - ): - defaultv = eval(defaultv) - if "sort" in v.keys(): - srter = v["sort"] - - obj = evaluate_expression( - f_tcm=f_tcm, + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + elif "first_at:" in mode or "last_at:" in mode: + sorter = tuple( + re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", + mode.split("first_at:")[-1], + )[0] + ) + return aggregators.evaluate_to_first_or_last( + idx=idx, + ids=ids, f_hit=f_hit, f_dsp=f_dsp, - chns=chns_e, + chns=chns, chns_rm=chns_rm, - mode=v["aggregation_mode"], - expr=v["expression"], + expr=expr, + exprl=exprl, + qry=qry_mask, nrows=nrows, - table=table, - para=pars, - qry=qry, - defv=defaultv, - sorter=srter, + sorter=sorter, + var_ph=var_ph, + defv=defv, + is_first=True if "first_at:" in mode else False, tcm_id_table_pattern=tcm_id_table_pattern, evt_group=evt_group, hit_group=hit_group, dsp_group=dsp_group, - tcm_group=tcm_group, ) - - # add attribute if present - if "lgdo_attrs" in v.keys(): - obj.attrs |= v["lgdo_attrs"] - - table.add_field(k, obj) - - # write output fields into f_evt - if "outputs" in tbl_cfg.keys(): - if len(tbl_cfg["outputs"]) < 1: - log.warning("No output fields specified, no file will be written.") - else: - clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] - for fld in clms_to_remove: - table.remove_field(fld, True) - store.write( - obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + elif mode in ["sum", "any", "all"]: + return aggregators.evaluate_to_scalar( + mode=mode, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - else: - log.warning("No output fields specified, no file will be written.") - - key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) - log.info( - f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" - ) + elif "gather" == mode: + return aggregators.evaluate_to_vector( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + sorter=sorter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + else: + raise ValueError(mode + " not a valid mode") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index b72198a6f..9539c49f2 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,13 +1,14 @@ """ Module for special event level routines for SiPMs -functions must take as the first 4 args in order: +functions must take as the first 8 args in order: - path to the hit file -- path to the dsp file +- path to the dsp int: + pre = tcm_id_table_pattern.split("{")[0] + post = tcm_id_table_pattern.split("}")[1] + return int(ch.strip(pre).strip(post)) + + +def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] + if pattern_check == "" or ":" == pattern_check[0]: + return tcm_id_table_pattern.format(ch_id) + else: + raise NotImplementedError( + "Only empty placeholders with format specifications are currently implemented" + ) + + +def num_and_pars(value: str, par_dic: dict): + # function tries to convert a string to a int, float, bool + # or returns the value if value is a key in par_dic + if value in par_dic.keys(): + return par_dic[value] + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError: + try: + value = bool(value) + except ValueError: + pass + return value + +def find_parameters( + f_hit: str, + f_dsp: str, + ch: str, + idx_ch: NDArray, + exprl: list, + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> dict: + """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` + tiers. + + Parameters + ---------- + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + ch + "rawid" in the tiers. + idx_ch + index array of entries to be read from files. + exprl + list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + """ + + # find fields in either dsp, hit + dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] + hit_flds = [e[1] for e in exprl if e[0] == hit_group] + + store = LH5Store() + hit_dict, dsp_dict = {}, {} + if len(hit_flds) > 0: + hit_ak = store.read( + f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch + )[0].view_as("ak") + hit_dict = dict( + zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) + ) + if len(dsp_flds) > 0: + dsp_ak = store.read( + f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch + )[0].view_as("ak") + dsp_dict = dict( + zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) + ) + + return hit_dict | dsp_dict + + +def get_data_at_channel( + ch: str, + ids: NDArray, + idx: NDArray, + expr: str, + exprl: list, + var_ph: dict, + is_evaluated: bool, + f_hit: str, + f_dsp: str, + defv, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> np.ndarray: + """Evaluates an expression and returns the result. + + Parameters + ---------- + ch + "rawid" of channel to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. + expr + expression to be evaluated. + exprl + list of parameter-tuples ``(root_group, field)`` found in the expression. + var_ph + dict of additional parameters that are not channel dependent. + is_evaluated + if false, the expression does not get evaluated but an array of default + values is returned. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # get index list for this channel to be loaded + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + outsize = len(idx_ch) + + if not is_evaluated: + res = np.full(outsize, defv, dtype=type(defv)) + elif "tcm.array_id" == expr: + res = np.full( + outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int + ) + elif "tcm.index" == expr: + res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] + else: + var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=exprl, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + if var_ph is not None: + var = var | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", ""), + var, + ) + + # in case the expression evaluates to a single value blow it up + if (not hasattr(res, "__len__")) or (isinstance(res, str)): + return np.full(outsize, res) + + # the resulting arrays need to be 1D from the operation, + # this can only change once we support larger than two dimensional LGDOs + # ak.to_numpy() raises error if array not regular + res = ak.to_numpy(res, allow_missing=False) + + # in this method only 1D values are allowed + if res.ndim > 1: + raise ValueError( + f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + + return res + + +def get_mask_from_query( + qry: str | NDArray, + length: int, + ch: str, + idx_ch: NDArray, + f_hit: str, + f_dsp: str, + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> np.ndarray: + """Evaluates a query expression and returns a mask accordingly. + + Parameters + ---------- + qry + query expression. + length + length of the return mask. + ch + "rawid" of channel to be evaluated. + idx_ch + channel indices to be read. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + hit_group + LH5 root group in hit file. + dsp_group + LH5 root group in dsp file. + """ + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=qry_lst, + hit_group=hit_group, + dsp_group=dsp_group, + ) + limarr = eval( + qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( + f"{hit_group}.", f"{hit_group}_" + ), + qry_var, + ) + + # in case the expression evaluates to a single value blow it up + if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): + return np.full(len(idx_ch), limarr) + + limarr = ak.to_numpy(limarr, allow_missing=False) + if limarr.ndim > 1: + raise ValueError( + f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(length).astype(bool) + + # explicit cast to bool + if limarr.dtype != bool: + limarr = limarr.astype(bool) + + return limarr \ No newline at end of file From 2c5b9b49cef22c7566bafd112b6e07049865f6bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 26 Jan 2024 16:02:42 +0100 Subject: [PATCH 71/73] agnostify also skm tier --- src/pygama/evt/aggregators.py | 13 +++++++---- src/pygama/evt/utils.py | 10 +++++--- src/pygama/skm/build_skm.py | 43 ++++++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index f9131ed96..b95db1b5e 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -3,15 +3,16 @@ """ from __future__ import annotations -import re -import numpy as np -from numpy.typing import NDArray + import awkward as ak +import numpy as np +from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 +from numpy.typing import NDArray from . import utils + def evaluate_to_first_or_last( idx: NDArray, ids: NDArray, @@ -308,7 +309,9 @@ def evaluate_at_channel( for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): + if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls( + f_hit + ): continue idx_ch = idx[ids == ch] res = utils.get_data_at_channel( diff --git a/src/pygama/evt/utils.py b/src/pygama/evt/utils.py index fe1e35e94..175cd868a 100644 --- a/src/pygama/evt/utils.py +++ b/src/pygama/evt/utils.py @@ -3,11 +3,14 @@ """ from __future__ import annotations + import re -import numpy as np -from numpy.typing import NDArray + import awkward as ak +import numpy as np from lgdo.lh5 import LH5Store +from numpy.typing import NDArray + def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: pre = tcm_id_table_pattern.split("{")[0] @@ -43,6 +46,7 @@ def num_and_pars(value: str, par_dic: dict): pass return value + def find_parameters( f_hit: str, f_dsp: str, @@ -275,4 +279,4 @@ def get_mask_from_query( if limarr.dtype != bool: limarr = limarr.astype(bool) - return limarr \ No newline at end of file + return limarr diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 049012985..e1f9add39 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -14,6 +14,8 @@ from lgdo import Array, Table, lh5 from lgdo.lh5 import LH5Store +from pygama.evt import utils + log = logging.getLogger(__name__) @@ -25,7 +27,12 @@ def build_skm( f_skm: str, skm_conf: dict | str, wo_mode="w", - group: str = "/skm/", + skm_group: str = "skm", + evt_group: str = "evt", + tcm_group: str = "hardware_tcm_1", + dsp_group: str = "dsp", + hit_group: str = "hit", + tcm_id_table_pattern: str = "ch{}", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -89,10 +96,21 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - group - LH5 root group name (only used if ``skim_format`` is ``lh5``). + skm_group + skm LH5 root group name. + evt_group + evt LH5 root group name. + hit_group + hit LH5 root group name. + dsp_group + dsp LH5 root group name. + tcm_group + tcm LH5 root group name. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ - f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} + f_dict = {evt_group: f_evt, hit_group: f_hit, dsp_group: f_dsp, tcm_group: f_tcm} log = logging.getLogger(__name__) log.debug(f"I am skimming {len(f_evt) if isinstance(f_evt,list) else 1} files") @@ -122,11 +140,9 @@ def build_skm( miss_val = eval(miss_val) fw_fld = tbl_cfg["operations"][op]["forward_field"].split(".") - if fw_fld[0] not in ["evt", "hit", "dsp", "tcm"]: - raise ValueError(f"{fw_fld[0]} is not a valid tier") # load object if from evt tier - if fw_fld[0] == "evt": + if fw_fld[0] == evt_group: obj = store.read(f"/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]])[ 0 ].view_as("ak") @@ -145,10 +161,10 @@ def build_skm( obj = ak.Array([[] for x in range(len(tcm_idx))]) # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("ak") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("ak") ids = ak.unflatten(ids[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("ak") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("ak") idx = ak.unflatten(idx[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) if "tcm.array_id" == tbl_cfg["operations"][op]["forward_field"]: @@ -167,13 +183,14 @@ def build_skm( ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - if f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}" not in lh5.ls( - f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/" + if ( + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}" + not in lh5.ls(f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/") ): och = Array(nda=np.full(len(fl_idx), miss_val)) else: och, _ = store.read( - f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]], idx=fl_idx, ) @@ -216,4 +233,4 @@ def build_skm( raise FileExistsError(f"Write_safe mode: {f_skm} exists.") wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" - store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) + store.write(obj=table, name=f"/{skm_group}/", lh5_file=f_skm, wo_mode=wo) From 013dc04c183a49f597a1dbd9e4d30d55acd725ae Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 30 Jan 2024 09:11:19 +0100 Subject: [PATCH 72/73] updated etc classifier --- src/pygama/evt/modules/spm.py | 11 +++++++++++ tests/evt/configs/module-test-evt-config.json | 2 +- tests/evt/configs/module-test-t0-vov-evt-config.json | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 9539c49f2..2dc5a4290 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -368,6 +368,7 @@ def get_majority_dplms( # trail = 1: Singlet window = [t_first_lar_pulse, t_first_lar_pulse+ swin] # trail = 2: Like trail = 1, but t_first_lar_pulse <= tge is ensured # min_first_pls_ene sets the minimum energy of the first pulse (only used in trail > 0) +# max_per_channel, maximum number of pes a channel is allowed to have, if above it gets excluded def get_etc( f_hit, f_dsp, @@ -385,6 +386,7 @@ def get_etc( swin, trail, min_first_pls_ene, + max_per_channel, ) -> Array: # load TCM data to define an event store = LH5Store() @@ -419,9 +421,18 @@ def get_etc( mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) pe = pe[mask] + + # max pe mask + max_pe_mask = ak.nansum(pe, axis=-1) < max_per_channel + pe = ak.drop_none( + ak.nan_to_none(ak.where(max_pe_mask, pe, ak.Array([[np.nan]]))) + ) pe_lst.append(pe) times = times[mask] * 16 + times = ak.drop_none( + ak.nan_to_none(ak.where(max_pe_mask, times, ak.Array([[np.nan]]))) + ) time_lst.append(times) pe_all = ak.concatenate(pe_lst, axis=-1) diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 6aba3bf75..0daa94658 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)" }, "lar_energy_dplms": { "channels": "spms_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 5d1c6f256..cda042337 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)" }, "lar_energy_dplms": { "channels": "spms_on", From 78134ca398df9303aa3326a18cee9877b4dd1685 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 30 Jan 2024 10:46:39 +0100 Subject: [PATCH 73/73] Docstring cosmetics --- src/pygama/evt/aggregators.py | 68 +++++++++++++++++------------------ src/pygama/evt/build_evt.py | 32 ++++++++--------- src/pygama/skm/build_skm.py | 60 +++++++++++++++---------------- 3 files changed, 80 insertions(+), 80 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index b95db1b5e..993c0ffe6 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -67,14 +67,14 @@ def evaluate_to_first_or_last( is_first defines if sorted by smallest or largest value of `sorter` tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array @@ -167,9 +167,9 @@ def evaluate_to_scalar( mode aggregation mode. idx - tcm index array. + `tcm` index array. ids - tcm id array. + `tcm` id array. f_hit path to `hit` tier file. f_dsp @@ -181,24 +181,24 @@ def evaluate_to_scalar( expr expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier, field). + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry query expression to mask aggregation. nrows length of output array var_ph - dictionary of evt and additional parameters and their values. + dictionary of `evt` and additional parameters and their values. defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array @@ -295,14 +295,14 @@ def evaluate_at_channel( defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -379,14 +379,14 @@ def evaluate_at_channel_vov( defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # blow up vov to aoesa @@ -486,14 +486,14 @@ def evaluate_to_aoesa( sorter sorts the entries in the vector according to sorter expression. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -592,14 +592,14 @@ def evaluate_to_vector( ``ascend_by:`` results in an vector ordered ascending, ``decend_by:`` sorts descending. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ out = evaluate_to_aoesa( idx=idx, diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e0c0dafb3..66489c38c 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -39,11 +39,11 @@ def build_evt( Parameters ---------- f_tcm - input LH5 file of the tcm level. + input LH5 file of the `tcm` level. f_dsp - input LH5 file of the dsp level. + input LH5 file of the `dsp` level. f_hit - input LH5 file of the hit level. + input LH5 file of the `hit` level. f_evt name of the output file. evt_config @@ -111,16 +111,16 @@ def build_evt( wo_mode writing mode. evt group - LH5 root group name of evt tier. + LH5 root group name of `evt` tier. tcm_group - LH5 root group in tcm file. + LH5 root group in `tcm` file. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must + have one placeholder which is the `tcm` id. """ store = LH5Store() @@ -362,7 +362,7 @@ def evaluate_expression( nrows number of rows to be processed. table - table of 'evt' tier data. + table of `evt` tier data. para dictionary of parameters defined in the ``parameters`` field in the configuration dictionary. @@ -372,16 +372,16 @@ def evaluate_expression( can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`). tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. evt group - LH5 root group name of evt tier. + LH5 root group name of `evt` tier. tcm_group - LH5 root group in tcm file. + LH5 root group in `tcm` file. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. """ store = LH5Store() diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index e1f9add39..a92619b83 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -34,7 +34,7 @@ def build_skm( hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", ) -> None: - """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). + """Builds a skimmed file from a (set) of `evt/hit/dsp` tier file(s). Parameters ---------- @@ -65,28 +65,28 @@ def build_skm( .. code-block:: json - { - "multiplicity": 2, - "postfixes":["","aux"], - "operations": { - "timestamp":{ - "forward_field": "evt.timestamp" - }, - "multiplicity":{ - "forward_field": "evt.multiplicity" - }, - "energy":{ - "forward_field": "hit.cuspEmax_ctc_cal", - "missing_value": "np.nan", - "tcm_idx": "evt.energy_idx" - }, - "energy_id":{ - "forward_field": "tcm.array_id", - "missing_value": 0, - "tcm_idx": "evt.energy_idx" - } - } - } + { + "multiplicity": 2, + "postfixes":["", "aux"], + "operations": { + "timestamp":{ + "forward_field": "evt.timestamp" + }, + "multiplicity":{ + "forward_field": "evt.multiplicity" + }, + "energy":{ + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" + }, + "energy_id":{ + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" + } + } + } wo_mode writing mode. @@ -97,18 +97,18 @@ def build_skm( - ``overwrite`` or ``o``: replaces existing file. skm_group - skm LH5 root group name. + `skm` LH5 root group name. evt_group - evt LH5 root group name. + `evt` LH5 root group name. hit_group - hit LH5 root group name. + `hit` LH5 root group name. dsp_group - dsp LH5 root group name. + `dsp` LH5 root group name. tcm_group - tcm LH5 root group name. + `tcm` LH5 root group name. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. """ f_dict = {evt_group: f_evt, hit_group: f_hit, dsp_group: f_dsp, tcm_group: f_tcm} log = logging.getLogger(__name__)