-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcalc_sumw.py
64 lines (52 loc) · 1.8 KB
/
calc_sumw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from operator import ne
import ROOT
import argparse
import yaml
import os, sys
import time
import glob
from tqdm import tqdm
from multiprocessing import Pool, current_process, RLock
import uproot
import pandas as pd
def base_filename(path):
return path.split("/")[-1]
def job_wrapper(args):
# print(args)
return calc(*args)
def calc(path, dataset_proc):
df = uproot.concatenate(f'{path}/mm/*.root:ntuple', num_workers = 8, library='pd')
nevents = len(df.index)
# assert nevents == dataset_proc["nevents"]
sumw = df['genweight'].astype('float64').sum()
df.loc[:,'genweight_sign'] = df['genweight'].astype('float64') / abs(df['genweight'].astype('float64'))
sumwnorm = df['genweight_sign'].astype('float64').sum()
negfrac = (0.5*(1. - (sumwnorm/nevents)))
generator_weight = (1. - 2.*negfrac)
outstr = f"""
{base_filename(path)}
nevents: {nevents:.10f}
sumw: {sumw:.10f}
sumwnorm: {sumwnorm:.10f}
negfrac: {negfrac:.10f}
generator_weight: {generator_weight:.10f}
"""
print(outstr)
sys.stdout.flush()
def calc_sumw(dataset, ntuples, nthreads):
arguments = [(ntuple, dataset[base_filename(ntuple)]) for ntuple in ntuples]
pool = Pool(nthreads, initargs=(RLock(),), initializer=tqdm.set_lock)
for _ in pool.imap_unordered(job_wrapper, arguments):
pass
if __name__ == "__main__":
base_path = "/ceph/jdriesch/CROWN_samples/Run3V07_sumw/ntuples/2022/*"
dataset = yaml.load(open("datasets.yaml"), Loader=yaml.Loader)
ntuples = glob.glob(base_path)
ntuples_wo_data = ntuples.copy()
for ntuple in ntuples:
if "Run20" in ntuple:
ntuples_wo_data.remove(str(ntuple))
nthreads = 16
if nthreads > len(ntuples_wo_data):
nthreads = len(ntuples_wo_data)
calc_sumw(dataset, ntuples_wo_data, nthreads)