forked from DUNE/larnd-sim
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmake_plots.py
661 lines (613 loc) · 36.8 KB
/
make_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
#!/usr/bin/env python3
import argparse
import pickle
import h5py
import matplotlib as mpl
import matplotlib.pyplot as plt
from cycler import cycler
import numpy as np
from glob import glob
import os, sys
from optimize.ranges import ranges
import scipy
from scipy.ndimage import uniform_filter1d
# ----------------- Constants ----------------------- #
# set matplotlib color rotation to have more options, hexcolor website: https://www.colorhexa.com/
# greyblue orange green midred grey prpl BROWN PINK drkGREY
hexcolors = ['1f77b4', 'ff7f0e', '2ca02c', 'd62728', '9467bd', '8c564b', 'ffc0cb', '696969',
'00008b', 'ffaa88', '66ffaa', '8b0000', '17becf', '800080', 'bbbbbb', '87ceeb', 'bcbd22', 'eeee00', '000000']
# drk blue | peach | lime | drkred | cyan | purple | ltgrey | SKY BLUE | gry yellow | yellow | black
mpl.rcParams['axes.prop_cycle'] = cycler('color', [mpl.colors.to_rgba('#' + c) for c in hexcolors])
# used when making dEdx histograms
h5_dict = '/sdf/home/b/bkroul/l-sim/h5/'
files_dict = {'muon': h5_dict + 'old_data_cut.h5',
'proton': h5_dict +'first_data_cut_high_dEdx.h5',
'dEdx > 5': h5_dict + 'proton_min-dEdx5.h5',
'dEdx < 2': h5_dict + 'proton_max-dEdx2.h5',
'proton_no_nuclei': h5_dict + 'proton_dEdx_no_nuclei.h5',
'muon_raw': '/fs/ddn/sdf/group/neutrino/cyifan/muon-sim/fake_data_S1/edepsim-output.h5',
'proton_raw': '/fs/ddn/sdf/group/neutrino/cyifan/muon-sim/larndsim_output/f1_1000_p_high_KE/edepsim-output.h5'
}
# labels for y-axis when plotting parameter iterations
labels = {'Ab' : "$A_{B}$",
'kb' : "$k_{B}$ [g/cm$^2$/MeV]",
'lifetime' : "$\\tau$ [$\mu s$]",
'long_diff' : "$D_{L}$ [$cm^2/\mu s$]",
'tran_diff' : "$D_{T}$ [$cm^2/\mu s$]",
#'vdrift' : 'vdrift [$cm/\mu s$]', # we use link_vdrift_efield now
'eField' : "$\\epsilon$ [kV/cm]"}
# dictionary of all particles found in input data so far
pdgIds = {1: "d", 2: "u", 3: "s", 4: "c", 5: "b", 6:"t", 7:"b'", 8:"t'", # quarks
-11:"e+", 11: "e-", 12: "νe", -13: "µ+", 13: "µ-", 14: "νµ", # leptons
15: "τ-", 16: "ντ", 17: "τ'-", 18: "ντ'",
21: "g", 22: "γ", 23: "Z0", 24: "W+", 37: "H+", 39: "G", # bosons, graviton
111: "π0", 211: "π+", -211: "π-", # light I = 0 mesons
321: "K+", # strange mesons
2112: "n", 2212: "p", 3112: "Σ−", 3122: "Λ", 3222: "Σ+", # light baryons, strange baryons
1000010020: "deuteron", 1000010030: "triton", 1000010040: "H4", 1000020030: "He3", 1000020040: "He4", 1000020060: "He6",
1000030040: 'Li4', 1000030060: 'Li6', 1000030070: 'Li7', 1000040080: 'Be8', 1000040090: 'Be9', 1000040100: 'Be10', 1000050080: 'B8', 1000050100: 'B10', 1000050110: 'B11', 1000060110: 'C11', 1000060120: 'C12', 1000060130: 'C13', 1000060140: 'C14', 1000070120: 'N12', 1000070140: 'N14', 1000070150: 'N15', 1000080160: 'O16', 1000080170: 'O17', 1000080180: 'O18', 1000080190: 'O19', 1000080200: 'O20', 1000090170: 'F17', 1000090180: 'F18', 1000090190: 'F19', 1000090200: 'F20', 1000100200: 'Ne20', 1000100210: 'Ne21', 1000100220: 'Ne22', 1000100230: 'Ne23', 1000100240: 'Ne24',
1000110220: 'Na22', 1000110230: 'Na23', 1000110240: 'Na24', 1000110241: 'Na24', 1000110250: 'Na25', 1000120230: 'Mg23', 1000120240: 'Mg24', 1000120250: 'Mg25', 1000120260: 'Mg26', 1000120270: 'Mg27', 1000120280: 'Mg28', 1000130260: 'Al26', 1000130270: 'Al27', 1000130280: 'Al28', 1000130290: 'Al29', 1000130300: 'Al30', 1000130310: 'Al31', 1000140270: 'Si27', 1000140280: 'Si28', 1000140290: 'Si29', 1000140300: 'Si30', 1000140310: 'Si31', 1000140320: 'Si32', 1000140330: 'Si33', 1000150300: 'P30', 1000150310: 'P31', 1000150320: 'P32', 1000150330: 'P33', 1000150340: 'P34', 1000150350: 'P35', 1000150360: 'P36', 1000160320: 'S32', 1000160330: 'S33', 1000160340: 'S34', 1000160350: 'S35', 1000160360: 'S36', 1000160370: 'S37', 1000160380: 'S38', 1000170340: 'Cl34', 1000170350: 'Cl35', 1000170360: 'Cl36', 1000170370: 'Cl37', 1000170380: 'Cl38', 1000170389: 'Cl38', 1000170390: 'Cl39', 1000170400: 'Cl40', 1000180350: 'Ar35', 1000180360: 'Ar36', 1000180370: 'Ar37', 1000180380: 'Ar38', 1000180390: 'Ar39', 1000180400: "Ar40", 1000180410: 'Ar41', 1000190380: 'K38', 1000190390: 'K39', 1000190400: 'K40', 1000190410: 'K41'
}
#atoms[proton number]
atoms = "0,H,He,Li,Be,B,C,N,O,F,Ne,Na,Mg,Al,Si,P,S,Cl,Ar,K,Ca,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ga,Ge,As,Se,Br,Kr".split(',')
# ----------------------- Utility Functions ---------------------- #
def nucleus_to_label(pdgId):
"""
Returns "Ar40"-esque labels of atom + num baryons for nuclei
Labeling scheme from https://pdg.lbl.gov/2007/reviews/montecarlorpp.pdf
10LZZZAAAI -- pdgId nuclear code
L = num lamba baryons
ZZZ = num protons = atomic number
AAA = total num baryons = protons + neutrons + lambda baryons
I = isomer number = excitation level, 0 for ground state
"""
pdgId = str(pdgId)
if len(pdgId) != 10 or pdgId[0] != '1':
print(f"pdgId {pdgId} is not a nucleus!")
return "N/A"
# str(int( removes leading zeroes.
L = pdgId[2]; Z = int(pdgId[3:6]); A = str(int(pdgId[6:9])); I = pdgId[9]
return atoms[Z]+A
def tolerant_mean(arrays):
"""
Returns a the moving average, max, and min of a numpy array of arrays
"""
lens = [arr.size for arr in arrays]
all_arr = np.ma.empty( (np.max(lens),len(arrays)) )
all_arr.mask = True
for idx, arr in enumerate(arrays):
all_arr[:lens[idx], idx] = arr
return all_arr.mean(axis = -1), all_arr.max(axis = -1), all_arr.min(axis = -1)
def smooth(array, length):
"""
Smooths array over a length window just like scipy.uniform1d(array, length) while ignoring np.inf() values
"""
l = len(array)
new_array = np.zeros(l)
for i in range(l):
mi = max(i-length//2, 0) # min valid array
ma = min(i+length//2, l) # max valid array
sub_array = array[mi:ma]
if mi == 0: # reflect at start # 0 1 2 3 4 5
sub_array = np.append(array[0:length//2 - i], sub_array)
if ma == l: # reflect at end
sub_array = np.append(sub_array, array[2*l - 1 - i - length//2:l])
new_array[i] = np.mean(sub_array[sub_array != np.inf]) # ignore np.inf values
return new_array
def movingavg(array):
l = len(array)
new_array = np.zeros(l)
for i in range(l):
sub_array = np.asarray(array[:i])
sub_array = sub_array[sub_array != np.inf] # ignore np.inf values
sub_array = sub_array[sub_array != np.nan]
new_array[i] = np.sum(sub_array) / (i+1)
return new_array
# ----------------- Plotting Functions ----------------- #
def plot_losses(data, plot_name, unif_len, cut_to_min=False, print_info=False, label=None):
"""
Plot the simulation loss, saved in data['loss']
"""
if cut_to_min: # graph convergence on minimum iterations over all data
min_iterations = min([len(dat['loss']) for dat in data])
plt.figure(figsize=PLOT_FIGSIZE)
for count, dat in enumerate(data):
seed_init = dat['seed_init']; seed = dat['seed']; data_seed = dat['data_seed']
loss = dat['loss'][:min_iterations] if cut_to_min else dat["loss"]
l = f"seed {seed}" if label == 'seed' else (f"iseed {seed_init}" if label == "seed_init" else (f"dtseed {data_seed}" if data_seed else f"{seed}-{seed_init}-{data_seed}"))
if print_info: print(f"\tloss {l}: {loss[0]}-->{loss[-1]}")
plt.plot(loss, c=f'C{count}', linewidth=LINEWIDTH, label=l)
#plt.plot(len(loss)+40, np.mean(loss[-unif_len:]), marker='_', linewidth=LINEWIDTH, markersize=6)
l = f" Initial Seed {seed_init} " if label == 'seed' else (f" Seed {seed} " if label == "seed_init" else (f" Data Seed {data_seed} " if label == "data_seed" else ""))
plt.title(f"Loss{l}")
plt.ylabel('Simulation Loss')
plt.xlabel('Training Iteration')
plt.legend(loc='best', fontsize="10")
plt.tight_layout()
plt.savefig(plot_name, dpi=PLOT_DPI)
print(f'Saving plot to {plot_name}')
plt.close()
def plot_params(data, plot_name, param, label=None):
"""
Plot parameter iterations for data, stored in dat['data']['{paaram}_iter']
"""
targets = []; label_target = False
plt.figure(figsize=PLOT_FIGSIZE)
for count, dat in enumerate(data):
# PLOT TARGET VALUE
target_val = dat['data'][f'{param}_target'][0]
if target_val not in targets:
targets.append(target_val)
plt.plot([0, len(dat['data'][f"{param}_iter"])], [target_val]*2, c=f'C{count}', ls='dashed', label=("target: %.3e" % target_val if label_target else None), linewidth=LINEWIDTH*1.2)
# plot & label iteration
seed_init = dat['seed_init']; seed = dat['seed']; init_val = dat['data'][f"{param}_iter"][0]; data_seed = dat['data_seed']
l = f"seed {seed}" if label == 'seed' else ("initial val %.3e" % init_val if label == "seed_init" else (f"dtseed {data_seed}" if label == 'data_seed' else f"{seed}-{seed_init}-{data_seed}"))
plt.plot(dat['data'][f"{param}_iter"], c=f'C{count}', linewidth=LINEWIDTH, label=l)
plt.plot(0, init_val, c=f'C{count}', marker='_', linewidth=LINEWIDTH, markersize=12)
l = f"Initial Seed {seed_init} " if label == 'seed' else (f" Seed {seed} " if label == "seed_init" else (f" Data Seed {data_seed} " if label == "data_seed" else ""))
plt.title(f"{param} {l}Iterations")
plt.ylabel(f'Fitting {labels[param]}')
plt.xlabel('Training Iteration')
#plt.legend(loc='best', fontsize="10") # w/ legend ofc
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.tight_layout()
plt.savefig(plot_name, dpi=PLOT_DPI)
print(f'Saving plot to {plot_name}')
plt.close()
def plot_convergences(data, plot_name, plot_individual_convergences=True, cut_to_min=False, print_info=False, logy=True, label=None,
iter_range=None):
"""
Plot parameter convergence across all parameters in a run, stored in data['convergence']
- plot_name = name of the plot to be saved as plot_name.{config.ext}}
- plot_individual_convergences
= True will plot the convergence of each individual run along with average, min, & max
= False will only plot the average, min, & max convergence for the data
- cut_to_min
= True will only show convergences up to the minimum num of iterations in data
= False will show convergences and calculate average as moving average over all iterations in data
- print_info determines if initial -> final convergence info is printed for each run in data
- logy determines if plot has a logarithmic y axis or not
- label = 'seed' or 'seed_init' determines label description, if either data has same seed_init or same seed
- iter_range determines iteration (x) range to plot
"""
if cut_to_min: # graph convergence on minimum iterations over all data
min_iterations = min([len(dat['convergence']) for dat in data])
plt.figure(figsize=PLOT_FIGSIZE)
if plot_individual_convergences:
for count, dat in enumerate(data):
seed = dat['seed']; seed_init = dat['seed_init']
conv = dat['convergence']
if print_info: print(f"convergence {seed}-{seed_init}: {conv[0]}-->{conv[-1]}")
l = f"seed {seed}" if label == 'seed' else (f"initial seed {seed_init}" if label == 'seed_init' else (f"data seed {data_seed}" if label == 'data_seed' else f"{seed}-{seed_init}-{data_seed}"))
if cut_to_min:
plt.plot(conv[:min_iterations], c=f'C{count}', linewidth=LINEWIDTH, label=l)
else:
plt.plot(conv, c=f'C{count}', linewidth=LINEWIDTH, label=l)
if cut_to_min:
avg_iter = np.mean(data['convergence'], 0)
max_iter = np.max(data['convergence'], 0)
min_iter = np.min(data['convergence'], 0)
else: # dynamically change min, max, avg
avg_iter, max_iter, min_iter = tolerant_mean(data['convergence'])
if not plot_individual_convergences:
plt.plot(min_iter, color='#6e77ff', linewidth=LINEWIDTH*0.5)
plt.plot(max_iter, color='#6e77ff', linewidth=LINEWIDTH*0.5)
plt.plot(avg_iter, color='red', linewidth=LINEWIDTH*2)
else:
plt.plot(avg_iter, label='avg', color='red', linewidth=LINEWIDTH*2, ls='dashed')
plt.legend(loc='best', fontsize='8')
plt.fill_between(range(len(avg_iter)), min_iter, max_iter, alpha=0.1, color='#6e77ff')
if logy: plt.yscale('log')
l = f" Initial Seed {data['seed_init'][0]}" if label == 'seed' else (f" Seed {data['seed'][0]}" if label == "seed_init" else (f" Data Seed {data['data_seed'][0]}" if label == "data_seed" else ""))
plt.title(f"Parameter Convergence{l}")
plt.ylabel(f'Convergence to Target Parameters [%]')
plt.xlabel('Training Iteration')
if iter_range is not None:
plt.xlim(iter_range)
#plt.ylim(np.min(min_iter), np.max(max_iter)) # fit y max, min to max, min of data
plt.tight_layout()
plt.savefig(plot_name, dpi=PLOT_DPI)
print(f'Saving plot to {plot_name}')
plt.close()
def plot_elements(fname, print_info=False, xlog=True, ylog=True, nbins = 30, e_range=None):
# return tracks masked by element
elem_msk = lambda arr, e: [int(str(p)[3:6]) == e for p in arr['pdgId']]
# get file, tracks
if print_info: print(f'getting {fname}, {files_dict[fname]}')
hfile = h5py.File(files_dict[fname])
tracks = hfile['segments']
# cut tracks to energy range & only nuclei
if e_range == None:
e_range = [1e-10,1e10] if xlog else [0,100]
if print_info: print(tracks.size,"total tracks")
tracks = tracks[np.logical_and(tracks['dEdx'] < e_range[1], tracks['dEdx'] > e_range[0])]
tracks = tracks[tracks['pdgId'] > 1e6]
# get particles & sort by descending number of counts
nuclei, counts = np.unique(tracks['pdgId'], return_counts=True)
nuclei = nuclei.tolist() # dont include '0' particle
if len(nuclei) == 0:
sys.exit("No nuclei found in "+fname+"!")
nuclei.sort(reverse=True, key=lambda n: tracks[tracks['pdgId'] == n].size)
elements = np.unique([int(str(p)[3:6]) for p in tracks['pdgId']]).tolist()
elements.sort(reverse=True, key=lambda e: tracks[elem_msk(tracks, e)].size)
if print_info:
print(tracks.size,"nuclei tracks")
print(f"nuclei found in {fname}: "+", ".join(f"{pdgIds[nuclei[i]]}-{counts[i]}" for i in range(len(nuclei))))
print(f"elements in {fname}: "+", ".join(atoms[e] for e in elements))
# plot!
plt.figure(figsize=PLOT_FIGSIZE)
min_e = np.min(tracks['dEdx'])
max_e = np.max(tracks['dEdx'])
if xlog: # set plot xlog, ylog
plt.xscale('log')
bins = np.logspace(np.log10(min_e), np.log10(max_e), nbins)
else:
bins = np.linspace(min_e, max_e, nbins)
if ylog: # want to see all particles
plt.yscale('log')
else:
elements = elements[::-1]
for c, element in enumerate(elements):
if print_info: print(f"plotting {element_name} {count}")
element_name = atoms[element]
energies = tracks[elem_msk(tracks, element)]['dEdx']
count = energies.size
if not ylog: c = len(elements) - 1 - c
#weight = [1/ncounts[c]]*counts[c] if normalize else None
plt.hist(energies, bins=bins, color=f"C{c}", alpha=1, label=f"{element_name}: {count}", stacked=True)
plot_name = f"plot_dEdx_{fname}_elements_{('T' if xlog else 'F')}{('T' if ylog else 'F')}_{nbins}.png"
plt.xlabel("dE/dx [MeV/cm]")
plt.ylabel("# of entries")
plt.title(f"{fname} elements dEdx, MeV/cm")
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.tight_layout()
plt.savefig(plot_name, dpi=PLOT_DPI)
print("Saved plot to",plot_name,"\n")
plt.close()
# plot particles, stacked, for a file. plot one for nucleus, one for not
def plot_particle_dEdxs(fname, xlog=True, ylog=True, nbins = 30, e_range=None, nuclei=None, print_info=False):
# get file, tracks
if print_info: print(f'getting {fname}, {files_dict[fname]}')
hfile = h5py.File(files_dict[fname])
tracks = hfile['segments']
# cut tracks to energy range
if e_range == None:
e_range = [1e-10,1e10] if xlog else [0,100]
tracks = tracks[np.logical_and(tracks['dEdx'] < e_range[1], tracks['dEdx'] > e_range[0])]
# cut tracks to include or exclude nuclei
add = "all"
if nuclei: # plot only nuclei
tracks = tracks[tracks['pdgId'] > 1e6]
add = "nuclei"
elif nuclei == False: # plot only non-nuclei
tracks = tracks[tracks['pdgId'] < 1e6]
add = "no-nuclei"
# get particles & sort by descending number of counts
particles = np.subtract(np.unique(tracks['pdgId']), np.array([0])).tolist() # dont include '0' particle
particles.sort(reverse=True, key=lambda p: tracks[tracks['pdgId'] == p].size)
print(f"particles found in {fname}: "+", ".join(pdgIds[p] for p in particles))
# begin plotting
min_e = np.min(tracks['dEdx'])
max_e = np.max(tracks['dEdx'])
plt.figure(figsize=PLOT_FIGSIZE)
if xlog: # set plot xlog, ylog
plt.xscale('log')
bins = np.logspace(np.log10(min_e), np.log10(max_e), nbins)
else:
bins = np.linspace(min_e, max_e, nbins)
if ylog: # want to see all particles
plt.yscale('log')
else:
particles = particles[::-1]
# plot all particles
for c, p in enumerate(particles):
#print(f"plotting {pdgIds[p]}")
if not ylog: c = len(particles) - c - 1
energies = tracks[tracks['pdgId'] == p]['dEdx']
count = energies.size
#weight = [1/ncounts[c]]*counts[c] if normalize else None
plt.hist(energies, bins=bins, color=f'C{c}', alpha=1, label=f"{pdgIds[p]}: {count}", stacked=True)
fadd = ''.join([''.join([j[0] for j in i.split(' ')]) for i in fname.split('_')])
plot_name = f"plot_dEdx_{fadd}_particles-{add}_{('T' if xlog else 'F')}{('T' if ylog else 'F')}_{nbins}.png"
plt.xlabel("dE/dx [MeV/cm]")
plt.ylabel("# of entries")
plt.title(f"{fname} {add} particles dEdx, MeV/cm")
plt.legend(loc='best')
#plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.savefig(plot_name, dpi=PLOT_DPI)
print("Saved plot to",plot_name,"\n")
plt.close()
# Plot dEdxs comparing one or more edepsim-style h5 files
# if plot_particles, will make plots comparing each file
# for each particle in both files
# normalize = normalize histogram counts relative to size of each set
def plot_dEdxs(fnames, plot_particles=False, print_info=False, normalize=True,
xlog=False, ylog=True, nbins = 30, e_range=None):
label_max_bin = False
numfiles = len(fnames)
hfiles = np.empty((numfiles,),dtype='object')
tracks = np.empty((numfiles,),dtype='object')
energies = np.empty((numfiles,),dtype='object')
ncounts = np.empty((numfiles,),dtype='object')
if e_range is None:
e_range = [1e-4,1e10] if xlog else [0,100]
for c, f in enumerate(fnames):
hfiles[c] = h5py.File(files_dict[f])
tracks[c] = hfiles[c]['segments']
e_msk = np.logical_and(tracks[c]['dEdx'] < e_range[1],tracks[c]['dEdx'] > e_range[0])
tracks[c] = tracks[c][e_msk]
# get list of all particles among all files
fadd = '.'.join([''.join([''.join([j[0] for j in i.split(' ')]) for i in f.split('_')]) for f in fnames])
plot_name = f"plot_dEdx_{fadd}_{('T' if xlog else 'F')}{('T' if ylog else 'F')}_{nbins}{('N' if normalize else '')}.png"
cmsk = [c for c in range(numfiles)]
uniq_particle_list = ['nah']
if plot_particles:
cmsk = [c for c in range(numfiles) if 'pdgId' in tracks[c].dtype.names]
print(cmsk)
if len(cmsk):
p = np.concatenate([tracks[c]['pdgId'] for c in cmsk])
uniq_particle_list = np.subtract(np.unique(p), np.array([0]))
# print new particles found so I can add them to pdgId list
pp = False
for p in uniq_particle_list:
if p not in pdgIds.keys():
pp = True
pdgIds[p] = nucleus_to_label(p)
if pp: print(pdgIds)
print("particles found: "+", ".join(pdgIds[p] for p in uniq_particle_list))
else:
print('no files',' ,'.join(fnames),'have pdgIds.\n quitting now...')
sys.exit()
tracks_ = tracks.copy()
for c in cmsk: ncounts[c] = tracks[c]['dEdx'].size # counts to normalize datasets
counts = ncounts.copy()
for particle in uniq_particle_list:
# # # SET TRACKS (CUTTING FOR PARTICLE TRACKS) # # # #
cmsk2 = [c for c in cmsk]
if plot_particles: # CUT TO ONLY TRACKS WITH PARTICLE & within energy range
cmsk2 = []
for c in cmsk:
msk = tracks[c]['pdgId'] == particle
tracks_[c] = tracks[c][msk]
if print_info: print(f"resized {tracks[c].size}-->{tracks_[c].size}")
if tracks_[c].size > 0:
cmsk2.append(c)
if len(cmsk2) < 2: continue
plot_name = f"plot_dEdx_{fadd}_{pdgIds[particle]}_{('T' if xlog else 'F')}{('T' if ylog else 'F')}_{nbins}{('N' if normalize else '')}'.png"
# # # SET ENERGIES AND COUNTS # # #
for c in cmsk2:
energies[c] = tracks_[c]['dEdx']
counts[c] = energies[c].size
add = pdgIds[particle]+" " if plot_particles else ""
if print_info:
string = ', '.join([ fnames[c]+"-"+str(np.sum(tracks_[c]['dx'])) for c in cmsk2])
print(f"all {add}segments length: {string}")
string = ', '.join([ fnames[c]+"-"+str(np.sum(energies[c])) for c in cmsk2])
print(f"total {add}dEdx: {string}")
string = ', '.join([ fnames[c]+"-"+str(counts[c]) for c in cmsk2])
print(f"number of {add}segments: {string}")
all_e = np.concatenate([energies[c] for c in cmsk2])
#print(len(all_e), all_e.shape)
min_e = np.min(all_e)
max_e = np.max(all_e)
#print(f"energy range: {min_e}<-->{max_e}")
plt.figure(figsize=PLOT_FIGSIZE)
if xlog: # set plot xlog, ylog
plt.xscale('log')
logbins = np.logspace(np.log10(min_e),np.log10(max_e),nbins)
else:
logbins = np.linspace(min_e,max_e,nbins)
if ylog: plt.yscale('log')
# PLOT NORMALIZED HISTOGRAM OF ENERGIES FOR ALL FILES
for c in cmsk2:
weight = [1/ncounts[c]]*counts[c] if normalize else None
(n, bins, patches) = plt.hist(np.asarray(energies[c]), bins=logbins, weights=weight,
color=f'C{c}', alpha=0.5, label=f"{fnames[c]}: {counts[c]}")
if label_max_bin:
max_counts_idx = max([i for i in range(len(n))], key = lambda x: n[x])
patch = patches[max_counts_idx]
label = "%.2e" % n[max_counts_idx] if normalize else str(n[max_counts_idx]).split('.')[0]
plt.text(patch.get_x() + patch.get_width() / 2, patch.get_height()+0.01, label,
ha='center', va='bottom') # ADD LABEL TO MAX BIN IN FILE
plt.xlabel("dE/dx [MeV/cm]")
add = "normalized " if normalize else ""
plt.ylabel(f"{add}# of entries")
string = ' vs. '.join([fnames[c] for c in cmsk2])
msk = pdgIds[particle]+" " if plot_particles else ""
plt.title( f"{string} {msk}dEdx counts in MeV/cm" )
plt.legend(loc='best')
plt.savefig(plot_name, dpi=PLOT_DPI)
print("Saved plot to",plot_name,"\n")
plt.close()
# —.——.———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.—————————.———.——————————————————————————————————————————
# ——.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.—.——————————————————————————————————————————
# ———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.———.—.—.—.———.———————————————————————————————————————————
def main(config):
config.plot = [t.lower() for t in config.plot]
global LINEWIDTH, PLOT_DPI, PLOT_FIGSIZE
LINEWIDTH = config.linewidth if config.linewidth else .8
PLOT_DPI = 600
PLOT_FIGSIZE = (6.4, 4.8) # default matplotlib figure size
# google slides is PLOT_FIGSIZE = (10, 5.625)
# significant number of seeds having one seed_init
# to plot for a single seed_init, & vice versa for seeds
sig_num_to_plot = 3
cut_to_min = False # cut graphs to minimum value?
print_info = True # print more info while plotting?
# GET UNIF_LEN FOR LOSSES SMOOTHING
UNIF_LEN = int(config.plot[config.plot.index('loss')+1]) if config.plot[config.plot.index('loss')+1].isdigit() else 320
# smooth with moving average of loss after smoothing with uniform?
# (avg[loss[:i] for i in loss])
do_moving_avg = 'avg' in config.plot[config.plot.index('loss')+1:min(config.plot.index('loss')+3,len(config.plot))]
if "dedx" in config.plot:
PLOT_FIGSIZE = (8,4.8)
# number of histogram bins specified after dedx or default 200
NUM = int(config.plot[config.plot.index('dedx')+1]) if config.plot[config.plot.index('dedx')+1].isdigit() else 200
for f in ['dEdx > 5', 'dEdx < 2']:
for log_combo in [(True, True), (False, False), (False, True)]: #(xlog, ylog)
#plot_elements(f, xlog=log_combo[0], ylog=log_combo[1], nbins = NUM)
plot_particle_dEdxs(f, xlog=log_combo[0], ylog=log_combo[1], nbins = NUM)
#plot_dEdxs(['dEdx > 5', 'dEdx < 2'], xlog=log_combo[0], ylog=log_combo[1], nbins=NUM, normalize=True,
# e_range=[1e-4,1e10], plot_particles=False)
sys.exit() # dont look at any pickle data
# -------------- LOAD DATA FROM .pkl FILES --------------------------------
data_entry = np.dtype([('seed', 'i4'),('seed_init', 'i4'),('data_seed', 'i4'),('data', 'O'),('convergence', 'O'),('loss', 'O')])
data = np.array([], dtype=data_entry)
fnames = []
for seed in config.seeds:
# search for all relevant seeds
for start in ['history', 'losses']:
if seed == -1: # no seeds specified
# get all relevant files regardless of seed
fnames.extend(glob(f"{start}*{config.label}*.pkl"))
else:
# try various combinations of seed, config_label
for slabel in ['_dtseed','_seed','i=seed','i=dtseed']:
fnames.extend(glob(f'{start}*{slabel}{seed}_*{config.label}*.pkl'))
fnames.extend(glob(f'{start}*{config.label}_*{slabel}{seed}*.pkl'))
fnames = list(set(fnames)) # dont repeat any filenames
for f in fnames:
history = pickle.load(open(f, "rb"))
# get seed, init_seed, and data_seed
data_seed = 0
if 'config' in history.keys():
seed = history['config'].seed; seed_init = history['config'].seed_init; data_seed = history['config'].data_seed
elif 'seed' in history.keys():
seed = history['seed']; seed_init = history['seed_init']; data_seed = history['data_seed']
else: # get seeds from filename lol
if 'i=dt_seed' in f:
seed = seed_init = f.split('i=dt_seed')[1][0]
if 'i=seed' in f:
seed = seed_init = f.split('i=seed')[1][0]
if 'dtseed' in f:
data_seed = f.split('dtseed')[1][0]
if 'iseed' in f:
seed_init = f.split('iseed')[1][0]
# get params
if len(config.params) == 0: # if no --params label, automatically use all params available
config.params = np.unique([key.split('_iter')[0] for key in history.keys() if key.split('_iter')[0] in labels])
print('Params found:',', '.join(config.params))
# STORE SMOOTHED LOSS IN dat['loss']
loss = np.array(history['losses_iter'])
if data_seed == 6: print(loss)
if UNIF_LEN > 0: loss = smooth(loss, UNIF_LEN) # smooth loss!
if do_moving_avg: loss = movingavg(loss)
# STORE PARAMETER CONVERGENCE dat['convergence']
try: # if history[f"{param}_iter"] is stored as a single value
float(history[f"{config.params[0]}_iter"])
size = int(loss.size)
norm_iters = np.zeros((len(config.params), size))
for c, param in enumerate(config.params):
norm_iters[c] = [abs(history[f"{param}_iter"] - history[f"{param}_target"])/history[f"{param}_target"]]*size
except TypeError:
min_iterations = min([len(history[f"{param}_iter"]) for param in config.params])
norm_iters = np.zeros((len(config.params), min_iterations))
for c, param in enumerate(config.params):
norm_iters[c] = (abs(np.array(history[f"{param}_iter"]) - history[f'{param}_target'][0])/history[f'{param}_target'][0])[:min_iterations]
if config.convergence == 'max':
convergence = np.max(norm_iters, 0)
elif config.convergence == 'sum':
convergence = np.sum(norm_iters, 0)
elif config.convergence == 'min':
convergence = np.min(norm_iters, 0)
elif config.convergence == 'mean':
convergence = np.mean(norm_iters, 0)
entry = np.array([(seed, seed_init, data_seed, history, convergence, loss)], dtype=data_entry)
data = np.append(data, entry)
if data.shape == (0,):
sys.exit(f"Error: found no .pkl files in {os.getcwd()} with label '{config.label}' and seeds {config.seeds}")
print("Data found: "+', '.join([f"{dat['seed']}-{dat['seed_init']}"+(f"-{dat['data_seed']}" if data_seed else "") for dat in data]))
seed_inits = np.unique(data['seed_init'])
seeds = np.unique(data['seed'])
data_seeds = np.unique(data['data_seed'])
msk = [len(data[data['seed'] == seed]) >= sig_num_to_plot for seed in seeds]
msk1 = [len(data[data['seed_init'] == seed_init]) >= sig_num_to_plot for seed_init in seed_inits]
msk2 = [len(data[data['data_seed'] == data_seed]) >= sig_num_to_plot for data_seed in data_seeds]
plot_seeds = seeds[msk]
plot_inits = seed_inits[msk1]
plot_dtseeds = data_seeds[np.logical_and(msk2, [d != 0 for d in data_seeds])]
# now use label for plotting purposes
config.label = config.label + f"_seeds{'-'.join([str(s) for s in seeds])}"+config.label_add
# multiple runs for each seed (varying initial condition), plot
# plot each param, showing iterations of all seeds ran
# -------------------------- CALL PLOTTING SCRIPTS ---------------------------------------
# plot simulation losses
if 'loss' in config.plot:
add = "-avg" if do_moving_avg else ""
PLOT_FIGSIZE = (8,4.8)
# PLOT INITIAL VALUE w/ MULT SEEDS
for seed_init in plot_inits:
plot_losses(data[data['seed_init'] == seed_init],
f"plot_loss{add}_iseed{seed_init}_{UNIF_LEN}_{config.label}.{config.ext}", UNIF_LEN,
print_info = print_info, label='seed', cut_to_min=cut_to_min)
# PLOT MULT INITAL VALUES per SEED
for seed in plot_seeds:
plot_losses(data[data['seed'] == seed],
f"plot_loss{add}_seed{seed}_{UNIF_LEN}_{config.label}.{config.ext}", UNIF_LEN,
print_info = print_info, label='seed_init', cut_to_min=cut_to_min)
for data_seed in plot_dtseeds:
plot_losses(data[data['data_seed'] == data_seed],
f"plot_loss{add}_dtseed{data_seed}_{UNIF_LEN}_{config.label}.{config.ext}", UNIF_LEN,
print_info = print_info, label='', cut_to_min=cut_to_min)
# PLOT ALL LOSSES
if len(plot_inits) + len(plot_seeds) + len(plot_dtseeds) != 1:
plot_losses(data, f"plot_loss{add}_{UNIF_LEN}_{config.label}.{config.ext}", UNIF_LEN,
print_info = print_info, cut_to_min=cut_to_min)
# plot parameter iterations
if 'all' in config.plot or "param" in config.plot:
for param in config.params:
# VARYING INITIAL VALUE CONVERGING TO ONE TARGET, per SEED
for seed in plot_seeds:
plot_name = f'plot_vary-init_{param}_seed{seed}_{config.label}.{config.ext}'
plot_params(data[data['seed'] == seed], plot_name, param, 'seed_init')
# MULTIPLE TARGETS / SEEDS, ONE PLOT per initial seed
for seed_init in plot_inits: # only works for repeat !!!!!
plot_name = f'plot_vary-seed_{param}_iseed{seed_init}_{config.label}.{config.ext}'
plot_params(data[data['seed_init'] == seed_init], plot_name, param, 'seed')
if len(plot_inits) + len(plot_seeds) != 1:
plot_name = f'plot_{param}_{config.label}.{config.ext}'
plot_params(data, plot_name, param)
# plot parameter convergences
if "convergence" in config.plot or "conv" in config.plot:
logy = False
#iter_range=[4500,5000] # determines range of x values to plot
iter_range=None
for logy in [False, True]:
for plot_individual_convergences in [False, True]:
add = "-all" if plot_individual_convergences else ""
add += "-regy" if not logy else ""
for seed_init in plot_inits:
plot_name = f"plot_{config.convergence}{add}_iseed{seed_init}_{config.label}.{config.ext}"
plot_convergences(data[data['seed_init'] == seed_init], plot_name, plot_individual_convergences, cut_to_min, print_info, logy, 'seed', iter_range)
for seed in plot_seeds:
plot_name = f"plot_{add}{config.convergence}_seed{seed}_{config.label}.{config.ext}"
plot_convergences(data[data['seed'] == seed], plot_name, plot_individual_convergences, cut_to_min, print_info, logy, 'seed_init', iter_range)
for data_seed in plot_dtseeds:
plot_name = f"plot_{add}{config.convergence}_dtseed{data_seed}_{config.label}.{config.ext}"
plot_convergences(data[data['data_seed'] == data_seed], plot_name, plot_individual_convergences, cut_to_min, print_info, logy, 'data_seed', iter_range)
# plot all convergences if we havent plotted anything yet or have plotted more than one graph
if len(plot_inits) + len(plot_seeds) + len(plot_dtseeds) != 1:
plot_name = f"plot_{add}{config.convergence}_all_{config.label}.{config.ext}"
plot_convergences(data, plot_name, plot_individual_convergences, cut_to_min, print_info, logy, iter_range)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--params", dest="params", default=[], nargs="+",
help="List of parameters to plot.")
parser.add_argument("--label", dest="label", default="",
help="Label of pkl file (after seed part).")
parser.add_argument("--seeds", dest="seeds", default=[-1], nargs="+",
help="List of target seeds to plot.")
parser.add_argument("--ext", dest="ext", default="png",
help="Image extension (e.g., pdf or png)")
parser.add_argument("--convergence", dest='convergence', default='max',
help="How normalized parameter iterations are combined into a total convergence level: 'sum', 'max', 'min', 'mean'")
parser.add_argument("--plot", dest='plot', default=[], nargs="+",
help="List of plot specifications: \nloss [UNIF_LEN] [avg]: make loss plots, [int length for smoothing] [make moving average loss plot] \nall: plot parameter iterations, \
\nconv: make parameter convergence plots\n dedx [NUM_BINS]: make histogram of energy data [int number of histogram bins], edit configurations in main() 471 \
")
parser.add_argument("--linewidth", dest='linewidth', default=None,
help="List of plot specifications: \nloss: make loss plots, \nall: plot parameter iterations, \nconvergence: make parameter convergence plots")
parser.add_argument("--ladd", dest='label_add', default='',
help="List of plot specifications: \nloss: make loss plots, \nall: plot parameter iterations, \nconvergence: make parameter convergence plots")
args = parser.parse_args()
main(args)