forked from jchelly/SOAP
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathread_hbtplus.py
306 lines (263 loc) · 10.5 KB
/
read_hbtplus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/bin/env python
import os.path
import numpy as np
import h5py
import unyt
import virgo.mpi.parallel_hdf5 as phdf5
import virgo.mpi.parallel_sort as psort
import virgo.mpi.util
def hbt_filename(hbt_basename, file_nr):
return f"{hbt_basename}.{file_nr}.hdf5"
def read_hbtplus_groupnr(basename):
"""
Read HBTplus output and return group number for each particle ID
"""
from mpi4py import MPI
comm = MPI.COMM_WORLD
comm_rank = comm.Get_rank()
comm_size = comm.Get_size()
# Find number of HBT output files
if comm_rank == 0:
with h5py.File(hbt_filename(basename, 0), "r") as infile:
nr_files = int(infile["NumberOfFiles"][...])
else:
nr_files = None
nr_files = comm.bcast(nr_files)
# Assign files to MPI ranks
files_per_rank = np.zeros(comm_size, dtype=int)
files_per_rank[:] = nr_files // comm_size
files_per_rank[: nr_files % comm_size] += 1
assert np.sum(files_per_rank) == nr_files
first_file_on_rank = np.cumsum(files_per_rank) - files_per_rank
# Read in the halos from the HBT output:
# 'halos' will be an array of structs with the halo catalogue
# 'ids_bound' will be an array of particle IDs in halos, sorted by halo
halos = []
ids_bound = []
for file_nr in range(
first_file_on_rank[comm_rank],
first_file_on_rank[comm_rank] + files_per_rank[comm_rank],
):
with h5py.File(hbt_filename(basename, file_nr), "r") as infile:
halos.append(infile["Subhalos"][...])
ids_bound.append(infile["SubhaloParticles"][...])
# Get the dtype for particle IDs
if len(ids_bound) > 0:
id_dtype = h5py.check_vlen_dtype(ids_bound[0].dtype)
else:
id_dtype = None
# Concatenate arrays of halos from different files
if len(halos) > 0:
halos = np.concatenate(halos)
else:
# This rank was assigned no files
halos = None
halos = virgo.mpi.util.replace_none_with_zero_size(halos, comm=comm)
# Combine arrays of particles in halos
if len(ids_bound) > 0:
ids_bound = np.concatenate(
ids_bound
) # Combine arrays of halos from different files
if len(ids_bound) > 0:
ids_bound = np.concatenate(
ids_bound
) # Combine arrays of particles from different halos
else:
# The files assigned to this rank contain zero halos
ids_bound = np.zeros(0, dtype=id_dtype)
else:
# This rank was assigned no files
ids_bound = None
ids_bound = virgo.mpi.util.replace_none_with_zero_size(ids_bound, comm=comm)
# Assign halo indexes to the particles
nr_local_halos = len(halos)
total_nr_halos = comm.allreduce(nr_local_halos)
halo_offset = comm.scan(len(halos), op=MPI.SUM) - len(halos)
halo_index = np.arange(nr_local_halos, dtype=int) + halo_offset
halo_size = halos["Nbound"]
del halos
grnr_bound = np.repeat(halo_index, halo_size)
# Assign ranking by binding energy to the particles
rank_bound = -np.ones(grnr_bound.shape[0], dtype=int)
offset = 0
for halo_nr in range(nr_local_halos):
rank_bound[offset : offset + halo_size[halo_nr]] = np.arange(
halo_size[halo_nr], dtype=int
)
offset += halo_size[halo_nr]
assert np.all(rank_bound >= 0) # HBT only outputs bound particles
del halo_size
del halo_offset
del halo_index
# HBTplus originally output duplicate particles, so this script previously
# assigned duplicate particles to a single subhalo based on their bound rank.
# HBT should no longer have duplicates, which is tested by this assert
unique_ids_bound, unique_counts = psort.parallel_unique(
ids_bound, comm=comm, arr_sorted=False, return_counts=True
)
assert len(unique_counts) == 0 or np.max(unique_counts) == 1
return total_nr_halos, ids_bound, grnr_bound, rank_bound
def read_hbtplus_catalogue(comm, basename, a_unit, registry, boxsize, keep_orphans=False):
"""
Read in the HBTplus halo catalogue, distributed over communicator comm.
comm - communicator to distribute catalogue over
basename - HBTPlus SubSnap filename without the .N suffix
a_unit - unyt a factor
registry - unyt unit registry
boxsize - box size as a unyt quantity
Returns a dict of unyt arrays with the halo properies.
Arrays which must always be returned:
index - index of each halo in the input catalogue
cofp - (N,3) array with centre to use for SO calculations
search_radius - initial search radius which includes all member particles
is_central - integer 1 for centrals, 0 for satellites
nr_bound_part - number of bound particles in each halo
Any other arrays will be passed through to the output ONLY IF they are
documented in property_table.py.
Note that in case of HBT we only want to compute properties of resolved
halos, so we discard those with 0-1 bound particles.
"""
comm_size = comm.Get_size()
comm_rank = comm.Get_rank()
# Get SWIFT's definition of physical and comoving Mpc units
swift_pmpc = unyt.Unit("swift_mpc", registry=registry)
swift_cmpc = unyt.Unit(a_unit * swift_pmpc, registry=registry)
swift_msun = unyt.Unit("swift_msun", registry=registry)
# Get km/s
kms = unyt.Unit("km/s", registry=registry)
# Get expansion factor as a float
a = a_unit.base_value
# Get h as a float
h_unit = unyt.Unit("h", registry=registry)
h = h_unit.base_value
# Get HBTplus unit information
if comm_rank == 0:
# Try to get units from the HDF5 output
have_units = False
filename = hbt_filename(basename, 0)
with h5py.File(filename, "r") as infile:
if "Units" in infile:
LengthInMpch = float(infile["Units/LengthInMpch"][...])
MassInMsunh = float(infile["Units/MassInMsunh"][...])
VelInKmS = float(infile["Units/VelInKmS"][...])
have_units = True
# Otherwise, will have to read the Parameters.log file
if not (have_units):
dirname = os.path.dirname(os.path.dirname(filename))
with open(dirname + "/Parameters.log", "r") as infile:
for line in infile:
fields = line.split()
if len(fields) == 2:
name, value = fields
if name == "MassInMsunh":
MassInMsunh = float(value)
elif name == "LengthInMpch":
LengthInMpch = float(value)
elif name == "VelInKmS":
VelInKmS = float(value)
else:
LengthInMpch = None
MassInMsunh = None
VelInKmS = None
(LengthInMpch, MassInMsunh, VelInKmS) = comm.bcast(
(LengthInMpch, MassInMsunh, VelInKmS)
)
# Read the subhalos for this snapshot
filename = f"{basename}.%(file_nr)d.hdf5"
mf = phdf5.MultiFile(filename, file_nr_dataset="NumberOfFiles", comm=comm)
subhalo = mf.read("Subhalos")
# Load the number of bound particles
nr_bound_part = unyt.unyt_array(
subhalo["Nbound"], units=unyt.dimensionless, dtype=int, registry=registry
)
# Do we only process resolved subhalos? (HBT also outputs unresolved "orphan" subhalos)
if not keep_orphans:
keep = nr_bound_part > 0
else:
keep = np.ones_like(nr_bound_part, dtype=bool)
# Assign indexes to halos: for each halo we're going to process we store the
# position in the input catalogue.
nr_local_halos = len(keep)
local_offset = comm.scan(nr_local_halos) - nr_local_halos
index = np.arange(nr_local_halos, dtype=int) + local_offset
index = index[keep]
index = unyt.unyt_array(
index, units=unyt.dimensionless, dtype=int, registry=registry
)
# Find centre of potential
cofp = (
subhalo["ComovingMostBoundPosition"][keep, :] * LengthInMpch / h
) * swift_cmpc
# Initial guess at search radius for each halos.
# Search radius will be expanded if we don't find all of the bound particles.
search_radius = (
1.01 * (subhalo["REncloseComoving"][keep] * LengthInMpch / h) * swift_cmpc
)
# Central halo flag
is_central = np.where(subhalo["Rank"] == 0, 1, 0)[keep]
is_central = unyt.unyt_array(
is_central, units=unyt.dimensionless, dtype=int, registry=registry
)
# Subhalo tracking information
track_id = unyt.unyt_array(
subhalo["TrackId"][keep], units=unyt.dimensionless, dtype=int, registry=registry
)
host_halo_id = unyt.unyt_array(
subhalo["HostHaloId"][keep],
units=unyt.dimensionless,
dtype=int,
registry=registry,
)
depth = unyt.unyt_array(
subhalo["Depth"][keep], units=unyt.dimensionless, dtype=int, registry=registry
)
snapshot_birth = unyt.unyt_array(
subhalo["SnapshotIndexOfBirth"][keep],
units=unyt.dimensionless,
dtype=int,
registry=registry,
)
parent_id = unyt.unyt_array(
subhalo["NestedParentTrackId"][keep],
units=unyt.dimensionless,
dtype=int,
registry=registry,
)
descendant_id = unyt.unyt_array(
subhalo["DescendantTrackId"][keep],
units=unyt.dimensionless,
dtype=int,
registry=registry,
)
# Peak mass
max_mass = (subhalo["LastMaxMass"][keep] * MassInMsunh / h) * swift_msun
snapshot_max_mass = subhalo["SnapshotIndexOfLastMaxMass"][keep]
snapshot_max_mass = unyt.unyt_array(
snapshot_max_mass, units=unyt.dimensionless, dtype=int, registry=registry
)
# Peak vmax
max_vmax = (subhalo["LastMaxVmaxPhysical"][keep] * VelInKmS) * kms
snapshot_max_vmax = subhalo["SnapshotIndexOfLastMaxVmax"][keep]
snapshot_max_vmax = unyt.unyt_array(
snapshot_max_vmax, units=unyt.dimensionless, dtype=int, registry=registry
)
# Number of bound particles
nr_bound_part = nr_bound_part[keep]
local_halo = {
"cofp": cofp,
"index": index,
"search_radius": search_radius,
"is_central": is_central,
"nr_bound_part": nr_bound_part,
"HostHaloId": host_halo_id,
"Depth": depth,
"TrackId": track_id,
"SnapshotIndexOfBirth": snapshot_birth,
"NestedParentTrackId": parent_id,
"DescendantTrackId": descendant_id,
"LastMaxMass": max_mass,
"SnapshotIndexOfLastMaxMass": snapshot_max_mass,
"LastMaxVmaxPhysical": max_vmax,
"SnapshotIndexOfLastMaxVmax": snapshot_max_vmax,
}
return local_halo