-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunjobs_combine_statistics.m
219 lines (217 loc) · 11.5 KB
/
runjobs_combine_statistics.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
function runjobs_combine_statistics( Ns, dims, shapes, jobs, extend, divisions, deljobs )
%RUNJOBS_COMBINE_STATISTICS loads single jobs from a job sequence with the
% same N's, dimensions and shapes and combines them to a single data file.
% All files are read from and written to a subdirectory 'data'.
%
% Arguments:
% NS array of numbers of elements in the causet.
% DIMS array of spacetime dimensions.
% SHAPES string array of shape names.
% JOBS array of job-indexes to be combined.
%
% Optional arguments:
% EXTEND boolean flag to extend existing files.
% true: job file data is added to existing files
% (Default)
% false: combined job file data overwrites existing
% files
% DIVISIONS create new job file by combining only a subset of all
% files; necessary for combining large data in
% parallel. New job files get the name extension JAxB
% where A stands for the new division order and
% B stands for the new job number.
% A: A is the new devision order (Default: 1)
% [ A, m, n ]: A is the new devision order (in
% the first call it shall be 1), m is subfile index of
% n new job files in this division.
% n many files with job indexes in JOBS (and in
% division A-1 if A>0) are read.
% DELJOBS boolean flag to delete job files.
% false: job files remain (Default)
% true: job files will be deleted after loading data
%
% Copyright 2021, C. Minz. BSD 3-Clause License.
%% set start up parameter:
if nargin < 5
extend = true;
end
if nargin < 6
divisions = 1;
end
if nargin < 7
deljobs = false;
end
if length( divisions ) == 1
divisionorder = divisions;
subfile = 0;
else
divisionorder = divisions( 1 );
subfile = divisions( 2 );
firstjobidx = length( jobs ) / divisions( 3 ) * ( subfile - 1 ) + 1;
lastjobidx = length( jobs ) / divisions( 3 ) * subfile;
jobs = jobs( firstjobidx : 1 : lastjobidx );
end
%% run through Ns, dimensions and shapes:
for N = Ns
for d = dims
for shape = shapes
fprintf( 'Combining files for the parameters N%dD%d%s:\n', ...
N, d, lower( char( shape ) ) );
runjobs_progress_initialize();
runjobs_progress_update();
%% load previous master data:
if ~subfile
masterfilename = sprintf( 'data/N%dD%d%s.mat', ...
N, d, lower( char( shape ) ) );
else
masterfilename = sprintf( 'data/N%dD%d%sJ%dx%d.mat', ...
N, d, lower( char( shape ) ), divisionorder, subfile );
end
if ( exist( masterfilename, 'file' ) > 0 ) && extend
masterdata = load( masterfilename );
else
masterdata = struct();
end
%% load job data file and add to master data:
jobcounter = 0;
jobsnumber = length( jobs );
jobsexist = zeros( size( jobs ) );
jobsincompatible = zeros( size( jobs ) );
incompatiblefield = '';
for j = jobs
jobcounter = jobcounter + 1;
if divisionorder > 1
filename = sprintf( 'data/N%dD%d%sJ%dx%d.mat', ...
N, d, lower( char( shape ) ), divisionorder - 1, j );
else
filename = sprintf( 'data/N%dD%d%sJ%d.mat', ...
N, d, lower( char( shape ) ), j );
end
jobsexist( jobcounter ) = exist( filename, 'file' );
if jobsexist( jobcounter )
jobdata = load( filename );
for fcell = transpose( fieldnames( jobdata ) )
fieldname = char( fcell );
jobvalue = jobdata.(fieldname);
if ~isfield( masterdata, fieldname )
% fields to copy:
if strcmp( fieldname, 'shape' )
masterdata.(fieldname) = char( jobvalue );
else
masterdata.(fieldname) = jobvalue;
end
elseif sum( strcmp( fieldname, { 'runs', ...
'futureinfinities', 'eventcounts', ...
'chains', 'simplices', 'dimestimators', ...
'diamonds', 'diamondtimes', 'propertimes', ...
'hyperbdistribution', 'comptime' } ) ) == 1
% fields to add up:
mastervalue = masterdata.(fieldname);
if size( jobvalue ) == size( mastervalue )
mastervalue = mastervalue + jobvalue;
masterdata.(fieldname) = mastervalue;
else
jobsincompatible( jobcounter ) = 1;
incompatiblefield = fieldname;
end
elseif sum( strcmp( fieldname, { ...
'distribution', 'preffutures', 'geodesics' } ) ) == 1
% field with subfields:
for subfcell = transpose( fieldnames( jobdata.(fieldname) ) )
subfieldname = char( subfcell );
jobvalue = jobdata.(fieldname).(subfieldname);
if ~isfield( masterdata.(fieldname), subfieldname )
masterdata.(fieldname).(subfieldname) = jobvalue;
elseif sum( strcmp( subfieldname, ...
{ 'Ns', 'bins', 'counts', 'diamonds', ...
'dimestimators', 'propertimes', ...
'hyperbdistribution', 'lgcount', 'lglength', ...
'vgcount', 'vglength', 'midpointdim', ...
'startcount', 'endcount', 'startfails', ...
'endfails', 'fails' } ) ) == 1
% subfields to add up:
mastervalue = masterdata.(fieldname).(subfieldname);
if size( jobvalue ) == size( mastervalue )
mastervalue = mastervalue + jobvalue;
else
jobsincompatible( jobcounter ) = 1;
incompatiblefield = ...
sprintf( '%s.%s', fieldname, subfieldname );
end
masterdata.(fieldname).(subfieldname) = mastervalue;
elseif strcmp( subfieldname, 'unithyperboloid' )
% subfields to extend:
mastervalue = masterdata.(fieldname).(subfieldname);
if size( jobvalue, 2 ) == size( mastervalue, 2 )
mastervalue = [ mastervalue; jobvalue ]; %#ok<AGROW>
else
jobsincompatible( jobcounter ) = 1;
incompatiblefield = ...
sprintf( '%s.%s', fieldname, subfieldname );
end
masterdata.(fieldname).(subfieldname) = mastervalue;
end
end
end
end
if deljobs
delete( filename );
end
end
runjobs_progress_update( jobcounter / jobsnumber );
end
runjobs_progress_finalize();
%% if any jobs had been processed:
if sum( jobsexist ) > 0
%% set default max. dimension:
if ~isfield( masterdata, 'maxdimension' )
masterdata.maxdimension = 8;
end
%% set default runs:
if ~isfield( masterdata, 'runs' )
masterdata.runs = 1;
end
%% save to combined file:
masterdata.comptimestr = ...
char( seconds( masterdata.comptime ), 'hh:mm:ss' );
save( masterfilename, '-struct', 'masterdata' );
end
%% show list of missing jobs:
missingjobs = jobs( jobsexist == 0 );
missingjobcount = length( missingjobs );
if missingjobcount > 0
fprintf( 'N%dD%d%s had %d / %d missing job files: ', ...
N, d, lower( char( shape ) ), missingjobcount, length( jobs ) );
jobcounter = 0;
for j = missingjobs
jobcounter = jobcounter + 1;
if jobcounter < missingjobcount
fprintf( '%d, ', j );
else
fprintf( '%d.\n', j );
end
end
else
fprintf( 'N%dD%d%s had %d job files.\n', ...
N, d, lower( char( shape ) ), length( jobs ) );
end
%% show list of jobs with incompatible fields:
incompatiblejobs = jobs( jobsincompatible == 1 );
incompatiblejobcount = length( incompatiblejobs );
if incompatiblejobcount > 0
fprintf( 'N%dD%d%s had %d / %d job files with incompatible sizes for some fields (last error for .%s): ', ...
N, d, lower( char( shape ) ), incompatiblejobcount, length( jobs ), incompatiblefield );
jobcounter = 0;
for j = incompatiblejobcount
jobcounter = jobcounter + 1;
if jobcounter < incompatiblejobcount
fprintf( '%d, ', j );
else
fprintf( '%d.\n', j );
end
end
end
end
end
end
end