-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpredict_sediment_thickness.py
429 lines (330 loc) · 19.9 KB
/
predict_sediment_thickness.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
"""
Copyright (C) 2017 The University of Sydney, Australia
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2, as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
##############################################################################################
# Predict the compacted sediment thickness (metres) at paleo ocean basin point locations. #
##############################################################################################
import argparse
import math
import numpy as np
import os
# Try importing 'ptt' first. If that fails then try 'gplately.ptt' (GPlately now contains PlateTectonicTools).
try:
from ptt.utils.call_system_command import call_system_command
except ImportError:
from gplately.ptt.utils.call_system_command import call_system_command
import sys
# Reads the input xy file and returns a list of (lon, lat) points.
def read_input_points(input_points_filename):
input_points = []
with open(input_points_filename, 'r') as input_points_file:
for line_number, line in enumerate(input_points_file):
# Make line number 1-based instead of 0-based.
line_number = line_number + 1
# Split the line into strings (separated by whitespace).
line_string_list = line.split()
# Need at least two strings per line (for latitude and longitude).
if len(line_string_list) < 2:
print('Line {0}: Ignoring point - line does not have at least two white-space separated strings.'.format(
line_number), file=sys.stderr)
continue
# Attempt to convert each string into a floating-point number.
try:
# Use GMT (lon/lat) order.
lon = float(line_string_list[0])
lat = float(line_string_list[1])
except ValueError:
print('Line {0}: Ignoring point - cannot read lon/lat values.'.format(line_number), file=sys.stderr)
continue
input_points.append((lon, lat))
return np.array(input_points) # numpy array uses less memory
def generate_input_points_grid(grid_spacing_degrees):
if grid_spacing_degrees == 0:
return
# Data points start *on* dateline (-180).
# If 180 is an integer multiple of grid spacing then final longitude also lands on dateline (+180).
num_latitudes = int(math.floor(180.0 / grid_spacing_degrees)) + 1
num_longitudes = int(math.floor(360.0 / grid_spacing_degrees)) + 1
input_points = np.full((num_latitudes * num_longitudes, 2), (0.0, 0.0), dtype=float) # numpy array uses less memory
input_point_index = 0
for lat_index in range(num_latitudes):
lat = -90 + lat_index * grid_spacing_degrees
for lon_index in range(num_longitudes):
lon = -180 + lon_index * grid_spacing_degrees
input_points[input_point_index] = (lon, lat)
input_point_index += 1
return (input_points, num_longitudes, num_latitudes)
# Returns a list of scalars (one per (lon, lat) point in the 'input_points' list).
# For input points outside the scalar grid then scalars will be Nan (ie, 'math.isnan(scalar)' will return True).
def get_ages_and_distances(input_points, age_grid_filename, distance_grid_filename, max_age=None, max_distance=None):
input_points_data = ''.join('{0} {1}\n'.format(lon, lat) for lon, lat in input_points)
# The command-line strings to execute GMT 'grdtrack'.
grdtrack_command_line = ["gmt", "grdtrack", "-G{0}".format(age_grid_filename), "-G{0}".format(distance_grid_filename)]
stdout_data = call_system_command(grdtrack_command_line, stdin=input_points_data, return_stdout=True)
lon_lat_age_distance_list = []
# Read lon, lat and scalar values from the output of 'grdtrack'.
for line in stdout_data.splitlines():
if line.strip().startswith('#'):
continue
line_data = line.split()
num_values = len(line_data)
# If just a line containing white-space then skip to next line.
if num_values == 0:
continue
if num_values < 4:
print('Ignoring line "{0}" - has fewer than 4 white-space separated numbers.'.format(line), file=sys.stderr)
continue
try:
# Convert strings to numbers.
lon = float(line_data[0])
lat = float(line_data[1])
age = float(line_data[2])
distance = float(line_data[3])
# Exclude if either age or distance is NaN.
if math.isnan(age) or math.isnan(distance):
continue
# Clamp to max values if requested.
if max_age is not None and age > max_age:
age = max_age
if max_distance is not None and distance > max_distance:
distance = max_distance
except ValueError:
print('Ignoring line "{0}" - cannot read floating-point lon, lat, age and distance values.'.format(line), file=sys.stderr)
continue
lon_lat_age_distance_list.append((lon, lat, age, distance))
return np.array(lon_lat_age_distance_list) # numpy array uses less memory
def write_xyz_file(output_filename, output_data):
with open(output_filename, 'w') as output_file:
for output_line in output_data:
output_file.write(' '.join(str(item) for item in output_line) + '\n')
#print('..generated: {}'.format(os.path.basename(output_filename)))
def write_grd_file(grd_filename, output_data, grid_spacing, num_grid_longitudes, num_grid_latitudes):
# Convert array to a string for standard-input to GMT.
xyz_data = ''.join('{} {} {}\n'.format(lon, lat, scalar) for lon, lat, scalar in output_data)
# The command-line strings to execute GMT 'xyz2grd'.
# For example "xyz2grd output_sediment_thickness.xy -R-180/180/-90/90 -I1 -Goutput_sediment_thickness.nc".
gmt_command_line = [
"gmt",
"xyz2grd",
"-I{0}".format(grid_spacing),
# Use GMT gridline registration since our input point grid has data points on the grid lines.
# Gridline registration is the default so we don't need to force pixel registration...
# "-r", # Force pixel registration since data points are at centre of cells.
"-R{0}/{1}/{2}/{3}".format(-180, 180, -90, 90),
"-G{0}".format(grd_filename)]
call_system_command(gmt_command_line, stdin=xyz_data)
#print('..generated: {}'.format(os.path.basename(grd_filename)))
def predict_sediment_thickness(
age,
distance,
mean_age,
mean_distance,
std_deviation_age,
std_deviation_distance,
age_distance_polynomial_coefficients):
# We need to remove the mean and scale to unit variance (for the age and distance values)
# based on the machine learning training scaler.
# See http://scikit-learn.org/stable/modules/preprocessing.html#standardization-or-mean-removal-and-variance-scaling
age = (age - mean_age) / std_deviation_age
distance = (distance - mean_distance) / std_deviation_distance
age_distance_polynomial_features = [
1, age, distance, age*age, age*distance, distance*distance, age*age*age, age*age*distance, age*distance*distance, distance*distance*distance]
# Evaluate the polynomial to get the log of the predicated sediment thickness.
log_sediment_thickness = sum(age_distance_polynomial_coefficients[i] * age_distance_polynomial_features[i] for i in range(10))
# Return the predicted sediment thickness (not as a logarithm).
return math.exp(log_sediment_thickness)
def predict_sedimentation(
input_points, # List of (lon, lat) tuples,
age_grid_filename,
distance_grid_filename,
mean_age,
mean_distance,
variance_age,
variance_distance,
age_distance_polynomial_coefficients,
max_age = None,
max_distance = None):
"""
Predicts compacted sediment thickness for each ocean basin point based on its age and its
mean distance to passive continental margins.
Returns: A list containing 3-tuples of (lon, lat, sed_thickness).
"""
# Get the input point ages and mean distances to passive continental margins.
lon_lat_age_distance_array = get_ages_and_distances(input_points, age_grid_filename, distance_grid_filename, max_age, max_distance)
#
# Calculate mean/variance statistics on the age/distance data.
#
# Note: We don't use the mean/variance of data (instead need to use mean/variance from trained data).
#
#sum_ages = 0
#sum_square_ages = 0
#sum_distances = 0
#sum_square_distances = 0
#for lon, lat, age, distance in lon_lat_age_distance_array:
# sum_ages += age
# sum_square_ages += age * age
# sum_distances += distance
# sum_square_distances += distance * distance
#num_age_distance_points = len(lon_lat_age_distance_array)
#mean_age = sum_ages / num_age_distance_points
#mean_distance = sum_distances / num_age_distance_points
#variance_age = (sum_square_ages / num_age_distance_points) - (mean_age * mean_age)
#variance_distance = (sum_square_distances / num_age_distance_points) - (mean_distance * mean_distance)
std_deviation_age = math.sqrt(variance_age)
std_deviation_distance = math.sqrt(variance_distance)
# For each ocean basin point predict compacted sediment thickness.
lon_lat_sediment_thickness_array = np.full((len(lon_lat_age_distance_array), 3), (0.0, 0.0, 0.0), dtype=float) # numpy array uses less memory
point_index = 0
for lon, lat, age, distance in lon_lat_age_distance_array:
predicted_sediment_thickness = predict_sediment_thickness(
age, distance,
mean_age, mean_distance,
std_deviation_age, std_deviation_distance,
age_distance_polynomial_coefficients)
lon_lat_sediment_thickness_array[point_index] = (lon, lat, predicted_sediment_thickness)
point_index += 1
return lon_lat_sediment_thickness_array
def write_sediment_data(
sediment_thickness_data,
output_filename_prefix,
output_grd_file = None):
# Write sediment thickness grd (".nc") file or xyz (".xy") file.
if output_grd_file:
grid_spacing, num_grid_longitudes, num_grid_latitudes = output_grd_file
sediment_thickness_grd_filename = '{}.nc'.format(output_filename_prefix)
write_grd_file(
sediment_thickness_grd_filename,
sediment_thickness_data,
grid_spacing, num_grid_longitudes, num_grid_latitudes)
else:
sediment_thickness_xyz_filename = '{}.xy'.format(output_filename_prefix)
write_xyz_file(sediment_thickness_xyz_filename, sediment_thickness_data)
if __name__ == '__main__':
import traceback
__description__ = \
"""Find the compacted sedimentation thickness (metres) at ocean basin point locations.
An input xy file can be specified. If one is not specified then a uniform lon/lat grid of points will be generated internally
(its grid spacing is controlled with the '-i' option and the grid point latitudes/longitudes use GMT gridline registration,
eg, for a 1 degree spacing the latitudes are -90, -89, ..., 90). If an input xy file is specified then it can contain
arbitrary (lon, lat) point locations (it can also contain extra 3rd, 4th, etc, columns but they are ignored).
By default a single '.xy' file containing sediment thickness is output.
Alternatively, a '.nc' grid file can be output (see the '-w' option)
If an ocean basin point falls outside the age grid or the distance grid then it is ignored.
NOTE: Separate the positional and optional arguments with '--' (workaround for bug in argparse module).
For example...
python %(prog)s -d distance.grd -g age_grid.nc -i 1 -w -- sediment_thickness
"""
try:
# The command-line parser.
parser = argparse.ArgumentParser(description = __description__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--distance_grid_filename', type=str, required=True,
metavar='distance_grid_filename',
help='The distance grid filename containing the mean distances, to passive continental margins, of each '
'ocean basin point (reconstructed from its time of appearance to the paleo time of the age grid).')
parser.add_argument('-g', '--age_grid_filename', type=str, required=True,
metavar='age_grid_filename',
help='The age grid filename used to find the age of each ocean basin point.')
parser.add_argument('-w', '--output_grd_file', action='store_true',
help='Generate a grid file (".nc") instead of an xyz file (".xy"). '
'By default only an xyz file is written. '
'Can only be specified if "ocean_basin_points_filename" is not specified '
'(ie, ocean basin points must be on a uniform lon/lat grid).')
parser.add_argument('-i', '--ocean_basin_grid_spacing', type=float,
help='The grid spacing (in degrees) of ocean basin points in lon/lat space. '
'The grid point latitudes/longitudes are offset by half the grid spacing '
'(eg, for a 1 degree spacing the latitudes are -89.5, -88.5, ..., 89.5). '
'Can only be specified if "ocean_basin_points_filename" is not specified.')
parser.add_argument('-m', '--mean_age_distance', type=float, nargs=2, required=True,
metavar=('mean_age', 'mean_distance'),
help='The mean of age and distance as two consecutive values. '
'These values come from the machine learning training scaler and are used to '
'remove the mean (for age and distance).')
parser.add_argument('-v', '--variance_age_distance', type=float, nargs=2, required=True,
metavar=('variance_age', 'variance_distance'),
help='The variance of age and distance as two consecutive values. '
'These values come from the machine learning training scaler and are used to '
'scale to unit variance (for age and distance).')
parser.add_argument('-x', '--clamp_age_distance', type=float, nargs=2, default=None,
metavar=('max_age', 'max_distance'),
help='Optional maximum of age and distance as two consecutive values. '
'These values come from the machine learning training stage and are used to clamp the '
'age and distance (because values above these are not represented well in trained data). '
'Defaults to no clamping (of age or distance).')
parser.add_argument('-f', '--age_distance_polynomial_coefficients', type=float, nargs=10, required=True,
metavar=('constant', 'age', 'distance', 'age*age', 'age*distance', 'distance*distance',
'age*age*age', 'age*age*distance', 'age*distance*distance', 'distance*distance*distance'),
help='The polynomial coefficients used to predict sedimentation rate from age and distance. '
'There should be ten consecutive values representing the polynomial features: '
'constant, age, distance, age*age, age*distance, distance*distance, age*age*age, age*age*distance, '
'age*distance*distance and distance*distance*distance. '
'These values come from the machine learning training scaler.')
def parse_unicode(value_string):
#try:
# # Filename uses the system encoding - decode from 'str' to 'unicode'.
# filename = value_string.decode(sys.getfilesystemencoding())
#except UnicodeDecodeError:
# raise argparse.ArgumentTypeError("Unable to convert filename %s to unicode" % value_string)
#return filename
return value_string
parser.add_argument('ocean_basin_points_filename', type=parse_unicode, nargs='?',
metavar='ocean_basin_points_filename',
help='Optional input xy file containing the ocean basin point locations. '
'If not specified then a uniform lon/lat grid of points is generated. '
'Can only be specified if "ocean_basin_grid_spacing" and "output_grd_file" are not specified.')
parser.add_argument('output_filename_prefix', type=parse_unicode,
metavar='output_filename_prefix',
help='The output filename prefix (minus filename extension).')
# Parse command-line options.
args = parser.parse_args()
if args.ocean_basin_points_filename is not None:
if args.ocean_basin_grid_spacing is not None:
raise argparse.ArgumentTypeError(
"'ocean_basin_grid_spacing' and 'ocean_basin_points_filename' cannot both be specified.")
if args.output_grd_file is not None:
raise argparse.ArgumentTypeError(
"'output_grd_file' and 'ocean_basin_points_filename' cannot both be specified.")
else: # args.ocean_basin_points_filename not specified...
if args.ocean_basin_grid_spacing is None:
raise argparse.ArgumentTypeError("'ocean_basin_grid_spacing' must be specified if 'ocean_basin_points_filename' is not specified.")
# Get the input points.
if args.ocean_basin_points_filename is not None:
input_points = read_input_points(args.ocean_basin_points_filename)
else:
input_points, num_grid_longitudes, num_grid_latitudes = generate_input_points_grid(args.ocean_basin_grid_spacing)
if input_points is None:
sys.exit(1)
sediment_thickness_data = predict_sedimentation(
input_points,
args.age_grid_filename,
args.distance_grid_filename,
args.mean_age_distance[0], # mean_age
args.mean_age_distance[1], # mean_distance
args.variance_age_distance[0], # variance_age
args.variance_age_distance[1], # variance_distance
args.age_distance_polynomial_coefficients,
args.clamp_age_distance[0] if args.clamp_age_distance is not None else None, # max_age
args.clamp_age_distance[1] if args.clamp_age_distance is not None else None) # max_distance
if sediment_thickness_data is None:
sys.exit(1)
write_sediment_data(
sediment_thickness_data,
args.output_filename_prefix,
(args.ocean_basin_grid_spacing, num_grid_longitudes, num_grid_latitudes) if args.output_grd_file else None)
sys.exit(0)
except KeyboardInterrupt:
pass
except Exception as exc:
print('ERROR: {0}'.format(exc), file=sys.stderr)
# Uncomment this to print traceback to location of raised exception.
#traceback.print_exc()
sys.exit(1)