-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnetcdfNew.py
191 lines (152 loc) · 6.4 KB
/
netcdfNew.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import csv
import datetime, time
import os, sys
import netCDF4
from stat import S_ISREG, ST_CTIME, ST_MODE
# lat/lon of Penlee Observatory
station_lat = 50.317993
station_lon = -4.189128
station_altitude = 8
sourcefolder = '/Users/arnoldas/Desktop/Fall 2016/ASRC/sourcefolder/'
targetfolder = '/Users/arnoldas/Desktop/Fall 2016/ASRC/targetfolder/'
outputfilenameprefix = 'test1'
epoch = datetime.datetime.utcfromtimestamp(0)
'''
f=codecs.open(location,"rb","utf-16")
csvread=csv.reader(f,delimiter='\t')
csvread.next()
'''
def csv_to_list(csv_file, delimiter=','):
#with open(csv_file, 'rb') as csv_con:
#reader = csv.reader(csv_con, delimiter=delimiter)
f = open(csv_file, 'r').readlines()
#return list(reader)
return list(f)
def append_to_avg_list(avg_list, source_list):
if len(source_list) > 0:
return avg_list.append(round(sum(source_list)/len(source_list), 3))
def extract_and_format_data_from_source(sourcefile):
obs_list = csv_to_list(sourcefile)
try:
targetfilename = outputfilenameprefix+'.nc'
targetfile = targetfolder + targetfilename
except Exception, e:
#print 'error processing file, skipped: '+ sourcefile
return
# to calculate cummulative rain we need to get the last value from the existing netCDF file if it exists
if os.path.isfile(targetfile):
# open the netCDF file and get the last value
rootgrp = netCDF4.Dataset(targetfile, 'a', format='NETCDF4')
rootgrp.close()
timestamp = []
temp = []
#wind = []
for row in obs_list:
try:
# get the timestamp from the first 29 characters in the first column
ob_timestamp = datetime.datetime.strptime(row[0][0:29],'[%a %b %d %H:%M:%S.%f %Y')
# get the temperature from column 6, where 6 is the zero-indexed column number in the CSV
ob_temp = float(row[6])
if isinstance(ob_temp, float):
timestamp.append((ob_timestamp - epoch).total_seconds())
temp.append(ob_temp)
except Exception, e:
print('error in row: ' + str(row) +' in '+ sourcefile)
# we have the data; next check for an existing file for this datetime
''''''
if os.path.isfile(targetfile):
# append the data to the file
'''
rootgrp = netCDF4.Dataset(targetfile, 'a', format='NETCDF4')
times = rootgrp.variables['TimeStamp']
start = len(times)
end = len(times)+len(timestamp)
times[start:end] = timestamp
air_temperatures = rootgrp.variables['air_temperature']
air_temperatures[start:end] = temp
rootgrp.close()
'''
else:
# create a new file and add the data to it
rootgrp = netCDF4.Dataset(targetfile, 'w', format='NETCDF4')
# set the global attributes
rootgrp.id = 'PML-Penlee-Met'
rootgrp.naming_authority = 'Plymouth Marine Laboratory'
rootgrp.Metadata_Conventions = 'Unidata Dataset Discovery v1.0'
rootgrp.Conventions = 'CF-1.6'
rootgrp.featureType = 'timeSeries'
# publisher details
rootgrp.publisher_name = 'Plymouth Marine Laboratory'
rootgrp.publisher_phone = '+44 (0)1752 633100'
rootgrp.publisher_url = 'http://www.westernchannelobservatory.org.uk/penlee'
rootgrp.publisher_email = 'forinfo@pml.ac.uk'
rootgrp.title = 'Penlee observatory meteorological data'
rootgrp.summary = 'Air temperature measurements taken at Penlee Point observatory; measurements are taken every 4 seconds.'
# creator details
rootgrp.creator_name = 'Ben Calton'
rootgrp.creator_email = 'bac@pml.ac.uk'
rootgrp.creator_url = 'https://rsg.pml.ac.uk/'
# create the dimensions
name_str = rootgrp.createDimension('name_str', None)
# time = rootgrp.createDimension('time', None)
reconMeasure = rootgrp.createDimension('reconMeasure', None)
time = rootgrp.createDimension('time', None)
# create the variables
station_name = rootgrp.createVariable('station_name', 'c', ('name_str',))
station_name.cf_role = 'timeseries_id'
station_name.long_name = 'station name'
altitude = rootgrp.createVariable('altitude', 'f4', ())
altitude.standard_name = 'altitude'
altitude.long_name = 'Observatory altitude'
altitude.units = 'm'
latitudes = rootgrp.createVariable('lat', 'f4', ())
latitudes.standard_name = 'latitude'
latitudes.long_name = 'Observatory latitude'
latitudes.units = 'degrees_north'
longitudes = rootgrp.createVariable('lon', 'f4', ())
longitudes.standard_name = 'longitude'
longitudes.long_name = 'Observatory longitude'
longitudes.units = 'degrees_east'
times = rootgrp.createVariable('time', 'i4', ('time',))
times.standard_name = 'time'
times.long_name = 'Time of measurement'
times.units = 'seconds since 1970-01-01 00:00:00'
####
elevation = rootgrp.createVariable("elevation", "f8", ("reconMeasure",))
elevation.standard_name = 'elevation'
elevation.units = "degrees"
azimuth = rootgrp.createVariable("Azimuth", "f8" , ("reconMeasure",))
azimuth.units = "degrees"
x = rootgrp.createVariable("x", "f4", ("reconMeasure",))
x.standard_name = 'X-Wind Speed'
x.units = 'm/s'
range = rootgrp.createVariable("range", "f4", ("reconMeasure",))
range.units = 'm'
winds = rootgrp.createVariable('winds', 'f4', ('time'))
winds.coordinates = 'lat lon'
winds.standard_name = 'winds'
winds.units = 'm/s'
####
'''
air_temperatures = rootgrp.createVariable('air_temperature', 'f4', ('time',))
air_temperatures.coordinates = 'lat lon'
air_temperatures.standard_name = 'air_temperature'
air_temperatures.long_name = 'Air temperature in degrees Celcius'
air_temperatures.units = 'degrees Celcius'
'''
# set the values of the variables
station_name[:] = netCDF4.stringtoarr('Penlee', 50)
altitude[:] = [station_altitude]
latitudes[:] = [station_lat]
longitudes[:] = [station_lon]
times[:] = timestamp
winds[:] = temp
rootgrp.close()
entries = (os.path.join(sourcefolder, fn) for fn in os.listdir(sourcefolder))
entries = ((os.stat(path), path) for path in entries)
# leave only regular files, insert creation date
entries = ((stat[ST_CTIME], path)
for stat, path in entries if S_ISREG(stat[ST_MODE]))
for cdate, path in sorted(entries):
#print('processing '+ path )
extract_and_format_data_from_source(path)