-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeosAgainstSonde.py
executable file
·604 lines (533 loc) · 30 KB
/
geosAgainstSonde.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
#!/usr/bin/env python3
import os, h5py, argparse, glob, math,sys
import numpy as np
from scipy.interpolate import interp1d
from scipy.interpolate import InterpolatedUnivariateSpline
from scipy import interp
from lib.sonde_io import readWoudc,readShadoz
from lib.readTolnet import readTolnet
# note: you'll need matplotlib ;)
from lib.plots import plotSondeAndAnalysisStats
def go ( a ):
"""
Main program for this thing.
Input:
a: argparse which gives command line arguements to do stuff with.
Output:
a plot tagged with experiment and control, along with and hdf5 with stats plotted.
"""
print(a.sonde_path)
startLon = convertLongitude360(a.start_lon)
endLon = convertLongitude360(a.end_lon)
startLat = float(a.start_lat)
endLat = float(a.end_lat)
press_edges, press_int, nint = getPressureGridForOutput()
# *Sigh* This is not great, but work your way through and figure out which data we're reading
sondeFilesCsv = glob.glob(os.path.join(a.sonde_path,'*.csv'))
sondeFilesCsv.extend( glob.glob( os.path.join( a.sonde_path,'*.CSV' ) ) )
nsondesCsv = len(sondeFilesCsv)
sondeFilesHdf = glob.glob(os.path.join(a.sonde_path,'*.hdf'))
nsondesHdf = len(sondeFilesHdf)
sondeFilesDat = glob.glob(os.path.join(a.sonde_path,'*R0.dat'))
nsondesDat = len(sondeFilesDat)
if (nsondesCsv == 0 and nsondesHdf == 0 and nsondesDat == 0 ):
sondeType = 'shadoz'
sondeFiles = glob.glob(os.path.join(a.sonde_path,'*'))
nsondes = len(sondeFiles)
sondeFiles.sort()
elif (nsondesHdf > 0 or nsondesDat > 0):
#Note: we're going to be sneaky and read the tolnet files, and put them into a big
# structure. Then, use readProfile to extract each profile from the big structure
# that we'll call sondeFiles.
sondeFiles = readTolnet( sondeFilesHdf, sondeFilesDat )
sondeType = 'tolnet'
else:
sondeType = 'woudc'
sondeFiles = sondeFilesCsv
sondeFiles.sort()
# stats dictionary to populate
ss = initProfileStats(nint)
controlAnalysisFiles = []
experimentAnalysisFiles = []
sondeData = []
sondeFilesUsed = []
# A little bit inefficient, but make two passes. First, make a list of files to dmget off dirac, and grab sonde data.
# Second loop, actually do stuff.
for s in sondeFiles:
lonSonde, latSonde, dateSonde, timeSonde, ozSonde_mPa, pressSonde_hPa, tempSonde_C = readProfile(s, sondeType)
if(dateSonde >= a.start and dateSonde <= a.end and\
convertLongitude360(lonSonde) >= startLon and convertLongitude360(lonSonde) <= endLon and\
float(latSonde) >= startLat and float(latSonde) <= endLat):
# Do stuff for the experimental run (get idx for the experiment and the control along the way)
experimentAnalysisFiles.append(getFileName(a.ops, a.experiment, dateSonde, timeSonde))
# now for the control
controlAnalysisFiles.append(getFileName(a.ops, a.control, dateSonde, timeSonde))
sondeDict = {}
sondeDict['lon'] = lonSonde
sondeDict['lat'] = latSonde
sondeDict['date'] = dateSonde
sondeDict['time'] = timeSonde
sondeDict['oz_mPa'] = ozSonde_mPa
sondeDict['press_hPa'] = pressSonde_hPa
sondeDict['temperature_C'] = tempSonde_C
sondeData.append(sondeDict)
sondeFilesUsed.append(s)
# do dmget
controlString = " ".join(controlAnalysisFiles)
print("dmget on control analysis files "+controlString)
os.system('dmget '+ controlString)
print('done dmget.')
experimentString = " ".join(experimentAnalysisFiles)
print("dmget on experiment analysis files "+experimentString)
os.system('dmget '+ experimentString)
print('done dmget...for good!')
fcnt = 1
#Actually do stuff.
for i,s in enumerate(sondeData):
if (sondeType == 'tolnet'):
print('sonde count',fcnt)
print('Sonde Date and location Date:{} Lat:{} Lon:{}'.format(s['date'],s['lat'],s['lon']))
else:
print('file count',fcnt)
print("Sonde read in: {}".format(sondeFilesUsed[i]) )
# use sonde latitude to get x,y from analysis.
idxLon,idxLat = getIndexFromAnalysis(experimentAnalysisFiles[i], s['lat'], s['lon'])
print('Reading Experiment Analysis File: {}'.format(experimentAnalysisFiles[i]))
experimentOzone = getInterpolatedOzoneFromAnalysis(experimentAnalysisFiles[i], press_int, idxLon, idxLat)
#same grid, don't need to interpolate that again...
print('Reading Control Analysis File: {}'.format(controlAnalysisFiles[i]))
controlOzone = getInterpolatedOzoneFromAnalysis(controlAnalysisFiles[i], press_int, idxLon, idxLat)
# now for the sonde
# if it's a regular sonde with lots of points do Kris' thing and average points into a layer.
if (s['press_hPa'].shape[0] > 2*press_int.shape[0]):
print('using layer average for sonde interpolation.')
interpolatedSondeOzone = interpolateSonde(np.nan, s['press_hPa'], s['oz_mPa'], press_edges)
else:
print('using spline for sonde interpolation.')
interpolatedSondeOzone = interpolateSondeSpline(np.nan, s['press_hPa'], s['oz_mPa'], press_int)
if( a.strict and ( any( np.isnan(controlOzone) ) or any( np.isnan(experimentOzone) ) ) ):
# skip this profile in stats. because you're picky and don't like when the surface pressure goes above the bottom of the profile.
print("Skipping this sonde because analysis possibly bad (all nans for ozone in analysis):{} {}".format(os.path.basename(controlAnalysisFiles[i]), os.path.basename(experimentAnalysisFiles[i])))
print("control Ozone", controlOzone)
print("experiment Ozone", experimentOzone)
print("a.strict", a.strict)
continue
else:
print("Using sonde analysis is not suspect (all nans for ozone in analysis).")
# s for statistics on profiles
ss = updateStats(ss, interpolatedSondeOzone, controlOzone, experimentOzone, s['lat'], s['lon'] )
fcnt+=1
ss = finishStats( ss )
writeH5(a, ss, press_int)
# plot only pressure above 10 hPa
if(fcnt > 2):
idx = np.where( (ss['count_both'] > 1) & ( press_int > float(a.ptop) ) )
else:
idx = np.where( (ss['count_both'] > 0) & ( press_int > float(a.ptop) ) )
plotSondeAndAnalysisStats(press_int, ss, idx, float(a.ptop), float(a.pbot), a.control, a.experiment, os.path.join(a.output,'stats_'+a.experiment+'_'+a.control+'.pdf'), a.cname, a.ename)
def convertLongitude360(lon):
"""
convert longitude from -180 to 180 to 0 to 360.
"""
return float(lon)%360
def getPressureGridForOutput():
"""
Get the pressure grid we want to interpolate to.
Output:
press_edges: pressure grid edges in hPa
press_int: pressures in the middle of the grid hPa
nint : number of levels
"""
#global grid stuff
press_edges = np.asarray([1000., 900., 800., 700., 600., 500., 400., 300., 250.,\
200., 150., 100., \
90., 80., 70., 60., 50., 40., 30., 25., 20., 15., 10., \
9., 8., 7., 6., 5.])
nint = press_edges.shape[0] - 1
press_int = np.sqrt(press_edges[0:nint]*press_edges[1::])
return press_edges, press_int, nint
def initProfileStats (nint):
"""
Initialize dictionary with number of levels we want to interpolate against. (nint)
Input:
nint: Number of pressure levels in the grid
Output:
o: dictionary of stats
o['count_sonde'] : counts (# of sondes used at each interpolated profile gridpoint)
o['count_both'] : counts where there are valid analysis profile, and sonde profile points
o['av_sonde'] : average of sonde ozone (using count_both)
o['av_sonde_standalone']: average of sonde ozone (using count_sonde)
o['av_ana1']: average of control analysis profile ozone
o['av_ana2']: average of experiment analysis profile ozone
o['std_ana1']: rms between control analysis and sondes
o['std_ana2']: rms between experiment analysis and sondes
o['std_sonde']: std of the sondes
o['lat']: lat of the sondes
o['lon']: lon of the sondes
"""
o = {}
o['count_sonde'] = np.zeros(nint)
o['count_both'] = np.zeros(nint)
o['av_sonde'] = np.zeros(nint)
o['av_sonde_standalone'] = np.zeros(nint)
o['av_ana1'] = np.zeros(nint)
o['av_ana2'] = np.zeros(nint)
o['std_ana1'] = np.zeros(nint)
o['std_ana2'] = np.zeros(nint)
o['std_sonde'] = np.zeros(nint)
o['lat'] = []
o['lon'] = []
return o
def readProfile ( s, sondeType ):
"""
Given a filename, and type of sonde, read the data into a common set of variables.
If you're bored, probably should make this more "classy" by using a factory class
and avoid if statement, but meh...
Input:
s: path to the file to be read in (sonde)
Output:
lon : longitude of the sonde (singleton)
lat : latitude of the sonde (singleton)
date : date string YYYYMMDD (singleton)
time : time string hh:mm (singleton)
ozPartialPress_mPa : ozone partial pressure in mPa (vector)
press_hPa : pressure hPa (vector)
temp_C : Temperature in Celsius (vector)
"""
if(sondeType == 'shadoz'):
d,names = readShadoz(s)
for k in list(d.keys()):
if('Longitude' in k): lonKey = k
elif('Latitude' in k): latKey = k
elif('Launch Date' in k): lDate = k
elif('Launch Time' in k ): lTime = k
lon = d[lonKey]
lat = d[latKey]
date = d[lDate]
time = d[lTime]
print(list(d['PROFILE'].keys()))
if('O3 mPa' in list(d['PROFILE'].keys())):
ozPartialPress_mPa = d['PROFILE']['O3 mPa']
elif('Ozone mPa' in list(d['PROFILE'].keys())):
ozPartialPress_mPa = d['PROFILE']['Ozone mPa']
else:
sys.exit('No valid key for Ozone mPa in SHADOZ dataset.')
press_hPa = d['PROFILE']['Press hPa']
temp_C = d['PROFILE']['Temp C']
elif(sondeType == 'tolnet'):
lon = s['Longitude']
lat = s['Latitude']
if( type( s['startTime'] ) == type( np.zeros([2] ) ) ):
date = s['startTime'][0].strftime("%Y%m%d")
time = s['startTime'][0].strftime("%H:%M")
else:
date = s['startTime'].strftime("%Y%m%d")
time = s['startTime'].strftime("%H:%M")
#1 hectopascal = 100000 millipascal
# be careful when multiplying things that might have not flat shapes (1000,) vs (1000,1)
# you'll get matlab like square of matrix behavior...not what you're expecting?!?
# flatten the thing with the extra dimenaions (1000,1)
ozPartialPress_mPa = s['O3MR']*(1e-6)*s['Press'].flatten()*100000.0
press_hPa = s['Press'].flatten()
temp_C = s['Temp'].flatten()
elif(sondeType == 'woudc'):
d = readWoudc(s)
lon = float(d['LOCATION']['Longitude'])
lat = float(d['LOCATION']['Latitude'])
date = d['TIMESTAMP']['Date'].replace('-','')
time = d['TIMESTAMP']['Time']
ozPartialPress_mPa = d['PROFILE']['O3PartialPressure']
press_hPa = d['PROFILE']['Pressure']
temp_C = d['PROFILE']['Temperature']
else:
sys.exit("error don't know what profile this is!")
return lon, lat, date, time, ozPartialPress_mPa, press_hPa, temp_C
def timeLookup(dateString, timeString):
"""
Not 100% sure how this guy works, but looks up nearest analysis time for a given sonde.
Input:
dateString: date of sonde YYYYMMDD
timeString: time of sonde hh:mm
Output:
YYYY: Year (integer) for analysis lookup
MM: Month " " " "
DD: Day " " " "
hh: Hour " " " "
mm: Minute " " " "
"""
YYYY = int(dateString[0:4])
MM = int(dateString[4:6])
DD = int(dateString[6:8])
hh = int(timeString.split(':')[0])
mm = int(timeString.split(':')[1])
# following Kris' script to look up analysis time.
timefrac = (hh + mm/60)/24.0
ihh = math.floor(timefrac*8.0+ 0.5)
if(ihh > 7): ihh = 7
hh = '{:02d}'.format(ihh*3)
hh = int(hh)
return YYYY, MM, DD, hh, mm
def getFileName(ops, experiment, dateSonde, timeSonde):
"""
Get the sonde ilename given path to users experiments, experiment, datestring, timetring
Input:
ops: path to experiments (i.e. /archive/$USER)
experiment: name of GEOS experiment
dateSonde: date string (YYYYMMDD) of the sonde
timeSonde: time string (hh:mm) of the sonde
Output:
analysisFile: give the analysis file to read in
"""
YYYY, MM, DD, hh, mm = timeLookup(dateSonde, timeSonde)
analysisFile = os.path.join(ops, experiment,'diag','Y{:04d}'.format(YYYY), 'M{:02d}'.format(MM),\
experiment+'.inst3_3d_asm_Np.'+dateSonde+'_'+'{:02d}00z.nc4'.format(hh))
return analysisFile
def getIndexFromAnalysis(analysisFile, latSonde, lonSonde):
"""
Look up index on grid for nearest i,j for a given sonde lat/lon
Input:
analysisFile: analysis file to read in
latSonde: latitude of the sonde
lonSonde: longitude of the sonde
Output:
idxLat: index associated with latitude in GEOS analysis file
idxLon: index associated with longitude in GEOS analysis file
"""
h5 = h5py.File(analysisFile,'r')
lons = h5['lon']
lats = h5['lat']
"""
#Kris' way of finding nearest neighbor.
idx, = np.where(np.asarray(lons) <= float(lonSonde))
idxLon = idx[max(idx)]
if idxLon >= nlon: idxLon = 0
idx, = np.where(np.asarray(lats) <= float(latSonde))
idxLat = idx[max(idx)]
"""
# using python and the internets...
# https://stackoverflow.com/questions/30873844/identifying-the-nearest-grid-point
idxLon = np.nanargmin((np.asarray(lons)-float(lonSonde))**2.0)
idxLat = np.nanargmin((np.asarray(lats)-float(latSonde))**2.0)
print('Index Lon, Index Lat, grid lon, grid lat, sonde lon, sonde lat')
print(idxLon, idxLat, lons[idxLon], lats[idxLat], lonSonde, latSonde)
h5.close()
return idxLon, idxLat
def getInterpolatedOzoneFromAnalysis(analysisFile, press_int, idxLon, idxLat):
"""
Read analysis file, and interpolate ozone to output grid (press_int)
Input:
analysisFile: analysis file to read in
pres_int: pressure levels to interpolate to (hPa)
idxLat: index associated with latitude in GEOS analysis file
idxLon: index associated with longitude in GEOS analysis file
Output:
interpolatedOzone: ozone partial pressure (mPa) interpolated to press_int grid at idxLon, idxLat
"""
h5 = h5py.File(analysisFile,'r')
levs = np.asarray(h5['lev'])
geosO3 = np.asarray(h5['O3'])
idxBad = np.where( ( geosO3 >= 1e15 ) )
geosO3[idxBad] = np.nan
oz = np.asarray(geosO3[0,:,idxLat,idxLon])*604229.0*0.1*np.asarray(levs)
h5.close()
pressureAnalysisFlipped = np.flipud(np.asarray(levs))
pressureInterpolatedLevelsFlipped = np.flipud(press_int)
ozoneAnalysisFlipped = np.flipud(oz)
interpolatedOzone = np.zeros(press_int.shape[0])
# For this collection, if surface pressure greater than the level, you're going to fill values of 1e15, or NaNs.
# Let us only use "good" values for interpolation.
idxOz, = np.where( (ozoneAnalysisFlipped > 0.0) & np.isfinite(ozoneAnalysisFlipped) )
fOz = InterpolatedUnivariateSpline(np.log(pressureAnalysisFlipped[idxOz]), ozoneAnalysisFlipped[idxOz], k=3)
interpolatedOzone = fOz( np.log(pressureInterpolatedLevelsFlipped) )
maxValidP = pressureAnalysisFlipped[idxOz].max()
minValidP = pressureAnalysisFlipped[idxOz].min()
# Avoid generating data points, when we don't really have any. If we got an interpolated value which is outside
# the model grid (higher than the surface pressure), don't try to interpret it!
idxOutsideGrid, = np.where( (pressureInterpolatedLevelsFlipped > maxValidP ) | (pressureInterpolatedLevelsFlipped < minValidP ) )
interpolatedOzone[idxOutsideGrid] = np.nan
interpolatedOzone = np.flipud(interpolatedOzone)
return interpolatedOzone
def interpolateSonde(undefinedValue, pressureIn, profileIn, pressEdgesOut):
"""
Interpolate sonde ozone profile to desired pressure grid(presEdgesOut)
Input:
undefinedValue: value which says the value is undefined (nan-like)
pressureIn: pressure (hPa) of the sonde
profileIn: ozone partial pressure (mPa) of the sonde
pressEdgesOut: pressure edges used to place pressures on interpolate grid
Output:
interpolatedProfile: ozone partial pressure (mPa) interpolated to output pressure grid.
"""
nlevs = pressEdgesOut.shape[0]-1
interpolatedProfile = np.zeros(nlevs)
for i in np.arange(0,nlevs):
idx, = np.where( (pressureIn <= pressEdgesOut[i]) & (pressureIn >= pressEdgesOut[i+1]) & (profileIn > 0.0) )
if(len(idx) == 0): interpolatedProfile[i] = undefinedValue
else: interpolatedProfile[i] = np.mean(profileIn[idx])
return interpolatedProfile
def interpolateSondeSpline(undefinedValue, pressureIn, profileIn, press_int):
"""
Interpolate sonde ozone profile to desired pressure press_int
Input:
undefinedValue: value which says the value is undefined (nan-like)
pressureIn: pressure (hPa) of the sonde
profileIn: ozone partial pressure (mPa) of the sonde
press_int: pressure layer for interpolation
Output:
interpolatedProfile: ozone partial pressure (mPa) interpolated to output pressure grid.
"""
interpolatedProfile = np.zeros(press_int.shape[0])
pressureInFlipped = np.flipud(np.asarray(pressureIn))
pressureInterpolatedLevelsFlipped = np.flipud(press_int)
profileInFlipped = np.flipud(profileIn)
idxOz, = np.where( (profileInFlipped > 0.0) )
fOz = InterpolatedUnivariateSpline(np.log(pressureInFlipped[idxOz]), profileInFlipped[idxOz], k=3)
#fOz = interp1d(np.log(pressureAnalysisFlipped[idxOz]), ozoneAnalysisFlipped[idxOz], kind='cubic' )
interpolatedProfile = fOz( np.log(pressureInterpolatedLevelsFlipped) )
interpolatedProfile = np.flipud(interpolatedProfile)
validPressureInFlipped = pressureInFlipped[ idxOz ]
minValid, maxValid = validPressureInFlipped.min(), validPressureInFlipped.max()
idxInvalid = np.where((press_int <= minValid) | (press_int >= maxValid ) )
interpolatedProfile[idxInvalid] = undefinedValue
return interpolatedProfile
def updateStats(ss, interpolatedSondeOzone, controlOzone, experimentOzone, lat, lon ):
"""
Compute and update stats for a given profile.
These are really running tallies.
They aren't the actual statistics until finishStats() has been run.
Input:
ss: stats dictionary
ss['count_sonde'] : counts (# of sondes used at each interpolated profile gridpoint)
ss['count_both'] : counts where there are valid analysis profile, and sonde profile points
ss['av_sonde'] : average of sonde ozone (using count_both)
ss['av_sonde_standalone']: average of sonde ozone (using count_sonde)
ss['av_ana1']: average of control analysis profile ozone
ss['av_ana2']: average of experiment analysis profile ozone
ss['std_ana1']: rms between control analysis and sondes
ss['std_ana2']: rms between experiment analysis and sondes
ss['std_sonde']: std of the sondes
interpolatedSondeOzone: ozone sonde profile interpolated to grid (mPa)
controlOzone: ozone profile from control analysis interpolated to grid (mPa)
experimentOzone: ozone profile from experiment analysis interpolated to grid (mPa)
lat : latitude of profile
lon : longitude of profile
Output:
ss: stats dictionary
"""
# The introduction of potentially bad portions of analysis profile leads to having to break out index
# into sondes for sonde and "both" for rms error stats.
# using nans may not be the greatest idea, as these lines will give you a warning, but will work.
idxGoodSonde, = np.where( np.isfinite(interpolatedSondeOzone) & (interpolatedSondeOzone > 0.0) )
idxGoodBoth, = np.where ( np.isfinite(interpolatedSondeOzone) & (interpolatedSondeOzone > 0.0) &\
np.isfinite(controlOzone) & np.isfinite(experimentOzone) & (controlOzone > 0.0) & (experimentOzone > 0.0) )
ss['count_sonde'][idxGoodSonde] = ss['count_sonde'][idxGoodSonde] + 1.0
ss['count_both'][idxGoodBoth] = ss['count_both'][idxGoodBoth] + 1.0
ss['av_sonde'][idxGoodBoth] = ss['av_sonde'][idxGoodBoth] + interpolatedSondeOzone[idxGoodBoth]
ss['av_sonde_standalone'][idxGoodSonde] = ss['av_sonde_standalone'][idxGoodSonde] + interpolatedSondeOzone[idxGoodSonde]
ss['av_ana1'][idxGoodBoth] = ss['av_ana1'][idxGoodBoth] + controlOzone[idxGoodBoth]
ss['av_ana2'][idxGoodBoth] = ss['av_ana2'][idxGoodBoth] + experimentOzone[idxGoodBoth]
#here std is really rms error. and we need Both
ss['std_ana1'][idxGoodBoth] = ss['std_ana1'][idxGoodBoth] + (controlOzone[idxGoodBoth] -interpolatedSondeOzone[idxGoodBoth])**2
ss['std_ana2'][idxGoodBoth] = ss['std_ana2'][idxGoodBoth] + (experimentOzone[idxGoodBoth] -interpolatedSondeOzone[idxGoodBoth])**2
# here std is std
ss['std_sonde'][idxGoodSonde] = ss['std_sonde'][idxGoodSonde] + (interpolatedSondeOzone[idxGoodSonde])**2
ss['lat'].append(float(lat))
ss['lon'].append(float(lon))
return ss
def finishStats( ss ):
"""
Take running tally values from stats dictionary and convert to desired statistics.
Input:
ss: stats dictionary
ss['count_sonde'] : counts (# of sondes used at each interpolated profile gridpoint)
ss['count_both'] : counts where there are valid analysis profile, and sonde profile points
ss['av_sonde'] : average of sonde ozone (using count_both)
ss['av_sonde_standalone']: average of sonde ozone (using count_sonde)
ss['av_ana1']: average of control analysis profile ozone
ss['av_ana2']: average of experiment analysis profile ozone
ss['std_ana1']: rms between control analysis and sondes
ss['std_ana2']: rms between experiment analysis and sondes
ss['std_sonde']: std of the sondes
interpolatedSondeOzone: ozone sonde profile interpolated to grid (mPa)
controlOzone: ozone profile from control analysis interpolated to grid (mPa)
experimentOzone: ozone profile from experiment analysis interpolated to grid (mPa)
Output:
ss: stats dictionary
"""
diff1 = np.zeros(ss['count_sonde'].shape[0])
diff2 = np.zeros(ss['count_sonde'].shape[0])
idxGoodSonde, = np.where( ss['count_sonde']>1 )
idxGoodBoth, = np.where( ss['count_both'] > 1 )
ss['av_ana1'][idxGoodBoth] = ss['av_ana1'][idxGoodBoth]/ss['count_both'][idxGoodBoth]
ss['av_ana2'][idxGoodBoth] = ss['av_ana2'][idxGoodBoth]/ss['count_both'][idxGoodBoth]
ss['av_sonde'][idxGoodBoth] = ss['av_sonde'][idxGoodBoth]/ss['count_both'][idxGoodBoth]
ss['av_sonde_standalone'][idxGoodBoth] = ss['av_sonde_standalone'][idxGoodBoth]/ss['count_sonde'][idxGoodBoth]
diff1[idxGoodBoth] = ss['av_ana1'][idxGoodBoth] - ss['av_sonde'][idxGoodBoth]
diff2[idxGoodBoth] = ss['av_ana2'][idxGoodBoth] - ss['av_sonde'][idxGoodBoth]
ss['std_ana1'][idxGoodBoth] = ss['std_ana1'][idxGoodBoth] - ss['count_both'][idxGoodBoth]*(diff1[idxGoodBoth])**2
ss['std_ana1'][idxGoodBoth] = np.sqrt(ss['std_ana1'][idxGoodBoth]/(ss['count_both'][idxGoodBoth]-1) )
ss['std_ana2'][idxGoodBoth] = ss['std_ana2'][idxGoodBoth] - ss['count_both'][idxGoodBoth]*(diff2[idxGoodBoth])**2
ss['std_ana2'][idxGoodBoth] = np.sqrt(ss['std_ana2'][idxGoodBoth]/(ss['count_both'][idxGoodBoth]-1) )
ss['std_sonde'][idxGoodSonde] = ss['std_sonde'][idxGoodSonde] - ss['count_sonde'][idxGoodSonde]*(ss['av_sonde_standalone'][idxGoodSonde])**2
ss['std_sonde'][idxGoodSonde] = np.sqrt( ss['std_sonde'][idxGoodSonde] / (ss['count_sonde'][idxGoodSonde]-1) )
ss['lat'] = np.asarray(ss['lat'])
ss['lon'] = np.asarray(ss['lon'])
return ss
def writeH5( a, ss, press_int ):
"""
Write hdf5 tagged with input experiment and control.
Input:
a: command line argparse
ss: stats dictionary
Output:
hdf5 file: {experiment}_{control}_output_stats.h5 output to wherever the script is running. (cwd)
"""
print("writing {}".format(os.path.join(a.output, a.experiment +'_'+ a.control +'_output_stats.h5')))
with h5py.File(os.path.join(a.output, a.experiment +'_'+ a.control +'_output_stats.h5'),'w') as f:
for k in list(ss.keys()):
dset = f.create_dataset(k,data=ss[k])
dset = f.create_dataset('pressure', data = press_int )
if __name__ == "__main__":
parser = argparse.ArgumentParser( description = 'compare ozone sondes')
parser.add_argument('--experiment', help = 'experiment', required = True, dest = 'experiment')
parser.add_argument('--control',help = 'control', required = True, dest='control')
parser.add_argument('--start', help = 'start dtg YYYYMMDDhh', required = True, dest = 'start')
parser.add_argument('--end', help = 'end dtg YYYYMMDDhh', required = True, dest = 'end')
parser.add_argument('--ops', help = 'Optional arg to specify ops archive.', required = False, dest = 'ops',default="/archive/u/bkarpowi/")
parser.add_argument('--strict', help="reject using any bad analysis levels for ozone.", dest='strict', action='store_true' )
parser.add_argument('--top', help="top pressure to use in profile.", dest='ptop', default='1.0' )
parser.add_argument('--bottom', help="bottom pressure to use in profile.", dest='pbot', default='1000.0' )
parser.add_argument('--cname', help="control name.", dest='cname', default='control' )
parser.add_argument('--ename', help="experiment name.", dest='ename', default='experiment' )
parser.add_argument('--output', help="output path.", dest='output', default=os.getcwd() )
#Default uses ascension island for SHADOZ
#parser.add_argument('--slat', help = 'southern most latitude', required = False, dest = 'start_lat',default="-10")
#parser.add_argument('--nlat', help = 'northern most latitude', required = False, dest = 'end_lat',default="0")
#parser.add_argument('--wlon', help = 'western most longitude', required = False, dest = 'start_lon',default="-15")
#parser.add_argument('--elon', help = 'eastern most longitude', required = False, dest = 'end_lon',default="-14")
# tropical pacific
parser.add_argument('--slat', help = 'southern most latitude', required = False, dest = 'start_lat',default="-20")
parser.add_argument('--nlat', help = 'northern most latitude', required = False, dest = 'end_lat',default="20")
parser.add_argument('--wlon', help = 'western most longitude', required = False, dest = 'start_lon',default="60")
parser.add_argument('--elon', help = 'eastern most longitude', required = False, dest = 'end_lon',default="-90")
#Default uses THE WORLD.
#parser.add_argument('--slat', help = 'southern most latitude', required = False, dest = 'start_lat',default="-90.0")
#parser.add_argument('--nlat', help = 'northern most latitude', required = False, dest = 'end_lat',default="90.0")
#parser.add_argument('--wlon', help = 'western most longitude', required = False, dest = 'start_lon',default="0.0")
#parser.add_argument('--elon', help = 'eastern most longitude', required = False, dest = 'end_lon',default="359.9")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/discover/nobackup/bkarpowi/github/ozonesondeVerifyCp/TOLnet/UAH/")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/discover/nobackup/bkarpowi/github/ozonesondeVerifyCp/ftp.cpc.ncep.noaa.gov/ndacc/station/ohp/hdf/lidar/")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/discover/nobackup/bkarpowi/github/ozonesondeVerify/ftp.cpc.ncep.noaa.gov/ndacc/station/maunaloa/hdf/mwave/")
parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
required = False, dest = 'sonde_path',default="/archive/u/kwargan/data/SHADOZ/")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/discover/nobackup/bkarpowi/SHADOZ_by_station/ascen/")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/archive/u/kwargan/data/ozone_sondes/woudc2018/")
#parser.add_argument('--profiles', help = 'Optional arg to specify profile location.',\
# required = False, dest = 'sonde_path',default="/archive/u/kwargan/data/ozone_sondes/SouthPole/")
a = parser.parse_args()
go ( a )