forked from deeplycloudy/lmatools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
flash_stats.py
419 lines (343 loc) · 17.3 KB
/
flash_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
from __future__ import absolute_import
from __future__ import print_function
import glob
import os
import numpy as np
from lmatools.io.LMA_h5_file import read_flashes
from lmatools.stream.subset import coroutine, Branchpoint
from lmatools.flashsort.flash_stats import hull_volume
from six.moves import range
from six.moves import zip
def gen_flash_events(events, flashes):
""" Given events and flashes tables, generate
events for each flash as (events, flash)
"""
for fl in flashes:
fl_id=fl['flash_id']
this_flash = (events['flash_id']==fl_id)
yield events[this_flash], fl
def length_from_area(A,D,b_s):
return ( (np.sqrt(A))**D ) / (b_s**(D-1))
def volumetric_length_from_points(x,y,z,D, b_s):
xyz = np.vstack((x,y,z)).T
volume, vertices, simplex_volumes = hull_volume(xyz)
simplex_centroids = np.average(vertices[:,:], axis=1)
# The simplex volumes have negative values since they are oriented (think surface normal direction for a triangle)
vol_len_scale = volume**(1.0/3.0)
S_i = np.abs(simplex_volumes)**(1.0/3.0) #S_i
P_i = (S_i**D) / (b_s**(D-1.0)) #P_i
L_3 = (vol_len_scale**D) / (b_s**(D-1.0)) #L_3
# length_weighted = (S_i / P_i) * L_3
sum_weights = (S_i/P_i).sum()
# print "The sum of the ratio S_i/P_i is not equal to one, but is {0}".format(sum_weights)
# Therefore, divide by the sum of the weights
length_weighted = (S_i / P_i) * L_3 / sum_weights
return simplex_centroids, np.abs(simplex_volumes), volume, L_3, length_weighted
def vertical_length_distribution(src_alt, simplex_alt, simplex_lengths,
alt_bins, norm=True):
""" given input altitudes and lengths in km, create vertical
profiles of source counts and total length.
Returns alt_bins, bin_total_src, bin_total_length
If norm==True, divide the counts by the bin width, returning
km, counts/km and km/km. Otherwise just return km, counts and km.
"""
# Not sure why we're not using histogram here, so that's a TODO
# d_alt = 0.5
d_alt = alt_bins[1:]-alt_bins[:-1]
# alt_bins = np.arange(0.0,max_alt+d_alt, d_alt)
bin_total_length = np.zeros(alt_bins.shape[0]-1, dtype=float)
bin_total_src = np.zeros(alt_bins.shape[0]-1, dtype=float)
# bin_total_length_sq = np.zeros(alt_bins.shape[0]-1, dtype=float)
tri_bin_idx = np.digitize(simplex_alt, alt_bins)
src_bin_idx = np.digitize(src_alt,alt_bins)
tri_bin_idx[tri_bin_idx>(bin_total_length.shape[0]-1)]=bin_total_length.shape[0]-1
src_bin_idx[src_bin_idx>(bin_total_src.shape[0]-1)]=bin_total_src.shape[0]-1
for idx in src_bin_idx:
bin_total_src[idx] += 1
for lw,idx in zip(simplex_lengths,tri_bin_idx):
bin_total_length[idx]+=lw
# bin_total_length_sq[idx] += lw*lw
# bin_total_length[tri_bin_idx] += length_weighted
if norm==True:
return alt_bins, bin_total_src/d_alt, bin_total_length/d_alt
else:
return alt_bins, bin_total_src, bin_total_length
def bin_center(bins):
return (bins[:-1] + bins[1:]) / 2.0
def energy_plot_setup(fig=None, subplot=111, bin_unit='km'):
""" Create an energy spectrum plot with a 5/3 slope line and an spare line
to be used for plotting the spectrum. The spare line is intially located
on top of the 5/3 line
If fig is specified, the spectrum axes will be created on that figure
in the subplot position given by subplot.
Returns
"""
if fig is None:
import matplotlib.pyplot as plt
fig = plt.figure()
spectrum_ax = fig.add_subplot(subplot)
spectrum_ax.set_xlabel('Flash width ($\sqrt{A_h}$, %s)' % (bin_unit,))
spectrum_ax.set_ylabel('$E(l) \mathrm{(m^2 s^{-2} km^{-1})}$')
spectrum_ax.set_xlim(10**-1, 10**2)
spectrum_ax.set_ylim(10**0, 10**8)
spectrum_ax.set_yscale('log')
spectrum_ax.set_xscale('log')
#1e-2 to 1e4
min_pwr = -2
max_pwr = 4
delta_pwr = 0.1
powers = np.arange(min_pwr, max_pwr+delta_pwr, delta_pwr)
flash_1d_extent = 10**powers
wavenumber = (2*np.pi)/flash_1d_extent
inertialsubrange = 10**6 * (wavenumber)**(-5.0/3.0)
spectrum_line_artist = spectrum_ax.loglog(flash_1d_extent, inertialsubrange, 'r',alpha=0.5)[0]
fivethirds_line_artist = spectrum_ax.loglog(flash_1d_extent, inertialsubrange, 'k')[0]
return fig, spectrum_ax, fivethirds_line_artist, spectrum_line_artist
def calculate_energy_from_area_histogram(histo, bin_edges, duration, scaling=1.0):
""" Given a histogram and bin edges for flash area, calculate the specific energy density
in units of (m^2/s^2) / km, as in Bruning and MacGorman 2013, J. Atmos. Sci.
duration is the total number of seconds over which the flashes were counted.
bin_edges are assumed to be in km^2. histo is a count corresponding to the intervals
specified by bin_edges.
Before return, the spectrum is multipled by scaling (default = 1.0)
"""
duration=float(duration)
flash_1d_extent = bin_center(np.sqrt(bin_edges))
bin_widths = np.sqrt(bin_edges[1:] - bin_edges[:-1])
# This should give s^-2 m^2 km^-1 = m s^-2 km^-1
specific_energy = (histo/duration * flash_1d_extent*1000.0)**2.0 / (bin_widths) # flash_1d_extent #bin_widths
specific_energy *= scaling
return flash_1d_extent, specific_energy
def plot_energy_from_area_histogram(histo, bin_edges, bin_unit='km', save=False, fig=None, color_cycle_length=1, color_map='gist_earth', duration=600.0):
""" Histogram for flash width vs. count """
fig, spectrum_ax, fivethirds_line_artist, spectrum_artist = energy_plot_setup()
spectrum_ax.set_title(save.split('/')[-1].split('.')[0])
flash_1d_extent, specific_energy = calculate_energy_from_area_histogram(histo, bin_edges, duration)
spectrum_artist.set_data(flash_1d_extent, specific_energy)
if save==False:
plt.show()
else:
# ax.set_title(save)
fig.savefig(save)
fig.clf()
#######ADDED ESTIMATED ENERGY ####################
def plot_energies(footprint_bin_edges,time_array,scalar_map,flashes_series,
flashes_in_poly_edges,spectrum_save_file_base_en,which_energy,title):
'''
Plots estimated total and specific energy fields in .h5 files.
Arguments:
-) flash area bin edges: footprint_bin_edges
-) times in polygon: time_array
-) color by time cmap: scalar_map
-) radar specific times: time_mask
-) flash table series: flashes_series
-) flashes in lasso: flashes_in_poly_edges
-) save path: spectrum_save_file_base_en
-) energy field: which_energy
-) title of plot: title
'''
import matplotlib.pyplot as plt
s_m = scalar_map
figure_energy = plt.figure(figsize=(14,9))
ax_energy = figure_energy.add_subplot(111)
# cmap = plt.cm.Reds_r
tmin = min(flashes_in_poly_edges)
tmax = max(flashes_in_poly_edges)
flash_1d_extent = bin_center(np.sqrt(footprint_bin_edges))
for f, (flashes, t0, t1) in enumerate(zip(flashes_series, flashes_in_poly_edges[:-1], flashes_in_poly_edges[1:])):
if flashes.shape[0] > 1:
# If there is no data in this time window, skip it
histo_cd, edges_cd = np.histogram(np.sqrt(flashes['area']),
bins=np.sqrt(footprint_bin_edges),
weights=np.abs(flashes[which_energy]))
estimated, = ax_energy.loglog(flash_1d_extent[:],
np.abs(np.asarray(histo_cd))/np.sqrt(flash_1d_extent),
color=s_m.to_rgba(np.asarray(time_array)[f]),
alpha=0.7);
if which_energy == 'Energy' or which_energy == 'total_energy':
ax_energy.set_ylim(1e7,1e13)
# plt.xlim(plt.xlim()[::-1])
ax_energy.set_xlim(1e2,1e-1)
wavenumber = (2.*np.pi)/flash_1d_extent
inertialsubrange = 10**6 * (wavenumber*0.0002)**(-5.0/3.0)
else:
wavenumber = (2.*np.pi)/flash_1d_extent
inertialsubrange = 10**1 * (wavenumber)**(5.0/3.0)
ax_energy.set_xlim(1e2,1e-1)
ax_energy.set_xlim(plt.xlim()[::-1])
spectrum_save_file_en = spectrum_save_file_base_en.format(tmin.strftime('%y%m%d%H%M%S'),
tmax.strftime('%y%m%d%H%M%S'))
cbar = plt.colorbar(s_m)
ax_energy.loglog(flash_1d_extent, inertialsubrange,'k-',alpha=0.5);
ax_energy.set_title('Estimated {0} Spectra for Cell Selection'.format(title),fontsize=15)
ax_energy.set_xlabel(r'Flash width ($\sqrt{A_h}$, $km$)',fontsize=15)
ax_energy.set_ylabel(r'Energy ($J$)',fontsize=15)
cbar.ax.tick_params(labelsize=15)
ax_energy.tick_params(labelsize=15)
plt.savefig(spectrum_save_file_en)
plt.close()
########################################################
@coroutine
def histogram_for_parameter(parameter, bin_edges, target=None):
""" General coroutine that accepts a named numpy array with field parameter
and calcualtes a histogram using bin_edges. Target is sent histogram, edges.
"""
while True:
a = (yield)
histo, edges = np.histogram(a[parameter], bins=bin_edges)
if target is not None:
target.send((histo, edges))
@coroutine
def events_flashes_receiver(target=None):
""" Passes along only flashes """
while True:
events, flashes = (yield)
if target is not None:
target.send(flashes)
@coroutine
def histogram_accumulate_plot(plotter, histo_array=None, save=False, fig=None):
bin_edges=None
try:
while True:
histo, edges = (yield)
if bin_edges is None:
bin_edges = edges
else:
assert (bin_edges == edges).all()
if histo_array is None:
histo_array = histo
else:
histo_array += histo
except GeneratorExit:
plotter(histo_array, bin_edges, save=save, fig=fig)
@coroutine
def raw_moments_for_parameter(parameter, preprocess=None, n_moments=5, output_target=None):
""" This coroutine builds up raw moments by streaming samples into the coroutine.
When the samples are exhausted by a GeneratorExit, An array of size n_moments
will be sent to the output_target, and will contain the zeroth and n_moments-1
higher moments. The higher moments are already divided by the zeroth moment.
Receives a data array, and calculate basic statistics from the distribution
of the named parameter in that array. Optionally call the preprocess function
on the parameter before calculating the moment.
"""
sample_raw_moments = np.zeros(n_moments, dtype='f8')
try:
while True:
data = (yield)
a = data[parameter]
if preprocess is not None:
a = preprocess(a)
# calculate the sample moments
for i in range(n_moments):
sample_raw_moments[i] += (a**i).sum()
except GeneratorExit:
sample_raw_moments[1:] /= sample_raw_moments[0]
if output_target is not None:
output_target.send(sample_raw_moments)
def raw_moments(a, n_moments=5):
""" Given data in array a, return the raw moments,
sum(a^m) where m is 0...(n_moments-1).
Return is an array of shape (n_moments,).
"""
sample_raw_moments = np.zeros(n_moments, dtype='f8')
for i in range(n_moments):
sample_raw_moments[i] = (a**i).sum()
sample_raw_moments[1:] /= sample_raw_moments[0]
return sample_raw_moments
def central_moments_from_raw(raw):
""" Returns ctr, std where
ctr = the zero through fourth central moments
std = (number, mean, variance, skewness, kurtosis)
the zeroth and first central moments are set to zero.
"""
ctr = np.zeros_like(raw)
# ctr[0] = 0
# ctr[1] = raw[1] - raw[1]
ctr[2] = raw[2] - raw[1]*raw[1]
ctr[3] = 2*(raw[1]**3) - 3*raw[1]*raw[2] + raw[3]
ctr[4] = -3*(raw[1]**4) + 6*raw[1]*raw[1]*raw[2] - 4*raw[3]*raw[1] + raw[4]
# number, mean, variance, skewness, kurtosis
std = raw[0], raw[1], ctr[2], ctr[3]/(ctr[2]**1.5), (ctr[4]/(ctr[2]*ctr[2]) - 3)
return ctr, std
def get_energy_spectrum_bins(min_pwr=-2, max_pwr = 4, delta_pwr=0.1):
#defaults are 1e-2 to 1e4
powers = np.arange(min_pwr, max_pwr+delta_pwr, delta_pwr)
footprint_bin_edges = 10**powers
return footprint_bin_edges
def footprint_stats(h5_filenames, save=False, fig=None, min_points=10,
base_date=None, other_analysis_targets=None, filterer=None):
""" filter should be a non-running coroutine that receives the (events, flashes)
arrays emitted by io.read_flashes and sends filtered (events, flashes) arrays to
a target defined by this function and passed to filter by keyword argument.
"""
#1e-2 to 1e4
footprint_bin_edges = get_energy_spectrum_bins()
plotter = plot_energy_from_area_histogram
histogram_plot = histogram_accumulate_plot(plotter, save=save, fig=fig)
histogrammer=histogram_for_parameter('area', footprint_bin_edges, target=histogram_plot)
ev_fl_rx = events_flashes_receiver(target=histogrammer)
brancher = Branchpoint([ev_fl_rx])
if other_analysis_targets is not None:
for t in other_analysis_targets:
brancher.targets.add(t)
if filterer is not None:
rcvr = filterer(target=brancher.broadcast())
else:
rcvr = brancher.broadcast()
read_flashes(h5_filenames, rcvr, min_points=min_points, base_date=base_date)
def plot_spectra_for_files(h5_filenames, min_points, time_criteria, distance_criteria,
outdir_template='./figures/thresh-{0}_dist-{1}_pts-{2}/',
other_analysis_targets=None, base_date=None, filterer=None):
""" Make standard plots of the flash energy spectrum. There will be one spectrum created
for each file in h5_filenames.
min_pts, time_criteria, distance_critera are tuples of point and time-space thresholds, all the same length.
They will be looped over, used to generate outdir_template which
needs {0},{1},{2}, which will be filled in with time, distance, and min_pts criteria
The path in outdir_template will be created if it does not exist.
"""
import matplotlib.pyplot as plt
fig = plt.figure()
for dpt in min_points:
for dt in time_criteria:
for dx in distance_criteria:
outdir = outdir_template.format(dt,dx,dpt)
if not os.path.exists(outdir):
os.makedirs(outdir)
for h5_file in h5_filenames:
file_basename = os.path.split(h5_file)[-1].split('.')[:-3][0]
figure_file = os.path.join(outdir, '{0}-energy.pdf'.format(file_basename))
footprint_stats([h5_file], save=figure_file, fig=fig, min_points=dpt,
base_date=base_date, other_analysis_targets=other_analysis_targets,
filterer=filterer)
if __name__ == '__main__':
# '/data/20090610/data'
# min_points = 10
# --- All times (6+ hours), .15 s and 3 km ---
# h5_filenames = glob.glob('29may-thresh-0.15_dist-3000.0/LYL*.flash.h5')
# h5_filenames += glob.glob('30may-thresh-0.15_dist-3000.0/LYL*.flash.h5')
# h5_filenames = glob.glob('30may-thresh-0.15_dist-3000.0/LYL*0130*.flash.h5')
# h5_filenames = glob.glob('/Users/ebruning/code/McCaul Flash/test20040529/fixed-area-run/thresh-0.15_dist-3000.0/LYL*.flash.h5')
# footprint_stats(h5_filenames, min_points=min_points)
# --- 0130 - 0140 UTC, range of different space/time criteria ---
if True:
import matplotlib.pyplot as plt
fig = plt.figure()
min_points = (10,)
time_critera = (0.15,)
distance_critera = (3000.0,)
filename_template = '/Users/ebruning/code/McCaul Flash/test20040529/fixed-area-run-expandedtime/thresh-{0}_dist-{1}/LYL*.flash.h5'
for dpt in min_points:
for dt in time_critera:
for dx in distance_critera:
pattern = filename_template.format(dt,dx)
print(pattern)
h5_filenames = glob.glob(pattern)
for h5_file in h5_filenames:
file_basename = os.path.split(h5_file)[-1].split('.')[:-3][0]
figure_file = '/Users/ebruning/code/McCaul Flash/test20040529/fixed-area-run-expandedtime/thresh-{0}_dist-{1}/histos/{2}-footprint_histogram-{3}pts.pdf'.format(dt,dx,file_basename,dpt)
# print figure_file
footprint_stats([h5_file], save=figure_file, fig=fig, min_points=dpt)
# break
# break
# To open all figures in Preview, you can use a pattern like so from sh/bash
# open thresh-0.{05,1,15,2,25}_dist*/footprint_histogram-10pts.pdf