Skip to content

Commit

Permalink
Upload LFR scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
zxdawn committed Apr 8, 2021
1 parent be01cb0 commit c90c764
Show file tree
Hide file tree
Showing 5 changed files with 449 additions and 0 deletions.
59 changes: 59 additions & 0 deletions LFR/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
## Scripts


- create_lfr.py

Create the LFR data

- create_zero_lfr.py

Generate the LFR data with zero lightning flashes

- entln.py and entln.yaml

Necessary scripts for running create_*.py.




Please check the [Satpy guide](https://satpy.readthedocs.io/en/stable/dev_guide/custom_reader.html) to add `entln.py` and `entln.yaml` by yourself as Xin Zhang did not make the Pull Request.

The Quickest way is copying `entln.py `, `entln.yaml` to `<satpy_dir>/readers/` and `<satpy_dir>/etc/readers/`, respectively.

Here's the method to check the `<satpy_dir>` after installing Satpy:

```
>>> import satpy
sat>>> satpy.__file__
'/home/xin/miniconda3/envs/python38/lib/python3.8/site-packages/satpy/__init__.py'
```

Then, `<satpy_dir>` is `/home/xin/miniconda3/envs/python38/lib/python3.8/site-packages/satpy/` in this case.

## Settings

```
NOTE: The LFR codes are based on WRFV4.1.4. You can check the commits, and add the LFR related codes to your WRF.
Part one: WRF CODE
&time_control
io_form_auxinput16 = 2,
frames_per_auxinput16 = 1, 1, 1, 1,
auxinput16_inname = 'wrflfr_d<domain>_<date>',
auxinput16_interval_m = 10, 10, 10, 10,
&physics:
lightning_option = 16, 16, 16, 16,
Part Two: Prepare LFR flashes data.
(1) Install necessary Python packages.
(2) Change line 27-43 in create_lfr.py.
(3) run create_lfr.py, then you will get wrflfr_d<domain>_<date>
(4) If you want to create LFR files with zero lightning flash, you can run create_zero_lfr.py
(5) put wrflfr_d<domain>_<date> under your test/em_real/
Note: Before running WRF-LDA-LFR, you can use ncview to check the gridded flashes data in wrflfr_d<domain>_<date>
```

201 changes: 201 additions & 0 deletions LFR/create_lfr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
'''
INPUT:
ENTLN DATA
WPS GEO DATA
OUTPUT:
LFR WRFINPUT FILES EVERY DELTA MINS
UPDATE:
Xin Zhang 10/19/2019
'''

import os, glob, copy
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
from satpy.scene import Scene
from satpy.utils import debug_on
from pyresample.geometry import AreaDefinition, SwathDefinition

# debug_on()
# os.environ['PPP_CONFIG_DIR'] = '/xin/scripts/satpy_config'

# --------------- set paras --------------- #
# -----date---- #
yyyy = 2020
mm = 9
dd = 1
minhour = 0
maxhour = 12
delta = 10 # unit: mintues; as same as that set in LDA
offset = timedelta(minutes=0) # unit: minutes; e.g. 30 means forward 30 min

# ----entln---- #
ic_de = 0.50
cg_de = 0.95

# ----dir---- #
wps_path = '/yin_raid/xin/tmp_lfr/v4.1.4/WPS/'
entln_path = '/xin/data/ENGTLN/'
output_dir = './lfr/20200901/'
domain = 'd01'

wrf_projs = {1: 'lcc',
2: 'npstere',
3: 'merc',
6: 'eqc'
}


class entln(object):
def __init__(self, st, et, delta):
self.get_info()
self.crop(st, et, delta)

def get_info(self,):
# read basic info from geo file generrated by WPS
self.geo = xr.open_dataset(wps_path + 'geo_em.'+domain+'.nc')
attrs = self.geo.attrs

self.map = attrs['MAP_PROJ']
self.mminlu = attrs['MMINLU']
self.moad_cen_lat = attrs['MOAD_CEN_LAT']

self.dx = attrs['DX']
self.dy = attrs['DY']
self.stand_lon = attrs['STAND_LON']
self.lon_0 = attrs['CEN_LON']
self.lat_0 = attrs['CEN_LAT']
self.lat_1 = attrs['TRUELAT1']
self.lat_2 = attrs['TRUELAT2']
self.eta = attrs['BOTTOM-TOP_GRID_DIMENSION']
self.i = attrs['WEST-EAST_GRID_DIMENSION'] - 1
self.j = attrs['SOUTH-NORTH_GRID_DIMENSION'] - 1

# calculate attrs for area definition
shape = (self.j, self.i)
radius = (self.i*attrs['DX']/2, self.j*attrs['DY']/2)

# create area as same as WRF
area_id = 'wrf_circle'
proj_dict = {'proj': wrf_projs[self.map],
'lat_0': self.lat_0,
'lon_0': self.lon_0,
'lat_1': self.lat_1,
'lat_2': self.lat_2,
'a': 6370000,
'b': 6370000}
center = (0, 0)

self.area_def = AreaDefinition.from_circle(area_id,
proj_dict,
center,
radius,
shape=shape)

def crop(self, st, et, delta):
# Crop data every 'delta' and split into IC and CG
scn = Scene(glob.glob(entln_path + 'LtgFlashPortions' + st.strftime('%Y%m%d') + '.csv'), reader='entln')
vname = 'timestamp' # any name in data is OK, because we just bin the counts
scn.load([vname])

# ---- loop through hour and delta interval ----- #
for h in range(st.hour, et.hour):
for m in range(0, 60, delta):
# 1. -----Crop by delta----- #
timestamp = scn[vname].timestamp.values.astype('datetime64[s]')
if m+delta < 60:
cond = (timestamp >= st.replace(hour=h, minute=m)+offset) & (timestamp < st.replace(hour=h, minute=m+delta)+offset)
else:
cond = (timestamp >= st.replace(hour=h, minute=m)+offset) & (timestamp < st.replace(hour=h+1, minute=0)+offset)

# 2. -----Crop by type ----- #
self.ic = copy.deepcopy(scn)
self.cg = copy.deepcopy(scn)
cond_ic = (scn[vname].type == 1) & (cond)
cond_cg = (scn[vname].type != 1) & (cond)
self.ic[vname] = self.ic[vname][cond_ic]
self.cg[vname] = self.cg[vname][cond_cg]
# Correct attrs
area_ic = SwathDefinition(lons=self.ic[vname].coords['longitude'],
lats=self.ic[vname].coords['latitude']
)
area_cg = SwathDefinition(lons=self.cg[vname].coords['longitude'],
lats=self.cg[vname].coords['latitude']
)
self.correct_attrs(self.ic, area_ic, vname)
self.correct_attrs(self.cg, area_cg, vname)

# 3. -----Crop by WRF_grid ----- #
self.resample_WRF()
# self.tl = self.ic[vname]/ic_de + self.cg[vname]/cg_de
self.tl = 4*(self.cg[vname]/cg_de)/(60*delta) # IC/CG = 3, unit: #/s
self.save(vname, h, m)
# break
# break

def correct_attrs(self, scn_data, area, vname):
# Because resample method reads the area and other from attrs,
# We need to set them with the same condition
scn_data[vname].attrs['area'] = area
scn_data[vname].attrs['start_time'] = scn_data['timestamp'].values[0]
scn_data[vname].attrs['end_time'] = scn_data['timestamp'].values[-1]

def resample_WRF(self,):
self.ic = self.ic.resample(self.area_def, resampler='bucket_count')
self.cg = self.cg.resample(self.area_def, resampler='bucket_count')

def save(self, vname, h, m):
t = self.ic[vname].attrs['start_time']
tstr = pd.to_datetime(str(t)).strftime('%Y-%m-%d_{}:{}:00'.format(str(h).zfill(2), str(m).zfill(2)))
ncfile = output_dir + 'wrflfr_' + domain + '_' + tstr

Times = xr.DataArray(np.array([tstr], dtype=np.dtype(('S', 19))), dims=['Time'])

# Because xarray's coordinates strat from upper left
# and WRF's grids strat from lower left
# We need to flip the y-axis.
lfr = xr.DataArray(np.fliplr(self.tl.values[np.newaxis, ...].astype('f4')),
dims=['Time', 'south_north', 'west_east'],
attrs={'FieldType': np.int32(104), 'MemoryOrder': 'XY',
'description': 'lightning flash rate data', 'units': '', 'stagger': ''},
)

# create the dataset
ds = xr.Dataset({'Times': Times,
'LFR': lfr,
},
attrs={'TITLE': 'OUTPUT FROM V4.1, Created by Xin Zhang {}'.format(datetime.utcnow()),
'WEST-EAST_GRID_DIMENSION': self.i+1,
'WEST-EAST_PATCH_END_UNSTAG': self.i,
'SOUTH-NORTH_GRID_DIMENSION': self.j+1,
'SOUTH-NORTH_PATCH_END_UNSTAG': self.j,
'BOTTOM-TOP_GRID_DIMENSION': self.eta,
'DX': self.dx, 'DY': self.dy,
'CEN_LAT': self.lat_0, 'CEN_LON': self.lon_0,
'TRUELAT1': self.lat_1, 'TRUELAT2': self.lat_2,
'MOAD_CEN_LAT': self.moad_cen_lat, 'STAND_LON': self.stand_lon,
'MAP_PROJ': self.map, 'MMINLU': self.mminlu}
)

# save dataset to nc file
print('Saving to {}'.format(ncfile))
os.makedirs(output_dir, exist_ok=True)
ds.to_netcdf(ncfile, format='NETCDF4',
encoding={
'Times': {
'zlib': True,
'complevel': 5,
'char_dim_name': 'DateStrLen'
},
'LFR': {'zlib': True, 'complevel': 5}
},
unlimited_dims={'Time': True})


if __name__ == '__main__':
st = datetime(yyyy, mm, dd, minhour)
et = datetime(yyyy, mm, dd, maxhour)
entln(st, et, delta)
53 changes: 53 additions & 0 deletions LFR/create_zero_lfr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
'''
INPUT:
LFR OUTPUT
OUTPUT:
LFR OUTPUT with zero lightning flash
UPDATE:
Xin Zhang 10/19/2019
'''

import xarray as xr
import numpy as np
import pandas as pd

# set the date range of output files
st = '20200901 00:00'
et = '20200901 11:50'
dt = 10 # unit: min

# get basic file
datadir = './lfr/20200901/'
# base = 'wrflfr_d01_2020-09-01_00:00:00'
base = 'wrflfr_d01_2020-08-31_23:00:00'
ds = xr.open_dataset(datadir+base)

# load data and create zero DataArray
da = ds['LFR']
ds['LFR'] = da.copy(data=np.full_like(da, 0.))

# generate output filenames based on the date range
dr = pd.date_range(st, et, freq=str(dt)+'T')
filenames = [d.strftime(f'{base[:10]}_%Y-%m-%d_%H:%M:%S') for d in dr]


# create files (zero values) based on filenames
for tindex, f in enumerate(filenames):
# generate 'Times' variable
Times = xr.DataArray(np.array([dr[tindex].strftime('%Y-%m-%d_%H:%M:%S')], dtype=np.dtype(('S', 19))), dims=['Time'])
ds['Times'] = Times

ncfile = datadir + f
print('Saving to {}'.format(ncfile))
ds.to_netcdf(ncfile, format='NETCDF4',
encoding={
'Times': {
'zlib': True,
'complevel': 5,
'char_dim_name': 'DateStrLen'
},
'LFR': {'zlib': True, 'complevel': 5}
},
unlimited_dims={'Time': True})
76 changes: 76 additions & 0 deletions LFR/entln.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Satpy developers
#
# This file is part of satpy.
#
# satpy is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Earth Networks Total Lightning Network Dataset reader
With over 1,700 sensors covering over 100 countries around the world,
ENTLN is the most extensive and technologically-advanced global lightning network.
The data provided is generated by the ENTLN worker.
References:
- [ENTLN] https://www.earthnetworks.com/why-us/networks/lightning
"""

import logging
import pandas as pd
import dask.array as da
import xarray as xr

from satpy import CHUNK_SIZE
from satpy.readers.file_handlers import BaseFileHandler

logger = logging.getLogger(__name__)


class ENTLNFileHandler(BaseFileHandler):
"""ASCII reader for ENTLN data."""

def __init__(self, filename, filename_info, filetype_info):
super(ENTLNFileHandler, self).__init__(filename, filename_info, filetype_info)

names = ['type', 'timestamp', 'latitude', 'longitude', 'peakcurrent',
'icheight', 'numbersensors', 'icmultiplicity', 'cgmultiplicity',
'starttime', 'endtime', 'duration', 'ullatitude', 'ullongitude',
'lrlatitude', 'lrlongitude']
types = ['int', 'str', 'float', 'float', 'float',
'float', 'int', 'int', 'int',
'str', 'str', 'float', 'float', 'float',
'float', 'float']
dtypes = dict(zip(names, types))

self.data = pd.read_csv(filename, delimiter=',', dtype=dtypes, parse_dates=['timestamp'], skipinitialspace=True)

@property
def start_time(self):
return self.data['timestamp'].index[0]

@property
def end_time(self):
return self.data['timestamp'].index[-1]

def get_dataset(self, dataset_id, dataset_info):
"""Load a dataset."""
xarr = xr.DataArray(da.from_array(self.data[dataset_id.name],
chunks=CHUNK_SIZE), dims=['y'])

# Add variables as non-dimensional y-coordinates
for column in ['type', 'timestamp', 'latitude', 'longitude']:
xarr[column] = ('y', self.data[column])
xarr.attrs.update(dataset_info)

return xarr
Loading

0 comments on commit c90c764

Please sign in to comment.