Upload LFR scripts

zxdawn · Apr 8, 2021 · c90c764 · c90c764
1 parent be01cb0
commit c90c764
Show file tree

Hide file tree

Showing 5 changed files with 449 additions and 0 deletions.
diff --git a/LFR/README.md b/LFR/README.md
@@ -0,0 +1,59 @@
+## Scripts
+
+
+- create_lfr.py
+
+  Create the LFR data
+
+- create_zero_lfr.py
+
+  Generate the LFR data with zero lightning flashes
+
+- entln.py and entln.yaml
+
+  Necessary scripts for running create_*.py.
+
+
+
+
+Please check the [Satpy guide](https://satpy.readthedocs.io/en/stable/dev_guide/custom_reader.html) to add `entln.py` and `entln.yaml` by yourself as Xin Zhang did not make the Pull Request.
+
+The Quickest way is copying `entln.py `, `entln.yaml` to `<satpy_dir>/readers/` and `<satpy_dir>/etc/readers/`, respectively.
+
+Here's the method to check the `<satpy_dir>` after installing Satpy:
+
+  ```
+  >>> import satpy
+  sat>>> satpy.__file__
+  '/home/xin/miniconda3/envs/python38/lib/python3.8/site-packages/satpy/__init__.py'
+  ```
+
+Then, `<satpy_dir>` is `/home/xin/miniconda3/envs/python38/lib/python3.8/site-packages/satpy/` in this case.
+
+## Settings
+
+```
+NOTE: The LFR codes are based on WRFV4.1.4. You can check the commits, and add the LFR related codes to your WRF.
+
+Part one: WRF CODE
+
+ &time_control
+  io_form_auxinput16                  = 2,
+  frames_per_auxinput16               = 1,  1,  1,  1,
+  auxinput16_inname                   = 'wrflfr_d<domain>_<date>',
+  auxinput16_interval_m               = 10, 10, 10, 10,
+
+ &physics:
+  lightning_option                    = 16,    16,    16,    16,
+
+Part Two: Prepare LFR flashes data.
+
+(1) Install necessary Python packages.
+(2) Change line 27-43 in create_lfr.py.
+(3) run create_lfr.py, then you will get wrflfr_d<domain>_<date>
+(4) If you want to create LFR files with zero lightning flash, you can run create_zero_lfr.py
+(5) put wrflfr_d<domain>_<date> under your test/em_real/
+
+Note: Before running WRF-LDA-LFR, you can use ncview to check the gridded flashes data in wrflfr_d<domain>_<date>
+```
+
diff --git a/LFR/create_lfr.py b/LFR/create_lfr.py
@@ -0,0 +1,201 @@
+'''
+INPUT:
+       ENTLN DATA
+       WPS GEO DATA
+
+OUTPUT:
+       LFR WRFINPUT FILES EVERY DELTA MINS
+
+UPDATE:
+       Xin Zhang     10/19/2019
+'''
+
+import os, glob, copy
+import numpy as np
+import pandas as pd
+import xarray as xr
+from datetime import datetime, timedelta
+from satpy.scene import Scene
+from satpy.utils import debug_on
+from pyresample.geometry import AreaDefinition, SwathDefinition
+
+# debug_on()
+# os.environ['PPP_CONFIG_DIR'] = '/xin/scripts/satpy_config'
+
+# --------------- set paras --------------- #
+# -----date---- #
+yyyy = 2020
+mm = 9
+dd = 1
+minhour = 0
+maxhour = 12
+delta = 10  # unit: mintues; as same as that set in LDA
+offset = timedelta(minutes=0)  # unit: minutes; e.g. 30 means forward 30 min
+
+# ----entln---- #
+ic_de = 0.50
+cg_de = 0.95
+
+# ----dir---- #
+wps_path = '/yin_raid/xin/tmp_lfr/v4.1.4/WPS/'
+entln_path = '/xin/data/ENGTLN/'
+output_dir = './lfr/20200901/'
+domain = 'd01'
+
+wrf_projs = {1: 'lcc',
+             2: 'npstere',
+             3: 'merc',
+             6: 'eqc'
+             }
+
+
+class entln(object):
+    def __init__(self, st, et, delta):
+        self.get_info()
+        self.crop(st, et, delta)
+
+    def get_info(self,):
+        # read basic info from geo file generrated by WPS
+        self.geo = xr.open_dataset(wps_path + 'geo_em.'+domain+'.nc')
+        attrs = self.geo.attrs
+
+        self.map = attrs['MAP_PROJ']
+        self.mminlu = attrs['MMINLU']
+        self.moad_cen_lat = attrs['MOAD_CEN_LAT']
+
+        self.dx = attrs['DX']
+        self.dy = attrs['DY']
+        self.stand_lon = attrs['STAND_LON']
+        self.lon_0 = attrs['CEN_LON']
+        self.lat_0 = attrs['CEN_LAT']
+        self.lat_1 = attrs['TRUELAT1']
+        self.lat_2 = attrs['TRUELAT2']
+        self.eta = attrs['BOTTOM-TOP_GRID_DIMENSION']
+        self.i = attrs['WEST-EAST_GRID_DIMENSION'] - 1
+        self.j = attrs['SOUTH-NORTH_GRID_DIMENSION'] - 1
+
+        # calculate attrs for area definition
+        shape = (self.j, self.i)
+        radius = (self.i*attrs['DX']/2, self.j*attrs['DY']/2)
+
+        # create area as same as WRF
+        area_id = 'wrf_circle'
+        proj_dict = {'proj': wrf_projs[self.map],
+                     'lat_0': self.lat_0,
+                     'lon_0': self.lon_0,
+                     'lat_1': self.lat_1,
+                     'lat_2': self.lat_2,
+                     'a': 6370000,
+                     'b': 6370000}
+        center = (0, 0)
+
+        self.area_def = AreaDefinition.from_circle(area_id,
+                                                   proj_dict,
+                                                   center,
+                                                   radius,
+                                                   shape=shape)
+
+    def crop(self, st, et, delta):
+        # Crop data every 'delta' and split into IC and CG
+        scn = Scene(glob.glob(entln_path + 'LtgFlashPortions' + st.strftime('%Y%m%d') + '.csv'), reader='entln')
+        vname = 'timestamp'  # any name in data is OK, because we just bin the counts
+        scn.load([vname])
+
+        # ---- loop through hour and delta interval ----- #
+        for h in range(st.hour, et.hour):
+            for m in range(0, 60, delta):
+                # 1. -----Crop by delta----- #
+                timestamp = scn[vname].timestamp.values.astype('datetime64[s]')
+                if m+delta < 60:
+                    cond = (timestamp >= st.replace(hour=h, minute=m)+offset) & (timestamp < st.replace(hour=h, minute=m+delta)+offset)
+                else:
+                    cond = (timestamp >= st.replace(hour=h, minute=m)+offset) & (timestamp < st.replace(hour=h+1, minute=0)+offset)
+
+                # 2. -----Crop by type ----- #
+                self.ic = copy.deepcopy(scn)
+                self.cg = copy.deepcopy(scn)
+                cond_ic = (scn[vname].type == 1) & (cond)
+                cond_cg = (scn[vname].type != 1) & (cond)
+                self.ic[vname] = self.ic[vname][cond_ic]
+                self.cg[vname] = self.cg[vname][cond_cg]
+                # Correct attrs
+                area_ic = SwathDefinition(lons=self.ic[vname].coords['longitude'],
+                                          lats=self.ic[vname].coords['latitude']
+                                          )
+                area_cg = SwathDefinition(lons=self.cg[vname].coords['longitude'],
+                                          lats=self.cg[vname].coords['latitude']
+                                          )
+                self.correct_attrs(self.ic, area_ic, vname)
+                self.correct_attrs(self.cg, area_cg, vname)
+
+                # 3. -----Crop by WRF_grid ----- #
+                self.resample_WRF()
+                # self.tl = self.ic[vname]/ic_de + self.cg[vname]/cg_de
+                self.tl = 4*(self.cg[vname]/cg_de)/(60*delta)  # IC/CG = 3, unit: #/s
+                self.save(vname, h, m)
+                # break
+            # break
+
+    def correct_attrs(self, scn_data, area, vname):
+        # Because resample method reads the area and other from attrs,
+        # We need to set them with the same condition
+        scn_data[vname].attrs['area'] = area
+        scn_data[vname].attrs['start_time'] = scn_data['timestamp'].values[0]
+        scn_data[vname].attrs['end_time'] = scn_data['timestamp'].values[-1]
+
+    def resample_WRF(self,):
+        self.ic = self.ic.resample(self.area_def, resampler='bucket_count')
+        self.cg = self.cg.resample(self.area_def, resampler='bucket_count')
+
+    def save(self, vname, h, m):
+        t = self.ic[vname].attrs['start_time']
+        tstr = pd.to_datetime(str(t)).strftime('%Y-%m-%d_{}:{}:00'.format(str(h).zfill(2), str(m).zfill(2)))
+        ncfile = output_dir + 'wrflfr_' + domain + '_' + tstr
+
+        Times = xr.DataArray(np.array([tstr], dtype=np.dtype(('S', 19))), dims=['Time'])
+
+        # Because xarray's coordinates strat from upper left
+        # and WRF's grids strat from lower left
+        # We need to flip the y-axis.
+        lfr = xr.DataArray(np.fliplr(self.tl.values[np.newaxis, ...].astype('f4')),
+                           dims=['Time', 'south_north', 'west_east'],
+                           attrs={'FieldType': np.int32(104), 'MemoryOrder': 'XY',
+                                  'description': 'lightning flash rate data', 'units': '', 'stagger': ''},
+                           )
+
+        # create the dataset
+        ds = xr.Dataset({'Times': Times,
+                         'LFR': lfr,
+                         },
+                        attrs={'TITLE': 'OUTPUT FROM V4.1, Created by Xin Zhang {}'.format(datetime.utcnow()),
+                               'WEST-EAST_GRID_DIMENSION': self.i+1,
+                               'WEST-EAST_PATCH_END_UNSTAG': self.i,
+                               'SOUTH-NORTH_GRID_DIMENSION': self.j+1,
+                               'SOUTH-NORTH_PATCH_END_UNSTAG': self.j,
+                               'BOTTOM-TOP_GRID_DIMENSION': self.eta,
+                               'DX': self.dx, 'DY': self.dy,
+                               'CEN_LAT': self.lat_0, 'CEN_LON': self.lon_0,
+                               'TRUELAT1': self.lat_1, 'TRUELAT2': self.lat_2,
+                               'MOAD_CEN_LAT': self.moad_cen_lat, 'STAND_LON': self.stand_lon,
+                               'MAP_PROJ': self.map, 'MMINLU': self.mminlu}
+                        )
+
+        # save dataset to nc file
+        print('Saving to {}'.format(ncfile))
+        os.makedirs(output_dir, exist_ok=True)
+        ds.to_netcdf(ncfile, format='NETCDF4',
+                     encoding={
+                              'Times': {
+                                 'zlib': True,
+                                 'complevel': 5,
+                                 'char_dim_name': 'DateStrLen'
+                              },
+                              'LFR': {'zlib': True, 'complevel': 5}
+                              },
+                     unlimited_dims={'Time': True})
+
+
+if __name__ == '__main__':
+    st = datetime(yyyy, mm, dd, minhour)
+    et = datetime(yyyy, mm, dd, maxhour)
+    entln(st, et, delta)
diff --git a/LFR/create_zero_lfr.py b/LFR/create_zero_lfr.py
@@ -0,0 +1,53 @@
+'''
+INPUT:
+       LFR OUTPUT
+
+OUTPUT:
+       LFR OUTPUT with zero lightning flash
+
+UPDATE:
+       Xin Zhang     10/19/2019
+'''
+
+import xarray as xr
+import numpy as np
+import pandas as pd
+
+# set the date range of output files
+st = '20200901 00:00'
+et = '20200901 11:50'
+dt = 10  # unit: min
+
+# get basic file
+datadir = './lfr/20200901/'
+# base = 'wrflfr_d01_2020-09-01_00:00:00'
+base = 'wrflfr_d01_2020-08-31_23:00:00'
+ds = xr.open_dataset(datadir+base)
+
+# load data and create zero DataArray
+da = ds['LFR']
+ds['LFR'] = da.copy(data=np.full_like(da, 0.))
+
+# generate output filenames based on the date range
+dr = pd.date_range(st, et, freq=str(dt)+'T')
+filenames = [d.strftime(f'{base[:10]}_%Y-%m-%d_%H:%M:%S') for d in dr]
+
+
+# create files (zero values) based on filenames
+for tindex, f in enumerate(filenames):
+    # generate 'Times' variable
+    Times = xr.DataArray(np.array([dr[tindex].strftime('%Y-%m-%d_%H:%M:%S')], dtype=np.dtype(('S', 19))), dims=['Time'])
+    ds['Times'] = Times
+
+    ncfile = datadir + f
+    print('Saving to {}'.format(ncfile))
+    ds.to_netcdf(ncfile, format='NETCDF4',
+                 encoding={
+                          'Times': {
+                             'zlib': True,
+                             'complevel': 5,
+                             'char_dim_name': 'DateStrLen'
+                          },
+                          'LFR': {'zlib': True, 'complevel': 5}
+                          },
+                 unlimited_dims={'Time': True})
diff --git a/LFR/entln.py b/LFR/entln.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2019 Satpy developers
+#
+# This file is part of satpy.
+#
+# satpy is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# satpy.  If not, see <http://www.gnu.org/licenses/>.
+"""Earth Networks Total Lightning Network Dataset reader
+
+With over 1,700 sensors covering over 100 countries around the world,
+ENTLN is the most extensive and technologically-advanced global lightning network.
+The data provided is generated by the ENTLN worker.
+
+References:
+- [ENTLN] https://www.earthnetworks.com/why-us/networks/lightning
+
+"""
+
+import logging
+import pandas as pd
+import dask.array as da
+import xarray as xr
+
+from satpy import CHUNK_SIZE
+from satpy.readers.file_handlers import BaseFileHandler
+
+logger = logging.getLogger(__name__)
+
+
+class ENTLNFileHandler(BaseFileHandler):
+    """ASCII reader for ENTLN data."""
+
+    def __init__(self, filename, filename_info, filetype_info):
+        super(ENTLNFileHandler, self).__init__(filename, filename_info, filetype_info)
+
+        names = ['type', 'timestamp', 'latitude', 'longitude', 'peakcurrent',
+                 'icheight', 'numbersensors', 'icmultiplicity', 'cgmultiplicity',
+                 'starttime', 'endtime', 'duration', 'ullatitude', 'ullongitude',
+                 'lrlatitude', 'lrlongitude']
+        types = ['int', 'str', 'float', 'float', 'float',
+                 'float', 'int', 'int', 'int',
+                 'str', 'str', 'float', 'float', 'float',
+                 'float', 'float']
+        dtypes = dict(zip(names, types))
+
+        self.data = pd.read_csv(filename, delimiter=',', dtype=dtypes, parse_dates=['timestamp'], skipinitialspace=True)
+
+    @property
+    def start_time(self):
+        return self.data['timestamp'].index[0]
+
+    @property
+    def end_time(self):
+        return self.data['timestamp'].index[-1]
+
+    def get_dataset(self, dataset_id, dataset_info):
+        """Load a dataset."""
+        xarr = xr.DataArray(da.from_array(self.data[dataset_id.name],
+                                          chunks=CHUNK_SIZE), dims=['y'])
+
+        # Add variables as non-dimensional y-coordinates
+        for column in ['type', 'timestamp', 'latitude', 'longitude']:
+            xarr[column] = ('y', self.data[column])
+        xarr.attrs.update(dataset_info)
+
+        return xarr