-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_wrf_netcdf.py
147 lines (115 loc) · 5.59 KB
/
create_wrf_netcdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#this script will read all the WRF radout and surfout files and will merge 6 variables and concatenate along the time dimension
#need to update the names of the WRF model and output file for each run of the script
#Import Python libraries
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import xarray as xray
import xarray
#import seaborn as sn
from datetime import datetime
from dask.diagnostics import ProgressBar
import warnings
import os
import pandas as pd
from datetime import datetime
from pytz import timezone
import re
western = timezone('US/Pacific')
utc = timezone('UTC')
path = '/Users/carina/Desktop/WRF_data/'
fileName = 'ds_reduced_NARR_Morr.nc'
# Create a file list of all the netCDF files
import glob
fileList = glob.glob(path + '*.nc')
fileList.sort()
files_dict = {}
for file in fileList:
radout = re.search('radout', file) is not None
surfout = re.search('surfout', file) is not None
if not (radout or surfout):
continue
#NARR = re.search('NARR', file) != None
#Morr = re.search('Morr', file) != None
year = file[len(file)-10:len(file)-6]
month = file[len(file)-5:len(file)-3]
index = "{}{}".format(year, month)
if index in files_dict:
file_dict = files_dict.get(index)
else:
file_dict = {}
if radout:
file_dict['radout'] = file
if surfout:
file_dict['surfout'] = file
files_dict[index] = file_dict
#use this fcn if you want to export the netCDF in UTC timezone
#def decode(d):
# decoded = datetime.strptime(d.decode(encoding='UTF-8'), "%Y-%m-%d_%H:%M:%S")
# return decoded
#use this fcn if you want to export in the timezone of your choice - this case Pacific
def decode(d):
decoded = datetime.strptime(d.decode(encoding='UTF-8'), "%Y-%m-%d_%H:%M:%S")
western_dt = utc.localize(decoded).astimezone(western)
return western_dt
#utc = pytz.timezone('UTC')
#naive_dt = datetime(2020, 10, 5, 15, 0, 0)
#western = timezone('US/Pacific')
#western_dt = tz.localize(naive_dt, is_dst=None).astimezone(western)
def process_month(radFile, surfFile, month):
radDS = xarray.open_dataset(radFile, engine = 'scipy') # may not need engine = 'netcdf4' others are using engine = 'scipy'
radDates = radDS.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
dsTotal_rad = xarray.Dataset({'SWdown': (['time','x','y'], radDS.swdnbhourly.values),
'LWdown':(['time','x','y'], radDS.lwdnbhourly.values)},
coords = {'longitude': (['x','y'], radDS.lon.values),
'latitude': (['x','y'], radDS.lat.values),
'time': xarray.DataArray.from_series(radDates).values})
surfDS = xarray.open_dataset(surfFile , engine = 'scipy') # may not need engine = 'netcdf4' others are using engine = 'scipy'
surfDates = surfDS.Times.to_series().apply(decode) # this creates a list of the varaibles Times in the netCDF file ds and then applies the decode function to each element
#calculate 2m wind speed from the 10 m u10 and v10
wind2m = 0.54 * np.sqrt(surfDS.u10.values * surfDS.u10.values + surfDS.v10.values * surfDS.v10.values)
dsTotal_surf = xarray.Dataset({'prec': (['time','x','y'], surfDS.prehourly.values),
'temp2m':(['time','x','y'], surfDS.t2.values),
'rh2m':(['time','x','y'], surfDS.rh2.values),
'wind2m':(['time','x','y'], wind2m)},
coords = {'longitude': (['x','y'], surfDS.lon.values),
'latitude': (['x','y'], surfDS.lat.values),
'time': xarray.DataArray.from_series(surfDates).values})
dsTotal_surf = dsTotal_surf.merge(dsTotal_rad)
# first the prec and temp2m variables are created
# add attributes to the netCDF
dsTotal_surf.attrs['prec'] = 'Precipitation Hourly [mm]'
dsTotal_surf.attrs['temp2m'] = 'Two Meter Temperature [deg K]'
dsTotal_surf.attrs['rh2m'] = 'Two Meter Relative Humidity [%?]'
dsTotal_surf.attrs['wind2m'] = 'Two Meter Wind Speed [m/s]'
dsTotal_surf.attrs['SWdown'] = 'Shortwave radiation [W/m2]'
dsTotal_surf.attrs['LWdown'] = 'Incoming longwave radiation [W/m2]'
# write the netcdf files back to the drive, using the year-month as the name
#dsTotal_surf.to_netcdf(path + 'temp1/' + month + '.nc', mode = 'w')
#print('done cleaning files')
return dsTotal_surf
ds_total = None
for k in sorted(files_dict):
print("Processing month {}".format(k))
v = files_dict[k]
radout_file = v['radout']
surfout_file = v['surfout']
ds = process_month(radout_file, surfout_file, k)
ds = ds.chunk({'x':60, 'y':60, 'time':275})
if ds_total:
ds_total = xray.concat([ds_total, ds], dim = 'time')
else:
ds_total = ds
# ds_total.to_netcdf(path + 'ds_total_NARR_Morr')
#subsetting the WRF dataset for the upper Tuolumne area
bb = {'minLong':-119.5, 'maxLong':-119.13, 'minLat': 37.66, 'maxLat':38.03}
long = ds_total.coords['longitude'].values
lat = ds_total.coords['latitude'].values
xycord = np.where( (long > bb['minLong'] ) & (long < bb['maxLong']) & (lat > bb['minLat']) & (lat < bb['maxLat']))
xcord = xycord[:][0]
ycord = xycord[:][1]
reduced_ds = ds_total.isel(x=xcord, y=ycord)
# df = reduced_ds.to_dataframe()
# df.to_csv(path + 'reduced.csv')
#reduced_ds.to_netcdf(path + 'ds_reduced_NARR_Morr.nc', format='NETCDF4', mode='w')
reduced_ds.to_netcdf(path + fileName)