developped to_fsm2oshd more, add functionalities to work with fsm2osh…

…d simulation
ArcticSnow · Dec 20, 2023 · 3c11926 · 3c11926
1 parent b6cbb55
commit 3c11926
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 52 deletions.
diff --git a/TopoPyScale/sim_fsm2oshd.py b/TopoPyScale/sim_fsm2oshd.py
@@ -12,28 +12,38 @@
 
 '''
 from TopoPyScale import topo_utils as tu
+from TopoPyScale import topo_export as te
 from pathlib import Path
-import glob, os
+import pandas as pd
+import xarray as xr
+import linecache
+import glob, os, re
 
 
-def _run_fsm2oshd(fsm_exec, nlstfile):
+def _run_fsm2oshd(fsm_exec, nam_file):
     '''
     function to execute FSM
     Args:
         fsm_exec (str): path to FSM executable
-        nlstfile (str): path to FSM simulation config file
+        nam_file (str): path to FSM simulation config file  .nam
 
     Returns:
         NULL (FSM simulation file written to disk)
     '''
-    os.system(fsm_exec + ' ' + nlstfile)
-    print('Simulation done: ' + nlstfile)
-
-def fsm2oshd_sim_parallel(fsm_forest_nam='fsm_sim/fsm__forest*.nam',
-                     fsm_open_nam='fsm_sim/fsm__open*.nam',
-                     fsm_exec='./FSM_OSHD',
-                     n_cores=6,
-                     delete_nlst_files=False):
+    os.system(fsm_exec + ' ' + nam_file)
+    print('Simulation done: ' + nam_file)
+
+    # convert output to netcdf file
+    fout = linecache.getline(nam_file, 16).split("'")[1]
+    to_netcdf(fout)
+    os.remove(fout)
+
+def fsm2oshd_sim_parallel(simulation_path='./fsm_sim,',
+                          fsm_forest_nam='fsm__forest*.nam',
+                          fsm_open_nam='fsm_sim/fsm__open*.nam',
+                          fsm_exec='./FSM_OSHD',
+                          n_cores=6,
+                          delete_nlst_files=False):
     '''
     Function to run parallelised simulations of FSM
 
@@ -69,6 +79,90 @@ def fsm2oshd_sim_parallel(fsm_forest_nam='fsm_sim/fsm__forest*.nam',
         for file in nlst_open:
             os.remove(file)
 
-def combine_outputs(df_centroids, fname='fsm_sim/fsm__ou*.txt'):
 
-    return
+
+def txt2ds(fname):
+    '''
+    Function to read a single FSM text file output as a xarray dataset
+    Args:
+        fname (str): filename
+
+    Returns:
+        xarray dataset of dimension (time, point_id)
+    '''
+    df = read_pt_fsm2oshd(fname)
+    point_id = int( re.findall(r'\d+', fname.split('/')[-1])[-1])
+    print(f'---> Reading FSM data for point_id = {point_id}')
+    ds = xr.Dataset({
+        "sd": (['time'], df.sd.values),
+        "scf":  (['time'], df.scf.values),
+        "swe":  (['time'], df.swe.values),
+        "t_surface":  (['time'], df.tsurf.values),
+        "t_soil":  (['time'], df.tsoil.values),
+        },
+        coords={
+            "point_id": point_id,
+            "time": df.index,
+            "reference_time": pd.Timestamp(df.index[0])
+        })
+
+    return ds
+
+
+def to_netcdf(fname_fsm_sim, complevel=9):
+    '''
+    Function to convert a single FSM simulation output file (.txt) to a compressed netcdf file (.nc)
+
+    Args:
+        fname_fsm_sim(str): filename to convert from txt to nc
+        complevel (int): Compression level. 1-9
+
+    Returns:
+        NULL (FSM simulation file written to disk)
+    '''
+    ver_dict = tu.get_versionning()
+
+    ds = txt2ds(fname_fsm_sim)
+    ds.sd.attrs = {'units':'m', 'standard_name':'sd', 'long_name':'Average snow depth', '_FillValue': -9999999.0}
+    ds.scf.attrs = {'units':'%', 'standard_name':'scf', 'long_name':'Average snow cover fraction', '_FillValue': -9999999.0}
+    ds.swe.attrs = {'units':'kg m-2', 'standard_name':'swe', 'long_name':'Average snow water equivalent', '_FillValue': -9999999.0}
+    ds.t_surface.attrs = {'units':'°C', 'standard_name':'t_surface', 'long_name':'Average surface temperature', '_FillValue': -9999999.0}
+    ds.t_soil.attrs = {'units':'°C', 'standard_name':'t_soil', 'long_name':'Average soil temperature at 20 cm depth', '_FillValue': -9999999.0}
+    ds.attrs = {'title':'FSM2oshd simulation outputs',
+                'source': 'Data downscaled with TopoPyScale and simulated with FSM',
+                'package_TopoPyScale_version':ver_dict.get('package_version'),
+                'url_TopoPyScale': 'https://github.com/ArcticSnow/TopoPyScale',
+                'url_FSM': 'https://github.com/ArcticSnow/FSM2oshd',
+                'git_commit': ver_dict.get('git_commit'),
+                'date_created':dt.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
+    fout = f"{fname_fsm_sim[:-4]}.nc"
+    te.to_netcdf(ds, fout, complevel=complevel)
+    print(f"File {fout} saved")
+
+
+
+def combine_open_forest(df_forest,
+                        fout_forest='fsm_sim/fsm_out_forest.nc',
+                        fout_open='fsm_sim/fsm_out_open.nc'):
+    '''
+    Function to compute weighted average of forest and open simulations
+    Args:
+        df_forest: dataframe look up table for proportion of forested pixels vs open in a given cluster
+        fout_forest: filename of netcdf file with forest simulation output
+        fout_open: filename of netcdf file with open simulation output
+
+    Returns:
+
+    '''
+    dsf = xr.open_dataset(fname)
+    dso = xr.open_dataset(fout_open)
+    point_id = dsf.point_id.values
+    ds = dsf * df_forest.proportion_with_forest[point_id] + dso * (1-df_forest.proportion_with_forest[point_id])
+
+    return ds
+
+def read_pt_fsm2oshd(fname):
+    df = pd.read_csv(fname, delim_whitespace=True, header=None,names=['year', 'month', 'day', 'hour', 'sd', 'scf', 'swe', 'tsurf','tsoil'])
+    df['time'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
+    df = df.set_index('time')
+    return df
diff --git a/TopoPyScale/topo_export.py b/TopoPyScale/topo_export.py
@@ -16,6 +16,8 @@
 from scipy import io
 from TopoPyScale import meteo_util as mu
 from TopoPyScale import topo_utils as tu
+from TopoPyScale import topo_param as tp
+from TopoPyScale import topo_sub as ts
 from multiprocessing.dummy import Pool as ThreadPool
 import multiprocessing as mproc
 from pathlib import Path
@@ -229,12 +231,14 @@ def to_cryogrid(ds,
 
 def to_fsm2oshd(ds_down,
                 fsm_param,
-                df_centroids,
                 ds_tvt,
-                fname_format='fsm_sim/fsm_',
+                simulation_path='fsm_sim',
+                fname_format='fsm_',
                 namelist_options=None,
                 n_digits=None,
-                snow_partition_method='continuous'):
+                snow_partition_method='continuous',
+                cluster_method=True,
+                epsg_ds_param=2056):
     '''
     Function to generate forcing files for FSM2oshd (https://github.com/oshd-slf/FSM2oshd).
     FSM2oshd includes canopy structures processes
@@ -245,14 +249,14 @@ def to_fsm2oshd(ds_down,
 
     Args:
         ds_down:  Downscaled weather variable dataset
-        ds_param_canop:  terrain and canopy parameter dataset
+        fsm_param:  terrain and canopy parameter dataset
         df_centroids:  cluster centroids statistics (terrain + canopy)
         ds_tvt (dataset):  transmisivity dataset
         namelist_param (dict): {'precip_multiplier':1, 'max_sd':4,'z_snow':[0.1, 0.2, 0.4], 'z_soil':[0.1, 0.2, 0.4, 0.8]}
 
     '''
 
-    def write_fsm2oshd_namelist(row_centroids,
+    def write_fsm2oshd_namelist(row,
                                 pt_name,
                                 n_digits,
                                 fname_format='fsm_sim/fsm_',
@@ -261,9 +265,9 @@ def write_fsm2oshd_namelist(row_centroids,
                                 modconf=None):
         # Function to write namelist file (.nam) for each point where to run FSM.
 
-        file_namelist = fname_format + f'_{mode}_' + str(pt_name).zfill(n_digits) + '.nam'
-        file_met = fname_format + '_met_' + str(pt_name).zfill(n_digits) + '.txt'
-        file_output = fname_format + f'_outputs_{mode}_' + str(pt_name).zfill(n_digits) + '.txt'
+        file_namelist = str(fname_format) + f'_{mode}_' + str(pt_name).zfill(n_digits) + '.nam'
+        file_met = str(fname_format) + '_met_' + str(pt_name).zfill(n_digits) + '.txt'
+        file_output = str(fname_format) + f'_outputs_{mode}_' + str(pt_name).zfill(n_digits) + '.txt'
 
         if modconf is None:
             modconf = {
@@ -353,7 +357,7 @@ def write_fsm2oshd_namelist(row_centroids,
   fsky_terr = {np.round(row.svf,3)},            ! terrain svf 
   slopemu = {np.round(row.slope,3)},            ! slope in rad
   xi = 0,                           ! to be ignored. relevant coarse scale run. see Nora's paper
-  Ld = {np.round(row.cluster_size,3)},              ! grid cell size in meters (used in snow fractional cover) linked to Nora's paper
+  Ld = {np.round(row.cluster_domain_size,3)},              ! grid cell size in meters (used in snow fractional cover) linked to Nora's paper
   lat = {np.round(row.lat,3)},             ! DD.DDD
   lon = {np.round(row.lon,3)},            ! DD.DDD
   dem = {np.round(row.elevation,0)},            ! elevation
@@ -377,7 +381,7 @@ def write_fsm2oshd_met(ds_pt,
                            ds_tvt,
                            pt_name,
                            n_digits,
-                           fname_format='fsm_sim/fsm_*.txt'):
+                           fname_format='fsm_sim/fsm_'):
         '''
         Function to write meteorological forcing for FSM
 
@@ -391,12 +395,12 @@ def write_fsm2oshd_met(ds_pt,
         '''
 
         # for storage optimization tvt is stored in percent.
-        if ds_tvt.tvt.max()>10:
+        if ds_tvt.for_tau.max()>10:
             scale_tvt = 100
         else:
             scale_tvt = 1
 
-        foutput = fname_format + '_met_' + str(pt_name).zfill(n_digits) + '.txt'
+        foutput = str(fname_format) + '_met_' + str(pt_name).zfill(n_digits) + '.txt'
         df = pd.DataFrame()
         df['year'] = pd.to_datetime(ds_pt.time.values).year
         df['month']  = pd.to_datetime(ds_pt.time.values).month
@@ -418,8 +422,8 @@ def write_fsm2oshd_met(ds_pt,
         arr.loc[np.isnan(arr)] = 0
         df['sf24'] = np.round(arr,3)
 
-        ds_pt['t_iter'] = ds_pt.time.dt.month*10000 + ds_pt.time.dt.day*100 + ds_pt.time.dt.hour
-        df['tvt'] = np.round(tvt_pt.sel(time=ds_pt.t_iter.values).tvt.values,4)/scale_tvt
+        #ds_pt['t_iter'] = ds_pt.time.dt.month*10000 + ds_pt.time.dt.day*100 + ds_pt.time.dt.hour
+        df['tvt'] = np.round(ds_tvt.sel(cluster_labels=pt_name).for_tau.values,4)/scale_tvt
 
         df.to_csv(foutput, index=False, header=False, sep=' ')
         print(f'---> Met file {foutput} saved')
@@ -447,45 +451,64 @@ def write_fsm2oshd_met(ds_pt,
     if n_digits is None:
         n_digits = len(str(ds_down.point_id.values.max())) + 1
 
-    # extract FSM forest parameters for each clusters
-
-    # TODO: canopy parameters are to be average for the forest cover only, exclude pixels with forest from averaging. Correct code here:
-    df_centroids = pd.concat([df_centroids, fsm_param.groupby('cluster_labels').mean().to_dataframe()], axis=1)
-    df_centroids['cluster_size'] = np.sqrt(fsm_param.groupby('cluster_labels').count().to_dataframe().LAI5)*np.diff(fsm_param.x.values).mean()
-
+    if cluster_method:
+        # extract FSM forest parameters for each clusters
+        # Aggregate forest parameters only to fores area
+        fsm_df = ts.ds_to_indexed_dataframe(fsm_param)
+        fsm_df['lon'], fsm_df['lat'] = tp.convert_epsg_pts(fsm_df.x, fsm_df.y, epsg_ds_param, 4326)
+        df_forest = fsm_df.where(fsm_df.forcov>0.).dropna().groupby('cluster_labels').mean()
+        df_open = fsm_df.where(fsm_df.forcov==0.).dropna().groupby('cluster_labels').mean()
+
+        dx = np.abs(np.diff(fsm_param.x)[0])
+        dy = np.abs(np.diff(fsm_param.y)[0])
+
+        df_forest['cluster_total_area'] = fsm_df.groupby('cluster_labels').count().elevation.values * dx * dy
+        df_forest['proportion_with_forest'] = fsm_df.where(fsm_df.forcov > 0.).groupby('cluster_labels').count().elevation.values / fsm_df.groupby('cluster_labels').count().elevation.values
+        df_forest['cluster_domain_size'] = np.sqrt(df_forest.cluster_total_area)
+        #df_forest['cluster_domain_size'] = np.sqrt(fsm_param.drop('cluster_labels').groupby(fsm_param.cluster_labels).count().to_dataframe().LAI5)*dx
+        df_forest['forest_cover'] = fsm_param.drop('cluster_labels').groupby(fsm_param.cluster_labels).mean().forcov.values
+    else:
+        pass
+
+    p = Path(simulation_path)
     # rename variable columns to match namelist functino varnames
-    new_name = {'LAI5':'lai5', 'LAI50':'lai50', 'vf':'vfhp', 'cc5':'fveg', 'cc50':'fves', 'mch5':'hcan'}
-    df_centroids = df_centroids.rename(columns=new_name)
-    print(df_centroids)
+    new_name = {'LAI5':'lai5', 'LAI50':'lai50', 'svf_for':'vfhp', 'CC5':'fveg', 'CC50':'fves', 'CH5':'hcan'}
+    df_forest = df_forest.rename(columns=new_name)
+    print(df_forest)
 
     # ----- Loop through all points-------
     # NOTE: eventually this for loop could be parallelized to several cores -----
     for pt in ds_down.point_id.values:
 
         ds_pt = ds_down.sel(point_id=pt).copy()
         tvt_pt = ds_tvt.sel(cluster_labels=pt).copy()
-        row = df_centroids.loc[pt]
+        row_forest = df_forest.loc[pt]
         write_fsm2oshd_met(ds_pt,
                            ds_tvt=ds_tvt,
                            n_digits=n_digits,
                            pt_name=pt,
-                           fname_format=fname_format)
-        write_fsm2oshd_namelist(row,
+                           fname_format=p/fname_format)
+        write_fsm2oshd_namelist(row_forest,
                                 pt_name=pt,
                                 n_digits=n_digits,
-                                fname_format=fname_format,
-                                mode='open',
-                                namelist_param=namelist_param) # write open namelist
-        write_fsm2oshd_namelist(row,
-                                pt_name=pt,
-                                n_digits=n_digits,
-                                fname_format=fname_format,
+                                fname_format=p/fname_format,
                                 mode='forest',
                                 namelist_param=namelist_param) # write forest namelist
+
+        if cluster_method:
+            row_open = df_forest.loc[pt]
+            write_fsm2oshd_namelist(row_open,
+                                    pt_name=pt,
+                                    n_digits=n_digits,
+                                    fname_format=p/fname_format,
+                                    mode='open',
+                                    namelist_param=namelist_param) # write open namelist
+
         ds_pt = None
         tvt_pt = None
         # [ ] add logic to computed weighted average outputs based on forest cover fraction per point.
 
+    df_forest.to_pickle(p/'df_forest.pckl')
     return
 
 def to_fsm(ds, fname_format='FSM_pt_*.tx', snow_partition_method='continuous', n_digits=None):

diff --git a/TopoPyScale/topoclass.py b/TopoPyScale/topoclass.py
@@ -273,6 +273,8 @@ def extract_pts_param(self, method='nearest', **kwargs):
         df_centroids = tp.extract_pts_param(df_centroids, self.toposub.ds_param,
                                             method=method)
         self.toposub.df_centroids = df_centroids
+    def extract_grid_param(self):
+        return
 
     def extract_topo_cluster_param(self):
         """
@@ -335,17 +337,15 @@ def extract_topo_param(self):
                 self.config.project.directory + 'outputs/' + self.config.outputs.file.df_centroids)
             print(f'---> Centroids file {self.config.outputs.file.df_centroids} exists and loaded')
         else:
-            if self.config.sampling.method in ['points', 'point']:
+            if self.config.sampling.method.lower() in ['points', 'point']:
                 self.extract_pts_param()
                 # if self.config.sampling.points.ID_col:
                 #     self.config.sampling.pt_names = list(
                 #         self.toposub.df_centroids[self.config.sampling.points.ID_col])
-            elif self.config.sampling.method == 'toposub':
+            elif self.config.sampling.method.lower() in ['toposub', 'cluster', 'clusters']:
                 self.extract_topo_cluster_param()
-            elif self.config.sampling.method == 'both':
-
-                # implement the case one want to run both toposub and a list of points
-                print('ERROR: method not yet implemented')
+            elif self.config.sampling.method == 'grid':
+                self.toposub.df_centroids = ts.ds_to_indexed_dataframe(self.toposub.ds_param)
 
             else:
                 print('ERROR: Extraction method not available')