From 16227b76bd480a07d2834241d42b6e1b91567bf9 Mon Sep 17 00:00:00 2001 From: andres Date: Fri, 8 Oct 2021 13:39:13 +0200 Subject: [PATCH] v 0.0.1 commit --- Pipeline/README.md | 14 + Pipeline/docs/README.md | 0 Pipeline/src/README.md | 0 .../set_arguments_pipeline.cpython-36.pyc | Bin 0 -> 1733 bytes .../src/modules/data_exchange/__init__.py | 0 .../src/modules/data_processing/__init__.py | 0 .../data_processing/feature_extraction.py | 136 ++++++++ .../data_processing/knmi_computation.py | 272 ++++++++++++++++ .../data_processing/knmi_interpolation.py | 233 +++++++++++++ Pipeline/src/modules/data_retrieval.py | 25 ++ .../src/modules/data_retrieval/__init__.py | 0 .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 206 bytes ...data_source_files_retrieval.cpython-36.pyc | Bin 0 -> 6372 bytes .../knmi_data_source_files_retrieval.py | 268 +++++++++++++++ .../modules/data_retrieval/modis_retrieval.py | 264 +++++++++++++++ .../src/modules/set_arguments_pipeline.py | 40 +++ Pipeline/src/modules/test.py | 307 ++++++++++++++++++ Pipeline/src/modules/utils/__init__.py | 0 .../utils/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 197 bytes .../utils/__pycache__/api_keys.cpython-36.pyc | Bin 0 -> 737 bytes .../argument_handler.cpython-36.pyc | Bin 0 -> 2597 bytes .../set_arguments_pipeline.cpython-36.pyc | Bin 0 -> 1721 bytes .../src/modules/utils/argument_handler.py | 64 ++++ .../src/modules/utils/data_base_management.py | 98 ++++++ Pipeline/src/modules/utils/set_up_database.py | 19 ++ Validation_Pipeline/README.md | 6 + .../validation_interpolation_knmi.R | 80 +++++ 27 files changed, 1826 insertions(+) create mode 100644 Pipeline/README.md create mode 100644 Pipeline/docs/README.md create mode 100644 Pipeline/src/README.md create mode 100644 Pipeline/src/modules/__pycache__/set_arguments_pipeline.cpython-36.pyc create mode 100644 Pipeline/src/modules/data_exchange/__init__.py create mode 100644 Pipeline/src/modules/data_processing/__init__.py create mode 100644 Pipeline/src/modules/data_processing/feature_extraction.py create mode 100644 Pipeline/src/modules/data_processing/knmi_computation.py create mode 100644 Pipeline/src/modules/data_processing/knmi_interpolation.py create mode 100644 Pipeline/src/modules/data_retrieval.py create mode 100644 Pipeline/src/modules/data_retrieval/__init__.py create mode 100644 Pipeline/src/modules/data_retrieval/__pycache__/__init__.cpython-36.pyc create mode 100644 Pipeline/src/modules/data_retrieval/__pycache__/knmi_data_source_files_retrieval.cpython-36.pyc create mode 100644 Pipeline/src/modules/data_retrieval/knmi_data_source_files_retrieval.py create mode 100644 Pipeline/src/modules/data_retrieval/modis_retrieval.py create mode 100644 Pipeline/src/modules/set_arguments_pipeline.py create mode 100644 Pipeline/src/modules/test.py create mode 100644 Pipeline/src/modules/utils/__init__.py create mode 100644 Pipeline/src/modules/utils/__pycache__/__init__.cpython-36.pyc create mode 100644 Pipeline/src/modules/utils/__pycache__/api_keys.cpython-36.pyc create mode 100644 Pipeline/src/modules/utils/__pycache__/argument_handler.cpython-36.pyc create mode 100644 Pipeline/src/modules/utils/__pycache__/set_arguments_pipeline.cpython-36.pyc create mode 100644 Pipeline/src/modules/utils/argument_handler.py create mode 100644 Pipeline/src/modules/utils/data_base_management.py create mode 100644 Pipeline/src/modules/utils/set_up_database.py create mode 100644 Validation_Pipeline/README.md create mode 100644 Validation_Pipeline/validation_interpolation_knmi.R diff --git a/Pipeline/README.md b/Pipeline/README.md new file mode 100644 index 0000000..b85ed0c --- /dev/null +++ b/Pipeline/README.md @@ -0,0 +1,14 @@ +# Tick Spatio-temporal dynamics modeler + +The Tick spatiotemporal dynamics modeler uses a facade pattern to obscure the complexity of the implementation while provides a transparent interface for the users. +The facade class named TickPipeline uses several underlying classes for implementing the pipeline namely, ExternalDataHandling, ModelingSpatioTemporal. + +## ExternalDataHandling + +This class is intended for data retrieval, preprocessing, and providing an interface for external applications to read the resulting data. + +Results should be returned in an web API (e.g. using flask). + +## TickModeler + +This class is the main implementation of the modeling stage. The model generator class is designed in principle for implementing a model selection pre-stage, trainning, and evaluation of results, and an independent method for forward forecasting. This class is also designed to allow an extended implementation of different models (initially as an interface for processing input and outputs from an external process). \ No newline at end of file diff --git a/Pipeline/docs/README.md b/Pipeline/docs/README.md new file mode 100644 index 0000000..e69de29 diff --git a/Pipeline/src/README.md b/Pipeline/src/README.md new file mode 100644 index 0000000..e69de29 diff --git a/Pipeline/src/modules/__pycache__/set_arguments_pipeline.cpython-36.pyc b/Pipeline/src/modules/__pycache__/set_arguments_pipeline.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6070458097d350eec45bfa2de18cea2f73ce6754 GIT binary patch literal 1733 zcmb7E&2HO95Z)yznUW>>Pe|j!DE49?DyqnK+FRNrsFR`voEmU}#ETXLquofP_?P9% zj#%na^#Sq#d4U2w`X$;&(6y%meSo5;&MqlcLURczu4ZTE`(`*h!+XtU{kM;Qcg#9M zf1s7i0Qn(2DS?F{h65C0im{T7z@P>*ne{WGCNnNjr}8Iwp$?vJyJ9d7c_f{o8?k_k zr+sBnmkkd@L0`g?-e80z^6?B!)xW{;3{P>6ec+ox6p$=tmQsaTT0(v`2R~dODu6I@ z0}xehH2|?uN)=`=5t>iOr~$&v%_YJFL|yxteuZr=5k*|&Z$NgIho+S`__dKE-#$e@ z{5`GaRlmlnBjXJg4=3NtdTxPcb05vHZ1|0lE&f_)@`|$DN72ceS94?o_9Nx(w-&yv zF*Bwc`NjhC3`GYRsatNbroXM2X8P?fQDi(vuWoc6EtN<5T+3TJr>nJCJEI!J`c>|- z?HT?Fm#D(??K1Yx2!YpIxyN=(ukxK1*}S@WhVlxW^xjiieHjOg3tGL9!ixskH;ljZ zB2MIxlfpz~Os<6|3!UKUIFN+7(j|8U4_xV8fhE>}#1+sjC9ad*QeM4L;DxS?gLvR^ zNAfVC^;Eh-I$-70I)ycRuo6`pRRB$!h`7(w6gUO2kkixd*;)4zE^=o$NrpTkDNj0C z{ph$0zmub*KrpdFO$di-f-AF}RY*gd?=9$bq!8Wdb*RzDt1 zi+wtb#{nadX@5u<>xN->0@q!lJmN5fDl)q9btL%!YQ9wpP82&C7jd@#Z4^w1SC~@* z)g*2Z#Mhh&7%`L`T*N|5J|R&|qH*|wi&w zIo5KLaJs*y8uB1R%b4Ru$@o7$x;Fs!bzMqpi*PAm z79SK9*@@hc&n=xqb7|2^TOGgjz-*>cP#cy+Zz~COshmUCs&<&0YE%0T$Q}z3i+Szw z>pln1IO^DR6F8S8sH;uc>n$a(^k75n7fnOk#S1_$LwXWw>xI=V_zMyZIFx^~-Y^b%&mSifpzOlu6b7mHNdGxc^a%FOp_|p1Occ`Xow;7* zoO=lmU>ad(r+{MNfomilx*fdygPtmE>2 zmt@%3GFrHS4cx}N@U(FYSMhDQ+kiX3b)$_%3z#|= max(self.periods): + self.create_rasters(rasters_to_compute, dates_to_compute) + +class MODIS_Compute(Knmi_create_features): + product = None + vars_names = [] + + def __init__(self, parent_folder='./data'): + Knmi_create_features.__init__(self, parent_folder) + super().__init__(parent_folder) + + def set_product(self, product): + self.product = product + + def set_vars_names(self, vars_names): + self.vars_names = vars_names + + def array_to_tif(self, data_to_save): + band_array = data_to_save['band_array'] + print(band_array) + band_ds = data_to_save['template'] + band_path = self.output_filename + scale_raster_values = 1.0000 + try: + band_array = np.round(band_array*scale_raster_values, decimals=4) + out_ds = gdal.GetDriverByName('GTiff').Create(band_path, + band_array.shape[0], + band_array.shape[1], + 1, + gdal.GDT_Int16, + ['COMPRESS=LZW', 'TILED=YES']) + out_ds.SetGeoTransform(band_ds.GetGeoTransform()) + out_ds.SetProjection(band_ds.GetProjection()) + out_ds.GetRasterBand(1).SetNoDataValue(-32768 * scale_raster_values) + out_ds.GetRasterBand(1).WriteArray(band_array) + print('file {0} created successfully'.format(band_path)) + except Exception as e: print(e) + + def NDWI_pipeline(self, dt): + rasters = ['{0}_{1}_{2}.tiff'.format(dt, self.product, element) for element in self.vars_names] + raster_nir_filename = rasters[0] + raster_sw_filename = rasters[1] + raster_template = gdal.Open(raster_nir_filename) + raster_nir = self.raster_to_array(raster_nir_filename) + raster_sw = self.raster_to_array(raster_sw_filename) + array_ndwi = (raster_nir.astype(float) - raster_sw.astype(float))/(raster_nir.astype(float) + raster_sw.astype(float)) + self.set_filename_output('{0}_{1}_{2}.tiff'.format(dt, self.product, 'ndwi')) + self.array_to_tif({'band_array':array_ndwi, 'template':raster_template}) + print(self.output_filename) + + def compute_NDWI(self): + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + rasters_to_compute = [] + dates_to_compute = [] + last_ignored_day = None + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + start_date_filter = self.start_date.strftime('%Y/%m/%d') + end_date_filter = self.end_date.strftime('%Y/%m/%d') + range_dates = pd.date_range(start_date_filter, end_date_filter, freq="MS") + diff_months = len(range_dates) + for i in range(1, diff_months + 1): + today = self.start_date + j = i - 1 + d = today.replace(day=1) + relativedelta(months=j) + dt = d.strftime('%Y.%m.%d') + self.NDWI_pipeline(dt) + + def compute_aggregated_stats(self): + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + start_date_filter = self.start_date.strftime('%Y') + end_date_filter = self.end_date.strftime('%Y') + for year in range(int(start_date_filter), int(end_date_filter)+1): + os_generator = os.walk(".") + da_list = [] + for root, dirs, file_list in os_generator: + for file_name in file_list: + if file_name.find(str(year)) != -1: + da_raster_content = self.raster_to_array(file_name) + da_list.append(da_raster_content) + da_ensembled_raster = np.stack(da_list, axis=-1) + raster_result_max = np.apply_along_axis(np.max, 2, da_ensembled_raster) + raster_result_min = np.apply_along_axis(np.min, 2, da_ensembled_raster) + raster_result_range = raster_result_max - raster_result_min + raster_template = gdal.Open(file_name) + self.set_filename_output('{0}_{1}_{2}.tiff'.format(year, self.target_folder, 'min')) + self.array_to_tif({'band_array':raster_result_min, 'template':raster_template}) + self.set_filename_output('{0}_{1}_{2}.tiff'.format(year, self.target_folder, 'max')) + self.array_to_tif({'band_array':raster_result_max, 'template':raster_template}) + self.set_filename_output('{0}_{1}_{2}.tiff'.format(year, self.target_folder, 'range')) + self.array_to_tif({'band_array':raster_result_range, 'template':raster_template}) + print('rasters created successfully') + +class LGN_Compute(Knmi_create_features): + + def __init__(self, parent_folder): + Knmi_create_features.__init__(self, parent_folder) + super().__init__(parent_folder) + + def resample(self): + return 0 + + def reclassify(self): + return 0 diff --git a/Pipeline/src/modules/data_processing/knmi_interpolation.py b/Pipeline/src/modules/data_processing/knmi_interpolation.py new file mode 100644 index 0000000..90a0303 --- /dev/null +++ b/Pipeline/src/modules/data_processing/knmi_interpolation.py @@ -0,0 +1,233 @@ +import argparse +import datetime +import re +import numpy as np +import pandas as pd +import geopandas as gpd +import rasterio +import rioxarray +import shapely +import pykrige.kriging_tools as kt + +from osgeo import ogr, osr, gdal +from rasterio.crs import CRS +from rasterio.warp import reproject, Resampling +from rbf.interpolate import RBFInterpolant +from geocube.api.core import make_geocube +from geocube.rasterize import rasterize_points_griddata, rasterize_points_radial, rasterize_image +from pykrige.ok import OrdinaryKriging +from scipy import signal + +from data_retrieval.knmi_data_source_files_retrieval import User_input_pipeline +from utils.set_up_database import set_up_database + + +class Knmi_Interpolator(User_input_pipeline): + db_query_manager = None + raster_template = None + boundaries = [] + width = 0 + height = 0 + interpolation_output_crs = None + output_filename = None + multiplier = 1 + + def __init__(self, parent_folder='./data'): + User_input_pipeline.__init__(self, parent_folder) + super().__init__(parent_folder) + self.db_query_manager = set_up_database() + + def interpolate_pipeline(self, columns): + station_locations = self.__get_station_locations() + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + self.set_interpolation_boundaries() + for current_day_to_download_relative in range(0, days_int): + current_day_to_download_date = self.start_date + datetime.timedelta(current_day_to_download_relative) + current_day_to_download_formatted = "'"+current_day_to_download_date.strftime('%Y%m%d')+"'" + interpolation_params = { + 'column':'std,'+','.join(columns), + 'variable':'date', + 'value':current_day_to_download_formatted + } + values_to_interpolate = self.__get_data_to_interpolate(interpolation_params) + values_locations = pd.merge(station_locations, values_to_interpolate, on='STN') + for column_in_df in columns: + df_to_grid = values_locations[['LON', 'LAT', column_in_df.upper()]] + df_to_grid_renamed = df_to_grid.rename( + columns = {'LON': 'x', 'LAT': 'y', column_in_df.upper():'value'}, inplace = False + ) + self.set_filename_output(column_in_df.upper()+'_'+current_day_to_download_formatted[1:9]+'.tif') + self.__interpolate(df_to_grid_renamed) + + def __interpolate(self, scattered_data): + interpolation_method = self.algorithm['method'] + if (interpolation_method == 'invdist'): + self.__interpolate_iwd(scattered_data) + elif(interpolation_method == 'tps'): + self.__interpolate_rbf(scattered_data) + elif(interpolation_method == 'ordinary_kriging'): + self.__interpolate_kriging(scattered_data) + else: + print('not interpolation method is recognized:', self.algorithm['method']) + + def __interpolate_iwd(self, scattered_data): + interpolation_params_formatted = self.__get_interpolation_params() + try: + print('iwd_interpolation') + gridopt = gdal.GridOptions( + format='GTiff', + algorithm=interpolation_params_formatted, + outputBounds = self.boundaries, + outputSRS = self.interpolation_output_crs, + width = self.width, + height = self.height, + outputType = gdal.GDT_Float32, + z_multiply = self.multiplier + ) + data_to_interpolate = scattered_data.dropna() + data_to_interpolate.to_csv('points.csv', index=False, sep=';') + gdal.Grid(self.output_filename, 'points.vrt', options=gridopt) + except Exception as e: print(e) + + def __interpolate_rbf(self, scattered_data): + interpolation_params_formatted = self.__get_interpolation_params() + tps_params = interpolation_params_formatted.split(':')[1:] + phi_param = tps_params[0].split('=')[1] + order_param = float(tps_params[1].split('=')[1]) + sigma_param = float(tps_params[2].split('=')[1]) + try: + print('rbf_interpolation') + data_to_interpolate = scattered_data.dropna() + data_to_interpolate['value'] = data_to_interpolate['value'] * self.multiplier + x_obs = data_to_interpolate[['x', 'y']].to_numpy() + u_obs = data_to_interpolate[['value']].to_numpy() + u_obs = np.asarray([element[0] for element in u_obs]) + I = RBFInterpolant(x_obs, u_obs, sigma=sigma_param, phi=phi_param, order=order_param) + x1, x2 = np.linspace(round(self.boundaries[0],3), round(self.boundaries[2],3), self.width),\ + np.linspace(round(self.boundaries[3],3), round(self.boundaries[1],3), self.height) + x_itp = np.reshape(np.meshgrid(x1, x2), (2, self.width*self.height)).T + u_itp = I(x_itp) + x_itp_df = pd.DataFrame(x_itp) + x_itp_df['value'] = u_itp + x_itp_df_rn = x_itp_df.rename(columns = {0: 'x', 1: 'y', 'value': 'value'}, inplace = False) + x_itp_df_rn.to_csv(self.output_filename+'.csv') + self.array2raster(x_itp_df_rn) + except Exception as e: print(e) + + def __interpolate_kriging(self, scattered_data): + print(scattered_data) + interpolation_params_formatted = self.__get_interpolation_params() + kriging_params = interpolation_params_formatted.split(':')[1:] + sill_param = float(kriging_params[0].split('=')[1]) + range_param = float(kriging_params[1].split('=')[1]) + nugget_param = float(kriging_params[2].split('=')[1]) + bs_param = int(kriging_params[3].split('=')[1]) + try: + print('kriging_interpolation') + data_to_interpolate = scattered_data.dropna() + data_to_interpolate['value'] = data_to_interpolate['value'] * self.multiplier + OrdonaryKrigingInstance = OrdinaryKriging( + data_to_interpolate[['x']], + data_to_interpolate[['y']], + data_to_interpolate[['value']], + variogram_model="exponential", + variogram_parameters = {'sill': sill_param, 'range':range_param, 'nugget':nugget_param}, + verbose=False, + enable_plotting=False + ) + x1, x2 = np.linspace(round(self.boundaries[0],3), round(self.boundaries[2],3), self.width),\ + np.linspace(round(self.boundaries[1],3), round(self.boundaries[3],3), self.height) + z, ss = OrdonaryKrigingInstance.execute("grid", x1, x2) + z_filtered = self.__smmothing_array(z, bs_param) + self.array2raster(np.array(z_filtered.T)) + except Exception as e: print(e) + + def __get_station_locations(self): + self.db_query_manager.db_query_params['table'] = 'stn_metadata' + station_locations = self.db_query_manager.get_data('*') + df_station_locations = pd.DataFrame.from_records(station_locations, columns =['STN', 'ALT', 'LON', 'LAT', 'NAME']) + df_station_locations['NAME'] = df_station_locations['NAME'].apply(lambda x: re.sub(r' +', '', x)) + return(df_station_locations) + + def __get_data_to_interpolate(self, condition): + self.db_query_manager.db_query_params['table'] = 'test_phenology' + values_to_interpolate = self.db_query_manager.get_data(condition['column'], condition) + dataframe_titles = ['STN'] + dataframe_titles.extend(condition['column'].upper().split(',')[1:]) + df_to_interpolate = pd.DataFrame.from_records(values_to_interpolate, columns = dataframe_titles) + return(df_to_interpolate) + + def set_raster_template(self, raster_template): + self.raster_template = raster_template + + def set_crs_output(self, interpolation_output_crs): + self.interpolation_output_crs = interpolation_output_crs + + def set_interpolation_algorithm(self, interpolation_params): + self.algorithm = interpolation_params + + def set_output_scale(self, multiplier): + self.multiplier = multiplier + + def set_filename_output(self, output_filename): + self.output_filename = output_filename + + def set_interpolation_boundaries(self): + raster_bestand = gdal.Open(self.raster_template) + nr_rows = raster_bestand.RasterYSize + nr_cols = raster_bestand.RasterXSize + geotransform = raster_bestand.GetGeoTransform() + x_origin = geotransform[0] + y_origin = geotransform[3] + cel_width = geotransform[1] + cel_height = geotransform[5] + raster_bestand = None + raster_bestand = None + ext_factor = 60 + width_rows_factor_added = ext_factor + width_cols_factor_added = ext_factor + x_ll = x_origin + x_ll -= width_cols_factor_added * cel_width + x_ur = x_origin + nr_cols * cel_width + x_ur += width_cols_factor_added * cel_width + y_ur = y_origin + y_ur -= width_rows_factor_added * cel_height + y_ll = y_origin + nr_rows * cel_height + y_ll += width_rows_factor_added * cel_height + self.width = nr_cols + width_cols_factor_added * 2 + self.height = nr_rows + width_rows_factor_added * 2 + self.boundaries = [x_ll, y_ll, x_ur, y_ur] + + def array2raster(self, input_array): + if isinstance(input_array, np.ndarray): + gdf_pivot = input_array + else: + gdf_pivot = np.array(input_array.pivot(index='x', columns='y', values='value')) + xmin,ymax,xmax,ymin = self.boundaries + ncols = self.width + nrows = self.height + array_df = gdf_pivot + xres = (xmax-xmin)/float(ncols) + yres = (ymax-ymin)/float(nrows) + geotransform = (xmin,xres,0,ymax,0, - yres) + output_raster = gdal.GetDriverByName('GTiff').Create(str(self.output_filename), ncols, nrows, 1 ,gdal.GDT_Float32) + output_raster.SetGeoTransform(geotransform) + srs = osr.SpatialReference() + srs.ImportFromEPSG(int(self.interpolation_output_crs.split(':')[1])) + output_raster.SetProjection( srs.ExportToWkt() ) + output_raster.GetRasterBand(1).WriteArray(array_df.T) + output_raster.FlushCache() + + def __get_interpolation_params(self): + list_params = [] + for key, value in self.algorithm.items(): + list_params.append(key+'='+value) + interpolation_params = self.algorithm['method'] + ':' + ":".join(list_params[1:]) + return (interpolation_params) + + def __smmothing_array(self, input_array_2d, smoothing_block_size): + filter_kernel = np.ones((smoothing_block_size, smoothing_block_size))/smoothing_block_size**2 + return signal.convolve2d(input_array_2d, filter_kernel, 'valid') \ No newline at end of file diff --git a/Pipeline/src/modules/data_retrieval.py b/Pipeline/src/modules/data_retrieval.py new file mode 100644 index 0000000..95234a1 --- /dev/null +++ b/Pipeline/src/modules/data_retrieval.py @@ -0,0 +1,25 @@ +from data_processing import knmi_interpolation, knmi_computation, feature_extraction +from data_retrieval import knmi_data_source_files_retrieval, modis_retrieval +from utils import api_keys, data_base_management + +import set_arguments_pipeline +import re + + +def main(): + input_arguments = set_arguments_pipeline.set_arguments_pipeline() + + # Download KNMI Source Files + parent_folder = '/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/' + knmi_stations_data_collector = knmi_data_source_files_retrieval.knmi_collector(parent_folder) + knmi_stations_data_collector.set_end_date(input_arguments['end_date']) + knmi_stations_data_collector.set_start_date(input_arguments['start_date']) + knmi_stations_data_collector.set_target_folder(input_arguments['folder']) + knmi_stations_data_collector.read_knmi_all_stations_file().compute() + knmi_stations_data_collector.remove_temp_files() + knmi_stations_data_collector.add_head_files() + knmi_stations_data_collector.remove_temp_files('csv') + +if __name__ == '__main__': + main() + diff --git a/Pipeline/src/modules/data_retrieval/__init__.py b/Pipeline/src/modules/data_retrieval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Pipeline/src/modules/data_retrieval/__pycache__/__init__.cpython-36.pyc b/Pipeline/src/modules/data_retrieval/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6980586e37b9a47691e427bcbb83913b01298c60 GIT binary patch literal 206 zcmXr!<>ivFI~mUa1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuWJ2_{M=OitkQzY zlGGyI#JrTE)M9-{Fct5XpOcbWq#uxxnwOuGpI#ZCl30?c@0*yJ7ax#WkeZX3m#Xic zSrQ*ml%JKFT%r#bEG|mc&&^LM%>kMP(i2~lT2hpmT9%ljA0MBY2hL z%JvFW9Ax~2OAn3D*3#oNz5c*L{ z&--{17b4R$&BsL1g--gdJ_!=;4@ETXq-97e;nALSrX~NKpP2S+Iv zH!+srN&AjU{14-ZanasA;Qcs?_m6xQq(S@BAng0Q;ebbBpSM2>Q-4>)UEWFCvyX}B zw0kidM(B&$K#x>}{Am!i4{3txTM`dNhx>aWYMoTr9vn%>_rpH=_3Jc4HEn%#WvOYl zr`2EGLhFM-1ifU@Mxu=2M3n^DCKnat$jx!tE;>7AXJQmacVUG1o z@0N}=9WMGnvozExckM%0Jk+(Zp=+b{)X9u)rRyH+y7owWOlU>PSH>4wACGcrW2a?! z_Cy1(oLe4PaV7{!q`43GDnSdOj6o~tt%@K72=-#(Cuxul6Y28j9X?3IxSx2Hj~_nV zy(>g4WN9G6eoDP10~B#-rTlr?GL<(-T}YjuHt$A`$pw!%i$Rlh2u(M1OZ{ELLAt4r zZZ5ZUK~Fm^mk`hodffM=>-#y9kgxgvm%|{+OY#XTZ&kAS# zxvUD7tFaoZBdxOrTS3}jt85MF3R`CzNLSe=JA-tMon>1{*V#FC9_a?Vz%C-)R6P8e z+)(yhB$gdP?EkX7tq7+WTC_$vSHeNbSv7LIDQ_CG4&UPa6i!3DCtH5OIKJOG;GILi zBY2RKe^J$zk#omu7+}um%HFgtH}E8+pQe}f(WQcJi#d$U|30 zwixieu_G<=2QYkt3_`#cK>{w9b}$J2Lw+<>?sfEWN;z4j`jdLwG|+@>FBhpeSGU-{ zMSGD8^of)hA3Ek%dZR#*26cW?^{qKwa^hcH?d`?tuda^6`)om(s|Cu`sP}2b-^{7f zTxOPO$M%Nz@g)~vX zo5?{uj9Bvtay&(X{hc>~K2`cszcngfeSEcdm3ii!d-qzFXC-MrktM|}(nLjRJq<+S z)kqb85`ub`pT_>k+D@ORQhzo&M=@#hAV@F(?>BR*XU$v9(Z>BeDsS!usKT1h!t|iE zE)1iIE1v3hRCkUg3P4%%693}o-@9G^vj<-Im>9hD* zKZ*n9UF-~n+VxP2%rGq=0`y*ARtURe1yjzvtIMhoC(f$Lje_vmYpRd)OL6MdBPd{% zWyEH~D3!)u-?QUk`XOw83d{1sOz-*b)ZyY!=p%UtLPM-m!w&4=PtaZ^X&4CD4BbJn z=HfeYsIiGWy{*!Iz5U7T%rZO2Y%d>f79_7>M2fWLSwLq-&IHUnPdq?7x`iCr?K>Pg zGXt`mnUHp7K{_v-oB_H91CD`yvugs!Hghu@<<+eGo%X^SJ3rB$c!ij!l2y7T;ssU% z^ZYX{GvM}X@C$t*RI^6ru$Ax4ah0v2jT7qQT3X47ZR%OQ>t$cOKro?|-%nao_iSuL9xw_ulsXAATN@Nc~0& z*8siX`6v7+8C|^7>F^}E`F`9_MI7C{6Gic}o1ckrKkR$!id*jNA><;zq+m?e?+nv} zScId1jvKP1^aNa+n3_=G58V^U(r_@~B4-Z@GsJJ{s|hj?Dp)h=+hRx$7D?j1ptDzN zEr-8~^KjUgRSM)L=8-kBC+*crp_S>|q}Z#2ZFvuz!RQR^tr1&bBE5l+9Tii-yrxam zaYJe9T~u@R1DdQlxT~^(;Ctb7LHnk7kHjA#IS#7wJ_!xD)&6M{Bu?WP`2a~<#a}l*{<-0i&PMrMY$a)V= zTeaM?Ez(=ur_nzpu|wiB61xx{#qtB}sj2u2y!db1491q7~IW z?M0XJC7#XTWLHGxF-A+yL1^%hsXtvoKHota`smH&UQ^sJv8~zhq3MQW20q@~loCh6teu#TO)z;KrOJxxyPO>Uj@k)fiHuAeHf`CX8^wm5jy~VPY#XL?H$BRv?;-;jX)qv#+w>?? zJ-?FaA83EQpf+by8Cu<9Wd@9lo2oA_Jepk;l;ri|n}Ht_&s_NT=JGbhJ|S-uw;9a zcZ!zbr@LzD;u3mmoe_i{f$MusTp;lV2^B=E8x(a4R^-s60%B8fg#;ZC#WfPwNwi3) z0GmQQ@rcA1Bp#EXYstC!&L%A`qJX+fsJqDxn8LATxK@4>rew*v>|Amhj-y(%DuT}4 zg3fk=E_6HxCwSZ#CET;=7#f971c67E<0o_-p2zYuSdkXec{jh;drwCV<>|SXJ4Ve| Gz3_hyRFmWY literal 0 HcmV?d00001 diff --git a/Pipeline/src/modules/data_retrieval/knmi_data_source_files_retrieval.py b/Pipeline/src/modules/data_retrieval/knmi_data_source_files_retrieval.py new file mode 100644 index 0000000..044ef68 --- /dev/null +++ b/Pipeline/src/modules/data_retrieval/knmi_data_source_files_retrieval.py @@ -0,0 +1,268 @@ +import datetime +import requests +import os +import argparse +import re + +import numpy as np +import dask +import dask.array as da +import dask.bag as db +import gdal, ogr, gdalnumeric, gdalconst + +from pathlib import Path +from utils.set_up_database import set_up_database + + +class User_input_pipeline(object): + start_date = None + end_date = None + parent_folder = '' + target_folder = '' + + def __init__(self, parent_folder='./data'): + self.parent_folder = parent_folder + self._check_create_folder(self.parent_folder) + + def set_start_date(self, start_date): + self.start_date = start_date + + def set_end_date(self, end_date): + self.end_date = end_date + + def set_target_folder(self, target_folder): + self.target_folder = target_folder + self._check_create_folder(self.target_folder) + + def _check_create_folder(self, folder_name): + if not os.path.isdir(folder_name): + os.mkdir(folder_name) + os.chdir(folder_name) + + +class Knmi_data_source_retrieval(User_input_pipeline): + def __init__(self, parent_folder='./data'): + User_input_pipeline.__init__(self, parent_folder) + super().__init__(parent_folder) + + def download_knmi_source_files(self, variables): + base_url = 'http://projects.knmi.nl/klimatologie/daggegevens/getdata_dag.cgi' +# base_url = 'https://cdn.knmi.nl/knmi/map/page/klimatologie/gegevens/daggegevens/jaar.zip' + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + variables_download = ':'.join(variables) + current_day_to_download_end = self.start_date + datetime.timedelta(days_int) + current_day_to_download_formatted = self.start_date.strftime('%Y%m%d') + current_day_to_download_end_formatted = current_day_to_download_end.strftime('%Y%m%d') + http_request_params = { + 'stns': 'ALL', + 'start': current_day_to_download_formatted, + 'end': current_day_to_download_end_formatted, + 'vars': variables_download + } + http_knmi_source_request_handler = Knmi_http_request_handler(base_url) + http_knmi_source_request_handler.set_http_request_params(http_request_params) + try: + response = http_knmi_source_request_handler.handle_http_request() + result = response.text + self.__text_to_database(result) + fn = 'knmi_source_' + current_day_to_download_formatted + '_to_' + current_day_to_download_end_formatted + '.txt' + outFile=open(fn, "w") + outFile.write(result) + outFile.close() + print ('file has been created : ' + fn) + except Exception as e: print(e) + + def __text_to_database(self, input_raw_data): + raw_data_split = input_raw_data.split('\n') + db_query_manager = set_up_database() + insert_dataset = [] + for line in raw_data_split: + line = re.sub(r" +", "", line) + if len(line) > 1 and line[0] != '#': + line_format = line.rstrip().split(',') + line_format_with_null = [x if x != '' else 'NULL' for x in line_format] + insert_dataset.append(line_format_with_null) + print(line) + db_query_manager.insert_data(insert_dataset) + + +class Knmi_interpolated_raster_retrieval(User_input_pipeline): + api_key = '' + dataset_raster = {} + + def __init__(self, parent_folder='./data'): + User_input_pipeline.__init__(self, parent_folder) + super().__init__(parent_folder) + + def set_dataset_raster(self, dataset_raster): + self.dataset_raster = dataset_raster + + def set_api_key(self, api_key): + self.api_key = api_key[self.dataset_raster['name']] + + def __get_api_list_files(self): + api_url = 'https://api.dataplatform.knmi.nl/open-data' + api_version = 'v1' + dataset_name = self.dataset_raster['name'] + dataset_version = self.dataset_raster['version'] + ensembled_url = f'{api_url}/{api_version}/datasets/{dataset_name}/versions/{dataset_version}/files' + timestamp_pattern = self.start_date.strftime('%Y%m%d') + dataset_name_upper = dataset_name.upper() + start_after_filename_prefix = f'INTER_OPER_R___{dataset_name_upper}____L3__{timestamp_pattern}' + ensembled_url = f'{api_url}/{api_version}/datasets/{dataset_name}/versions/{dataset_version}/files' + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 1: + raise Exception('Start date should be set before end date') + http_request_params = { + 'maxKeys': str(days_int + 1), + 'Access-Control-Allow-Origin': '*', + 'startAfterFilename': start_after_filename_prefix + } + http_request_headers = {'Authorization': self.api_key} + http_raster_list_request_handler = Knmi_http_request_handler(ensembled_url) + http_raster_list_request_handler.set_http_request_params(http_request_params) + http_raster_list_request_handler.set_http_request_headers(http_request_headers) + list_files_response = http_raster_list_request_handler.handle_http_request() + list_files = list_files_response.json() + dataset_files = list_files.get('files') + return dataset_files + + def __get_temporary_download_url(self, file): + filename = file.get('filename') + api_url = 'https://api.dataplatform.knmi.nl/open-data' + api_version = 'v1' + dataset_name = self.dataset_raster['name'] + dataset_version = self.dataset_raster['version'] + endpoint = f'{api_url}/{api_version}/datasets/{dataset_name}/versions/{dataset_version}/files/{filename}/url' + http_request_headers = {'Authorization': self.api_key} + http_raster_url_request_handler = Knmi_http_request_handler(endpoint) + http_raster_url_request_handler.set_http_request_headers(http_request_headers) + url_response = http_raster_url_request_handler.handle_http_request() + download_url = url_response.json().get('temporaryDownloadUrl') + return download_url + + def __download_knmi_raster(self, file, knmi_url_download): + filename = file.get('filename') + http_raster_request = Knmi_http_request_handler(knmi_url_download) + url_response = http_raster_request.handle_http_request() + p = Path(filename) + p.write_bytes(url_response.content) + print(f'{filename} created successfully') + + def download_knmi_interpolated_rasters(self): + list_rasters_download = self.__get_api_list_files() + for file in list_rasters_download: + download_url = self.__get_temporary_download_url(file) + self.__download_knmi_raster(file, download_url) + +class Knmi_http_request_handler(object): + base_url_request = '' + params_url_request = {} + headers_url_request = {} + + def __init__(self, base_url): + self.base_url_request = base_url + + def set_http_request_params(self, params_url_request): + self.params_url_request = params_url_request + + def set_http_request_headers(self, headers_url_request): + self.headers_url_request = headers_url_request + + def handle_http_request(self): + try: + r = requests.get( + self.base_url_request, + headers = self.headers_url_request, + params = self.params_url_request + ) + r.raise_for_status() + return r.content + except requests.exceptions.HTTPError as e: + print (e.response.text) + +class knmi_collector(User_input_pipeline): + collect_climate_vars_names = ['# STN', 'YYYYMMDD', 'TN', 'TX', 'RH', 'EV24', 'UG', 'TG'] + def __init__(self, parent_folder): + User_input_pipeline.__init__(self, parent_folder) + super().__init__(parent_folder) + + def read_knmi_all_stations_file(self): + base_url = 'https://cdn.knmi.nl/knmi/map/page/klimatologie/gegevens/daggegevens/jaar.zip' + temp_filename = 'temp_data.zip' + http_request_params = {} + http_knmi_source_request_handler = Knmi_http_request_handler(base_url) + http_knmi_source_request_handler.set_http_request_params(http_request_params) + try: + response = http_knmi_source_request_handler.handle_http_request() + result = response + with open(temp_filename, 'wb') as compressed_knmi_data: + compressed_knmi_data.write(result) + raw_knmi_station_data = db.read_text(temp_filename, compression='zip') + cleaned_knmi_station_data = self.clean_knmi_raw_data(raw_knmi_station_data) + filtered_dataframe = self.get_knmi_columns(cleaned_knmi_station_data) + for str_date in self.get_current_last_dates(): + filtered_dates = self.filter_dates_knmi(filtered_dataframe, str_date) + filename_current = '{0}.csv'.format(str_date) + filtered_dates.to_csv(filename_current, index=False, single_file = True) + return filtered_dates + except Exception as e: print(e) + + + def clean_knmi_raw_data(self, raw_data): + stripped_rows = db.map(lambda x:x.lstrip(), raw_data) + stripped_no_blanks = stripped_rows.filter(lambda x:x!='') + filtered_knmi_data = self.get_knmi_current_stations_data(stripped_no_blanks) + return(filtered_knmi_data) + + def get_knmi_current_stations_data(self, knmi_data_bag): + filtered_csv_no_csv = knmi_data_bag.filter(lambda x:re.match('[^0-9]', x[0])) + filtered_csv_only_titles = knmi_data_bag.filter(lambda x:x[0]=='#') + filtered_csv_only_values = knmi_data_bag.filter(lambda x:re.match('[0-9]', x[0])) + titles_row = filtered_csv_only_titles.map(lambda x: x.strip().split(',')).compute()[0] + titles_row_strip = list(map(lambda x:x.lstrip(), titles_row)) + tiles_dict = dict(zip(range(len(titles_row_strip)), titles_row_strip)) + knmi_stations_dataframe_allyear = filtered_csv_only_values.map(lambda x: x.strip().split(',')).to_dataframe() + knmi_stations_dataframe_allyear_renamed = knmi_stations_dataframe_allyear.rename(columns=tiles_dict) + return knmi_stations_dataframe_allyear_renamed + + def get_knmi_columns(self, knmi_all_columns): + knmi_filtered_columns = knmi_all_columns[self.collect_climate_vars_names] + return knmi_filtered_columns + + def filter_dates_knmi(self, knmi_filtered_columns, str_date): + filetered_dates = knmi_filtered_columns[knmi_filtered_columns.YYYYMMDD == str_date] + return filetered_dates + + def get_current_last_dates(self): + todays_date = datetime.date.today() + dates_list = [] + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + for days_past in range(1, days_int): + current_date = self.end_date - datetime.timedelta(days_past) + dates_list.append(datetime.datetime.strftime(current_date, '%Y%m%d')) + return dates_list + + def remove_temp_files(self, wild_card = 'zip'): + test = os.listdir('.') + for item in test: + if item.endswith("{0}".format(wild_card)): + os.remove(item) + + def add_head_files(self): + absolute_path = os.path.join(self.parent_folder, self.target_folder) + filenames = [file for file in os.listdir('.') if file.split('.')[1] == 'csv'] + for file in filenames: + with open(file, 'r') as input_file: + with open('coordinate_info.txt', 'r') as coordinate_file: + with open('{0}.txt'.format(file.split('.')[0]),'w') as newf: + newf.write(coordinate_file.read()) + newf.write(input_file.read()) + \ No newline at end of file diff --git a/Pipeline/src/modules/data_retrieval/modis_retrieval.py b/Pipeline/src/modules/data_retrieval/modis_retrieval.py new file mode 100644 index 0000000..59a26aa --- /dev/null +++ b/Pipeline/src/modules/data_retrieval/modis_retrieval.py @@ -0,0 +1,264 @@ +import os +import sys +import argparse + +import datetime +import time +import re +import numpy as np +import pandas as pd +import ee +import geopandas as gpd +import rasterio +import rioxarray +import shapely +import requests +import lxml.html +import urllib + +from dateutil.relativedelta import * +from osgeo import ogr, osr, gdal +from rasterio.crs import CRS +from rasterio.warp import reproject, Resampling +from subprocess import Popen +from getpass import getpass +from netrc import netrc + +from data_processing.knmi_interpolation import Knmi_Interpolator + + +class ModisRetrieval(Knmi_Interpolator): + vars_names = [] + bounds = [] + + def __init__(self, parent_folder='./data'): + Knmi_Interpolator.__init__(self, parent_folder) + super().__init__(parent_folder) + try: + ee.Initialize() + print('Google Earth Engine has initialized successfully!') + except ee.EEException as e: + print('Google Earth Engine has failed to initialize!') + except: + print("Unexpected error:", sys.exc_info()[0]) + raise + + def get_crs_image_collection(self, image_collection): + return ee.Image(image_collection.first()).projection().crs() + + def set_bounds(self, bounds): + self.bounds = bounds + + def set_vars_names(self, vars_names): + self.vars_names = vars_names + + def get_ee_data(self, dataset_name): + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + start_date_filter = self.start_date.strftime('%Y-%m-%d') + end_date_filter = self.end_date.strftime('%Y-%m-%d') + self.set_interpolation_boundaries() + try: + data_ee = ee.ImageCollection(dataset_name).filterDate(start_date_filter, end_date_filter).select(self.vars_names) + mosaic = data_ee.mean() + proj = ee.Projection(self.interpolation_output_crs); + ext_factor = 1 + aoi = ee.Geometry.Polygon( + [[[self.boundaries[1] * ext_factor, self.boundaries[2] * ext_factor], + [self.boundaries[1] * ext_factor, self.boundaries[0] * ext_factor], + [self.boundaries[3] * ext_factor, self.boundaries[0] * ext_factor], + [self.boundaries[3] * ext_factor, self.boundaries[2] * ext_factor]]], proj, False) + reprojected = mosaic.reproject(proj, None, 1); + band_arrs = reprojected.sampleRectangle(region=aoi).getInfo() + return band_arrs + except ee.EEException as e: + print('error', e) + except: + print("Unexpected error:", sys.exc_info()[0]) + raise + +class NASARetrieval(Knmi_Interpolator): + fileList = None + urs = None + netrcDir = None + vars_names = [] + tiles = None + product = None + + def __init__(self, parent_folder='./data'): + Knmi_Interpolator.__init__(self, parent_folder) + super().__init__(parent_folder) + + def set_urs(self, urs): + self.urs = urs + + def set_file_list(self, files): + self.fileList = list(files) + + def set_netrcDir(self, netrc_name): + self.netrcDir = os.path.expanduser(netrc_name) + + def set_vars_names(self, vars_names): + self.vars_names = vars_names + + def set_type_data(self, type_data): + self.type_data = type_data + + def set_tiles(self, tiles): + self.tiles = tiles + + def set_product(self, product): + self.product = product + + def download_list(self, nmonths): + fileList_2 = [] + for i in range(1, nmonths + 1): + today = self.start_date + j = i - 1 + d = today.replace(day=1) + relativedelta(months=j) + dt = d.strftime('%Y.%m.%d') + url = "https://e4ftl01.cr.usgs.gov/{2}/{0}/{1}/".format(self.product, dt, self.type_data) + f = requests.get(url) + element_tree = lxml.html.fromstring(f.text) + url_ahref = element_tree.xpath('//a/@href') + arr = np.array(url_ahref) + arr_f = [] + for element in arr: + if self.tiles in element and element.endswith('.hdf'): + arr_f.append(True) + else: + arr_f.append(False) + arr_filterd = arr[arr_f] + filename_hrz = arr_filterd[0] + download_url = url + filename_hrz + fileList_2.append(download_url) + self.set_file_list(fileList_2) + + def derive_file_list(self): + days_timedelta = self.end_date - self.start_date + days_int = days_timedelta.days + if days_int < 0: + raise Exception('Start date should be set before end date') + start_date_filter = self.start_date.strftime('%Y/%m/%d') + end_date_filter = self.end_date.strftime('%Y/%m/%d') + range_dates = pd.date_range(start_date_filter, end_date_filter, freq="MS") + diff_months = len(range_dates) + self.download_list(diff_months) + + def set_up_credentials(self): + prompts = [ + 'Enter NASA Earthdata Login Username \n(or create an account at urs.earthdata.nasa.gov): ', + 'Enter NASA Earthdata Login Password: ' + ] + try: + self.set_netrcDir("~/.netrc") + netrc(self.netrcDir).authenticators(self.urs)[0] + + except FileNotFoundError: + homeDir = os.path.expanduser("~") + Popen('touch {0}.netrc | chmod og-rw {0}.netrc | echo machine {1} >> {0}.netrc'.\ + format(homeDir + os.sep, self.urs), shell=True) + Popen('echo login {} >> {}.netrc'.format(getpass(prompt=prompts[0]), homeDir + os.sep), shell=True) + Popen('echo password {} >> {}.netrc'.format(getpass(prompt=prompts[1]), homeDir + os.sep), shell=True) + + except TypeError: + homeDir = os.path.expanduser("~") + Popen('echo machine {1} >> {0}.netrc'.format(homeDir + os.sep, self.urs), shell=True) + Popen('echo login {} >> {}.netrc'.format(getpass(prompt=prompts[0]), homeDir + os.sep), shell=True) + Popen('echo password {} >> {}.netrc'.format(getpass(prompt=prompts[1]), homeDir + os.sep), shell=True) + + tries = 0 + while tries < 30: + try: + netrc(netrcDir).authenticators(urs)[2] + except: + time.sleep(2.0) + tries += 1 + + def download_files(self): + for f in self.fileList: + self.set_filename_output(f.split('/')[5]+'_'+self.product+'.hdf') + with requests.get(f.strip(),\ + verify=False,\ + stream=True,\ + auth=(netrc(self.netrcDir).authenticators(self.urs)[0],\ + netrc(self.netrcDir).authenticators(self.urs)[2])) as response: + if response.status_code != 200: + print("{} not downloaded. Verify that your username and password are correct in {}".\ + format(f.split('/')[-1].strip(), self.netrcDir)) + else: + response.raw.decode_content = True + content = response.raw + with open(self.output_filename, 'wb') as d: + while True: + chunk = content.read(16 * 1024) + if not chunk: + break + d.write(chunk) + print('Downloaded file: {}'.format(self.output_filename )) + + def array_to_tif(self, data_to_save): + band_array = data_to_save['band_array'] + band_ds = data_to_save['template'] + band_path = data_to_save['name'] + band_array[band_array == -3000] = -32768 + scale_raster_values = 1.0000 + if data_to_save['layer'] == 'NVDI': + scale_raster_values = 0.0001 + elif data_to_save['layer'] == 'EVI': + scale_raster_values = 1.0000 + try: + band_array = np.round(band_array*scale_raster_values, decimals=4) + out_ds = gdal.GetDriverByName('GTiff').Create(band_path, + band_array.shape[0], + band_array.shape[1], + 1, + gdal.GDT_Int16, + ['COMPRESS=LZW', 'TILED=YES']) + out_ds.SetGeoTransform(band_ds.GetGeoTransform()) + out_ds.SetProjection(band_ds.GetProjection()) + out_ds.GetRasterBand(1).SetNoDataValue(-32768 * scale_raster_values) + out_ds.GetRasterBand(1).WriteArray(band_array) + print('file {0} created successfully'.format(band_path)) + except Exception as e: print(e) + + def __print_layers_subdatasets(self, subdatasets): + for fname, name in subdatasets: + print (name, "------", fname) + + def read_sub_datasets(self, filename_raster): + filename_raster_no_folder = filename_raster.split('/')[-1] + nasa_raster = gdal.Open(filename_raster) + if nasa_raster is None: + print ("Problem opening file!") + return None + else: + subdatasets = nasa_raster.GetSubDatasets() + self.__print_layers_subdatasets(subdatasets) + if self.type_data == 'MOLT': + file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_monthly_1km_VI:%s' + elif self.type_data == 'MOTA': + file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_BRDF:%s' + for i, layer in enumerate ( self.vars_names ): + data = {} + this_file = file_template % ( filename_raster, layer ) + g = gdal.Open(this_file) + if g is None: + raise IOError + data['band_array'] = g.ReadAsArray().astype(np.int16) + data['name'] = '.'.join(filename_raster_no_folder.split('.')[:-1])+'_'+layer+'.tiff' + data['template'] = g + data['layer'] = layer + self.array_to_tif(data) + return data + + def hdf_to_tiff(self): + directory = self.parent_folder + self.target_folder + for filename in os.listdir(directory): + if filename.endswith(self.product+".hdf"): + filename_raster = os.path.join(directory, filename) + data_NASA = self.read_sub_datasets(filename_raster) + else: + continue diff --git a/Pipeline/src/modules/set_arguments_pipeline.py b/Pipeline/src/modules/set_arguments_pipeline.py new file mode 100644 index 0000000..2f38d77 --- /dev/null +++ b/Pipeline/src/modules/set_arguments_pipeline.py @@ -0,0 +1,40 @@ +from utils import argument_handler +import datetime + +def set_arguments_pipeline(): + arg_handler = argument_handler.Arguments_handler() + arg_folder = argument_handler.Argument("folder") + arg_folder.set_argument_type("String") + arg_folder.set_argument_help_message("define the folder to write the result data (relative to the parent folder)") + arg_folder.set_argument_default_value("data/klimatologie_temp") + arg_handler.add_input_argument(arg_folder) + arg_start_date = argument_handler.Argument("start_date") + arg_start_date.set_argument_type("Date") + arg_start_date.set_argument_help_message("define the start date to process format DD-MM-YYYY (hyphen sep)") + arg_start_date.set_argument_default_value("01-01-2005") + arg_handler.add_input_argument(arg_start_date) + arg_end_date = argument_handler.Argument("end_date") + arg_end_date.set_argument_type("Date") + arg_end_date.set_argument_help_message("define the end date to process format DD-MM-YYYY (hyphen sep)") + arg_end_date.set_argument_default_value("31-12-2014") + arg_handler.add_input_argument(arg_end_date) + input_parameters_raw = arg_handler.get_input_arguments() + input_parameters_checked = {} + for arg in arg_handler.arguments: + if arg.argument_type_name == "Date": + try: + if type(input_parameters_raw[arg.argument_name]) is datetime.datetime: + input_parameters_checked[arg.argument_name] = input_parameters_raw[arg.argument_name] + elif type(input_parameters_raw[arg.argument_name]) is str: + temp_argument_type_date = datetime.datetime.strptime(input_parameters_raw[arg.argument_name], '%d-%m-%Y') + input_parameters_checked[arg.argument_name] = temp_argument_type_date + except: + print("wrong date format {0} date should match dd-mm-yyyy. {0} ommited".format(arg.argument_name)) + elif arg.argument_type_name == "Integer" or arg.argument_type_name == "Integer_no_zero": + if not type(input_parameters_raw[arg.argument_name]) is int: + raise TypeError("Only integers are allowed") + elif arg.argument_type_name == "Integer_no_zero" and input_parameters_raw[arg.argument_name] < 0: + raise Exception("Sorry, no numbers below zero") + else: + input_parameters_checked[arg.argument_name] = input_parameters_raw[arg.argument_name] + return input_parameters_checked \ No newline at end of file diff --git a/Pipeline/src/modules/test.py b/Pipeline/src/modules/test.py new file mode 100644 index 0000000..3c38f90 --- /dev/null +++ b/Pipeline/src/modules/test.py @@ -0,0 +1,307 @@ +from data_processing import knmi_interpolation, knmi_computation, feature_extraction +from data_retrieval import knmi_data_source_files_retrieval, modis_retrieval +from utils import api_keys, data_base_management + +import set_arguments_pipeline +import re + + +def main(): + + input_arguments = set_arguments_pipeline.set_arguments_pipeline() + + DB_params = { + 'PGHOST': api_keys.PGHOST, + 'PGDATABASE': api_keys.PGDATABASE, + 'PGUSER': api_keys.PGUSER, + 'PGPASSWORD': api_keys.PGPASSWORD, + 'TIMEOUT': api_keys.TIMEOUT, + 'PORT': api_keys.PORT + } + + # Download KNMI Source Files + data_retrieval_handler = knmi_data_source_files_retrieval.Knmi_data_source_retrieval('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/') + data_retrieval_handler.set_end_date(input_arguments['end_date']) + data_retrieval_handler.set_start_date(input_arguments['start_date']) + data_retrieval_handler.set_target_folder(input_arguments['folder']) + data_retrieval_handler.download_knmi_source_files(['TN','TX','RH','EV24','UG','TG']) + + # Download KNMI Interpolated rasters + # names 'Tn1_', ... +# raster_dataset = { +# 'name':'EV24', +# 'version':'2' +# } + +# data_retrieval_handler_rasters = knmi_data_source_files_retrieval.Knmi_interpolated_raster_retrieval('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/') +# data_retrieval_handler_rasters.set_dataset_raster(raster_dataset) +# data_retrieval_handler_rasters.set_end_date(input_arguments['end_date']) +# data_retrieval_handler_rasters.set_start_date(input_arguments['start_date']) +# data_retrieval_handler_rasters.set_target_folder(input_arguments['folder']) +# data_retrieval_handler_rasters.set_api_key(api_keys.api_keys) +# data_retrieval_handler_rasters.download_knmi_interpolated_rasters() + +# ### Create table in db for knmi raw data + +# db_setting_manager = data_base_management.DB_manager(DB_params) + +# table_name = 'test_phenology' +# table_params = [ +# { +# 'name_field':'STD' , +# 'type_field':'INT NOT NULL' +# }, +# { +# 'name_field':'DATE' , +# 'type_field':'CHAR(8) NOT NULL' +# }, +# { +# 'name_field':'TN' , +# 'type_field':'INT' +# }, +# { +# 'name_field':'TX' , +# 'type_field':'INT' +# }, +# { +# 'name_field':'RH' , +# 'type_field':'INT' +# }, +# { +# 'name_field':'EV24' , +# 'type_field':'INT' +# }, +# { +# 'name_field':'UG' , +# 'type_field':'INT' +# }, +# { +# 'name_field':'TG' , +# 'type_field':'INT' +# } +# ] + +# db_setting_manager.create_data_base_table(table_name, table_params) +# query_params = { +# 'columns': '*', +# 'schema': 'andres', +# 'table': 'stn_metadata' +# } + +# ### Populate db table with the knmi text files + +# db_query_manager = data_base_management.DB_query(db_setting_manager.get_conn(), query_params) +# data = db_query_manager.get_data('*') +# with open('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Stations_Metadata/station_coordinates.csv') as stn_metadata: +# lines_info = stn_metadata.readlines() +# insert_data = [] +# for line in lines_info[1:]: +# line = line.strip().split(",") +# line[0] = re.sub(r'"', '', line[0]) +# line[2] = re.sub(r'"', "'", line[2]) +# line[2] = re.sub(r'-', " ", line[2]) +# print(line) +# insert_data.append(line) + +# insert_data = [['1'],['2']] +# db_query_manager.insert_data(insert_data) + +# ### Create interpolated rasters + +# Interpolator = knmi_interpolation.Knmi_Interpolator('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/') + +# Interpolator.set_start_date(input_arguments['start_date']) +# Interpolator.set_end_date(input_arguments['end_date']) +# Interpolator.set_target_folder(input_arguments['folder']) + +# Interpolator.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Interpolator.set_crs_output('EPSG:28992') + +# Interpolator.set_output_scale(0.1) + +# interpolation_params = {'method':'invdist', 'power':'2.5', 'smoothing':'10000'} +# Interpolator.set_interpolation_algorithm(interpolation_params) +# columns = ['tg','tn','tx'] +# Interpolator.interpolate_pipeline(columns) + +# smoothing_tps = 43496 * 2 +# interpolation_params = {'method':'tps', 'phi':'phs2', 'order':'1', 'sigma':str(smoothing_tps)} +# Interpolator.set_interpolation_algorithm(interpolation_params) +# columns = ['ev24',] +# Interpolator.interpolate_pipeline(columns) + +# range_krig = 121505 * 43496 +# interpolation_params = {'method':'ordinary_kriging', 'sill':'0.04', 'range':str(range_krig), +# 'nugget':'0', 'smoothing':str(20)} +# Interpolator.set_interpolation_algorithm(interpolation_params) +# Interpolator.set_output_scale(1) +# columns = ['ug'] +# Interpolator.interpolate_pipeline(columns) + +# Interpolator.set_output_scale(1) +# columns = ['rh'] +# Interpolator.interpolate_pipeline(columns) + +# ### Compute SD and VP + +# Computing_raster = knmi_computation.Knmi_compute(\ +# '/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Self_Interpolated_Data') +# Computing_raster.set_start_date(input_arguments['start_date']) +# Computing_raster.set_end_date(input_arguments['end_date']) +# Computing_raster.set_target_folder(input_arguments['folder']) +# Computing_raster.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Computing_raster.set_crs_output('EPSG:28992') + +# Computing_raster.set_inputs_computation(['UG', 'TG']) +# Computing_raster.set_operation('compute_sd') +# Computing_raster.compute_pipeline() + +# Computing_raster.set_operation('compute_vp') +# Computing_raster.compute_pipeline() + +# ### FEATURE CREATION (Averaging rasters by period) + +# Features = knmi_computation.Knmi_create_features(\ +# '/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Self_Interpolated_Data') +# Features.set_start_date(input_arguments['start_date']) +# Features.set_end_date(input_arguments['end_date']) +# Features.set_target_folder(input_arguments['folder']) +# Features.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Features.set_crs_output('EPSG:28992') +# Features.set_operation('mean') + +# Features.set_var_name('EV24') +# Features.set_periods([1, 2, 3, 5, 6, 7, 14, 365]) +# Features.compute_pipeline() + +# Features.set_var_name('UG') +# Features.set_periods([1, 2, 3, 4, 7, 14, 30, 90, 365]) +# Features.compute_pipeline() + +# Features.set_var_name('SD') +# Features.set_periods([2, 3, 4, 5, 6, 7, 14, 90, 365]) +# Features.compute_pipeline() + +# Features.set_var_name('RH') +# Features.set_periods([14, 90, 365]) +# Features.compute_pipeline() + +# Features.set_var_name('TN') +# Features.set_periods([90, 365]) +# Features.compute_pipeline() + +# Features.set_var_name('TX') +# Features.set_periods([90, 365]) +# Features.compute_pipeline() + +# ### Download MODIS data using GEE + +# modisInstance = modis_retrieval.ModisRetrieval('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/') +# modisInstance.set_end_date(input_arguments['end_date']) +# modisInstance.set_start_date(input_arguments['start_date']) +# modisInstance.set_target_folder(input_arguments['folder']) +# modisInstance.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# modisInstance.set_crs_output('EPSG:28992') +# modisInstance.set_vars_names(['NDVI', 'EVI']) + + ### Download MODIS data using NASA +# NasaInstance = modis_retrieval.NASARetrieval('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/NASA/HDF/') +# NasaInstance.set_end_date(input_arguments['end_date']) +# NasaInstance.set_start_date(input_arguments['start_date']) +# NasaInstance.set_target_folder(input_arguments['folder']) + ### SET PRODUCT +# NasaInstance.set_product('MOD13A3.006') +# NasaInstance.set_type_data('MOLT') +# NasaInstance.set_tiles('h18v03') +# NasaInstance.set_urs('urs.earthdata.nasa.gov') +# NasaInstance.set_up_credentials() +# NasaInstance.derive_file_list() +# NasaInstance.download_files() + ## NASA HDF to TIFF +# NasaInstance.set_vars_names(['1 km monthly NDVI', '1 km monthly EVI']) +# NasaInstance.hdf_to_tiff() + +# NasaInstance.set_product('MCD43A1.006') +# NasaInstance.set_tiles('h18v03') +# NasaInstance.set_type_data('MOTA') + # SET CREDENTIALS FIRST +# NasaInstance.set_urs('urs.earthdata.nasa.gov') +# NasaInstance.set_up_credentials() +# NasaInstance.derive_file_list() +# NasaInstance.download_files() + + ## NASA HDF to TIFF +# NasaInstance.set_vars_names([ +# 'BRDF_Albedo_Band_Mandatory_Quality_Band1', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band2', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band3', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band4', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band5', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band6', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band7', +# 'BRDF_Albedo_Band_Mandatory_Quality_nir', +# 'BRDF_Albedo_Band_Mandatory_Quality_shortwave' +# ]) +# NasaInstance.hdf_to_tiff() + + # Compute NDWI from NASA MCD43A1 + +# Features_NDWI = knmi_computation.MODIS_Compute('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/NASA/TIFF') +# Features_NDWI.set_start_date(input_arguments['start_date']) +# Features_NDWI.set_end_date(input_arguments['end_date']) +# Features_NDWI.set_target_folder(input_arguments['folder']) +# Features_NDWI.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Features_NDWI.set_crs_output('EPSG:28992') +# Features_NDWI.set_product('MCD43A1.006') +# Features_NDWI.set_vars_names([ +# 'BRDF_Albedo_Band_Mandatory_Quality_Band2', +# 'BRDF_Albedo_Band_Mandatory_Quality_Band6' +# 'BRDF_Albedo_Band_Mandatory_Quality_nir', +# 'BRDF_Albedo_Band_Mandatory_Quality_shortwave' +# ]) +# Features_NDWI.compute_NDWI() + + # Create Features for NASA +# Features_NDWI = knmi_computation.MODIS_Compute('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Self_Interpolated_Data') +# Features_NDWI.set_start_date(input_arguments['start_date']) +# Features_NDWI.set_end_date(input_arguments['end_date']) +# Features_NDWI.set_target_folder(input_arguments['folder']) +# Features_NDWI.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Features_NDWI.set_crs_output('EPSG:28992') +# Features_NDWI.compute_aggregated_stats() + + # Get landuse map + + # Extract values +# Extract_values = feature_extraction.Feature_collector('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Self_Interpolated_Data/Features') +# Extract_values.set_start_date(input_arguments['start_date']) +# Extract_values.set_end_date(input_arguments['end_date']) +# Extract_values.set_target_folder(input_arguments['folder']) +# Extract_values.set_raster_template('/home/jupyter-andres/Andres_Folder/Phenology_data/Validation_Files/nederland_1000.tif') +# Extract_values.set_crs_output('EPSG:28992') +# coordinate_list = [ +# (6.34, 52.93),#'Appelscha' +# (5.23, 52.15),#'Bilthoven' +# (5.69, 52.53),#'Dronten' +# (5.69, 52.03),#'Ede' +# (6.76, 53.02),#'Gieten' +# (5.87, 52.11),#'HoogBaarlo' +# (3.98, 51.84),#'KwadeHoek' +# (6.22, 51.89),#'Montferland' +# (6.42, 52.34),#'Nijverdal' +# (6.16, 53.49),#'Schiermonnikoog' +# (4.89, 52.45),#'Twiske' +# (5.96, 50.79),#'Vaals' +# (5.33, 51.42),#'Veldhoven' +# (4.36, 52.16),#'Wassenaar' +# ] +# Extract_values.activate_transform() +# Extract_values.set_extraction_coordinates(coordinate_list) +# Extract_values.extract_features_from_folder() + + ## assemble features into one file +# Merge_Features = feature_extraction.Feature_merge('/home/jupyter-andres/Andres_Folder/Phenology_data/Datasets/Self_Interpolated_Data/Features') +# Merge_Features.feature_merge() + +if __name__ == '__main__': + main() diff --git a/Pipeline/src/modules/utils/__init__.py b/Pipeline/src/modules/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Pipeline/src/modules/utils/__pycache__/__init__.cpython-36.pyc b/Pipeline/src/modules/utils/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9100ac036deaf042930e2f2f56411888cb687682 GIT binary patch literal 197 zcmXr!<>k_}I~mUa1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuOj`7{M=OitkQzY zlGGyI#JrTE)M9-{Fct5XpOcbWq#uxxnwOuGpI#ZCl30?c@0*yJ7ax#WkeZX3m#Xic zSrQ*ml%JKFT%r#bEG|mc&&^LM%>kNKT9TPltREkrnFmxEuUAlci^B#YX9sdiF%UBV E0A!Ci2><{9 literal 0 HcmV?d00001 diff --git a/Pipeline/src/modules/utils/__pycache__/api_keys.cpython-36.pyc b/Pipeline/src/modules/utils/__pycache__/api_keys.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1f0cb5dab6d0a692965ad5c8b8138e4a67f0714 GIT binary patch literal 737 zcmb`FPmY@~6voM)snU^V(_3WGQj<`rQdJcN5)suVFwP*nK?qxk2?hjYz&T4V(5rOQ zeYU+tS9NC8YBsZ}#@6@Q@{`}sdVaH;o9myuAMtdgX}`25<4WznC^c`CQH>7hkdEm1 z+n8wd>QY(k&2%|j89R78&uE@gu;EF)N zY%ieL_AvBko*f*J5t$LTdLp73!n-*-StMb9?n7 zp@;^-0f)wt6XBXfKr=d75{63w!$`g`IYT+j(z$0_0G{pyv;#rHXR znhwsBq~h^`$>zIOtNwYk-?hFk1=F)*C0ka%O$u7GMze}_$->Y}8_T;)2pJa6>B@OPCwPd+`yJ`XP~J5e?q IT;}BU9|c(Ej{pDw literal 0 HcmV?d00001 diff --git a/Pipeline/src/modules/utils/__pycache__/argument_handler.cpython-36.pyc b/Pipeline/src/modules/utils/__pycache__/argument_handler.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..037b7fd8d9c7e96fb744932a9ef180aba1277d87 GIT binary patch literal 2597 zcmaJ@&2Jk;6rY)0uQ#rprk}K>&=eYkD-t8Y2~{YCiV6v$NV&jXMoVVu*j=y9%(_yn zR6=sDTsa{Q-1t}c6XwdP{{km|Z`Qks6PLB-&DVQx-uLroolYxx@VB&UjQzvTeIeGn z81@N(Vv1)hDQBK=zZZ#k&XiEWDN}(C=0Or-AF2lS4Zm+Fe!#lX8*pHIhXkvyfyFL{ z{Q*ET;+P1;c0A&(kGwutB$&& z+NV6Z>R(=W@l(^Ej&xq^bwd|EOAGCq-{ytxYvZmi7BU~pnKok=9TaAe_YVbRqK{@8 zUt`!7zzIKL5=MYhe6P!0CtYxtc{i(RieL!|CL>&}KW$ zmC?5M)KBu8ai+BC?H}oUoQ?a(Ql&-O`#v4ya(^(<*&x@w=YvA-oAFQ|7QOn=n#0~` ztfm=wO^ZQhd$lA-cp}qgXL9UfDF->o$2^;bgewRUa zfpsvaRajY};jY(j7snIrP2n1}wC4;&d-qUQk4tHFA(eiSPP0P3O0%h!9_4n;Z0WKq zObmg{Isr0l3CJn#8nLR$h|5hJ+X#U1h`0IdUiIwGCDhf*(=!FspOeE8o2A2r3v6NB zt@^DaolWFOTbuUb{ZhgkAiXHz-KA&GQ{4r2<(UswOL-oBwX_>x0>@mI*IXrG=(>`Q z))%F_wXw07K=^kOs4OOKoqBwzqyX6j6?K*GwA=6rJw-aE zxEKkni@``IF(T!KUxjp*G>WlGk&-x`(6PA=9{=N&~WTJDm2;GDX<8LljmFvWGkWZ#(wfR%9vc!+da^?J%m_4dYdS;Z% z#I?UDDQkb-#-=pSoW16SFA%TA3DTbeJt*~Sc^8p})`hR&He66QNjhnk)r#9>qZAj| z$PILBdr_wweRP^-o$ME2wCFW0C^g%LPyDUY*?sf(YKmEcRt2ulWv Kp-?O~w*CXYAZ1hl literal 0 HcmV?d00001 diff --git a/Pipeline/src/modules/utils/__pycache__/set_arguments_pipeline.cpython-36.pyc b/Pipeline/src/modules/utils/__pycache__/set_arguments_pipeline.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c0a3662786d0c4bb7a4ba04d75900e8dc9c27f9 GIT binary patch literal 1721 zcmb7F-EP}96ecDA$d2v&8C^33gMcAG4a{|tu6OO)VrhpBNEaXj+7KHA)nskS`YTB% zwI&0@ya(t5^aTd&GEcEb@a?V!^Z|-p?U0f^8|zJ}kUBi)eBU`bJaq0foAq~}|LMH1 zA@mno83xD?VTyfND55w(B!6RqsX@)(5HTn|N1f8&;DMg@)vAUCvkvoXnBp2EB$1D2 zXsX_TnBghTuy0ahhA{%EM9o4f(~_2uU&+7^=ZG>OjLZN;MOzI(R12w0ttCS9=@>OY zn3=gmn1HBjKhrPMjU^(Fi~K5NSMt!b{06@^GUQt)=%@G7N>=e}v@$Z@VE%COqpW8o z&}{6X8I}#dF|zo(g(fR2%RLkwuX!~`7GOV9-hON0OB*v|TFt5p%rg}2W27FrNt^zb zVw&l%zd(`k9KE{Md9+j>;<=VLbxxOSv9?Dwi1mlup<6TjD=tvE=i6oM?GXa6w{n;6 z6kf$QEwXua{S0MgIMv;!r1CNjDC4AZF1Z&C((fpH=|#+zLuThDTgLVk_hhc)OpF6* zQ&+n7EzSa0dY52{H6U?0i)4Z8q&JjTZxnc;E8`#@c+8P3Oh{cwmrDn%jFe7bO&_d8 z)kYOS(Z*$@bE6NSVT2>dVhr$ zlpjrc=cs#h45UY2rq%)N9)#V4i}b-2kE21(Rb=(^;WXciVLT3~4Vm_bHl^J#>`q|Z zwv|U5hEPRHs^3PE4WQ~k zQEW%!@CD<-e!;+QD|Dwl-*Wk29KsPeLpP!UgxG#mXv9)wEmUM%FzGBIa*{Bzx277h zAaOz{zdK;$(=`)R)`doJF5O_v$c{^?<3-8%KR&WM0QPmAAhks}0hmRyDzX!~A)A+U z63wMWD=pOv(gU+8BqtUuiQbkIXjwUj7FNA6H`S*01CTxDJm&M-t3GvSE|p2^0lyK F{0q~H0r3C; literal 0 HcmV?d00001 diff --git a/Pipeline/src/modules/utils/argument_handler.py b/Pipeline/src/modules/utils/argument_handler.py new file mode 100644 index 0000000..ac27a27 --- /dev/null +++ b/Pipeline/src/modules/utils/argument_handler.py @@ -0,0 +1,64 @@ +import datetime +import argparse +import enum + +class Argument(object): + argument_name = None + argument_type = None + argument_type_name = None + argument_help_message = None + argument_default_value = None + types = { + "Date":str, + "Integer":int, + "Integer_no_zero":int, + "String":str + } + + def __init__(self, argument_name): + self.argument_name = argument_name + + def set_argument_type(self, argument_type): + self.argument_type_name = argument_type + self.argument_type = self.types[argument_type] + self.argument_default_value = self.__set_default_value_type() + + def set_argument_help_message(self, argument_help_message): + self.argument_help_message = str(argument_help_message) + + def set_argument_default_value(self, argument_default_value): + self.argument_default_value = self.__set_default_value_type(argument_default_value) + + def __set_default_value_type(self, argument_default_value = None): + if self.argument_type_name == "Integer_no_zero": + return 1 if argument_default_value < 1 | default_value is None else default_value + elif self.argument_type_name == "Integer": + return 1 if argument_default_value is None else int(argument_default_value) + elif self.argument_type_name == "Date": + return datetime.datetime.strftime(datetime.date.today(), '%d-%m-%Y') if argument_default_value is None\ + else datetime.datetime.strptime(argument_default_value, '%d-%m-%Y') + elif self.argument_type_name == "String": + return '' if argument_default_value is None else str(argument_default_value) + + +class Arguments_handler(object): + arguments = [] + parser = None + + def __init__(self): + self.parser = argparse.ArgumentParser() + + def add_input_argument(self, argument): + self.parser.add_argument( + "--"+argument.argument_name, + dest = argument.argument_name, + default = argument.argument_default_value, + help = argument.argument_help_message, + type = argument.argument_type + ) + self.arguments.append(argument) + + def get_input_arguments(self): + all_arguments = self.parser.parse_args() + input_parameters = vars(all_arguments) + return input_parameters \ No newline at end of file diff --git a/Pipeline/src/modules/utils/data_base_management.py b/Pipeline/src/modules/utils/data_base_management.py new file mode 100644 index 0000000..3d96e77 --- /dev/null +++ b/Pipeline/src/modules/utils/data_base_management.py @@ -0,0 +1,98 @@ +import psycopg2 +import sys, os +import numpy as np +import pandas as pd +import pandas.io.sql as psql + +from psycopg2 import sql + +class DB_query(object): + conn = None + cursor = None + db_query_params = None + def __init__(self, conn, db_query_params): + self.conn = conn + self.cursor = self.conn.cursor() + self.db_query_params = db_query_params + + def excecute_query(self, sql_command): + try: + self.cursor.execute(sql_command) + result = self.cursor.fetchall() + self.conn.commit() + return (result) + except Exception as e: print(e) + + def get_data(self, columns, condition = None): + if condition is None: + sql_command = sql.SQL('SELECT {} FROM {}.{};'.format(columns,\ + self.db_query_params['schema'],\ + self.db_query_params['table'])) + else: + sql_command = sql.SQL('SELECT {} FROM {}.{} WHERE {} = {};'.format(\ + columns, self.db_query_params['schema'],\ + self.db_query_params['table'], condition['variable'], condition['value'])) + return(self.excecute_query(sql_command)) + + def insert_data(self, insert_data): + for datum in insert_data: + sql_command = 'INSERT INTO {}.{}({}) '.format(self.db_query_params['schema'],\ + self.db_query_params['table'], 'stn, alt, name, lon, lat') + sql_command += 'VALUES (' + ','.join(datum) + ')' + self.excecute_query(sql_command) + +class DB_manager(object): + conn = None + cursor = None + db_params = {} + + def __init__(self, db_params): + self.db_params = db_params + self.conn, self.cursor = self._set_up_db_cursor() + + def _set_up_db_cursor(self): + conn_string = "host=" + self.db_params['PGHOST'] +\ + " port=" + self.db_params['PORT'] +\ + " dbname=" + self.db_params['PGDATABASE'] +\ + " user=" + self.db_params['PGUSER'] +\ + " password="+ self.db_params['PGPASSWORD'] +\ + " connect_timeout=" + str(self.db_params['TIMEOUT']) + try: + conn=psycopg2.connect(conn_string) + conn.autocommit = True + cursor = conn.cursor() + print("Connection with the db done") + return (conn, cursor) + except Exception as e: print(e) + + def get_cursor(self): + return self.cursor + + def get_conn(self): + return self.conn + + def create_data_base(self, db_name): + sql_command = sql.SQL('CREATE DATABASE {};'.format(db_name)) + try: + self.cursor.execute(sql_command) + self.db_params['PGDATABASE'] = db_name + print('Database created successfully') + except Exception as e: print(e) + + def create_data_base_table(self, table_name, table_fields = []): + sql_command = 'CREATE TABLE {}'.format(table_name) + if len(table_fields) > 0: + sql_command += ' ( ' + list_sql_command = [] + for field in table_fields: + list_sql_command.append(f'{field["name_field"]} {field["type_field"]}') + sql_command += ','.join(list_sql_command) + ' )' + print(sql_command) + try: + self.cursor.execute(f'DROP TABLE IF EXISTS {table_name}') + self.cursor.execute(sql_command) + print("Database created successfully........") + except Exception as e: print(e) + + def close_connection(self): + conn.close() diff --git a/Pipeline/src/modules/utils/set_up_database.py b/Pipeline/src/modules/utils/set_up_database.py new file mode 100644 index 0000000..5aee8dd --- /dev/null +++ b/Pipeline/src/modules/utils/set_up_database.py @@ -0,0 +1,19 @@ +from utils import api_keys, data_base_management + +def set_up_database(): + DB_params = { + 'PGHOST': api_keys.PGHOST, + 'PGDATABASE': api_keys.PGDATABASE, + 'PGUSER': api_keys.PGUSER, + 'PGPASSWORD': api_keys.PGPASSWORD, + 'TIMEOUT': api_keys.TIMEOUT, + 'PORT': api_keys.PORT + } + db_setting_manager = data_base_management.DB_manager(DB_params) + query_params = { + 'columns': '*', + 'schema': 'andres', + 'table': 'test_phenology' + } + db_query_manager = data_base_management.DB_query(db_setting_manager.get_conn(), query_params) + return db_query_manager diff --git a/Validation_Pipeline/README.md b/Validation_Pipeline/README.md new file mode 100644 index 0000000..39edeaf --- /dev/null +++ b/Validation_Pipeline/README.md @@ -0,0 +1,6 @@ +## Validation Pipeline + +This scripts are intended to validate the results from the main pipeline, most of them make use of R for validation. This scripts are not coded with clean code practices as they are for validation purposes and are not expected to be included in the final product. + +### Interpolation Validation Scripts + diff --git a/Validation_Pipeline/validation_interpolation_knmi.R b/Validation_Pipeline/validation_interpolation_knmi.R new file mode 100644 index 0000000..5f71ba6 --- /dev/null +++ b/Validation_Pipeline/validation_interpolation_knmi.R @@ -0,0 +1,80 @@ +library(ncdf4) +library(raster) +library(BAMMtools) +library(RColorBrewer) +library(oceanmap) +library(rgeos) + +# Set up the environment for processing in Rstudio +# rm(list = ls()) +# path <- rstudioapi::getActiveDocumentContext()$path +# setwd(dirname(path)) + +RasterTemplate <- raster('nederland_1000.tif') +test_variable <- 'TN' +test_date <- '20140801' + +file_path <- paste0('./',test_variable,'/',test_date,'/KNMITEST.nc') +pal <- colorRampPalette(c("blue", "red")) + +knmi_stations <- nc2raster(file_path, "stations",layer=1) +names(knmi_stations) <- c('values') +vals_stations <- na.omit(c(as.matrix(knmi_stations$values))) + + +knmi_stations_values <- nc2raster(file_path, "stationvalues",layer=1) +names(knmi_stations_values) <- c('values') +vals_stations_values <- na.omit(c(as.matrix(knmi_stations_values$values))) + +knmi_longitude <- nc2raster(file_path, "lon",layer=1) +names(knmi_longitude) <- c('values') +vals_longitude <- na.omit(c(as.matrix(knmi_longitude$values))) +vals_station_longitude <- vals_longitude[!c(as.matrix(is.na(knmi_stations_values)))] + +knmi_latitude <- nc2raster(file_path, "lat",layer=1) +names(knmi_latitude) <- c('values') +vals_latitude <- na.omit(c(as.matrix(knmi_latitude$values))) +vals_station_latitude <- vals_latitude[!c(as.matrix(is.na(knmi_stations_values)))] + +station_complete_from_knmi <- data.frame(vals_stations, vals_station_longitude, vals_station_latitude, vals_stations_values) +names(station_complete_from_knmi) <- c("STN", "LON", "LAT", "value") +P_2 <- SpatialPointsDataFrame(station_complete_from_knmi[,2:3], + station_complete_from_knmi, + proj4string = CRS("+proj=longlat +datum=WGS84")) + +station_coordinates <- read.csv('StationCoordinates') +station_values <- read.csv(paste0('./',test_variable,'/',test_date,'/STATION_VALUES')) +station_complete <- merge(station_coordinates, station_values, by="STN") +p_temp <- data.frame(station_complete$STN, station_complete$LON, station_complete$LAT, station_complete$TG*0.1) +names(p_temp) <- c("STN", "LON", "LAT", "value") +p_temp <- p_temp[complete.cases(p_temp),] +P <- SpatialPointsDataFrame(p_temp[,2:3], + p_temp, + proj4string = CRS("+proj=longlat +datum=WGS84")) + +RasterTemplate <- raster('nederland_1000.tif') +RasterTemplate_Proj <- projectRaster(RasterTemplate, crs = CRS("+proj=longlat +datum=WGS84")) +knmi_prediction <- nc2raster(file_path, "prediction",layer=1) + +g <- as(!is.na(RasterTemplate_Proj), 'SpatialGridDataFrame') +P.idw <- gstat::idw(value ~ 1, P, newdata=g, idp=2.5) +Interpolated_raster <- raster(P.idw) + +mask_raster <- !is.na(knmi_prediction) +mask_raster[mask_raster==0]<- NA + +# global_error <- c() +# for (blosck_size in seq(0,1,0.05)){ +P_2.iwd <- gstat::idw(value ~ 1, P_2, newdata=g, idp=2.5, block=c(0.3, 0.3)) +Interpolated_raster_2 <- raster(P_2.iwd) +Interpolated_raster_2_rs <- resample(Interpolated_raster_2, knmi_prediction) +Interpolated_raster_clipped <- mask(Interpolated_raster_2_rs, mask_raster) +sqr_error <- (Interpolated_raster_clipped-knmi_prediction)**2 +error <- sum(na.omit(c(as.matrix(sqr_error)))) + # global_error <- c(global_error, error) +# } +# global_error + +plot(Interpolated_raster_clipped) +plot(knmi_prediction) +plot(sqr_error) \ No newline at end of file