From 547cfbc2c3aa82b33f6ede85b5bfda33b07351b8 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 13:42:37 -0800 Subject: [PATCH 01/21] Update ARGOS_service_data_converter.py - use xarray to make netcdf --- ARGOS_service_data_converter.py | 106 ++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 8950a4c..3300e1b 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -52,17 +52,19 @@ and output seconds since midnight. 2018-03-12: Add netcdf output option 2024-12-04: Modify pandas date_parse to be pandas 2.X compliant - + 2024-12-09: Swap Xarray for lowerlevel NetCDF creation + Compatibility: ============== python >=3.9 **tested** """ import argparse -import datetime import sys import pandas as pd import numpy as np +import xarray as xa +from datetime import datetime, timezone from netCDF4 import date2num, num2date @@ -130,11 +132,10 @@ def get_data(fobj=None): df["latitude"] = df.latitude.round(3) df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) - # df.drop('year_doy_hhmm',axis=1,inplace=True) + df.drop('year_doy_hhmm',axis=1,inplace=True) return df - class ARGOS_SERVICE_Drifter(object): r""" @@ -198,7 +199,7 @@ def get_data(fobj=None): df["latitude"] = df.latitude.round(3) df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) - # df.drop('year_doy_hhmm',axis=1,inplace=True) + df.drop('year_doy_hhmm',axis=1,inplace=True) return df @@ -334,6 +335,7 @@ def get_data(fobj=None, time="current"): df["longitude"] = df["longitude"] * -1 # convert to +W df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) + df.drop('year_doy_hhmm',axis=1,inplace=True) return df @@ -449,33 +451,74 @@ def AZ(self, s1): return output -def pandas2netcdf(df=None, ofile="data.nc"): +def pandas2netcdf(df=None, ofile="data.nc",isxa=True): if df.empty: return else: - df["time"] = [ - date2num(x[1], "hours since 1900-01-01T00:00:00Z") - for x in enumerate(df.index) - ] + if isxa: + EPIC_VARS_yaml = ConfigParserLocal.get_config( + "config_files/drifters.yaml", "yaml" + ) - EPIC_VARS_dict = ConfigParserLocal.get_config( - "config_files/drifters.yaml", "yaml" - ) + df = df.reset_index() + df.index = df.reset_index().index.rename('record_number') + xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() + + #rename variables and add attributes + drop_missing = True + + for var in EPIC_VARS_yaml.keys(): + try: + xdf[var].attrs = EPIC_VARS_yaml[var] + except (ValueError, KeyError): + if drop_missing: + try: + xdf = xdf.drop_vars(var) + except (ValueError, KeyError): + pass + else: + pass + + #global attributes + xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") + xdf.attrs["INST_TYPE"] = '' + xdf.attrs["DATA_CMNT"] = '' + xdf.attrs["NC_FILE_GENERATOR"] = __file__.split('/')[-1] + ' ' + __version__ + xdf.attrs["WATER_DEPTH"] = '' + xdf.attrs["MOORING"] = '' + xdf.attrs["WATER_MASS"] = '' + xdf.attrs["EXPERIMENT"] = '' + xdf.attrs["PROJECT"] = '' + xdf.attrs["SERIAL_NUMBER"] = '' + xdf.attrs['History']="File Created from ARGSOS Drifter Data." + + xdf.to_netcdf(ofile, + format='NETCDF3_CLASSIC', + encoding={'time':{'units':'days since 1900-01-01'}}) + else: + df["time"] = [ + date2num(x[1], "hours since 1900-01-01T00:00:00Z") + for x in enumerate(df.index) + ] + + EPIC_VARS_dict = ConfigParserLocal.get_config( + "config_files/drifters.yaml", "yaml" + ) - # create new netcdf file - ncinstance = EcF_write.NetCDF_Create_Profile_Ragged1D(savefile=ofile) - ncinstance.file_create() - ncinstance.sbeglobal_atts( - raw_data_file="", History="File Created from ARGSOS Drifter Data." - ) - ncinstance.dimension_init(recnum_len=len(df)) - ncinstance.variable_init(EPIC_VARS_dict) - ncinstance.add_coord_data(recnum=range(1, len(df) + 1)) - ncinstance.add_data( - EPIC_VARS_dict, data_dic=df, missing_values=np.nan, pandas=True - ) - ncinstance.close() + # create new netcdf file + ncinstance = EcF_write.NetCDF_Create_Profile_Ragged1D(savefile=ofile) + ncinstance.file_create() + ncinstance.sbeglobal_atts( + raw_data_file="", History="File Created from ARGSOS Drifter Data." + ) + ncinstance.dimension_init(recnum_len=len(df)) + ncinstance.variable_init(EPIC_VARS_dict) + ncinstance.add_coord_data(recnum=range(1, len(df) + 1)) + ncinstance.add_data( + EPIC_VARS_dict, data_dic=df, missing_values=np.nan, pandas=True + ) + ncinstance.close() """--------------------- Main ----------------------------------------------""" @@ -494,7 +537,7 @@ def pandas2netcdf(df=None, ofile="data.nc"): "version", metavar="version", type=str, - help="beacon,buoy,buoy_3hr,v1-metocean(pre-2017),v2-vendor(2017)", + help="beacon,buoy,buoy_3hr,v1-(pre-2017),v2-(post-2017)", ) parser.add_argument("-csv", "--csv", type=str, help="output as csv - full path") parser.add_argument( @@ -525,9 +568,12 @@ def pandas2netcdf(df=None, ofile="data.nc"): df = atseadata.get_data(args.sourcefile) + df["location_quality"] = df["s2"] + df.drop_duplicates( subset=["year_doy_hhmm", "latitude", "longitude"], keep="last", inplace=True ) + df.drop(['year_doy_hhmm',"s1", "s2", "s3", "s4"], axis=1, inplace=True) elif args.version in ["v1", "V1", "version1", "v1-metocean"]: atseadata = ARGOS_SERVICE_Drifter() @@ -548,11 +594,10 @@ def pandas2netcdf(df=None, ofile="data.nc"): df["location_quality"] = df["s8"] df["location_quality"] = pd.to_numeric(df["location_quality"], errors="coerce") - df.drop(["s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) - df.drop(df.index[~df["checksum"]], inplace=True) df.drop_duplicates(subset="year_doy_hhmm", keep="last", inplace=True) + df.drop(["year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) elif args.version in ["v2", "V2", "version2", "v2-vendor(2017)"]: @@ -572,8 +617,6 @@ def pandas2netcdf(df=None, ofile="data.nc"): df["location_quality"] = df["s8"] df["location_quality"] = pd.to_numeric(df["location_quality"], errors="coerce") - df.drop(["s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) - try: df.drop(df.index[~df["checksum"]], inplace=True) except TypeError: @@ -582,6 +625,7 @@ def pandas2netcdf(df=None, ofile="data.nc"): df.drop_duplicates( subset=["year_doy_hhmm", "latitude", "longitude"], keep="last", inplace=True ) + df.drop(["year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) elif args.version in ["buoy", "met", "sfc_package"]: From 0a5ea9bc6aaf15380b6393102f63f05303695c7d Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:09:33 -0800 Subject: [PATCH 02/21] Update ARGOS_service_data_converter.py keep column a bit longer --- ARGOS_service_data_converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 3300e1b..2c7c080 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -132,7 +132,7 @@ def get_data(fobj=None): df["latitude"] = df.latitude.round(3) df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) - df.drop('year_doy_hhmm',axis=1,inplace=True) + # df.drop('year_doy_hhmm',axis=1,inplace=True) return df @@ -199,7 +199,7 @@ def get_data(fobj=None): df["latitude"] = df.latitude.round(3) df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) - df.drop('year_doy_hhmm',axis=1,inplace=True) + # df.drop('year_doy_hhmm',axis=1,inplace=True) return df @@ -335,7 +335,7 @@ def get_data(fobj=None, time="current"): df["longitude"] = df["longitude"] * -1 # convert to +W df.set_index(pd.DatetimeIndex(df["year_doy_hhmm"]), inplace=True) - df.drop('year_doy_hhmm',axis=1,inplace=True) + # df.drop('year_doy_hhmm',axis=1,inplace=True) return df From 363cd86b28030f454b03ceb65970abc49904873b Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:12:17 -0800 Subject: [PATCH 03/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 2c7c080..7154aa8 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -484,7 +484,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") xdf.attrs["INST_TYPE"] = '' xdf.attrs["DATA_CMNT"] = '' - xdf.attrs["NC_FILE_GENERATOR"] = __file__.split('/')[-1] + ' ' + __version__ + xdf.attrs["NC_FILE_GENERATOR"] = 'Generated with Xarray' xdf.attrs["WATER_DEPTH"] = '' xdf.attrs["MOORING"] = '' xdf.attrs["WATER_MASS"] = '' From b722fa30cf6ff8afc3bf78efc4adf4d5b7b8f182 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:14:50 -0800 Subject: [PATCH 04/21] Update drifters.yaml time is captured in xarray output --- config_files/drifters.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config_files/drifters.yaml b/config_files/drifters.yaml index bb5a930..c458465 100644 --- a/config_files/drifters.yaml +++ b/config_files/drifters.yaml @@ -1,10 +1,10 @@ --- -time: - name: time - generic_name: time - longname: date and time since reference time - time_origin: '1900-01-01 00:00:00' - units: 'hours since 1900-01-01T00:00:00Z' +# time: +# name: time +# generic_name: time +# longname: date and time since reference time +# time_origin: '1900-01-01 00:00:00' +# units: 'hours since 1900-01-01T00:00:00Z' latitude: name: latitude generic_name: latitude From 6d10f814ca34bbe553c97453aef77e065ee40d08 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:23:09 -0800 Subject: [PATCH 05/21] Update ARGOS_service_data_converter.py force checksum --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 7154aa8..fb2fa4f 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -495,7 +495,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.to_netcdf(ofile, format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'}}) + encoding={'time':{'units':'days since 1900-01-01'},'checksum':{"dtype":"bool"}}) else: df["time"] = [ date2num(x[1], "hours since 1900-01-01T00:00:00Z") From ae13c1865aa25c1eb8f172bd00394ebfc5f05cf3 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:47:39 -0800 Subject: [PATCH 06/21] Update ARGOS_service_data_converter.py checksum force to boolean --- ARGOS_service_data_converter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index fb2fa4f..3a8b05c 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -465,6 +465,11 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): df.index = df.reset_index().index.rename('record_number') xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() + try: + df['checksum'] = df['checksum'].astype(bool) + except: + pass + #rename variables and add attributes drop_missing = True @@ -495,7 +500,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.to_netcdf(ofile, format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'},'checksum':{"dtype":"bool"}}) + encoding={'time':{'units':'days since 1900-01-01'}}) else: df["time"] = [ date2num(x[1], "hours since 1900-01-01T00:00:00Z") From c9d83256da0883fe890dd10ab0eb7278bdefa5f1 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:49:04 -0800 Subject: [PATCH 07/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 3a8b05c..ab1d6c8 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -466,10 +466,10 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() try: - df['checksum'] = df['checksum'].astype(bool) + df['checksum'] = df['checksum'].astype("bool") except: pass - + #rename variables and add attributes drop_missing = True From 3f1884904669d9c1db60821c7b0b7ad625d7f08e Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:51:39 -0800 Subject: [PATCH 08/21] Update ARGOS_service_data_converter.py drop checksum from potential output file --- ARGOS_service_data_converter.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index ab1d6c8..222e869 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -465,11 +465,6 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): df.index = df.reset_index().index.rename('record_number') xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() - try: - df['checksum'] = df['checksum'].astype("bool") - except: - pass - #rename variables and add attributes drop_missing = True @@ -602,7 +597,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): df.drop(df.index[~df["checksum"]], inplace=True) df.drop_duplicates(subset="year_doy_hhmm", keep="last", inplace=True) - df.drop(["year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) + df.drop(["checksum","year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) elif args.version in ["v2", "V2", "version2", "v2-vendor(2017)"]: @@ -630,7 +625,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): df.drop_duplicates( subset=["year_doy_hhmm", "latitude", "longitude"], keep="last", inplace=True ) - df.drop(["year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) + df.drop(["checksum","year_doy_hhmm","s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"], axis=1, inplace=True) elif args.version in ["buoy", "met", "sfc_package"]: From fc88c35a257169fe1d9b7fed665f6a7fbeb65ebc Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:54:13 -0800 Subject: [PATCH 09/21] Update drifters.yaml force datatypes --- config_files/drifters.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config_files/drifters.yaml b/config_files/drifters.yaml index c458465..45cf755 100644 --- a/config_files/drifters.yaml +++ b/config_files/drifters.yaml @@ -10,27 +10,32 @@ latitude: generic_name: latitude units: degrees_north longname: 'latitude' + dtype: float longitude: name: longitude generic_name: longitude units: degrees_west longname: 'longitude' + dtype: float sst: name: sea surface temperature generic_name: sst units: degree_C longname: 'sea surface temperature (degree_C)' standard_name: 'sea_surface_temperature' + dtype: float strain: name: strain generic_name: strain units: percent longname: 'strain gauge percent' + dtype: float voltage: name: voltage generic_name: voltage longname: 'battery voltage' units: volts + dtype: float location_quality: name: location_quality generic_name: location_quality From 794c20336a7d86ef67d1c82fc38b32ac8cb43e83 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 14:59:42 -0800 Subject: [PATCH 10/21] Update drifters.yaml --- config_files/drifters.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config_files/drifters.yaml b/config_files/drifters.yaml index 45cf755..c11965d 100644 --- a/config_files/drifters.yaml +++ b/config_files/drifters.yaml @@ -10,32 +10,32 @@ latitude: generic_name: latitude units: degrees_north longname: 'latitude' - dtype: float + _Encoding: float longitude: name: longitude generic_name: longitude units: degrees_west longname: 'longitude' - dtype: float + _Encoding: float sst: name: sea surface temperature generic_name: sst units: degree_C longname: 'sea surface temperature (degree_C)' standard_name: 'sea_surface_temperature' - dtype: float + _Encoding: float strain: name: strain generic_name: strain units: percent longname: 'strain gauge percent' - dtype: float + _Encoding: float voltage: name: voltage generic_name: voltage longname: 'battery voltage' units: volts - dtype: float + _Encoding: float location_quality: name: location_quality generic_name: location_quality From e026987b2d5a175dd4a43c389dabedb7967b331d Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:04:35 -0800 Subject: [PATCH 11/21] Update ARGOS_service_data_converter.py switch to netcdf4 --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 222e869..ec261c3 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -494,7 +494,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.attrs['History']="File Created from ARGSOS Drifter Data." xdf.to_netcdf(ofile, - format='NETCDF3_CLASSIC', + format='NETCDF4', encoding={'time':{'units':'days since 1900-01-01'}}) else: df["time"] = [ From 4dd04d9093e3ef9fbb8657fd3caf6c37b61f8560 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:04:38 -0800 Subject: [PATCH 12/21] Update drifters.yaml --- config_files/drifters.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config_files/drifters.yaml b/config_files/drifters.yaml index c11965d..c458465 100644 --- a/config_files/drifters.yaml +++ b/config_files/drifters.yaml @@ -10,32 +10,27 @@ latitude: generic_name: latitude units: degrees_north longname: 'latitude' - _Encoding: float longitude: name: longitude generic_name: longitude units: degrees_west longname: 'longitude' - _Encoding: float sst: name: sea surface temperature generic_name: sst units: degree_C longname: 'sea surface temperature (degree_C)' standard_name: 'sea_surface_temperature' - _Encoding: float strain: name: strain generic_name: strain units: percent longname: 'strain gauge percent' - _Encoding: float voltage: name: voltage generic_name: voltage longname: 'battery voltage' units: volts - _Encoding: float location_quality: name: location_quality generic_name: location_quality From a5e0a80a9e63921b0c6d5353403886f4098510f2 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:10:46 -0800 Subject: [PATCH 13/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index ec261c3..d1a338f 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -479,6 +479,11 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): pass else: pass + + #xarray casting issue? + for var in xdf.variables: + if xdf[var].dtype == 'float64': + xdf[var] = xdf[var].astype('float32') #global attributes xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") From c81fffc54c74df0ab5f1a04c8d67d59db36f068d Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:16:01 -0800 Subject: [PATCH 14/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index d1a338f..b86b6f8 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -483,7 +483,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): #xarray casting issue? for var in xdf.variables: if xdf[var].dtype == 'float64': - xdf[var] = xdf[var].astype('float32') + xdf[var] = xdf[var].astype('float') #global attributes xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") @@ -499,7 +499,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.attrs['History']="File Created from ARGSOS Drifter Data." xdf.to_netcdf(ofile, - format='NETCDF4', + format='NETCDF3_CLASSIC', encoding={'time':{'units':'days since 1900-01-01'}}) else: df["time"] = [ From f706ea618dec6f11c20085c3c5b2e6768b93c52a Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:17:09 -0800 Subject: [PATCH 15/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index b86b6f8..dfdfb2c 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -483,7 +483,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): #xarray casting issue? for var in xdf.variables: if xdf[var].dtype == 'float64': - xdf[var] = xdf[var].astype('float') + xdf[var] = xdf[var].astype('int') #global attributes xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") From 9a73cc0cd03d0bda95fe0db4907abc904f9872cf Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:18:52 -0800 Subject: [PATCH 16/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index dfdfb2c..4da720e 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -479,11 +479,6 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): pass else: pass - - #xarray casting issue? - for var in xdf.variables: - if xdf[var].dtype == 'float64': - xdf[var] = xdf[var].astype('int') #global attributes xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") @@ -500,7 +495,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.to_netcdf(ofile, format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'}}) + encoding={'time':{'units':'days since 1900-01-01'},'latitude':{'dtype':'float'}}) else: df["time"] = [ date2num(x[1], "hours since 1900-01-01T00:00:00Z") From d6a1fbbe6e789d1acc6a5226042db87ed388fbc3 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:21:38 -0800 Subject: [PATCH 17/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 4da720e..5cf7b4e 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -495,7 +495,12 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): xdf.to_netcdf(ofile, format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'},'latitude':{'dtype':'float'}}) + encoding={'time':{'units':'days since 1900-01-01'}, + 'latitude':{'dtype':'float'}, + 'longitude':{'dtype':'float'}, + 'strain':{'dtype':'float'}, + 'voltage':{'dtype':'float'}, + 'sst':{'dtype':'float'}}) else: df["time"] = [ date2num(x[1], "hours since 1900-01-01T00:00:00Z") From 2457ca3053d9dad98abfda91238cc4a454fa9bb2 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:31:03 -0800 Subject: [PATCH 18/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 5cf7b4e..b5a6300 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -503,7 +503,7 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): 'sst':{'dtype':'float'}}) else: df["time"] = [ - date2num(x[1], "hours since 1900-01-01T00:00:00Z") + date2num(x[1], "days since 1900-01-01T00:00:00Z") for x in enumerate(df.index) ] From 323fa6ee5830c82201f4d05109a94b7702a666a9 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:41:22 -0800 Subject: [PATCH 19/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index b5a6300..d3745f6 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -465,6 +465,19 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): df.index = df.reset_index().index.rename('record_number') xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() + #global attributes + xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") + xdf.attrs["INST_TYPE"] = '' + xdf.attrs["DATA_CMNT"] = '' + xdf.attrs["NC_FILE_GENERATOR"] = 'Generated with Xarray' + xdf.attrs["WATER_DEPTH"] = '' + xdf.attrs["MOORING"] = '' + xdf.attrs["WATER_MASS"] = '' + xdf.attrs["EXPERIMENT"] = '' + xdf.attrs["PROJECT"] = '' + xdf.attrs["SERIAL_NUMBER"] = '' + xdf.attrs['History']="File Created from ARGSOS Drifter Data." + #rename variables and add attributes drop_missing = True @@ -479,21 +492,8 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): pass else: pass - - #global attributes - xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") - xdf.attrs["INST_TYPE"] = '' - xdf.attrs["DATA_CMNT"] = '' - xdf.attrs["NC_FILE_GENERATOR"] = 'Generated with Xarray' - xdf.attrs["WATER_DEPTH"] = '' - xdf.attrs["MOORING"] = '' - xdf.attrs["WATER_MASS"] = '' - xdf.attrs["EXPERIMENT"] = '' - xdf.attrs["PROJECT"] = '' - xdf.attrs["SERIAL_NUMBER"] = '' - xdf.attrs['History']="File Created from ARGSOS Drifter Data." - - xdf.to_netcdf(ofile, + try: #others + xdf.to_netcdf(ofile, format='NETCDF3_CLASSIC', encoding={'time':{'units':'days since 1900-01-01'}, 'latitude':{'dtype':'float'}, @@ -501,6 +501,12 @@ def pandas2netcdf(df=None, ofile="data.nc",isxa=True): 'strain':{'dtype':'float'}, 'voltage':{'dtype':'float'}, 'sst':{'dtype':'float'}}) + except: #beacon file + xdf.to_netcdf(ofile, + format='NETCDF3_CLASSIC', + encoding={'time':{'units':'days since 1900-01-01'}, + 'latitude':{'dtype':'float'}, + 'longitude':{'dtype':'float'}}) else: df["time"] = [ date2num(x[1], "days since 1900-01-01T00:00:00Z") From 79b0b662809d9a7ce72e512f4986589da6e01ca4 Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:56:21 -0800 Subject: [PATCH 20/21] clean old routines --- ARGOS_service_data_converter.py | 129 ++++++++----------- io_utils/EcoFOCI_netCDF_write.py | 204 ------------------------------- 2 files changed, 50 insertions(+), 283 deletions(-) delete mode 100644 io_utils/EcoFOCI_netCDF_write.py diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index d3745f6..8daff10 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -67,11 +67,6 @@ from datetime import datetime, timezone from netCDF4 import date2num, num2date - -# User Stack -import io_utils.EcoFOCI_netCDF_write as EcF_write -import io_utils.ConfigParserLocal as ConfigParserLocal - from io_utils import ConfigParserLocal """-----------------------------------------------------Data Classes----------------------------------------------------------""" @@ -451,86 +446,62 @@ def AZ(self, s1): return output -def pandas2netcdf(df=None, ofile="data.nc",isxa=True): +def pandas2netcdf(df=None, ofile="data.nc": if df.empty: return - else: - if isxa: - EPIC_VARS_yaml = ConfigParserLocal.get_config( - "config_files/drifters.yaml", "yaml" - ) - df = df.reset_index() - df.index = df.reset_index().index.rename('record_number') - xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() - - #global attributes - xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") - xdf.attrs["INST_TYPE"] = '' - xdf.attrs["DATA_CMNT"] = '' - xdf.attrs["NC_FILE_GENERATOR"] = 'Generated with Xarray' - xdf.attrs["WATER_DEPTH"] = '' - xdf.attrs["MOORING"] = '' - xdf.attrs["WATER_MASS"] = '' - xdf.attrs["EXPERIMENT"] = '' - xdf.attrs["PROJECT"] = '' - xdf.attrs["SERIAL_NUMBER"] = '' - xdf.attrs['History']="File Created from ARGSOS Drifter Data." - - #rename variables and add attributes - drop_missing = True - - for var in EPIC_VARS_yaml.keys(): + EPIC_VARS_yaml = ConfigParserLocal.get_config( + "config_files/drifters.yaml", "yaml" + ) + + df = df.reset_index() + df.index = df.reset_index().index.rename('record_number') + xdf = df.rename(columns={'year_doy_hhmm':'time'}).to_xarray() + + #global attributes + xdf.attrs["CREATION_DATE"] = datetime.now(timezone.utc).strftime("%B %d, %Y %H:%M UTC") + xdf.attrs["INST_TYPE"] = '' + xdf.attrs["DATA_CMNT"] = '' + xdf.attrs["NC_FILE_GENERATOR"] = 'Generated with Xarray' + xdf.attrs["WATER_DEPTH"] = '' + xdf.attrs["MOORING"] = '' + xdf.attrs["WATER_MASS"] = '' + xdf.attrs["EXPERIMENT"] = '' + xdf.attrs["PROJECT"] = '' + xdf.attrs["SERIAL_NUMBER"] = '' + xdf.attrs['History']="File Created from ARGSOS Drifter Data." + + #rename variables and add attributes + drop_missing = True + + for var in EPIC_VARS_yaml.keys(): + try: + xdf[var].attrs = EPIC_VARS_yaml[var] + except (ValueError, KeyError): + if drop_missing: try: - xdf[var].attrs = EPIC_VARS_yaml[var] + xdf = xdf.drop_vars(var) except (ValueError, KeyError): - if drop_missing: - try: - xdf = xdf.drop_vars(var) - except (ValueError, KeyError): - pass - else: - pass - try: #others - xdf.to_netcdf(ofile, - format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'}, - 'latitude':{'dtype':'float'}, - 'longitude':{'dtype':'float'}, - 'strain':{'dtype':'float'}, - 'voltage':{'dtype':'float'}, - 'sst':{'dtype':'float'}}) - except: #beacon file - xdf.to_netcdf(ofile, - format='NETCDF3_CLASSIC', - encoding={'time':{'units':'days since 1900-01-01'}, - 'latitude':{'dtype':'float'}, - 'longitude':{'dtype':'float'}}) - else: - df["time"] = [ - date2num(x[1], "days since 1900-01-01T00:00:00Z") - for x in enumerate(df.index) - ] - - EPIC_VARS_dict = ConfigParserLocal.get_config( - "config_files/drifters.yaml", "yaml" - ) - - # create new netcdf file - ncinstance = EcF_write.NetCDF_Create_Profile_Ragged1D(savefile=ofile) - ncinstance.file_create() - ncinstance.sbeglobal_atts( - raw_data_file="", History="File Created from ARGSOS Drifter Data." - ) - ncinstance.dimension_init(recnum_len=len(df)) - ncinstance.variable_init(EPIC_VARS_dict) - ncinstance.add_coord_data(recnum=range(1, len(df) + 1)) - ncinstance.add_data( - EPIC_VARS_dict, data_dic=df, missing_values=np.nan, pandas=True - ) - ncinstance.close() - + pass + else: + pass + try: #others + xdf.to_netcdf(ofile, + format='NETCDF3_CLASSIC', + encoding={'time':{'units':'days since 1900-01-01'}, + 'latitude':{'dtype':'float'}, + 'longitude':{'dtype':'float'}, + 'strain':{'dtype':'float'}, + 'voltage':{'dtype':'float'}, + 'sst':{'dtype':'float'}}) + except: #beacon file + xdf.to_netcdf(ofile, + format='NETCDF3_CLASSIC', + encoding={'time':{'units':'days since 1900-01-01'}, + 'latitude':{'dtype':'float'}, + 'longitude':{'dtype':'float'}}) + """--------------------- Main ----------------------------------------------""" diff --git a/io_utils/EcoFOCI_netCDF_write.py b/io_utils/EcoFOCI_netCDF_write.py deleted file mode 100644 index 874562b..0000000 --- a/io_utils/EcoFOCI_netCDF_write.py +++ /dev/null @@ -1,204 +0,0 @@ -""" - EcoFOCI_netCDF_write.py - - class for building netcdf files from specified instruments - - - History: - -------- - 2016-08-02: Migrate to EcoFOCI_MooringAnalysis pkg and unify netcdf creation code so - that it is no longer instrument dependant - -""" - -# Standard library. -import datetime, os - -# Scientific stack. -from netCDF4 import Dataset - -__author__ = 'Shaun Bell' -__email__ = 'shaun.bell@noaa.gov' -__created__ = datetime.datetime(2014, 1, 13) -__modified__ = datetime.datetime(2014, 12, 2) -__version__ = "0.3.0" -__status__ = "Development" - - -"""-------------------------------NCFile Creation--------------------------------------""" - -class NetCDF_Create_Profile_Ragged1D(object): - """ Class instance to generate a NetCDF file. - - Standards - --------- - EPICNetCDF (PMEL) Standards - - - Usage - ----- - - Order of routines matters and no error checking currently exists - ToDo: Error Checking - - Use this to create a nc file with all default values - ncinstance = NetCDF_Create_Profile_Ragged1D() - ncinstance.file_create() - ncinstance.sbeglobal_atts() - ncinstance.dimension_init() - ncinstance.variable_init() - ncinstance.add_coord_data() - ncinstance.add_data() - ncinstance.close() - """ - - - nc_format = 'NETCDF3_CLASSIC' - nc_read = 'w' - - def __init__(self, savefile='data/test.nc'): - """initialize output file path""" - - self.savefile = savefile - - def file_create(self): - rootgrpID = Dataset(self.savefile, NetCDF_Create_Profile_Ragged1D.nc_read, - format=NetCDF_Create_Profile_Ragged1D.nc_format) - self.rootgrpID = rootgrpID - return ( rootgrpID ) - - def sbeglobal_atts(self, raw_data_file='', Water_Mass='', Water_Depth=9999, - Experiment='', Station_Name='', SerialNumber='', - Instrument_Type='', History='', Project=''): - """ - Assumptions - ----------- - - Format of DataFrame.name = 'dy1309l1_ctd001' - - seabird related global attributes found in DataFrame.header list - - """ - - self.rootgrpID.CREATION_DATE = datetime.datetime.utcnow().strftime("%B %d, %Y %H:%M UTC") - self.rootgrpID.INST_TYPE = Instrument_Type - self.rootgrpID.DATA_CMNT = raw_data_file - self.rootgrpID.NC_FILE_GENERATOR = __file__.split('/')[-1] + ' ' + __version__ - self.rootgrpID.WATER_DEPTH = Water_Depth - self.rootgrpID.MOORING = Station_Name - self.rootgrpID.WATER_MASS = Water_Mass - self.rootgrpID.EXPERIMENT = Experiment - self.rootgrpID.PROJECT = Project - self.rootgrpID.SERIAL_NUMBER = SerialNumber - self.rootgrpID.History = History - - def dimension_init(self, recnum_len=1): - """ - Assumes - ------- - Dimensions will be 'record_number' - - Todo - ---- - User defined dimensions - """ - - self.dim_vars = ['record_number'] - - self.rootgrpID.createDimension( self.dim_vars[0], recnum_len ) #recnumber - - - def variable_init(self, EPIC_VARS_dict, verbose=False): - """ - EPIC keys: - passed in as a dictionary (similar syntax as json data file) - The dictionary keys are what defines the variable names. - """ - #exit if the variable dictionary is not passed - if not bool(EPIC_VARS_dict): - raise RuntimeError('Empty EPIC Dictionary is passed to variable_init.') - - #build record variable attributes - rec_vars, rec_var_name, rec_var_longname = [], [], [] - rec_var_generic_name, rec_var_FORTRAN, rec_var_units, rec_var_epic = [], [], [], [] - - #cycle through epic dictionary and create nc parameters - for evar in EPIC_VARS_dict.keys(): - if verbose: - print("Creating Variable {0}".format(EPIC_VARS_dict[evar]['name'])) - rec_vars.append(evar) - rec_var_name.append( EPIC_VARS_dict[evar]['name'] ) - rec_var_longname.append( EPIC_VARS_dict[evar]['longname'] ) - rec_var_generic_name.append( EPIC_VARS_dict[evar]['generic_name'] ) - rec_var_units.append( EPIC_VARS_dict[evar]['units'] ) - - rec_vars = ['record_number'] + rec_vars - - rec_var_name = [''] + rec_var_name - rec_var_longname = [''] + rec_var_longname - rec_var_generic_name = [''] + rec_var_generic_name - rec_var_units = ['sequential measurement id'] + rec_var_units - rec_var_type= ['f4'] + ['f8' for spot in rec_vars[1:]] - - var_class = [] - var_class.append(self.rootgrpID.createVariable(rec_vars[0], rec_var_type[0], self.dim_vars[0]))#time1 - - for i, v in enumerate(rec_vars[1:]): #1D coordinate variables - var_class.append(self.rootgrpID.createVariable(rec_vars[i+1], rec_var_type[i+1], self.dim_vars)) - - ### add variable attributes - for i, v in enumerate(var_class): #4dimensional for all vars - print("Adding Variable {0}".format(v))# - v.setncattr('name',rec_var_name[i]) - v.long_name = rec_var_longname[i] - v.generic_name = rec_var_generic_name[i] - v.units = rec_var_units[i] - - self.var_class = var_class - self.rec_vars = rec_vars - - - def add_coord_data(self, recnum=None): - """ """ - self.var_class[0][:] = recnum - - def add_data(self, EPIC_VARS_dict, data_dic=None, missing_values=99999, pandas=False): - """ - using the same dictionary to define the variables, and a new dictionary - that associates each data array with an epic key, cycle through and populate - the desired variables. If a variable is defined in the epic keys but not passed - to the add_data routine, it should be populated with missing data - """ - #exit if the variable dictionary is not passed - if not bool(EPIC_VARS_dict): - raise RuntimeError('Empty EPIC Dictionary is passed to add_data.') - - #cycle through EPIC_Vars and populate with data - this is a comprehensive list of - # all variables expected - # if no data is passed but an epic dictionary is, complete routine leaving variables - # with missing data if not found - - if pandas: - for EPICdic_key in EPIC_VARS_dict.keys(): - print("Adding {0}".format(EPICdic_key)) - di = self.rec_vars.index(EPICdic_key) - try: - self.var_class[di][:] = data_dic[EPICdic_key].values - except KeyError: - self.var_class[di][:] = missing_values - else: - for EPICdic_key in EPIC_VARS_dict.keys(): - print("Adding {0}".format(EPICdic_key)) - di = self.rec_vars.index(EPICdic_key) - try: - self.var_class[di][:] = data_dic[EPICdic_key] - except KeyError: - self.var_class[di][:] = missing_values - - def add_history(self, new_history): - """Adds timestamp (UTC time) and history to existing information""" - self.rootgrpID.History = self.rootgrpID.History + '\n' + datetime.datetime.utcnow().strftime("%B %d, %Y %H:%M UTC")\ - + ' ' + new_history - - def close(self): - self.rootgrpID.close() \ No newline at end of file From 58d53d1c5c47475c193669ec7567e59d2afafd3a Mon Sep 17 00:00:00 2001 From: shaunwbell Date: Tue, 10 Dec 2024 15:57:15 -0800 Subject: [PATCH 21/21] Update ARGOS_service_data_converter.py --- ARGOS_service_data_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARGOS_service_data_converter.py b/ARGOS_service_data_converter.py index 8daff10..ce05ec6 100755 --- a/ARGOS_service_data_converter.py +++ b/ARGOS_service_data_converter.py @@ -446,7 +446,7 @@ def AZ(self, s1): return output -def pandas2netcdf(df=None, ofile="data.nc": +def pandas2netcdf(df=None, ofile="data.nc"): if df.empty: return