diff --git a/podaac/subsetter/gpm_cleanup.py b/podaac/subsetter/gpm_cleanup.py index bbdec9ce..31a125b4 100644 --- a/podaac/subsetter/gpm_cleanup.py +++ b/podaac/subsetter/gpm_cleanup.py @@ -1,6 +1,11 @@ """ -Module designed for mapping the dimensions in GPM. Phony dimensions are changed -to nscan, nbin, nfreq by using the DimensionNames variable attribute +Module designed for mapping the dimensions in GPM, and changing calendar time type +to seconds since 1980-1-6 T00:00:00Z +Problem: each variable will have unique phony dims. When time for subset_with_bbox +is called, each unique group will have to have common dimensions. +Solution: Phony dimensions are changed to nscan, nbin, nfreq and will have the same name for each +unique group that will be subsetted. +nscan, nbin, and nfreq are named in the variable attributes. """ import datetime @@ -14,9 +19,10 @@ def compute_new_time_data(time_group, nc_dataset): create a time variable, timeMidScan, that is present in other GPM collections but not the ENV collections. """ - # set the time unit for GPM + # set the time unit for GPM - this is the case for all GPM collections time_unit_out = "seconds since 1980-01-06 00:00:00" - # conver to a float, seconds variable + # convert to a float variable, seconds since the above date + # this list will be a new time variable new_time_list = [date2num(datetime.datetime( nc_dataset[time_group+'__Year'][:][i], nc_dataset[time_group+'__Month'][:][i], @@ -37,7 +43,7 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"): dimensions to have the name in the DimensionName attribute rather than phony_dim """ var_list = list(nc_dataset.variables.keys()) - # loop through variable list to avoid netcdf4 runtime error + # loop through the entire variable list to avoid netcdf4 runtime error for var_name in var_list: # GPM will always need to be cleaned up via netCDF # generalizing coordinate variables in netCDF file to speed variable subsetting up @@ -57,14 +63,16 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"): dim_prefix = var_name.split('__')[1] # new dimension name new_dim = '__'+dim_prefix+'__'+dim - length = var.shape[count] + # get dim size of the newly created variable + dim_size = var.shape[count] # check if the dimension name created has already been created in the dataset if new_dim not in dim_dict: # create the new dimension - nc_dataset.createDimension(new_dim, length) - dim_dict[new_dim] = length + nc_dataset.createDimension(new_dim, dim_size) + dim_dict[new_dim] = dim_size # utilized from Dimension Cleanup module attrs_contents = {} + # new variable that will point to the updated dimensions new_mapped_var = {} # if the variable has attributes, get the attributes to then be copied to the new variable if len(var.ncattrs()) > 0: @@ -99,7 +107,7 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"): comp_args = {"zlib": True, "complevel": 1} nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args) nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit) - # copy the data in + # copy the new time list into the new time variable nc_dataset.variables[new_time_var_name][:] = time_data return nc_dataset diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py index 5a08528b..fa599b55 100644 --- a/podaac/subsetter/group_handling.py +++ b/podaac/subsetter/group_handling.py @@ -108,7 +108,7 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta Name of the output file to write the resulting NetCDF file to. TODO: add docstring and type hint for `start_date` parameter. """ - + # base_dataset = nc.Dataset(output_file, mode='w') for dataset in datasets: group_lst = [] @@ -179,7 +179,7 @@ def _rename_variables(dataset: xr.Dataset, base_dataset: nc.Dataset, start_date, comp_args = {"zlib": True, "complevel": 1} var_data = variable.data - + # create variable based upon the original data type if variable.dtype in [object, '|S27']: comp_args = {"zlib": False, "complevel": 1} var_group.createVariable(new_var_name, 'S4', var_dims, fill_value=fill_value, **comp_args) diff --git a/podaac/subsetter/time_converting.py b/podaac/subsetter/time_converting.py index bf1e361d..cfb169fe 100644 --- a/podaac/subsetter/time_converting.py +++ b/podaac/subsetter/time_converting.py @@ -22,7 +22,8 @@ def get_start_date(instrument_type): """ - returns the start date based on the instrument type + returns the start date based on the instrument type. Start date will calculate + seconds since this start date. """ if instrument_type in ['OMI', 'MLS']: start_date = datetime.datetime.strptime("1993-01-01T00:00:00.00", "%Y-%m-%dT%H:%M:%S.%f") @@ -51,6 +52,7 @@ def convert_to_datetime(dataset: xr.Dataset, time_vars: list, instrument_type: s dataset[var].values = date_time_array.astype("datetime64[ns]") + dataset[var].astype('timedelta64[s]').values continue # if there isn't a start_date, get it from the UTC variable + # subtracts the seconds from the first UTC time to get the date that seconds are from utc_var_name = subset.compute_utc_name(dataset) if utc_var_name: start_seconds = dataset[var].values[0]