diff --git a/podaac/subsetter/gpm_cleanup.py b/podaac/subsetter/gpm_cleanup.py index 456c08d7..c7b85404 100644 --- a/podaac/subsetter/gpm_cleanup.py +++ b/podaac/subsetter/gpm_cleanup.py @@ -8,8 +8,14 @@ dim_dict = {} -def create_new_time_var(time_group, nc_dataset): +def compute_new_time_data(time_group, nc_dataset): + """ + create a time variable, timeMidScan, that is present in other + GPM collections but not the ENV collections. + """ + # set the time unit for GPM time_unit_out = "seconds since 1980-01-06 00:00:00" + # conver to a float, seconds variable new_time_list = [date2num(datetime.datetime(nc_dataset[time_group+'__Year'][:][i], nc_dataset[time_group+'__Month'][:][i], nc_dataset[time_group+'__DayOfMonth'][:][i], hour=nc_dataset[time_group+'__Hour'][:][i], minute=nc_dataset[time_group+'__Minute'][:][i], second=nc_dataset[time_group+'__Second'][:][i], microsecond=nc_dataset[time_group+'__Second'][:][i]*1000), time_unit_out) @@ -74,14 +80,19 @@ def change_var_dims(nc_dataset, variables=None): new_mapped_var[var_name][:] = var[:] if not any("timeMidScan" in var for var in var_list): + # if there isn't any timeMidScan variables, create one scan_time_groups = ["__".join(i.split('__')[:-1]) for i in var_list if 'ScanTime' in i] for time_group in list(set(scan_time_groups)): - time_data, time_unit = create_new_time_var(time_group, nc_dataset) + # get the seconds since Jan 6, 1980 + time_data, time_unit = compute_new_time_data(time_group, nc_dataset) + # make a new variable for each ScanTime group new_time_var_name = time_group+'__timeMidScan' + # copy dimensions from the Year variable var_dims = nc_dataset.variables[time_group+'__Year'].dimensions comp_args = {"zlib": True, "complevel": 1} nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args) nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit) + # copy the data in nc_dataset.variables[new_time_var_name][:] = time_data return nc_dataset diff --git a/tests/test_subset.py b/tests/test_subset.py index 17f51102..e1485e0b 100644 --- a/tests/test_subset.py +++ b/tests/test_subset.py @@ -2309,3 +2309,22 @@ def test_gpm_dimension_map(data_dir, subset_output_dir, request): for dim in dims: assert 'phony' not in dim + +def test_gpm_compute_new_var_data(data_dir, subset_output_dir, request): + """Test GPM files that have scantime variable to compute the time for seconds + since 1980-01-06""" + + gpm_dir = join(data_dir, 'GPM') + gpm_file = 'GPM_test_file_3.HDF5' + bbox = np.array(((-180, 180), (-90, 90))) + shutil.copyfile( + os.path.join(gpm_dir, gpm_file), + os.path.join(subset_output_dir, gpm_file) + ) + + nc_dataset, has_groups, file_extension = subset.open_as_nc_dataset(join(subset_output_dir, gpm_file)) + + + time_data, time_unit = gc.compute_new_time_data("__FS__ScanTime", nc_dataset) + + assert int(time_data[0]) == 1325120552