Skip to content

Commit

Permalink
update get time functions
Browse files Browse the repository at this point in the history
  • Loading branch information
sliu008 committed Jun 20, 2024
1 parent 26be201 commit 6f85237
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
poetry run flake8 podaac
- name: Test and coverage
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
poetry run pytest -n auto --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
- name: SonarCloud Scan
id: sonarcloud
uses: sonarsource/sonarcloud-github-action@master
Expand Down
4 changes: 2 additions & 2 deletions podaac/subsetter/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def recreate_pixcore_dimensions(datasets: list):
dim_dict = {}
count = 0
for dataset in datasets:
dim_list_shape = list(dataset.dims.values())
current_dims = list(dataset.dims.keys())
dim_list_shape = list(dataset.sizes.values())
current_dims = list(dataset.sizes.keys())
rename_list = []
for current_dim, dim_value in zip(current_dims, dim_list_shape):
if current_dim not in dim_dict:
Expand Down
5 changes: 2 additions & 3 deletions podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,11 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
for group in groups:
base_dataset.createGroup(group)

for dim_name in list(dataset.dims.keys()):
for dim_name in list(dataset.sizes.keys()):
new_dim_name = dim_name.split(GROUP_DELIM)[-1]
dim_group = _get_nested_group(base_dataset, dim_name)
if new_dim_name not in dim_group.dimensions:
dim_group.createDimension(new_dim_name, dataset.dims[dim_name])

dim_group.createDimension(new_dim_name, dataset.sizes[dim_name])
# Rename variables
_rename_variables(dataset, base_dataset, start_date, time_vars)

Expand Down
21 changes: 18 additions & 3 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import xarray.coding.times
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import transform
import re

from podaac.subsetter import gpm_cleanup as gc
from podaac.subsetter import time_converting as tc
Expand Down Expand Up @@ -270,11 +271,11 @@ def calculate_chunks(dataset: xr.Dataset) -> dict:
"""
if len(dataset.dims) <= 3:
chunk = {dim: 4000 for dim in dataset.dims
if dataset.dims[dim] > 4000
if dataset.sizes[dim] > 4000
and len(dataset.dims) > 1}
else:
chunk = {dim: 500 for dim in dataset.dims
if dataset.dims[dim] > 500}
if dataset.sizes[dim] > 500}

return chunk

Expand Down Expand Up @@ -528,7 +529,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
return time_vars[0]

# Filter variables with 'time' in the name to avoid extra work
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.sizes.keys()))

for var_name in time_vars:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
Expand All @@ -542,12 +543,22 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

time_units_pattern = re.compile(r"(days|d|hours|hr|h|minutes|min|m|seconds|sec|s) since \d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?")
# Check variables for common time variable indicators
for var_name, var in dataset.variables.items():
if ((('standard_name' in var.attrs and var.attrs['standard_name'] == 'time') or \
('axis' in var.attrs and var.attrs['axis'] == 'T') or \
('units' in var.attrs and time_units_pattern.match(var.attrs['units'])))) and var_name not in var_name not in total_time_vars:
print(var_name)
return var_name

# then check if any variables have 'time' in the string if the above loop doesn't return anything
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
if len(dataset[var_name].squeeze().dims) == 0:
continue
if var_name not in total_time_vars and 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
print(var_name)
return var_name

raise ValueError('Unable to determine time variable')
Expand Down Expand Up @@ -1243,6 +1254,10 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
time_var_names=time_var_names
)

print("#############################################")
print(time_var_names)
print("#############################################")

start_date = None
if hdf_type and (min_time or max_time):
dataset, start_date = tc.convert_to_datetime(dataset, time_var_names, hdf_type)
Expand Down
30 changes: 17 additions & 13 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ importlib-metadata = "^7.0.1"
h5py = "^3.6.0"
cf-xarray = "*"
numpy = "^1.26.3"
pytest-rerunfailures = "^14.0"

[tool.poetry.dev-dependencies]
pytest = "^8.0.2"
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = --reruns 3 --reruns-delay 10
4 changes: 3 additions & 1 deletion tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,7 @@ def test_get_time_squeeze(data_dir, subset_output_dir):
os.path.join(subset_output_dir, tropomi_file_name))

nc_dataset = nc.Dataset(os.path.join(subset_output_dir, tropomi_file_name))
total_time_vars = ['__PRODUCT__time']

args = {
'decode_coords': False,
Expand All @@ -1540,7 +1541,8 @@ def test_get_time_squeeze(data_dir, subset_output_dir):
**args
) as dataset:
lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], [])
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], total_time_vars)
print(time_var_name)
lat_dims = dataset[lat_var_name].squeeze().dims
time_dims = dataset[time_var_name].squeeze().dims
assert lat_dims == time_dims
Expand Down

0 comments on commit 6f85237

Please sign in to comment.