diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml index db822ec3..7eb7bc46 100644 --- a/.github/workflows/build-pipeline.yml +++ b/.github/workflows/build-pipeline.yml @@ -84,7 +84,7 @@ jobs: poetry run flake8 podaac - name: Test and coverage run: | - poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/ + poetry run pytest -n auto --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/ - name: SonarCloud Scan id: sonarcloud uses: sonarsource/sonarcloud-github-action@master diff --git a/podaac/subsetter/dimension_cleanup.py b/podaac/subsetter/dimension_cleanup.py index 4ba76406..6904ee50 100644 --- a/podaac/subsetter/dimension_cleanup.py +++ b/podaac/subsetter/dimension_cleanup.py @@ -117,8 +117,8 @@ def recreate_pixcore_dimensions(datasets: list): dim_dict = {} count = 0 for dataset in datasets: - dim_list_shape = list(dataset.dims.values()) - current_dims = list(dataset.dims.keys()) + dim_list_shape = list(dataset.sizes.values()) + current_dims = list(dataset.sizes.keys()) rename_list = [] for current_dim, dim_value in zip(current_dims, dim_list_shape): if current_dim not in dim_dict: diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py index ee61c76e..5a08528b 100644 --- a/podaac/subsetter/group_handling.py +++ b/podaac/subsetter/group_handling.py @@ -119,12 +119,11 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta for group in groups: base_dataset.createGroup(group) - for dim_name in list(dataset.dims.keys()): + for dim_name in list(dataset.sizes.keys()): new_dim_name = dim_name.split(GROUP_DELIM)[-1] dim_group = _get_nested_group(base_dataset, dim_name) if new_dim_name not in dim_group.dimensions: - dim_group.createDimension(new_dim_name, dataset.dims[dim_name]) - + dim_group.createDimension(new_dim_name, dataset.sizes[dim_name]) # Rename variables _rename_variables(dataset, base_dataset, start_date, time_vars) diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py index ee7ba7a0..a1cc0ab5 100644 --- a/podaac/subsetter/subset.py +++ b/podaac/subsetter/subset.py @@ -40,6 +40,7 @@ import xarray.coding.times from shapely.geometry import Point, Polygon, MultiPolygon from shapely.ops import transform +import re from podaac.subsetter import gpm_cleanup as gc from podaac.subsetter import time_converting as tc @@ -270,11 +271,11 @@ def calculate_chunks(dataset: xr.Dataset) -> dict: """ if len(dataset.dims) <= 3: chunk = {dim: 4000 for dim in dataset.dims - if dataset.dims[dim] > 4000 + if dataset.sizes[dim] > 4000 and len(dataset.dims) > 1} else: chunk = {dim: 500 for dim in dataset.dims - if dataset.dims[dim] > 500} + if dataset.sizes[dim] > 500} return chunk @@ -528,7 +529,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_ return time_vars[0] # Filter variables with 'time' in the name to avoid extra work - time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys())) + time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.sizes.keys())) for var_name in time_vars: if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims: @@ -542,12 +543,22 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_ if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: return var_name + time_units_pattern = re.compile(r"(days|d|hours|hr|h|minutes|min|m|seconds|sec|s) since \d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?") + # Check variables for common time variable indicators + for var_name, var in dataset.variables.items(): + if ((('standard_name' in var.attrs and var.attrs['standard_name'] == 'time') or \ + ('axis' in var.attrs and var.attrs['axis'] == 'T') or \ + ('units' in var.attrs and time_units_pattern.match(var.attrs['units'])))) and var_name not in var_name not in total_time_vars: + print(var_name) + return var_name + # then check if any variables have 'time' in the string if the above loop doesn't return anything for var_name in list(dataset.data_vars.keys()): var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1] if len(dataset[var_name].squeeze().dims) == 0: continue if var_name not in total_time_vars and 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: + print(var_name) return var_name raise ValueError('Unable to determine time variable') @@ -1243,6 +1254,10 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str, time_var_names=time_var_names ) + print("#############################################") + print(time_var_names) + print("#############################################") + start_date = None if hdf_type and (min_time or max_time): dataset, start_date = tc.convert_to_datetime(dataset, time_var_names, hdf_type) diff --git a/poetry.lock b/poetry.lock index 7de16d5b..1ae6a523 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -1943,6 +1943,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] +[[package]] +name = "pytest-rerunfailures" +version = "14.0" +description = "pytest plugin to re-run tests to eliminate flaky failures" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92"}, + {file = "pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32"}, +] + +[package.dependencies] +packaging = ">=17.1" +pytest = ">=7.2" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1991,7 +2006,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1999,16 +2013,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2025,7 +2031,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2033,7 +2038,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2669,4 +2673,4 @@ harmony = ["harmony-service-lib", "pystac"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "dded8e00b53f4c441d551560bb878d1308d3a16393e137105ca360b8571217be" +content-hash = "85c6f89163736a3e14c2330d649d5606f2bb45ae6f4c274f3e3970e689246586" diff --git a/pyproject.toml b/pyproject.toml index e69253c6..2dba10c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ importlib-metadata = "^7.0.1" h5py = "^3.6.0" cf-xarray = "*" numpy = "^1.26.3" +pytest-rerunfailures = "^14.0" [tool.poetry.dev-dependencies] pytest = "^8.0.2" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..4d3cbcf2 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --reruns 3 --reruns-delay 10 diff --git a/tests/test_subset.py b/tests/test_subset.py index d6bb6fb9..faea2bcd 100644 --- a/tests/test_subset.py +++ b/tests/test_subset.py @@ -1527,6 +1527,7 @@ def test_get_time_squeeze(data_dir, subset_output_dir): os.path.join(subset_output_dir, tropomi_file_name)) nc_dataset = nc.Dataset(os.path.join(subset_output_dir, tropomi_file_name)) + total_time_vars = ['__PRODUCT__time'] args = { 'decode_coords': False, @@ -1540,7 +1541,8 @@ def test_get_time_squeeze(data_dir, subset_output_dir): **args ) as dataset: lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0] - time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], []) + time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], total_time_vars) + print(time_var_name) lat_dims = dataset[lat_var_name].squeeze().dims time_dims = dataset[time_var_name].squeeze().dims assert lat_dims == time_dims