From 0eb65d2ad9a504828c66fd94f85989b5be6c3803 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 13 Jun 2024 18:05:59 -0400 Subject: [PATCH 01/11] add and configure deptry, split docs requirements from dev requirements, add deptry to pre-commit, update CONTRIBUTING.rst and installation.rst to modern conventions --- .pre-commit-config.yaml | 4 ++++ CONTRIBUTING.rst | 4 ++-- Makefile | 2 +- docs/installation.rst | 12 ++++++------ pyproject.toml | 21 +++++++++++++++++---- tox.ini | 4 +++- xclim/testing/diagnostics.py | 7 +++++++ 7 files changed, 40 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5cb38c26e..a343be338 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -86,6 +86,10 @@ repos: hooks: - id: codespell additional_dependencies: [ 'tomli' ] + - repo: https://github.com/fpgmaas/deptry.git + rev: "0.16.1" + hooks: + - id: deptry - repo: https://github.com/gitleaks/gitleaks rev: v8.18.3 hooks: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2c35af94d..a992e60cb 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -122,7 +122,7 @@ Ready to contribute? Here's how to set up `xclim` for local development. #. Create a development environment. We recommend using ``conda``:: $ conda create -n xclim python=3.10 --file=environment.yml - $ python -m pip install -e ".[dev]" + $ python -m pip install -e --no-deps . #. Create a branch for local development:: @@ -174,7 +174,7 @@ Ready to contribute? Here's how to set up `xclim` for local development. .. warning:: Starting from `xclim` v0.46.0, when running tests with `tox`, any `pytest` markers passed to `pyXX` builds (e.g. `-m "not slow"`) must be passed to `tox` directly. This can be done as follows:: - $ tox -e py38 -- -m "not slow" + $ tox -e py310 -- -m "not slow" The exceptions to this rule are: `notebooks_doctests`: this configuration does not pass test markers to its `pytest` call. diff --git a/Makefile b/Makefile index f01f59948..ed31681c0 100644 --- a/Makefile +++ b/Makefile @@ -115,7 +115,7 @@ install: clean ## install the package to the active Python's site-packages python -m pip install --no-user . develop: clean ## install the package and development dependencies in editable mode to the active Python's site-packages - python -m pip install --no-user --editable ".[dev]" + python -m pip install --no-user --editable ".[dev,docs]" upstream: clean develop ## install the GitHub-based development branches of dependencies in editable mode to the active Python's site-packages python -m pip install --no-user --requirement requirements_upstream.txt diff --git a/docs/installation.rst b/docs/installation.rst index 80ec03412..271f83eb9 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -110,7 +110,7 @@ Afterwards, `SBCK` can be installed from PyPI using `pip`: .. _SBCK: https://github.com/yrobink/SBCK .. _Eigen3: https://eigen.tuxfamily.org/index.php -From sources +From Sources ------------ .. warning:: @@ -132,11 +132,11 @@ Or download the `tarball`_: $ curl -OL https://github.com/Ouranosinc/xclim/tarball/main -Once you have extracted a copy of the source, you can install it with pip: +Once you have extracted a copy of the source, you can install it with `pip`_: .. code-block:: shell - $ python -m pip install -e ".[dev]" + $ python -m pip install -e ".[dev,docs]" Alternatively, you can also install a local development copy via `flit`_: @@ -148,13 +148,13 @@ Alternatively, you can also install a local development copy via `flit`_: .. _tarball: https://github.com/Ouranosinc/xclim/tarball/main .. _flit: https://flit.pypa.io/en/stable -Creating a Conda environment +Creating a Conda Environment ---------------------------- To create a conda environment including `xclim`'s dependencies and several optional libraries (notably: `clisops`, `eigen`, `sbck`, and `flox`) and development dependencies, run the following command from within your cloned repo: .. code-block:: console - $ conda env create -n my_xclim_env python=3.8 --file=environment.yml + $ conda env create -n my_xclim_env python=3.10 --file=environment.yml $ conda activate my_xclim_env - (my_xclim_env) $ python -m pip install -e . + (my_xclim_env) $ python -m pip install -e --no-deps . diff --git a/pyproject.toml b/pyproject.toml index 21ec74787..eae04b0ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "numpy>=1.20.0", "pandas>=2.2", "pint>=0.10,<0.24", + "platformdirs >=3.2", "pyarrow", # Strongly encouraged for pandas v2.2.0+ "pyyaml", "scikit-learn>=0.21.3", @@ -75,20 +76,21 @@ dev = [ "pandas-stubs >=2.2", "platformdirs >=3.2", "pre-commit >=3.7", - "pybtex", "pylint >=3.1", "pytest <8.0", # Pinned due to breakage with xdoctest. See: https://github.com/Erotemic/xdoctest/issues/151 "pytest-cov", "pytest-socket", "pytest-xdist[psutil] >=3.2", - "ruff >=0.3.0", + "ruff >=0.4.0", "tokenize-rt", - "tox >=4.5", + "tox >=4.15.1", # "tox-conda", # Will be added when a tox@v4.0+ compatible plugin is released. "tox-gh >=1.3.1", "vulture ==2.11", "xdoctest", - "yamllint ==1.35.1", + "yamllint ==1.35.1" +] +docs = [ # Documentation and examples "distributed >=2.0", "furo >=2023.9.10", @@ -97,6 +99,7 @@ dev = [ "nbsphinx", "nc-time-axis", "pooch", + "pybtex", "sphinx", "sphinx-autobuild >=2024.4.16", "sphinx-autodoc-typehints", @@ -162,6 +165,16 @@ ignore-words-list = "absolue,bloc,bui,callendar,degreee,environnement,hanel,infe relative_files = true omit = ["tests/*.py"] +[tool.deptry] +extend_exclude = ["docs"] +ignore_notebooks = true +pep621_dev_dependency_groups = ["dev", "docs"] + +[tool.deptry.per_rule_ignores] +DEP001 = ["SBCK"] +DEP002 = ["bottleneck", "pyarrow"] +DEP004 = ["matplotlib", "pytest_socket"] + [tool.flit.sdist] include = [ "AUTHORS.rst", diff --git a/tox.ini b/tox.ini index 8aeecb455..a8e818241 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -min_version = 4.15 +min_version = 4.15.1 env_list = lint docs @@ -58,6 +58,8 @@ description = Build the documentation with makefile under {basepython} setenv = PYTHONPATH = {toxinidir} READTHEDOCS = 1 +deps = + docs commands_pre = commands = make docs diff --git a/xclim/testing/diagnostics.py b/xclim/testing/diagnostics.py index 875e68d1d..5506d53d8 100644 --- a/xclim/testing/diagnostics.py +++ b/xclim/testing/diagnostics.py @@ -25,6 +25,7 @@ from matplotlib import pyplot as plt except ModuleNotFoundError: warnings.warn("Matplotlib not found, plot-generating functions will not work.") + plt = None __all__ = ["adapt_freq_graph", "cannon_2015_figure_2", "synth_rainfall"] @@ -51,6 +52,9 @@ def synth_rainfall(shape, scale=1, wet_freq=0.25, size=1): def cannon_2015_figure_2(): """Create a graphic similar to figure 2 of Cannon et al. 2015.""" # noqa: D103 + if plt is None: + raise ModuleNotFoundError("Matplotlib not found.") + n = 10000 ref, hist, sim = cannon_2015_rvs(n, random=False) QM = EmpiricalQuantileMapping(kind="*", group="time", interp="linear") @@ -127,6 +131,9 @@ def cannon_2015_figure_2(): def adapt_freq_graph(): """Create a graphic with the additive adjustment factors estimated after applying the adapt_freq method.""" + if plt is None: + raise ModuleNotFoundError("Matplotlib not found.") + n = 10000 x = series(synth_rainfall(2, 2, wet_freq=0.25, size=n), "pr") # sim y = series(synth_rainfall(2, 2, wet_freq=0.5, size=n), "pr") # ref From 2f7a9143013e0465fbb625c8fd23077340ef5b07 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 10:09:03 -0400 Subject: [PATCH 02/11] add "all" recipe --- docs/installation.rst | 2 +- pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 271f83eb9..62200815c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -136,7 +136,7 @@ Once you have extracted a copy of the source, you can install it with `pip`_: .. code-block:: shell - $ python -m pip install -e ".[dev,docs]" + $ python -m pip install -e ".[all]" Alternatively, you can also install a local development copy via `flit`_: diff --git a/pyproject.toml b/pyproject.toml index d435ade9b..90eaaac14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,7 @@ docs = [ "sphinxcontrib-bibtex", "sphinxcontrib-svg2pdfconverter[Cairosvg]" ] +all = ["xclim[dev]", "xclim[docs]"] [project.scripts] xclim = "xclim.cli:cli" @@ -167,7 +168,7 @@ omit = ["tests/*.py"] [tool.deptry] extend_exclude = ["docs"] ignore_notebooks = true -pep621_dev_dependency_groups = ["dev", "docs"] +pep621_dev_dependency_groups = ["all", "dev", "docs"] [tool.deptry.per_rule_ignores] DEP001 = ["SBCK"] From 7021d8f84231e34ba4d71d7931b887e483e464d1 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:22:09 -0400 Subject: [PATCH 03/11] add deptry to linters and add deptry configuration, place pins on some linting dependencies --- CI/requirements_ci.txt | 1 + Makefile | 1 + environment.yml | 9 +++++---- pyproject.toml | 23 ++++++++++++++++------- tox.ini | 7 +++++-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/CI/requirements_ci.txt b/CI/requirements_ci.txt index f636973a3..b693ea80d 100644 --- a/CI/requirements_ci.txt +++ b/CI/requirements_ci.txt @@ -1,5 +1,6 @@ bump-my-version==0.23.0 coveralls==4.0.0 +deptry==0.16.1 flit==3.9.0 pip==24.0 pylint==3.2.2 diff --git a/Makefile b/Makefile index ed31681c0..3e51fad2d 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,7 @@ lint: ## check style with flake8 and black blackdoc --check --exclude=xclim/indices/__init__.py xclim blackdoc --check docs codespell xclim tests docs + deptry . yamllint --config-file=.yamllint.yaml xclim test: ## run tests quickly with the default Python diff --git a/environment.yml b/environment.yml index 6a91bebd9..7e543d0a4 100644 --- a/environment.yml +++ b/environment.yml @@ -35,13 +35,14 @@ dependencies: - codespell ==2.3.0 - coverage >=7.5.0 - coveralls >=4.0.0 + - deptry ==0.16.1 - distributed >=2.0 - filelock - flake8 >=7.0.0 - flake8-rst-docstrings - flit >=3.9.0 - furo >=2023.9.10 - - h5netcdf + - h5netcdf >=1.3.0 - ipykernel - ipython - isort ==5.13.2 @@ -50,7 +51,7 @@ dependencies: - nbconvert <7.14 # Pinned due to directive errors in sphinx. See: https://github.com/jupyter/nbconvert/issues/2092 - nbqa - nbsphinx - - nbval + - nbval >=0.11.0 - nc-time-axis - netCDF4 >=1.4,<1.7 - notebook @@ -58,13 +59,13 @@ dependencies: - platformdirs - pooch - pre-commit >=3.7 - - pybtex + - pybtex >=0.24.0 - pylint >=3.1 - pytest <8.0 # Pinned due to breakage with xdoctest. See: https://github.com/Erotemic/xdoctest/issues/151 - pytest-cov - pytest-socket - pytest-xdist >=3.2 - - ruff >=0.4.0 + - ruff >=0.4.10 - sphinx >=7.0.0 - sphinx-autobuild >=2024.4.16 - sphinx-autodoc-typehints diff --git a/pyproject.toml b/pyproject.toml index 90eaaac14..acbe170cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,29 +58,31 @@ dev = [ # Dev tools and testing "black[jupyter] ==24.4.2", "blackdoc ==0.3.9", - "bump-my-version >=0.23.0", + "bump-my-version ==0.23.0", "codespell ==2.3.0", "coverage[toml] >=7.5.0", "coveralls >=4.0.0", + "deptry ==0.16.1", "flake8 >=7.0.0", "flake8-rst-docstrings", - "h5netcdf", + "h5netcdf>=1.3.0", "ipython", "isort ==5.13.2", "mypy", "nbconvert <7.14", # Pinned due to directive errors in sphinx. See: https://github.com/jupyter/nbconvert/issues/2092 "nbqa >=1.8.2", - "nbval", + "nbval >=0.11.0", "netCDF4 >=1.4,<1.7", "pandas-stubs >=2.2", "platformdirs >=3.2", + "pooch", "pre-commit >=3.7", "pylint >=3.1", "pytest <8.0", # Pinned due to breakage with xdoctest. See: https://github.com/Erotemic/xdoctest/issues/151 "pytest-cov", "pytest-socket", "pytest-xdist[psutil] >=3.2", - "ruff >=0.4.0", + "ruff >=0.4.10", "tokenize-rt", "tox >=4.15.1", # "tox-conda", # Will be added when a tox@v4.0+ compatible plugin is released. @@ -98,7 +100,7 @@ docs = [ "nbsphinx", "nc-time-axis", "pooch", - "pybtex", + "pybtex >=0.24.0", "sphinx >=7.0.0", "sphinx-autobuild >=2024.4.16", "sphinx-autodoc-typehints", @@ -170,6 +172,10 @@ extend_exclude = ["docs"] ignore_notebooks = true pep621_dev_dependency_groups = ["all", "dev", "docs"] +[tool.deptry.package_module_name_map] +"scikit-learn" = "sklearn" +"pyyaml" = "yaml" + [tool.deptry.per_rule_ignores] DEP001 = ["SBCK"] DEP002 = ["bottleneck", "pyarrow"] @@ -267,10 +273,12 @@ line-length = 150 target-version = "py39" exclude = [ ".git", - "docs", "build", ".eggs" ] +extend-include = [ + "*.ipynb" # Include notebooks +] [tool.ruff.format] line-ending = "auto" @@ -291,6 +299,7 @@ select = [ "E", # pycodestyle errors "F", # pyflakes "N802", # invalid-function-name + "S", # bandit "W" # pycodestyle warnings ] @@ -317,7 +326,7 @@ max-complexity = 20 [tool.ruff.lint.per-file-ignores] "docs/*.py" = ["D100", "D101", "D102", "D103"] -"tests/**/*test*.py" = ["D100", "D101", "D102", "D103", "N802"] +"tests/**/*test*.py" = ["D100", "D101", "D102", "D103", "N802", "S101"] "xclim/**/__init__.py" = ["F401", "F403"] "xclim/core/indicator.py" = ["D214", "D405", "D406", "D407", "D411"] "xclim/core/locales.py" = ["E501", "W505"] diff --git a/tox.ini b/tox.ini index 8a2de655f..dcb355920 100644 --- a/tox.ini +++ b/tox.ini @@ -29,13 +29,14 @@ skip_install = True extras = deps = codespell ==2.3.0 + deptry==0.16.1 flake8 >=7.0.0 flake8-rst-docstrings black[jupyter]==24.4.2 blackdoc==0.3.9 isort==5.13.2 nbqa - ruff==0.4.0 + ruff==0.4.10 vulture==2.11 yamllint==1.35.1 commands_pre = @@ -49,6 +50,7 @@ commands = blackdoc --check --exclude=xclim/indices/__init__.py xclim blackdoc --check docs codespell xclim tests docs + deptry . yamllint --config-file=.yamllint.yaml xclim commands_post = @@ -78,6 +80,8 @@ allowlist_externals = [testenv:notebooks{-prefetch,}] description = Run notebooks with pytest under {basepython} +extras = + all deps = lmoments3 commands = @@ -111,7 +115,6 @@ passenv = XCLIM_* extras = dev deps = - coverage: coveralls upstream: -r CI/requirements_upstream.txt sbck: pybind11 lmoments: lmoments3 From 7e1fffbff9ba28313d7e1632d456642d4b2c6aec Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:23:25 -0400 Subject: [PATCH 04/11] address bandit-like security issues --- tests/test_sdba/test_base.py | 2 +- xclim/cli.py | 2 +- xclim/core/formatting.py | 3 ++- xclim/core/indicator.py | 5 +++-- xclim/core/units.py | 6 ++---- xclim/indicators/atmos/_conversion.py | 2 +- xclim/sdba/adjustment.py | 3 ++- xclim/sdba/base.py | 2 +- xclim/testing/helpers.py | 4 ++-- 9 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/test_sdba/test_base.py b/tests/test_sdba/test_base.py index c40c39165..31d63d307 100644 --- a/tests/test_sdba/test_base.py +++ b/tests/test_sdba/test_base.py @@ -29,7 +29,7 @@ def test_param_class(): ) s = jsonpickle.encode(obj) - obj2 = jsonpickle.decode(s) + obj2 = jsonpickle.decode(s) # noqa: S301 assert obj.parameters == obj2.parameters diff --git a/xclim/cli.py b/xclim/cli.py index 9cddfe75c..fd4ddedbb 100644 --- a/xclim/cli.py +++ b/xclim/cli.py @@ -24,7 +24,7 @@ from dask.distributed import Client, progress distributed = True -except ImportError: +except ImportError: # noqa: S110 # Distributed is not a dependency of xclim pass diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index 02855fe09..aaa18025b 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -274,7 +274,8 @@ def _parse_parameters(section): try: choices = literal_eval(match.groups()[0]) params[curr_key]["choices"] = choices - except ValueError: + except ValueError: # noqa: S110 + # If the literal_eval fails, we just ignore the choices. pass return params diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 40d68ecc4..74777dbe9 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1291,7 +1291,7 @@ def cfcheck(self, **das): for varname, vardata in das.items(): try: cfcheck_from_name(varname, vardata) - except KeyError: + except KeyError: # noqa: S110 # Silently ignore unknown variables. pass @@ -1845,7 +1845,8 @@ def _merge_attrs(dbase, dextra, attr, sep): if indice_func is None and hasattr(indices, "__getitem__"): try: indice_func = indices[indice_name] - except KeyError: + except KeyError: # noqa: S110 + # The indice is not found in the mapping. pass if indice_func is not None: diff --git a/xclim/core/units.py b/xclim/core/units.py index f06e00040..195ecfdd2 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -349,10 +349,8 @@ def convert_units_to( # noqa: C901 # The new cf standard name is inserted by the converter try: source = _CONVERSIONS[(convname, direction)](source) - except Exception: - # FIXME: This is a broad exception. Bad practice. - # Failing automatic conversion - # It will anyway fail further down with a correct error message. + except KeyError: # noqa: S110 + # There is no conversion available for this standard name pass else: source_unit = units2pint(source) diff --git a/xclim/indicators/atmos/_conversion.py b/xclim/indicators/atmos/_conversion.py index f2cd58d6a..c4e2450cf 100644 --- a/xclim/indicators/atmos/_conversion.py +++ b/xclim/indicators/atmos/_conversion.py @@ -44,7 +44,7 @@ def cfcheck(self, **das): try: # Only check standard_name, and not cell_methods which depends on the variable's frequency. cfcheck_from_name(varname, vardata, attrs=["standard_name"]) - except KeyError: + except KeyError: # noqa: S110 # Silently ignore unknown variables. pass diff --git a/xclim/sdba/adjustment.py b/xclim/sdba/adjustment.py index 957359bc8..a01fc81e5 100644 --- a/xclim/sdba/adjustment.py +++ b/xclim/sdba/adjustment.py @@ -1206,7 +1206,8 @@ def _adjust( try: import SBCK -except ImportError: +except ImportError: # noqa: S110 + # SBCK is not installed, we will not generate the SBCK classes. pass else: diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py index 6d2b6b593..1f3554b37 100644 --- a/xclim/sdba/base.py +++ b/xclim/sdba/base.py @@ -89,7 +89,7 @@ def from_dataset(cls, ds: xr.Dataset): and that attribute must be the result of `jsonpickle.encode(object)` where object is of the same type as this object. """ - obj = jsonpickle.decode(ds.attrs[cls._attribute]) + obj = jsonpickle.decode(ds.attrs[cls._attribute]) # noqa: S301 obj.set_dataset(ds) return obj diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 935750101..c407323f3 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -151,7 +151,7 @@ def populate_testing_data( data[filepattern] = _get_file( filepattern, branch=branch, cache_dir=_local_cache ) - except FileNotFoundError: + except FileNotFoundError: # noqa: S112 continue elif temp_folder: try: @@ -161,7 +161,7 @@ def populate_testing_data( branch=branch, _local_cache=_local_cache, ) - except FileNotFoundError: + except FileNotFoundError: # noqa: S112 continue return From 3d4ce75eb1b10d3fc4f576c77889542ed2295dd8 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 13:24:08 -0400 Subject: [PATCH 05/11] better URL security auditing --- tests/test_testing_utils.py | 17 ++++++----- xclim/testing/utils.py | 60 ++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 28 deletions(-) diff --git a/tests/test_testing_utils.py b/tests/test_testing_utils.py index 17a6f2f9e..10a080cbb 100644 --- a/tests/test_testing_utils.py +++ b/tests/test_testing_utils.py @@ -3,6 +3,7 @@ import platform import sys from pathlib import Path +from urllib.error import URLError import numpy as np import pytest @@ -139,24 +140,26 @@ def test_release_notes_file_not_implemented(self, tmp_path): class TestTestingFileAccessors: def test_unsafe_urls(self): with pytest.raises( - ValueError, match="GitHub URL not safe: 'ftp://domain.does.not.exist/'." + ValueError, match="GitHub URL not secure: 'ftp://domain.does.not.exist/'." ): utilities.open_dataset( "doesnt_exist.nc", github_url="ftp://domain.does.not.exist/" ) with pytest.raises( - ValueError, match="OPeNDAP URL not safe: 'ftp://domain.does.not.exist/'." + OSError, + match="OPeNDAP file not read. Verify that the service is available: " + "'https://seemingly.trustworthy.com/doesnt_exist.nc'", ): utilities.open_dataset( - "doesnt_exist.nc", dap_url="ftp://domain.does.not.exist/" + "doesnt_exist.nc", dap_url="https://seemingly.trustworthy.com/" ) - def test_bad_opendap_url(self): + def test_malicious_urls(self): with pytest.raises( - OSError, - match="OPeNDAP file not read. Verify that the service is available.", + URLError, + match="urlopen error OPeNDAP URL is not well-formed: 'doesnt_exist.nc'", ): utilities.open_dataset( - "doesnt_exist.nc", dap_url="https://dap.service.does.not.exist/" + "doesnt_exist.nc", dap_url="Robert'); DROP TABLE STUDENTS; --" ) diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 009fffda6..b7e866f61 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -21,7 +21,7 @@ from shutil import copy from typing import TextIO from urllib.error import HTTPError, URLError -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse from urllib.request import urlopen, urlretrieve import pandas as pd @@ -62,6 +62,7 @@ __all__ = [ "_default_cache_dir", + "audit_url", "get_file", "get_local_testdata", "list_datasets", @@ -73,12 +74,33 @@ def file_md5_checksum(f_name): - hash_md5 = hashlib.md5() # nosec + hash_md5 = hashlib.md5() # noqa: S324 with open(f_name, "rb") as f: hash_md5.update(f.read()) return hash_md5.hexdigest() +def audit_url(url: str, context: str = None) -> str: + """Check if the URL is well-formed. + + Raises + ------ + URLError + If the URL is not well-formed. + """ + msg = "" + result = urlparse(url) + if result.scheme == "http": + msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip() + if not all([result.scheme, result.netloc]): + msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip() + + if msg: + logger.error(msg) + raise URLError(msg) + return url + + def get_file( name: str | os.PathLike[str] | Sequence[str | os.PathLike[str]], github_url: str = "https://github.com/Ouranosinc/xclim-testdata", @@ -197,8 +219,8 @@ def _get( md5_name = fullname.with_suffix(f"{suffix}.md5") md5_file = cache_dir / branch / md5_name - if not github_url.lower().startswith("http"): - raise ValueError(f"GitHub URL not safe: '{github_url}'.") + if not github_url.startswith("https"): + raise ValueError(f"GitHub URL not secure: '{github_url}'.") if local_file.is_file(): local_md5 = file_md5_checksum(local_file) @@ -206,7 +228,7 @@ def _get( url = "/".join((github_url, "raw", branch, md5_name.as_posix())) msg = f"Attempting to fetch remote file md5: {md5_name.as_posix()}" logger.info(msg) - urlretrieve(url, md5_file) # nosec + urlretrieve(audit_url(url), md5_file) # noqa: S310 with open(md5_file) as f: remote_md5 = f.read() if local_md5.strip() != remote_md5.strip(): @@ -241,7 +263,7 @@ def _get( msg = f"Fetching remote file: {fullname.as_posix()}" logger.info(msg) try: - urlretrieve(url, local_file) # nosec + urlretrieve(audit_url(url), local_file) # noqa: S310 except HTTPError as e: msg = f"{fullname.as_posix()} not accessible in remote repository. Aborting file retrieval." raise FileNotFoundError(msg) from e @@ -262,7 +284,7 @@ def _get( url = "/".join((github_url, "raw", branch, md5_name.as_posix())) msg = f"Fetching remote file md5: {md5_name.as_posix()}" logger.info(msg) - urlretrieve(url, md5_file) # nosec + urlretrieve(audit_url(url), md5_file) # noqa: S310 except (HTTPError, URLError) as e: msg = ( f"{md5_name.as_posix()} not accessible online. " @@ -337,21 +359,17 @@ def open_dataset( suffix = ".nc" fullname = name.with_suffix(suffix) - if not github_url.lower().startswith("http"): - raise ValueError(f"GitHub URL not safe: '{github_url}'.") - if dap_url is not None: - if not dap_url.lower().startswith("http"): - raise ValueError(f"OPeNDAP URL not safe: '{dap_url}'.") - - dap_file = urljoin(dap_url, str(name)) + dap_file_address = urljoin(dap_url, str(name)) try: - ds = _open_dataset(dap_file, **kwargs) + ds = _open_dataset(audit_url(dap_file_address, context="OPeNDAP"), **kwargs) return ds - except OSError as err: - msg = "OPeNDAP file not read. Verify that the service is available." + except URLError: + raise + except OSError: + msg = f"OPeNDAP file not read. Verify that the service is available: '{dap_file_address}'" logger.error(msg) - raise OSError(msg) from err + raise OSError(msg) local_file = _get( fullname=fullname, @@ -378,8 +396,8 @@ def list_datasets(github_repo="Ouranosinc/xclim-testdata", branch="main"): This uses an unauthenticated call to GitHub's REST API, so it is limited to 60 requests per hour (per IP). A single call of this function triggers one request per subdirectory, so use with parsimony. """ - with urlopen( # nosec - f"https://api.github.com/repos/{github_repo}/contents?ref={branch}" + with urlopen( # noqa: S310 + audit_url(f"https://api.github.com/repos/{github_repo}/contents?ref={branch}") ) as res: base = json.loads(res.read().decode()) records = [] @@ -387,7 +405,7 @@ def list_datasets(github_repo="Ouranosinc/xclim-testdata", branch="main"): if folder["path"].startswith(".") or folder["size"] > 0: # drop hidden folders and other files. continue - with urlopen(folder["url"]) as res: # nosec + with urlopen(audit_url(folder["url"])) as res: # noqa: S310 listing = json.loads(res.read().decode()) for file in listing: if file["path"].endswith(".nc"): From 21e72f5606516f0a92070a943cf6864ce565a38f Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:25:53 -0400 Subject: [PATCH 06/11] update pre-commit deps, synchronize --- .pre-commit-config.yaml | 14 +++++++------- CI/requirements_ci.txt | 2 +- pyproject.toml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cbc8dbc9b..7e1e30844 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: repos: - repo: https://github.com/asottile/pyupgrade - rev: v3.15.2 + rev: v3.16.0 hooks: - id: pyupgrade args: ['--py39-plus'] @@ -41,16 +41,16 @@ repos: hooks: - id: isort - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.7 + rev: v0.4.10 hooks: - id: ruff - repo: https://github.com/pylint-dev/pylint - rev: v3.2.2 + rev: v3.2.4 hooks: - id: pylint args: [ '--rcfile=.pylintrc.toml', '--errors-only', '--jobs=0', '--disable=import-error' ] - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 7.1.0 hooks: - id: flake8 additional_dependencies: [ 'flake8-rst-docstrings '] @@ -64,7 +64,7 @@ repos: hooks: - id: nbqa-pyupgrade args: [ '--py39-plus' ] - additional_dependencies: [ 'pyupgrade==3.15.2' ] + additional_dependencies: [ 'pyupgrade==3.16.0' ] - id: nbqa-black additional_dependencies: [ 'black==24.4.2' ] - id: nbqa-isort @@ -91,11 +91,11 @@ repos: hooks: - id: deptry - repo: https://github.com/gitleaks/gitleaks - rev: v8.18.3 + rev: v8.18.4 hooks: - id: gitleaks - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.28.4 + rev: 0.28.6 hooks: - id: check-github-workflows - id: check-readthedocs diff --git a/CI/requirements_ci.txt b/CI/requirements_ci.txt index b693ea80d..0036c2228 100644 --- a/CI/requirements_ci.txt +++ b/CI/requirements_ci.txt @@ -3,6 +3,6 @@ coveralls==4.0.0 deptry==0.16.1 flit==3.9.0 pip==24.0 -pylint==3.2.2 +pylint==3.2.4 tox==4.15.1 tox-gh==1.3.1 diff --git a/pyproject.toml b/pyproject.toml index acbe170cf..199def5ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dev = [ "coverage[toml] >=7.5.0", "coveralls >=4.0.0", "deptry ==0.16.1", - "flake8 >=7.0.0", + "flake8 >=7.1.0", "flake8-rst-docstrings", "h5netcdf>=1.3.0", "ipython", @@ -77,7 +77,7 @@ dev = [ "platformdirs >=3.2", "pooch", "pre-commit >=3.7", - "pylint >=3.1", + "pylint >=3.2.4", "pytest <8.0", # Pinned due to breakage with xdoctest. See: https://github.com/Erotemic/xdoctest/issues/151 "pytest-cov", "pytest-socket", From 7ca3d2c131ac73b902d7ea4b634d539624aa8518 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:26:25 -0400 Subject: [PATCH 07/11] do not run deptry via pre-commit as it requires the library be installed globally --- .pre-commit-config.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e1e30844..366f07d08 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -86,10 +86,6 @@ repos: hooks: - id: codespell additional_dependencies: [ 'tomli' ] - - repo: https://github.com/fpgmaas/deptry.git - rev: "0.16.1" - hooks: - - id: deptry - repo: https://github.com/gitleaks/gitleaks rev: v8.18.4 hooks: From db90c2c4376ec7299f8125c816405621f0a759c0 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:43:00 -0400 Subject: [PATCH 08/11] update CHANGES.rst --- CHANGES.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 42ff6c252..fbf02a8f2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,12 +6,23 @@ v0.51.0 (unreleased) -------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* `xclim` now separates the optional dependencies into `dev` and `docs` recipes. Both can be installed with the `all` option (`$ python -m pip install xclim[all]`). (:pull:`1806`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* Added the `deptry `_ package to the `dev` linter tools and linting workflows for performing dependency analyses. (:pull:`1806`). + Internal changes ^^^^^^^^^^^^^^^^ * GitHub repository now uses Rulesets for branch protection. (:pull:`1790`). * Version bumping and project triage is now handled by the Ouranos Helper GitHub App. (:pull:`1790`). * `netcdf4` has been pinned below v1.7 for test stability reasons. (:pull:`1791`). * `bump-my-version` has been updated to v0.23.0. (:pull:`1790`). +* Several linting tools have been updated to the latest versions and pinned. (:pull:`1806`). +* `flake8-bandit`-like checks have been enabled via `ruff`, with fixes for a few security-related issues. (:pull:`1806`). +* ``xclim.testing.utils`` now employs more secure URL auditing checks. (:pull:`1806`). v0.50.0 (2024-06-17) -------------------- From 4aa8b25f13edd673f655d6e17a166e1d5a7e65a2 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 27 Jun 2024 14:26:12 -0400 Subject: [PATCH 09/11] address review comments --- CHANGES.rst | 14 +++++++------- xclim/core/units.py | 7 ++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fbf02a8f2..2d32b9bdb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,19 +10,19 @@ New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * `xclim` now separates the optional dependencies into `dev` and `docs` recipes. Both can be installed with the `all` option (`$ python -m pip install xclim[all]`). (:pull:`1806`). -Breaking changes -^^^^^^^^^^^^^^^^ -* Added the `deptry `_ package to the `dev` linter tools and linting workflows for performing dependency analyses. (:pull:`1806`). - Internal changes ^^^^^^^^^^^^^^^^ +* `netcdf4` has been pinned below v1.7 for test stability reasons. (:pull:`1791`). +* `flake8-bandit`-like checks have been enabled via `ruff`, with fixes for a few security-related issues. (:pull:`1806`). +* ``xclim.testing.utils`` now employs more secure URL auditing checks. (:pull:`1806`). + +CI changes +^^^^^^^^^^ * GitHub repository now uses Rulesets for branch protection. (:pull:`1790`). * Version bumping and project triage is now handled by the Ouranos Helper GitHub App. (:pull:`1790`). -* `netcdf4` has been pinned below v1.7 for test stability reasons. (:pull:`1791`). * `bump-my-version` has been updated to v0.23.0. (:pull:`1790`). +* Added the `deptry `_ package to the `dev` linter tools and linting workflows for performing dependency analyses. (:pull:`1806`). * Several linting tools have been updated to the latest versions and pinned. (:pull:`1806`). -* `flake8-bandit`-like checks have been enabled via `ruff`, with fixes for a few security-related issues. (:pull:`1806`). -* ``xclim.testing.utils`` now employs more secure URL auditing checks. (:pull:`1806`). v0.50.0 (2024-06-17) -------------------- diff --git a/xclim/core/units.py b/xclim/core/units.py index 195ecfdd2..6d370e29e 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -349,9 +349,10 @@ def convert_units_to( # noqa: C901 # The new cf standard name is inserted by the converter try: source = _CONVERSIONS[(convname, direction)](source) - except KeyError: # noqa: S110 - # There is no conversion available for this standard name - pass + except Exception as err: + raise ValueError( + f"No conversion available for this standard name: {standard_name}" + ) from err else: source_unit = units2pint(source) From 9237dc9d2bbf3220f7476a9e5d5d10ff95ad44ae Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 27 Jun 2024 14:49:27 -0400 Subject: [PATCH 10/11] Update xclim/core/units.py Co-authored-by: Pascal Bourgault --- xclim/core/units.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/core/units.py b/xclim/core/units.py index 6d370e29e..d0ba4b183 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -351,7 +351,7 @@ def convert_units_to( # noqa: C901 source = _CONVERSIONS[(convname, direction)](source) except Exception as err: raise ValueError( - f"No conversion available for this standard name: {standard_name}" + f"There is a dimensionality incompatibility between the source and the target and no CF-based conversions have been found for this standard name: {standard_name}" ) from err else: source_unit = units2pint(source) From 59fa5c1e06f96adc7da5cd27801504f888ffb20e Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 27 Jun 2024 15:01:28 -0400 Subject: [PATCH 11/11] fix too long line --- xclim/core/units.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xclim/core/units.py b/xclim/core/units.py index d0ba4b183..3282de6f8 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -351,7 +351,8 @@ def convert_units_to( # noqa: C901 source = _CONVERSIONS[(convname, direction)](source) except Exception as err: raise ValueError( - f"There is a dimensionality incompatibility between the source and the target and no CF-based conversions have been found for this standard name: {standard_name}" + f"There is a dimensionality incompatibility between the source and the target " + f"and no CF-based conversions have been found for this standard name: {standard_name}" ) from err else: source_unit = units2pint(source)