diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 419ed77..8c4c3da 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,49 +1,106 @@ -name: build +name: CI/CD Workflow -on: [push, pull_request] +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main ] jobs: - test-on-linux: + build: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ['3.10', '3.12'] steps: - - uses: actions/checkout@v2 + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - architecture: x64 +# cache: 'pip' + - name: Cache pip - uses: actions/cache@v1 + uses: actions/cache@v4 with: - path: ~/.cache/pip # This path is specific to Ubuntu - # Look to see if there is a cache hit for the corresponding requirements file + path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - ${{ runner.os }}- + - name: Install dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | + run: pip install -r requirements.txt + + - name: Install local package + run: pip install -e . + + - name: Run flake8 + run: | pip install flake8 flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # stop the build if there are Python syntax errors or undefined names flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - name: Test with pytest + + - name: Run tests run: | - pip install pyscaffold pip install pytest pytest-cov - python setup.py develop - pytest -v --doctest-modules --cov=./ --cov-report=xml --cov-report=html - - uses: codecov/codecov-action@v1 + pytest src/ --doctest-modules --doctest-continue-on-failure -ra -v --cov=./ --cov-report=xml --cov-report=html + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: ./coverage.xml + flags: unittests + name: codecov-${{ matrix.python-version }} + fail_ci_if_error: false + + + build-documentation: + runs-on: ubuntu-latest + needs: build + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('docs/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install documentation dependencies + run: pip install -r docs/requirements.txt + + - name: Build documentation + run: make documentation + + - name: Upload documentation artifacts + uses: actions/upload-pages-artifact@v3 + with: + name: documentation + path: docs/_build/html/ + + deploy-documentation: + runs-on: ubuntu-latest + needs: build-documentation + if: github.ref == 'refs/heads/main' + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + permissions: + pages: write + id-token: write + steps: + - name: Deploy documentation + uses: actions/deploy-pages@v4 with: - token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos - file: ./coverage.xml # optional - flags: pytest # optional - name: codecov-umbrella # optional - fail_ci_if_error: true # optional (default = false) \ No newline at end of file + artifact_name: documentation diff --git a/.gitignore b/.gitignore index d7c9832..fcf62b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,13 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +*$py.class # C extensions *.so # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -19,9 +19,12 @@ lib64/ parts/ sdist/ var/ +wheels/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -36,12 +39,17 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ # Translations *.mo @@ -49,41 +57,105 @@ coverage.xml # Django stuff: *.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy # Sphinx documentation -docs/_build/ +_build/ +_autoapi/ # PyBuilder +.pybuilder/ target/ -# DotEnv configuration +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments .env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ -# Database -*.db -*.rdb - -# Pycharm -.idea +# Spyder project settings +.spyderproject +.spyproject -# VS Code -.vscode/ +# Rope project settings +.ropeproject -# Spyder -.spyproject/ +# mkdocs documentation +/site -# Jupyter NB Checkpoints -.ipynb_checkpoints/ +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json -# exclude data from source control by default -/data/ +# Pyre type checker +.pyre/ -# Mac OS-specific storage files -.DS_Store +# pytype static type analyzer +.pytype/ -# vim -*.swp -*.swo +# Cython debug symbols +cython_debug/ -# Mypy cache -.mypy_cache/ +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/LICENSE b/LICENSE index ced5a80..72abd0f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,10 +1,22 @@ +MIT License -The MIT License (MIT) -Copyright (c) 2020, Markus Ritschel +Copyright (c) 2024, Markus Ritschel -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile index 49eb54b..e0d3bfa 100644 --- a/Makefile +++ b/Makefile @@ -1,124 +1,134 @@ -.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3 +# This is a self-documenting Makefile. +# For details, check out the following resources: +# https://gist.github.com/klmr/575726c7e05d8780505a +# https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html -################################################################################# -# GLOBALS # -################################################################################# +# ======= Put your targets etc. between here and the line which is starting with ".DEFAULT_GOAL" ======= +# Document any rules by adding a single line starting with ## right before the rule (see examples below) +# ====================================================================================================== -PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://') -PROFILE = default -PROJECT_NAME = dkrz-cera -PYTHON_INTERPRETER = python3 +# If you have an .env file +# include .env -ifeq (,$(shell which conda)) -HAS_CONDA=False +# Check if Mamba is installed +CONDA := $(shell command -v conda 2> /dev/null) +MAMBA := $(shell command -v mamba 2> /dev/null) + +# Set the package manager to use +ifeq ($(MAMBA),) + PACKAGE_MANAGER := conda else -HAS_CONDA=True + PACKAGE_MANAGER := mamba endif -################################################################################# -# COMMANDS # -################################################################################# -## Install Python Dependencies -requirements: test_environment - $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel - $(PYTHON_INTERPRETER) -m pip install -r requirements.txt +.PHONY: cleanup clean-jupyter-book clean-pyc, clean-logs, documentation, book, save-requirements, requirements, src-available, conda-env, test-requirements, tests, clear-images, convert-images, figures, crop-pdf, crop-png, show-help + +## Clean-up python artifacts, logs and jupyter-book built +cleanup: clean-pyc clean-logs clean-docs + +## Cleanup documentation built +clean-docs: + rm -rf doc/_build/* + jb clean --all docs/ + +# Remove Python file artifacts +clean-pyc: + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + + +## Remove all log files +clean-logs: + find ./logs -iname '*.log' -type f -exec rm {} + + + +## Build the code documentation with Jupyter-Book +documentation: + jb build docs/ -v + -## Delete all compiled Python files -clean: - find . -type f -name "*.py[co]" -delete - find . -type d -name "__pycache__" -delete -## Lint using flake8 +## Run flake8 linter lint: - flake8 dkrz_cera - -## Set up python interpreter environment -create_environment: -ifeq (True,$(HAS_CONDA)) - @echo ">>> Detected conda, creating conda environment." -ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER))) - conda create --name $(PROJECT_NAME) python=3 -else - conda create --name $(PROJECT_NAME) python=2.7 -endif - @echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)" + flake8 ./src/ + + +## Synchronize Jupyter notebooks according to the rules in pyproject.toml +sync-notebooks: + jupytext --sync notebooks/**/*.ipynb + + +## Update the requirements.txt +save-requirements: + pip list --format=freeze > requirements.txt + + +## Create a conda environment.yml file +save-conda-env: + @pip_packages=$$(conda env export | grep -A9999 ".*- pip:" | grep -v "^prefix: ") ;\ + conda env export --from-history | grep -v "^prefix: " > environment.yml;\ + echo "$$pip_packages" >> environment.yml ;\ + sed -i 's/name: base/name: $(CONDA_DEFAULT_ENV)/g' environment.yml + @echo exported \"$$CONDA_DEFAULT_ENV\" environment to environment.yml + + +## Create a conda environment named 'dkrz-cera', install packages, and activate it +conda-env: + @echo "Create conda environment 'dkrz-cera" + conda create --name dkrz-cera python=3.10 --no-default-packages + @echo "Activate conda environment 'dkrz-cera'" + conda activate dkrz-cera + + + +## Install Python Dependencies +install-requirements: + @echo "Install required packages into current environment" +ifeq ($(CONDA),) + @echo "Conda not found, using pip." + python -m pip install -U pip setuptools wheel + python -m pip install -r requirements.txt else - $(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper - @echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\ - export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n" - @bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)" - @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)" + $(PACKAGE_MANAGER) env update --file environment.yml endif -## Test python environment is setup correctly -test_environment: - $(PYTHON_INTERPRETER) test_environment.py - -################################################################################# -# PROJECT RULES # -################################################################################# - - - -################################################################################# -# Self Documenting Commands # -################################################################################# - -.DEFAULT_GOAL := help - -# Inspired by -# sed script explained: -# /^##/: -# * save line in hold space -# * purge line -# * Loop: -# * append newline + line to hold space -# * go to next line -# * if line starts with doc comment, strip comment character off and loop -# * remove target prerequisites -# * append hold space (+ newline) to line -# * replace newline plus comments by `---` -# * print line -# Separate expressions are necessary because labels cannot be delimited by -# semicolon; see -.PHONY: help -help: - @echo "$$(tput bold)Available rules:$$(tput sgr0)" - @echo - @sed -n -e "/^## / { \ - h; \ - s/.*//; \ - :doc" \ - -e "H; \ - n; \ - s/^## //; \ - t doc" \ - -e "s/:.*//; \ - G; \ - s/\\n## /---/; \ - s/\\n/ /g; \ - p; \ - }" ${MAKEFILE_LIST} \ - | LC_ALL='C' sort --ignore-case \ - | awk -F '---' \ - -v ncol=$$(tput cols) \ - -v indent=19 \ - -v col_on="$$(tput setaf 6)" \ - -v col_off="$$(tput sgr0)" \ - '{ \ - printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ - n = split($$2, words, " "); \ - line_length = ncol - indent; \ - for (i = 1; i <= n; i++) { \ - line_length -= length(words[i]) + 1; \ - if (line_length <= 0) { \ - line_length = ncol - indent - length(words[i]) - 1; \ - printf "\n%*s ", -indent, " "; \ - } \ - printf "%s ", words[i]; \ - } \ - printf "\n"; \ - }' \ - | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') + +## Install requirements for building the docs +install-doc-requirements: + python -m pip install -r docs/requirements.txt + + +## Make the source code as package available +src-available: + pip install -e . + + +## Check if all packages listed in requirements.txt are installed in the current environment +test-requirements: + @echo "Check if all packages listed in requirements.txt are installed in the current environment:" + # the "|| true" prevents the command returning an error if grep does not find a match + python -m pip -vvv freeze -r requirements.txt | grep "not installed" || true + + +## Run pytest for the source code +tests: test-requirements + python -m pytest -v + + +## Test github actions locally +test-gh-actions: + mkdir -p /tmp/artifacts + act push --artifact-server-path /tmp/artifacts --container-options "--userns host" --action-offline-mode + + + + + +# ==================== Don't put anything below this line ==================== +# https://www.digitalocean.com/community/tutorials/how-to-use-makefiles-to-automate-repetitive-tasks-on-an-ubuntu-vps +.DEFAULT_GOAL := show-help +show-help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)";echo;sed -ne"/^## /{h;s/.*//;:d" -e"H;n;s/^## //;td" -e"s/:.*//;G;s/\\n## /---/;s/\\n/ /g;p;}" ${MAKEFILE_LIST}|LC_ALL='C' sort -f|awk -F --- -v n=$$(tput cols) -v i=21 -v a="$$(tput setaf 6)" -v z="$$(tput sgr0)" '{printf"%s%*s%s ",a,-i,$$1,z;m=split($$2,w," ");l=n-i;for(j=1;j<=m;j++){l-=length(w[j])+1;if(l<= 0){l=n-i-length(w[j])-1;printf"\n%*s ",-i," ";}printf"%s ",w[j];}printf"\n";}'|more diff --git a/README.md b/README.md index 79e2925..ee87113 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,38 @@ -dkrz-cera -========= -[![GitHub license](https://img.shields.io/github/license/markusritschel/dkrz-cera)](https://github.com/markusritschel/dkrz-cera/blob/master/LICENSE) -![build](https://github.com/markusritschel/dkrz-cera/workflows/build/badge.svg) -[![codecov](https://codecov.io/gh/markusritschel/dkrz-cera/branch/master/graph/badge.svg)](https://codecov.io/gh/markusritschel/dkrz-cera) +# DKRZ-CERA -This package provides an interface to the CERA database of the DKRZ (_Deutsches Klimarechenzentrum_). +![build](https://github.com/markusritschel/dkrz-cera/actions/workflows/main.yml/badge.svg) +[![License MIT license](https://img.shields.io/github/license/markusritschel/dkrz-cera)](./LICENSE) + + +This package provides an interface to the CERA database of the DKRZ (Deutsches Klimarechenzentrum). This allows the user to scrape the database for CMIP data, for example, and prepare files for the remote download via [Jblob](https://cera-www.dkrz.de/WDCC/ui/cerasearch/info?site=jblob), a program written in Java and provided by the DKRZ. - -Installation ------------- -Via pip: +## Installation +Clone this repo via ```bash -pip install git+https://github.com/markusritschel/dkrz-cera.git +git clone https://github.com/markusritschel/dkrz-cera +``` +Then, in the new directory (`cd dkrz-cera/`) install the package via: ``` +pip install . +``` +or via +``` +pip install -e . +``` +if you plan on making changes to the code. -Or, for installing from the sources, simply clone this repo via -```bash -git clone https://github.com/markusritschel/dkrz-cera.git +Alternatively, install directly from GitHub via ``` -and then install the package by running +pip install 'git+https://github.com/markusritschel/dkrz-cera.git' ``` -python setup.py install -``` -in the directory of the cloned repository. -Usage ------ +## Usage + The database can be scraped by creating an instance of the Cera class and using it's `search` method: ```python from dkrz_cera import Cera @@ -62,8 +64,15 @@ This can be done by using the function `unzip_files()` which takes the root path as a mandatory argument. -ToDos ------ + +## Testing +Run `make tests` in the source directory to test the code. +This will execute both the unit tests and docstring examples (using `pytest`). + +Run `make lint` to check code style consistency. + +## ToDos + - [x] Routine for scraping the CERA database based on multiple keywords - [x] sort files depending on configuration file => creates directory structure automatically during jblob download - [x] create intake-esm catalog files => this will be implemented in another package @@ -72,11 +81,13 @@ ToDos - [ ] implement [click](https://click.palletsprojects.com/) for command line tooling -Contact -------- -Feel free to contact me via git@markusritschel.de. -For problems or feature requests please open an [issue](https://github.com/markusritschel/dkrz-cera/issues). -Of course, you are also welcome to contribute and start a pull-request :-) +## Maintainer +- [markusritschel](https://github.com/markusritschel) + + +## Contact & Issues +For any questions or issues, please contact me via git@markusritschel.de or open an [issue](https://github.com/markusritschel/dkrz-cera/issues). + --------- -

Project based on the cookiecutter data science project template. #cookiecutterdatascience

+--- +© Markus Ritschel 2024 diff --git a/requirements.txt b/requirements.txt index 50d11d7..2707b83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,15 @@ -# local package --e . - -# external requirements -click -Sphinx -coverage -awscli flake8 -python-dotenv>=0.5.1 +jupyter-book +jupytext +sphinx-autoapi +sphinx-autodoc-defaultargs +sphinx-book-theme +sphinx-comments +sphinx-issues +sphinx_rtd_theme +sphinxcontrib-apidoc +sphinxcontrib-napoleon +Click -setuptools pytest -pandas -requests \ No newline at end of file +