From 095d99c03314ba04249a1f4762d1e42c1a519bc1 Mon Sep 17 00:00:00 2001 From: nitreb <51701372+nitreb@users.noreply.github.com> Date: Wed, 20 Mar 2024 16:55:42 +0100 Subject: [PATCH] Migrate code on github (#1) Co-authored-by: mbertin --- .condarc | 2 - .github/workflows/check-format.yml | 29 ++ .github/workflows/ci.yml | 66 ---- .github/workflows/tests.yml | 30 ++ .github/workflows/tox-tests.yml | 30 ++ .gitlab-ci.yml | 121 ------- CONTRIBUTION.md | 17 +- Dockerfile.ci | 18 - Dockerfile.ci.tests | 12 - Dockerfile.ci.tests.tox | 24 -- Dockerfile.dockerhub | 7 + LICENSE.txt | 287 ++++++++++++++++ Makefile | 70 +--- README.md | 90 +++-- .../catalogue_parser/catalogue_parser.py | 317 +++++++++++------- .../catalogue_parser/request_structure.py | 42 ++- .../command_line_interface/group_get.py | 76 +++-- .../command_line_interface/group_login.py | 10 +- .../core_functions/credentials_utils.py | 33 +- copernicusmarine/core_functions/get.py | 83 +++-- copernicusmarine/core_functions/login.py | 14 + .../core_functions/services_utils.py | 25 +- copernicusmarine/core_functions/subset.py | 31 +- copernicusmarine/core_functions/utils.py | 40 ++- .../download_functions/download_ftp.py | 25 +- .../download_functions/download_get.py | 3 + .../download_original_files.py | 109 ++++-- .../download_functions/subset_xarray.py | 21 +- copernicusmarine/python_interface/get.py | 52 +-- copernicusmarine/python_interface/login.py | 2 + pip.conf | 4 - poetry.lock | 21 +- pyproject.toml | 3 +- release.sh | 50 +-- tests/resources/file_list_example.txt | 2 + ..._get_request_with_one_wrong_attribute.json | 2 +- .../test_get_request_with_request_file.json | 2 +- ...t_request_with_dataset_not_in_catalog.json | 2 +- ...test_subset_request_with_request_file.json | 2 +- tests/test_command_line_interface.py | 316 ++++++----------- ...and_line_interface_nearest_layer_subset.py | 80 ----- tests/test_dataset_version_selection.py | 2 +- tests/test_get_index_files_insitu.py | 79 +++++ tests/test_get_sync.py | 1 + tests/test_overwrite_output_data.py | 34 +- tests/test_python_interface.py | 12 +- tests/test_sqlite_subsetting.py | 45 +++ 47 files changed, 1374 insertions(+), 969 deletions(-) create mode 100644 .github/workflows/check-format.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/tests.yml create mode 100644 .github/workflows/tox-tests.yml delete mode 100644 .gitlab-ci.yml delete mode 100644 Dockerfile.ci delete mode 100644 Dockerfile.ci.tests delete mode 100644 Dockerfile.ci.tests.tox create mode 100644 Dockerfile.dockerhub create mode 100644 LICENSE.txt delete mode 100644 pip.conf create mode 100644 tests/resources/file_list_example.txt create mode 100644 tests/test_get_index_files_insitu.py create mode 100644 tests/test_sqlite_subsetting.py diff --git a/.condarc b/.condarc index 242f7c09..42825518 100644 --- a/.condarc +++ b/.condarc @@ -1,4 +1,2 @@ -channel_alias: https://nexus.mercator-ocean.fr/repository channels: - - conda-proxy - conda-forge diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml new file mode 100644 index 00000000..3d4fa5ac --- /dev/null +++ b/.github/workflows/check-format.yml @@ -0,0 +1,29 @@ +name: Check format + +on: push + +jobs: + check-format: + runs-on: self-hosted + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: '1.5.6-0' + micromamba-binary-path: ${{ runner.temp }}/bin/micromamba + environment-file: conda_environment.yaml + environment-name: copernicusmarine + condarc-file: .condarc + cache-environment: true + post-cleanup: 'all' + + - name: Poetry install + run: poetry install + shell: micromamba-shell {0} + + - name: Check format + run: make check-format + shell: micromamba-shell {0} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index b3f5dbd5..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: CI - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - - workflow_dispatch: - -jobs: - - prepare: - runs-on: self-hosted - outputs: - docker_ci_image: ${{ steps.ci-image.outputs.DOCKER_CI_IMAGE }} - docker_ci_image_tests: ${{ steps.ci-image-tests.outputs.DOCKER_CI_IMAGE_TESTS }} - - steps: - - name: Check out code - uses: actions/checkout@v4 - - - name: build-image-ci - run: make build-and-publish-ci-image - env: - REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} - - - name: build-image-ci-tests - run: make build-and-publish-ci-image-tests - env: - REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} - - - id: ci-image - run: echo "DOCKER_CI_IMAGE=$(make get_ci_image)" >> $GITHUB_OUTPUT - - - id: ci-image-tests - run: echo "DOCKER_CI_IMAGE_TESTS=$(make get_ci_image_tests)" >> $GITHUB_OUTPUT - - check-format: - runs-on: self-hosted - needs: [prepare] - - steps: - - name: Check out code - uses: actions/checkout@v4 - - - name: check-format - run: | - chown 1000:1000 * .* -R - docker run --rm -v $(pwd):/workspace -w /workspace ${{needs.prepare.outputs.docker_ci_image}} make check-format - - tests: - runs-on: self-hosted - needs: [prepare] - - steps: - - name: Check out code - uses: actions/checkout@v4 - - - name: tests - env: - COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} - COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} - run: | - chown 1000:1000 * .* -R - docker run --rm -v $(pwd):/workspace -w /workspace ${{needs.prepare.outputs.docker_ci_image_tests}} make run-tests diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..30ae714b --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,30 @@ +name: Run tests + +on: + pull_request: + branches: [ "main" ] + +jobs: + tests: + runs-on: self-hosted + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: '1.5.6-0' + micromamba-binary-path: ${{ runner.temp }}/bin/micromamba + environment-file: conda_environment_test.yaml + environment-name: copernicusmarine_test + condarc-file: .condarc + cache-environment: true + post-cleanup: 'all' + + - name: Run tests + env: + COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} + COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} + run: make run-tests + shell: micromamba-shell {0} diff --git a/.github/workflows/tox-tests.yml b/.github/workflows/tox-tests.yml new file mode 100644 index 00000000..7b60b3d5 --- /dev/null +++ b/.github/workflows/tox-tests.yml @@ -0,0 +1,30 @@ +name: Run tests + +on: + schedule: + - cron: '0 0 * * 6' + +jobs: + tests: + runs-on: self-hosted + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: '1.5.6-0' + micromamba-binary-path: ${{ runner.temp }}/bin/micromamba + environment-file: conda_environment_test_tox.yaml + environment-name: copernicusmarine_test_tox + condarc-file: .condarc + cache-environment: true + post-cleanup: 'all' + + - name: Run tests + env: + COPERNICUS_MARINE_SERVICE_USERNAME: ${{ secrets.COPERNICUS_MARINE_SERVICE_USERNAME }} + COPERNICUS_MARINE_SERVICE_PASSWORD: ${{ secrets.COPERNICUS_MARINE_SERVICE_PASSWORD }} + run: make run-tests-dependencie-versions + shell: micromamba-shell {0} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index f31dba7a..00000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,121 +0,0 @@ -stages: - - prepare - - tests - -build-image-ci: - stage: prepare - tags: - - shell-docker - script: - - make build-and-publish-ci-image - retry: - max: 2 - rules: - - if: $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - -build-image-ci-tests: - stage: prepare - tags: - - shell-docker - script: - - make build-and-publish-ci-image-tests - retry: - max: 2 - rules: - - if: $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - -build-image-ci-tests-tox: - stage: prepare - tags: - - shell-docker - script: - - make build-and-publish-ci-image-tests-tox - retry: - max: 2 - rules: - - if: $CI_PIPELINE_SOURCE == "schedule" && $IS_TOX_SCHEDULE_PIPELINE - -ci-image: - stage: prepare - tags: - - shell - script: - - echo "DOCKER_CI_IMAGE=$(make get_ci_image)" > out.env - - cat out.env - artifacts: - reports: - dotenv: out.env - rules: - - if: $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - -ci-image-tests: - stage: prepare - tags: - - shell - script: - - echo "DOCKER_CI_IMAGE_TESTS=$(make get_ci_image_tests)" > out_tests.env - - cat out_tests.env - artifacts: - reports: - dotenv: out_tests.env - rules: - - if: $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - -ci-image-tests-tox: - stage: prepare - tags: - - shell - script: - - echo "DOCKER_CI_IMAGE_TESTS_TOX=$(make get_ci_image_tests_tox)" > out_tests_tox.env - - cat out_tests_tox.env - artifacts: - reports: - dotenv: out_tests_tox.env - rules: - - if: $CI_PIPELINE_SOURCE == "schedule" && $IS_TOX_SCHEDULE_PIPELINE - -check-format: - stage: tests - image: - name: ${DOCKER_CI_IMAGE} - tags: - - docker - script: - - make check-format - rules: - - if: $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - -tests: - stage: tests - image: - name: ${DOCKER_CI_IMAGE_TESTS} - tags: - - docker - script: - - make run-tests - rules: - - if: $CI_PIPELINE_SOURCE != "schedule" && $CI_PIPELINE_SOURCE != "merge_request_event" - when: always - artifacts: - when: always - paths: - - report.xml - reports: - junit: report.xml - -tests-dependencie-versions: - stage: tests - timeout: 4h - image: - name: ${DOCKER_CI_IMAGE_TESTS_TOX} - tags: - - docker - script: - - make run-tests-dependencie-versions - rules: - - if: $CI_PIPELINE_SOURCE == "schedule" && $IS_TOX_SCHEDULE_PIPELINE diff --git a/CONTRIBUTION.md b/CONTRIBUTION.md index 442d44a3..1464faa1 100644 --- a/CONTRIBUTION.md +++ b/CONTRIBUTION.md @@ -72,7 +72,7 @@ pytest tests --log-level -vv tests --durations=0 --log-level=info If you have the [`moi`](https://gitlab.mercator-ocean.fr/internal/shell-utils) command installed: ```sh -VERSION= PYPI_TOKEN=`moi read-secret --name PYPI_TOKEN` make release +VERSION= DOCKER_HUB_USERNAME=`moi read-secret --name DOCKER_HUB_USERNAME` DOCKER_HUB_PUSH_TOKEN=`moi read-secret --name DOCKER_HUB_PUSH_TOKEN` PYPI_TOKEN=`moi read-secret --name PYPI_TOKEN` make release ``` Otherwise: @@ -83,3 +83,18 @@ poetry publish --build --username __token__ --password $PYPI_TOKEN ``` Then tag the appropriate persons and add the changelog the Jira issue, before merging the branch. + +## Build the Docker image + +If you have the [`moi`](https://gitlab.mercator-ocean.fr/internal/shell-utils) command installed: +```sh +VERSION= DOCKER_HUB_USERNAME=`moi read-secret --name DOCKER_HUB_USERNAME` DOCKER_HUB_PUSH_TOKEN=`moi read-secret --name DOCKER_HUB_PUSH_TOKEN` make build-and-publish-dockerhub-image +``` + +## Update the conda-forge feedstock repository + +First, here is the link to the conda-forge feedstock repository: [https://github.com/conda-forge/copernicusmarine-feedstock](https://github.com/conda-forge/copernicusmarine-feedstock). + +All the conda-forge informations about this repository are available [here in the README](https://github.com/orgs/conda-forge/teams/copernicusmarine). To update it (new version, new maintainer...), please follow the indicated procedure. + +Please also take a look at [this conda-forge documentation](https://conda-forge.org/docs/maintainer/updating_pkgs/#example-workflow-for-updating-a-package) for more information about the update procedure. diff --git a/Dockerfile.ci b/Dockerfile.ci deleted file mode 100644 index 5f3c0132..00000000 --- a/Dockerfile.ci +++ /dev/null @@ -1,18 +0,0 @@ -ARG REGISTRY - -FROM ${REGISTRY}mambaorg/micromamba:1.4.1-kinetic - -COPY --chown=$MAMBA_USER:$MAMBA_USER Makefile /tmp/Makefile -COPY --chown=$MAMBA_USER:$MAMBA_USER .condarc /tmp/.condarc -COPY --chown=$MAMBA_USER:$MAMBA_USER conda_environment.yaml /tmp/conda_environment.yaml -COPY --chown=$MAMBA_USER:$MAMBA_USER pyproject.toml /tmp/pyproject.toml -COPY --chown=$MAMBA_USER:$MAMBA_USER copernicusmarine /tmp/copernicusmarine -COPY --chown=$MAMBA_USER:$MAMBA_USER README.md /tmp/README.md -RUN CONDARC=.condarc micromamba install -y -n base make -ARG MAMBA_DOCKERFILE_ACTIVATE=1 -COPY --chown=$MAMBA_USER:$MAMBA_USER pip.conf /tmp/pip.conf -RUN /usr/local/bin/_entrypoint.sh make create-environment -RUN ENV_NAME=copernicusmarine /usr/local/bin/_entrypoint.sh git init -COPY --chown=$MAMBA_USER:$MAMBA_USER .pre-commit-config.yaml /tmp/.pre-commit-config.yaml -RUN ENV_NAME=copernicusmarine /usr/local/bin/_entrypoint.sh git config --global --add safe.directory /tmp -RUN ENV_NAME=copernicusmarine /usr/local/bin/_entrypoint.sh pre-commit install --install-hooks diff --git a/Dockerfile.ci.tests b/Dockerfile.ci.tests deleted file mode 100644 index e29dc850..00000000 --- a/Dockerfile.ci.tests +++ /dev/null @@ -1,12 +0,0 @@ -ARG REGISTRY - -FROM ${REGISTRY}mambaorg/micromamba:1.4.1-kinetic - -COPY --chown=$MAMBA_USER:$MAMBA_USER Makefile /tmp/Makefile -COPY --chown=$MAMBA_USER:$MAMBA_USER .condarc /tmp/.condarc -COPY --chown=$MAMBA_USER:$MAMBA_USER conda_environment_test.yaml /tmp/conda_environment_test.yaml -COPY --chown=$MAMBA_USER:$MAMBA_USER copernicusmarine /tmp/copernicusmarine -RUN CONDARC=.condarc micromamba install -y -n base make -ARG MAMBA_DOCKERFILE_ACTIVATE=1 -COPY --chown=$MAMBA_USER:$MAMBA_USER pip.conf /tmp/pip.conf -RUN /usr/local/bin/_entrypoint.sh make create-test-environment diff --git a/Dockerfile.ci.tests.tox b/Dockerfile.ci.tests.tox deleted file mode 100644 index cae4dad1..00000000 --- a/Dockerfile.ci.tests.tox +++ /dev/null @@ -1,24 +0,0 @@ -ARG REGISTRY - -FROM ubuntu:latest - -ARG DEBIAN_FRONTEND=noninteractive - -RUN apt update -RUN apt install software-properties-common -y -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt upgrade -y - -RUN apt-get install -y python3.12 python3.11 python3.10 python3.9 -RUN apt-get install -y python3.11-dev python3.10-dev python3.9-dev -RUN apt-get install -y python3.11-distutils python3.10-distutils python3.9-distutils -RUN apt-get install -y python3-pip - -RUN python3.11 -m pip install --upgrade pip -RUN python3.11 -m pip install tox - -WORKDIR /tmp - -COPY Makefile /tmp/Makefile -COPY tox.ini /tmp/tox.ini -COPY copernicusmarine /tmp/copernicusmarine diff --git a/Dockerfile.dockerhub b/Dockerfile.dockerhub new file mode 100644 index 00000000..6cad2de2 --- /dev/null +++ b/Dockerfile.dockerhub @@ -0,0 +1,7 @@ +FROM python:3.12.2 + +ARG VERSION + +RUN pip install copernicusmarine==$VERSION + +ENTRYPOINT [ "copernicusmarine" ] diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..4153cd37 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,287 @@ + EUROPEAN UNION PUBLIC LICENCE v. 1.2 + EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined +below) which is provided under the terms of this Licence. Any use of the Work, +other than as authorised under this Licence is prohibited (to the extent such +use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as +defined below) has placed the following notice immediately following the +copyright notice for the Work: + + Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions + +In this Licence, the following terms have the following meaning: + +- ‘The Licence’: this Licence. + +- ‘The Original Work’: the work or software distributed or communicated by the + Licensor under this Licence, available as Source Code and also as Executable + Code as the case may be. + +- ‘Derivative Works’: the works or software that could be created by the + Licensee, based upon the Original Work or modifications thereof. This Licence + does not define the extent of modification or dependence on the Original Work + required in order to classify a work as a Derivative Work; this extent is + determined by copyright law applicable in the country mentioned in Article 15. + +- ‘The Work’: the Original Work or its Derivative Works. + +- ‘The Source Code’: the human-readable form of the Work which is the most + convenient for people to study and modify. + +- ‘The Executable Code’: any code which has generally been compiled and which is + meant to be interpreted by a computer as a program. + +- ‘The Licensor’: the natural or legal person that distributes or communicates + the Work under the Licence. + +- ‘Contributor(s)’: any natural or legal person who modifies the Work under the + Licence, or otherwise contributes to the creation of a Derivative Work. + +- ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of + the Work under the terms of the Licence. + +- ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, + renting, distributing, communicating, transmitting, or otherwise making + available, online or offline, copies of the Work or providing access to its + essential functionalities at the disposal of any other natural or legal + person. + +2. Scope of the rights granted by the Licence + +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +sublicensable licence to do the following, for the duration of copyright vested +in the Original Work: + +- use the Work in any circumstance and for all usage, +- reproduce the Work, +- modify the Work, and make Derivative Works based upon the Work, +- communicate to the public, including the right to make available or display + the Work or copies thereof to the public and perform publicly, as the case may + be, the Work, +- distribute the Work or copies thereof, +- lend and rent the Work or copies thereof, +- sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now +known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to +exercise his moral right to the extent allowed by law in order to make effective +the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to +any patents held by the Licensor, to the extent necessary to make use of the +rights granted on the Work under this Licence. + +3. Communication of the Source Code + +The Licensor may provide the Work either in its Source Code form, or as +Executable Code. If the Work is provided as Executable Code, the Licensor +provides in addition a machine-readable copy of the Source Code of the Work +along with each copy of the Work that the Licensor distributes or indicates, in +a notice following the copyright notice attached to the Work, a repository where +the Source Code is easily and freely accessible for as long as the Licensor +continues to distribute or communicate the Work. + +4. Limitations on copyright + +Nothing in this Licence is intended to deprive the Licensee of the benefits from +any exception or limitation to the exclusive rights of the rights owners in the +Work, of the exhaustion of those rights or of other applicable limitations +thereto. + +5. Obligations of the Licensee + +The grant of the rights mentioned above is subject to some restrictions and +obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or +trademarks notices and all notices that refer to the Licence and to the +disclaimer of warranties. The Licensee must include a copy of such notices and a +copy of the Licence with every copy of the Work he/she distributes or +communicates. The Licensee must cause any Derivative Work to carry prominent +notices stating that the Work has been modified and the date of modification. + +Copyleft clause: If the Licensee distributes or communicates copies of the +Original Works or Derivative Works, this Distribution or Communication will be +done under the terms of this Licence or of a later version of this Licence +unless the Original Work is expressly distributed only under this version of the +Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee +(becoming Licensor) cannot offer or impose any additional terms or conditions on +the Work or Derivative Work that alter or restrict the terms of the Licence. + +Compatibility clause: If the Licensee Distributes or Communicates Derivative +Works or copies thereof based upon both the Work and another work licensed under +a Compatible Licence, this Distribution or Communication can be done under the +terms of this Compatible Licence. For the sake of this clause, ‘Compatible +Licence’ refers to the licences listed in the appendix attached to this Licence. +Should the Licensee's obligations under the Compatible Licence conflict with +his/her obligations under this Licence, the obligations of the Compatible +Licence shall prevail. + +Provision of Source Code: When distributing or communicating copies of the Work, +the Licensee will provide a machine-readable copy of the Source Code or indicate +a repository where this Source will be easily and freely available for as long +as the Licensee continues to distribute or communicate the Work. + +Legal Protection: This Licence does not grant permission to use the trade names, +trademarks, service marks, or names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the copyright notice. + +6. Chain of Authorship + +The original Licensor warrants that the copyright in the Original Work granted +hereunder is owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings +to the Work are owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent +Contributors grant You a licence to their contributions to the Work, under the +terms of this Licence. + +7. Disclaimer of Warranty + +The Work is a work in progress, which is continuously improved by numerous +Contributors. It is not a finished work and may therefore contain defects or +‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis +and without warranties of any kind concerning the Work, including without +limitation merchantability, fitness for a particular purpose, absence of defects +or errors, accuracy, non-infringement of intellectual property rights other than +copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition +for the grant of any rights to the Work. + +8. Disclaimer of Liability + +Except in the cases of wilful misconduct or damages directly caused to natural +persons, the Licensor will in no event be liable for any direct or indirect, +material or moral, damages of any kind, arising out of the Licence or of the use +of the Work, including without limitation, damages for loss of goodwill, work +stoppage, computer failure or malfunction, loss of data or any commercial +damage, even if the Licensor has been advised of the possibility of such damage. +However, the Licensor will be liable under statutory product liability laws as +far such laws apply to the Work. + +9. Additional agreements + +While distributing the Work, You may choose to conclude an additional agreement, +defining obligations or services consistent with this Licence. However, if +accepting obligations, You may act only on your own behalf and on your sole +responsibility, not on behalf of the original Licensor or any other Contributor, +and only if You agree to indemnify, defend, and hold each Contributor harmless +for any liability incurred by, or claims asserted against such Contributor by +the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence + +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ +placed under the bottom of a window displaying the text of this Licence or by +affirming consent in any other similar way, in accordance with the rules of +applicable law. Clicking on that icon indicates your clear and irrevocable +acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and +conditions by exercising any rights granted to You by Article 2 of this Licence, +such as the use of the Work, the creation by You of a Derivative Work or the +Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public + +In case of any Distribution or Communication of the Work by means of electronic +communication by You (for example, by offering to download the Work from a +remote location) the distribution channel or media (for example, a website) must +at least provide to the public the information requested by the applicable law +regarding the Licensor, the Licence and the way it may be accessible, concluded, +stored and reproduced by the Licensee. + +12. Termination of the Licence + +The Licence and the rights granted hereunder will terminate automatically upon +any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has +received the Work from the Licensee under the Licence, provided such persons +remain in full compliance with the Licence. + +13. Miscellaneous + +Without prejudice of Article 9 above, the Licence represents the complete +agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable +law, this will not affect the validity or enforceability of the Licence as a +whole. Such provision will be construed or reformed so as necessary to make it +valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of +this Licence or updated versions of the Appendix, so far this is required and +reasonable, without reducing the scope of the rights granted by the Licence. New +versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, +have identical value. Parties can take advantage of the linguistic version of +their choice. + +14. Jurisdiction + +Without prejudice to specific agreement between parties, + +- any litigation resulting from the interpretation of this License, arising + between the European Union institutions, bodies, offices or agencies, as a + Licensor, and any Licensee, will be subject to the jurisdiction of the Court + of Justice of the European Union, as laid down in article 272 of the Treaty on + the Functioning of the European Union, + +- any litigation arising between other parties and resulting from the + interpretation of this License, will be subject to the exclusive jurisdiction + of the competent court where the Licensor resides or conducts its primary + business. + +15. Applicable Law + +Without prejudice to specific agreement between parties, + +- this Licence shall be governed by the law of the European Union Member State + where the Licensor has his seat, resides or has his registered office, + +- this licence shall be governed by Belgian law if the Licensor has no seat, + residence or registered office inside a European Union Member State. + +Appendix + +‘Compatible Licences’ according to Article 5 EUPL are: + +- GNU General Public License (GPL) v. 2, v. 3 +- GNU Affero General Public License (AGPL) v. 3 +- Open Software License (OSL) v. 2.1, v. 3.0 +- Eclipse Public License (EPL) v. 1.0 +- CeCILL v. 2.0, v. 2.1 +- Mozilla Public Licence (MPL) v. 2 +- GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 +- Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for + works other than software +- European Union Public Licence (EUPL) v. 1.1, v. 1.2 +- Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong + Reciprocity (LiLiQ-R+). + +The European Commission may update this Appendix to later versions of the above +licences without producing a new version of the EUPL, as long as they provide +the rights granted in Article 2 of this Licence and protect the covered Source +Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new +EUPL version. diff --git a/Makefile b/Makefile index 9250d0d0..dff1a853 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ ENVIRONMENT_NAME = ${PROJECT_NAME} ENVIRONMENT_FILE_NAME = conda_environment.yaml TEST_ENVIRONMENT_NAME = ${PROJECT_NAME}_test TEST_ENVIRONMENT_FILE_NAME = conda_environment_test.yaml +TEST_TOX_ENVIRONMENT_NAME = ${PROJECT_NAME}_test_tox TEST_TOX_ENVIRONMENT_FILE_NAME = conda_environment_test_tox.yaml .ONESHELL: @@ -13,26 +14,13 @@ SHELL := /bin/bash MICROMAMBA_ACTIVATE=eval "$$(micromamba shell hook --shell=bash)" && micromamba activate && micromamba activate CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh && conda activate && conda activate ACTIVATE_ENVIRONMENT=${MICROMAMBA_ACTIVATE} ${SELECTED_ENVIRONMENT_NAME} || ${CONDA_ACTIVATE} ${SELECTED_ENVIRONMENT_NAME} -BUILD_ENVIRONMENT_CHECKSUM=$$(sha256sum ${ENVIRONMENT_FILE_NAME} .pre-commit-config.yaml .condarc pip.conf Dockerfile.ci | sha256sum -z | cut -d ' ' -f 1) -BUILD_ENVIRONMENT_CHECKSUM_TESTS=$$(sha256sum ${TEST_ENVIRONMENT_FILE_NAME} .condarc pip.conf Dockerfile.ci.tests | sha256sum -z | cut -d ' ' -f 1) -BUILD_ENVIRONMENT_CHECKSUM_TESTS_TOX=$$(sha256sum ${TEST_TOX_ENVIRONMENT_FILE_NAME} .condarc pip.conf Dockerfile.ci.tests.tox | sha256sum -z | cut -d ' ' -f 1) - -REGISTRY="docker.mercator-ocean.fr" -REGISTRY_URI="https://${REGISTRY}" -REGISTRY_REPOSITORY="${REGISTRY}/moi-docker" -REGISTRY_USERNAME="ci-robot" define conda-command micromamba $1 || mamba $1 || conda $1 endef -# Make sure you use it for nothing but networking stuff (think about race conditons) -# # Example: $(call retry,3,some_script.sh) -retry = $(2) $(foreach t,$(shell seq 1 ${1}),|| (echo -e "\033[33m Failed ($$?): '$(2)'\n Retrying $t ... \033[0m"; $(2)))"]]")) - create-update-environment: export CONDARC=.condarc - export PIP_CONFIG_FILE=pip.conf ($(call conda-command, env update --file ${SELECTED_ENVIRONMENT_FILE_NAME} --name ${SELECTED_ENVIRONMENT_NAME}) \ || $(call conda-command, update --file ${SELECTED_ENVIRONMENT_FILE_NAME} --name ${SELECTED_ENVIRONMENT_NAME}) \ || $(call conda-command, env create --file ${SELECTED_ENVIRONMENT_FILE_NAME} --name ${SELECTED_ENVIRONMENT_NAME})) @@ -42,62 +30,36 @@ create-environment: SELECTED_ENVIRONMENT_FILE_NAME = ${ENVIRONMENT_FILE_NAME} create-environment: create-update-environment $(call conda-command, run --name ${ENVIRONMENT_NAME} poetry install) -check-format: SELECTED_ENVIRONMENT_NAME = ${ENVIRONMENT_NAME} -check-format: - ${ACTIVATE_ENVIRONMENT} - pre-commit run --all-files --show-diff-on-failure - -get_ci_image: - @echo ${REGISTRY_REPOSITORY}/${PROJECT_NAME}-ci:${BUILD_ENVIRONMENT_CHECKSUM} - -get_ci_image_tests: - @echo ${REGISTRY_REPOSITORY}/${PROJECT_NAME}-ci-tests:${BUILD_ENVIRONMENT_CHECKSUM_TESTS} - -get_ci_image_tests_tox: - @echo ${REGISTRY_REPOSITORY}/${PROJECT_NAME}-ci-tests-tox:${BUILD_ENVIRONMENT_CHECKSUM_TESTS_TOX} - -build-and-publish-image: - docker login ${REGISTRY_URI} --username ${REGISTRY_USERNAME} --password $${REGISTRY_PASSWORD} - @if docker manifest inspect ${REGISTRY_REPOSITORY}/${CONTAINER_IMAGE_NAME} > /dev/null ; then - echo "The image already exist on nexus" - else - echo "The image does not exists on nexus" - docker build --ulimit nofile=65536:65536 --tag ${REGISTRY_REPOSITORY}/${CONTAINER_IMAGE_NAME} -f ${CONTAINER_IMAGE_DOCKERFILE} --build-arg REGISTRY="${REGISTRY}/" . - docker push ${REGISTRY_REPOSITORY}/${CONTAINER_IMAGE_NAME} - fi - -build-and-publish-ci-image: CONTAINER_IMAGE_NAME = ${PROJECT_NAME}-ci:${BUILD_ENVIRONMENT_CHECKSUM} -build-and-publish-ci-image: CONTAINER_IMAGE_DOCKERFILE = Dockerfile.ci -build-and-publish-ci-image: build-and-publish-image - -build-and-publish-ci-image-tests: CONTAINER_IMAGE_NAME = ${PROJECT_NAME}-ci-tests:${BUILD_ENVIRONMENT_CHECKSUM_TESTS} -build-and-publish-ci-image-tests: CONTAINER_IMAGE_DOCKERFILE = Dockerfile.ci.tests -build-and-publish-ci-image-tests: build-and-publish-image - -build-and-publish-ci-image-tests-tox: CONTAINER_IMAGE_NAME = ${PROJECT_NAME}-ci-tests-tox:${BUILD_ENVIRONMENT_CHECKSUM_TESTS_TOX} -build-and-publish-ci-image-tests-tox: CONTAINER_IMAGE_DOCKERFILE = Dockerfile.ci.tests.tox -build-and-publish-ci-image-tests-tox: build-and-publish-image - create-test-environment: SELECTED_ENVIRONMENT_NAME = ${TEST_ENVIRONMENT_NAME} create-test-environment: SELECTED_ENVIRONMENT_FILE_NAME = ${TEST_ENVIRONMENT_FILE_NAME} create-test-environment: create-update-environment -create-test-environment-tox: SELECTED_ENVIRONMENT_NAME = ${TEST_ENVIRONMENT_NAME}-tox +create-test-environment-tox: SELECTED_ENVIRONMENT_NAME = ${TEST_TOX_ENVIRONMENT_NAME} create-test-environment-tox: SELECTED_ENVIRONMENT_FILE_NAME = ${TEST_TOX_ENVIRONMENT_FILE_NAME} create-test-environment-tox: create-update-environment +check-format: SELECTED_ENVIRONMENT_NAME = ${ENVIRONMENT_NAME} +check-format: + ${ACTIVATE_ENVIRONMENT} + pre-commit run --all-files --show-diff-on-failure + run-tests: SELECTED_ENVIRONMENT_NAME = ${TEST_ENVIRONMENT_NAME} run-tests: ${ACTIVATE_ENVIRONMENT} pip install --editable . pytest tests --verbose -vv --durations=0 --log-cli-level=info --basetemp="tests/downloads" --junitxml=report.xml --log-format "%(asctime)s %(levelname)s %(message)s" --log-date-format "%Y-%m-%d %H:%M:%S" +run-tests-dependencie-versions: SELECTED_ENVIRONMENT_NAME = ${TEST_TOX_ENVIRONMENT_NAME} +run-tests-dependencie-versions: + ${ACTIVATE_ENVIRONMENT} + tox run + release: SELECTED_ENVIRONMENT_NAME = ${ENVIRONMENT_NAME} release: ${ACTIVATE_ENVIRONMENT} PYPI_TOKEN=$${PYPI_TOKEN} VERSION=$${VERSION} ./release.sh -run-tests-dependencie-versions: SELECTED_ENVIRONMENT_NAME = ${TEST_ENVIRONMENT_NAME} -run-tests-dependencie-versions: - ${ACTIVATE_ENVIRONMENT} - tox run +build-and-publish-dockerhub-image: + docker login --username $${DOCKER_HUB_USERNAME} --password $${DOCKER_HUB_PUSH_TOKEN} + docker build --ulimit nofile=65536:65536 --tag copernicusmarine/copernicusmarine:$${VERSION} -f Dockerfile.dockerhub --build-arg VERSION="$${VERSION}" . + docker push copernicusmarine/copernicusmarine:$${VERSION} diff --git a/README.md b/README.md index 93a9980c..b319747d 100644 --- a/README.md +++ b/README.md @@ -20,34 +20,18 @@ The `copernicusmarine` offers capabilities through both **Command Line Interface For installation, multiple options are available depending on your setup: ### Conda|Mamba -Though no conda package has been created yet, these steps cover the installation in a new isolated environment (safer). Replace conda by mamba if necessary. - -- Create the file `copernicusmarine-env.yml` that contains: -```yaml -name: cmc -channels: - - conda-forge -dependencies: - - python>=3.9,<3.13 - - pip - - pip: - - copernicusmarine -``` -- Use the terminal or a [Conda|Mamba] Prompt to create the `cmc` environment from the `yml` file: -```bash -conda env create --file copernicusmarine-env.yml -``` +A conda package is available and has been uploaded to the conda-forge channel. -- Open the new `cmc` environment by running: -```bash -conda activate cmc -``` +Here is the main web page of it: [https://anaconda.org/conda-forge/copernicusmarine](https://anaconda.org/conda-forge/copernicusmarine) -- Verify that the new `cmc` environment was installed correctly: -```bash -conda env list -``` +You can install it using conda though the conda-forge channel with the following command: `conda install copernicusmarine -c conda-forge` + +### Docker + +A docker image is also available here: [https://hub.docker.com/r/copernicusmarine/copernicusmarine](https://hub.docker.com/r/copernicusmarine/copernicusmarine) + +Here is a basic command to run it: `docker run -it --rm copernicusmarine/copernicusmarine:1.0.5 --version` ### Pip Otherwise, if you already have an environment (safer to clone it), the package can be installed using the `pip` command: @@ -71,7 +55,7 @@ Cachier library is used for caching part of the requests (as describe result or #### Disable SSL -A global SSL context is used when making HTTP calls using the `copernicusmarine` toolbox. For some reason, it can lead to unexpected behavior depending on your network configuration. You can set the `COPERNICUSMARINE_DISABLE_SSL_CONTEXT` environmnent variable to globally disable the usage of SSL in the client. +A global SSL context is used when making HTTP calls using the `copernicusmarine` toolbox. For some reason, it can lead to unexpected behavior depending on your network configuration. You can set the `COPERNICUSMARINE_DISABLE_SSL_CONTEXT` environmnent variable to any value to globally disable the usage of SSL in the client (e.g. `COPERNICUSMARINE_DISABLE_SSL_CONTEXT=True`). ## Command Line Interface (CLI) @@ -121,6 +105,8 @@ INFO - Configuration files stored in /Users/foo/.copernicusmarine If `.copernicusmarine-credentials` already exists, the user is asked for confirmation to overwrite (`--overwrite`/`--overwrite-configuration-file`). +You can use the `--skip-if-user-logged-in` option to skip the configuration file overwrite if the user is already logged in. + #### Access points migration and evolution If you already have a configuration for current services (e.g. `~/motuclient/motuclient-python.ini`, `~/.netrc` or `~/_netrc`) in your home directory, it will automatically be taken into account with commands `get` and `subset` without the need for running the `login` command. @@ -213,6 +199,8 @@ By default: Option `--show-outputnames` displays the full paths of the output files, if required. +Option `--download-file-list` only creates a file `files_to_download.txt` containing the names of the targeted files instead of downloading them. If specified, no other action will be performed. + #### Note about sync option Option `--sync` allows to download original files only if not exist and not up to date. The toolbox checks the destination folder against the source folder. It can be combined with filters. Note that if set with `--overwrite-output-data`, the latter will be ignored. @@ -256,6 +244,56 @@ Total size of the download: 26.59 MB Do you want to proceed with download? [Y/n]: ``` +Option `--file-list` allows to specify a list of files for more advanced files selection: + +An example `file_list.txt` would look like this: +```txt +CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20000101_20001231_R20221101_RE01.nc +CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20010101_20011231_R20221101_RE01.nc +CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20020101_20021231_R20221101_RE01.nc +``` +> **_NOTE:_** This option is compatible with the file generated by the `--download-file-list` option. + +Then the following command: +```bash +copernicusmarine get -i cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m --file-list file_list.txt +``` +Returns: +```bash +INFO - Downloading using service files... +INFO - You requested the download of the following files: +s3://mdl-native/native/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20000101_20001231_R20221101_RE01.nc - 8.93 MB +s3://mdl-native/native/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20010101_20011231_R20221101_RE01.nc - 8.91 MB +s3://mdl-native/native/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20020101_20021231_R20221101_RE01.nc - 8.75 MB + +Total size of the download: 26.59 MB +Do you want to proceed with download? [Y/n]: +``` + +Also, there is a specific command `--index-part` to retrieve the index files of INSITU datasets (for index files example see [this link on Copernicus Marine Service](https://data.marine.copernicus.eu/product/INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034/files?subdataset=cmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311--ext--history&path=INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034%2Fcmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311%2F)). Note that in the future, we plan to have the index files for those datasets, directly available through the filter, regex and/or file-list options. + +Then the following command: +``` +copernicusmarine get --dataset-id cmems_obs-ins_blk_phybgcwav_mynrt_na_irr --index-parts +``` + +Returns: +``` +INFO - 2024-03-13T08:08:12Z - Dataset version was not specified, the latest one was selected: "202311" +INFO - 2024-03-13T08:08:12Z - Dataset part was not specified, the first one was selected: "history" +INFO - 2024-03-13T08:08:12Z - You forced selection of service: original-files +INFO - 2024-03-13T08:08:12Z - Downloading using service original-files... +INFO - 2024-03-13T08:08:13Z - You requested the download of the following files: +s3://mdl-native-08/native/INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034/cmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311/index_history.txt - 244.61 kB - 2023-11-30T17:01:25Z +s3://mdl-native-08/native/INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034/cmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311/index_latest.txt - 572.09 kB - 2024-03-13T07:21:00Z +s3://mdl-native-08/native/INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034/cmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311/index_monthly.txt - 1.51 MB - 2024-03-05T18:09:43Z +s3://mdl-native-08/native/INSITU_BLK_PHYBGCWAV_DISCRETE_MYNRT_013_034/cmems_obs-ins_blk_phybgcwav_mynrt_na_irr_202311/index_platform.txt - 209.11 kB - 2024-03-13T07:21:00Z + +Total size of the download: 2.53 MB + + +Do you want to proceed with download? [Y/n]: +``` ### Shared options Both `subset` and `get` commands provide these options: diff --git a/copernicusmarine/catalogue_parser/catalogue_parser.py b/copernicusmarine/catalogue_parser/catalogue_parser.py index ab2da5e6..421e8c90 100644 --- a/copernicusmarine/catalogue_parser/catalogue_parser.py +++ b/copernicusmarine/catalogue_parser/catalogue_parser.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import re from abc import ABC, abstractmethod from collections import defaultdict @@ -13,7 +14,9 @@ import nest_asyncio import pystac -from cachier import cachier +from aiohttp import ContentTypeError +from aioretry import RetryInfo, RetryPolicyStrategy, retry +from cachier.core import cachier from tqdm import tqdm from copernicusmarine.command_line_interface.exception_handler import ( @@ -26,8 +29,10 @@ from copernicusmarine.core_functions.utils import ( CACHE_BASE_DIRECTORY, construct_query_params_for_marine_data_store_monitoring, + datetime_parser, map_reject_none, next_or_raise_exception, + rolling_batch_gather, ) logger = logging.getLogger("copernicus_marine_root_logger") @@ -73,6 +78,10 @@ class _ServiceShortName(str, Enum): MARINE_DATA_STORE_STAC_BASE_URL_STAGING + "/catalog.stac.json" ) +MAX_CONCURRENT_REQUESTS = int( + os.getenv("COPERNICUSMARINE_MAX_CONCURRENT_REQUESTS", "15") +) + @dataclass(frozen=True) class _Service: @@ -109,6 +118,11 @@ class CopernicusMarineDatasetServiceType(_Service, Enum): STATIC_ARCO = _ServiceName.STATIC_ARCO, _ServiceShortName.STATIC_ARCO +class CopernicusMarineServiceFormat(str, Enum): + ZARR = "zarr" + SQLITE = "sqlite" + + def _service_type_from_web_api_string( name: str, ) -> CopernicusMarineDatasetServiceType: @@ -163,6 +177,10 @@ class CopernicusMarineCoordinates: maximum_value: Optional[float] step: Optional[float] values: Optional[str] + chunking_length: Optional[int] + chunk_type: Optional[str] + chunk_reference_coordinate: Optional[int] + chunk_geometric_factor: Optional[int] @dataclass @@ -177,6 +195,7 @@ class CopernicusMarineVariable: @dataclass class CopernicusMarineService: service_type: CopernicusMarineDatasetServiceType + service_format: Optional[CopernicusMarineServiceFormat] uri: str variables: list[CopernicusMarineVariable] @@ -431,7 +450,10 @@ class CatalogParserConnection: def __init__(self, proxy: Optional[str] = None) -> None: self.proxy = proxy self.session = get_configured_aiohttp_session() + self.__max_retries = 5 + self.__sleep_time = 1 + @retry("_retry_policy") async def get_json_file(self, url: str) -> dict[str, Any]: logger.debug(f"Fetching json file at this url: {url}") async with self.session.get( @@ -443,6 +465,24 @@ async def get_json_file(self, url: str) -> dict[str, Any]: async def close(self) -> None: await self.session.close() + def _retry_policy(self, info: RetryInfo) -> RetryPolicyStrategy: + if not isinstance( + info.exception, + ( + TimeoutError, + ConnectionResetError, + ContentTypeError, + ), + ): + logger.error( + f"Unexpected error while downloading: {info.exception}" + ) + return True, 0 + logger.debug( + f"Retrying {info.fails} times after error: {info.exception}" + ) + return info.fails >= self.__max_retries, info.fails * self.__sleep_time + def _construct_copernicus_marine_service( stac_service_name, stac_asset, datacube @@ -450,17 +490,18 @@ def _construct_copernicus_marine_service( try: service_uri = stac_asset.get_absolute_href() service_type = _service_type_from_web_api_string(stac_service_name) - if service_type in ( - CopernicusMarineDatasetServiceType.GEOSERIES, - CopernicusMarineDatasetServiceType.TIMESERIES, - ): - if not service_uri.endswith(".zarr"): - return None + service_format = None + if stac_asset.media_type and "zarr" in stac_asset.media_type: + service_format = CopernicusMarineServiceFormat.ZARR + elif stac_asset.media_type and "sqlite3" in stac_asset.media_type: + service_format = CopernicusMarineServiceFormat.SQLITE + if not service_uri.endswith("/"): return CopernicusMarineService( service_type=service_type, - uri=stac_asset.get_absolute_href(), - variables=_get_variables(datacube), + uri=service_uri, + variables=_get_variables(datacube, stac_asset), + service_format=service_format, ) return None except ServiceNotHandled as service_not_handled: @@ -531,78 +572,93 @@ def _get_services( ] -def _get_coordinates( - dimensions_cube: dict, - arco_data_metadata_producer_valid_start_date: Optional[str], -) -> dict[str, CopernicusMarineCoordinates]: - def _create_coordinate( - key: str, - value: dict, - arco_data_metadata_producer_valid_start_date: Optional[str], - ) -> CopernicusMarineCoordinates: - if arco_data_metadata_producer_valid_start_date: - minimum_value = ( - _format_arco_data_metadata_producer_valid_start_date( - arco_data_metadata_producer_valid_start_date - ) - ) - else: - minimum_value = value["extent"][0] if "extent" in value else None - return CopernicusMarineCoordinates( - coordinates_id="depth" if key == "elevation" else key, - units=value.get("unit") or "", - minimum_value=minimum_value, # type: ignore - maximum_value=value["extent"][1] if "extent" in value else None, - step=value.get("step"), - values=value.get("values"), - ) - - coordinates_dict = {} - for key, value in dimensions_cube.items(): - coordinates_dict[key] = _create_coordinate( - key, - value, - ( - arco_data_metadata_producer_valid_start_date - if key == "time" - else None - ), - ) - return coordinates_dict - - def _format_arco_data_metadata_producer_valid_start_date( arco_data_metadata_producer_valid_start_date: str, -) -> str: + to_timestamp: bool = False, +) -> Union[str, int]: + if to_timestamp: + return int( + datetime_parser( + arco_data_metadata_producer_valid_start_date.split(".")[0] + ).timestamp() + * 1000 + ) return arco_data_metadata_producer_valid_start_date.split(".")[0] def _get_variables( stac_dataset: pystac.Item, + stac_asset: pystac.Asset, ) -> list[CopernicusMarineVariable]: - def _create_variable( - variable_cube: dict[str, Any], - bbox: tuple[float, float, float, float], - coordinates_dict: dict[str, CopernicusMarineCoordinates], - ) -> Union[CopernicusMarineVariable, None]: - coordinates = variable_cube["dimensions"] - return CopernicusMarineVariable( - short_name=variable_cube["id"], - standard_name=variable_cube["standardName"], - units=variable_cube.get("unit") or "", + bbox = stac_dataset.bbox + return [ + CopernicusMarineVariable( + short_name=var_cube["id"], + standard_name=var_cube["standardName"], + units=var_cube.get("unit") or "", bbox=bbox, - coordinates=[coordinates_dict[key] for key in coordinates], + coordinates=_get_coordinates( + var_cube["id"], + stac_asset, + stac_dataset.properties.get("admp_valid_start_date"), + ) + or [], ) + for var_cube in stac_dataset.properties["cube:variables"].values() + ] - coordinates_dict = _get_coordinates( - stac_dataset.properties["cube:dimensions"], - stac_dataset.properties.get("admp_valid_start_date"), - ) - bbox = stac_dataset.bbox - variables: list[Optional[CopernicusMarineVariable]] = [] - for var_cube in stac_dataset.properties["cube:variables"].values(): - variables += [_create_variable(var_cube, bbox, coordinates_dict)] - return [var for var in variables if var] + +def _get_coordinates( + variable_id: str, + stac_asset: pystac.Asset, + arco_data_metadata_producer_valid_start_date: Optional[str], +) -> Optional[list[CopernicusMarineCoordinates]]: + extra_fields_asset = stac_asset.extra_fields + dimensions = extra_fields_asset.get("viewDims") + if dimensions: + coordinates = [] + for dimension, dimension_metadata in dimensions.items(): + coordinates_info = dimension_metadata.get("coords", {}) + if ( + arco_data_metadata_producer_valid_start_date + and dimension == "time" + ): + minimum_value = ( + _format_arco_data_metadata_producer_valid_start_date( + arco_data_metadata_producer_valid_start_date, + to_timestamp=isinstance( + coordinates_info.get("min"), int + ), + ) + ) + else: + minimum_value = coordinates_info.get("min") + chunking_length = dimension_metadata.get("chunkLen") + if isinstance(chunking_length, dict): + chunking_length = chunking_length.get(variable_id) + coordinates.append( + CopernicusMarineCoordinates( + coordinates_id=( + "depth" if dimension == "elevation" else dimension + ), + units=dimension_metadata.get("units") or "", + minimum_value=minimum_value, # type: ignore + maximum_value=coordinates_info.get("max"), + step=coordinates_info.get("step"), + values=coordinates_info.get("values"), + chunking_length=chunking_length, + chunk_type=dimension_metadata.get("chunkType"), + chunk_reference_coordinate=dimension_metadata.get( + "chunkRefCoord" + ), + chunk_geometric_factor=dimension_metadata.get( + "chunkGeometricFactor", {} + ).get(variable_id), + ) + ) + return coordinates + else: + return None def _construct_marine_data_store_dataset( @@ -735,19 +791,18 @@ async def async_fetch_childs( tasks = [] for link in child_links: tasks.append( - asyncio.ensure_future( - async_fetch_collection( - root_url, connection, link.absolute_href - ) - ) + async_fetch_collection(root_url, connection, link.absolute_href) ) - return filter(lambda x: x is not None, await asyncio.gather(*tasks)) + return filter( + lambda x: x is not None, + await rolling_batch_gather(tasks, MAX_CONCURRENT_REQUESTS), + ) async def async_fetch_catalog( connection: CatalogParserConnection, staging: bool = False, -) -> Tuple[pystac.Catalog, Iterator[pystac.Collection]]: +) -> Iterator[pystac.Collection]: catalog_root_url = ( MARINE_DATA_STORE_STAC_ROOT_CATALOG_URL if not staging @@ -762,7 +817,7 @@ async def async_fetch_catalog( else (MARINE_DATA_STORE_STAC_BASE_URL_STAGING) ) childs = await async_fetch_childs(root_url, connection, child_links) - return catalog, childs + return childs def _retrieve_marine_data_store_products( @@ -771,7 +826,7 @@ def _retrieve_marine_data_store_products( ) -> list[ProductFromMarineDataStore]: nest_asyncio.apply() loop = asyncio.get_event_loop() - _, marine_data_store_root_collections = loop.run_until_complete( + marine_data_store_root_collections = loop.run_until_complete( async_fetch_catalog(connection=connection, staging=staging) ) @@ -789,12 +844,20 @@ def parse_catalogue( staging: bool = False, ) -> CopernicusMarineCatalogue: logger.debug("Parsing catalogue...") - catalog = _parse_catalogue( - ignore_cache=no_metadata_cache, - _version=package_version("copernicusmarine"), - disable_progress_bar=disable_progress_bar, - staging=staging, - ) + try: + catalog = _parse_catalogue( + ignore_cache=no_metadata_cache, + _versions=package_version("copernicusmarine"), + disable_progress_bar=disable_progress_bar, + staging=staging, + ) + except ValueError: + catalog = _parse_catalogue( + ignore_cache=True, + _versions=package_version("copernicusmarine"), + disable_progress_bar=disable_progress_bar, + staging=staging, + ) logger.debug("Catalogue parsed") return catalog @@ -803,11 +866,10 @@ def merge_products( products_from_marine_data_store: List[ProductFromMarineDataStore], products_from_portal: List[ProductFromPortal], ) -> List[CopernicusMarineProduct]: - merged_products: List[CopernicusMarineProduct] = [] - for marine_data_store_product in products_from_marine_data_store: - merged_products.append( - marine_data_store_product.to_copernicus_marine_product() - ) + merged_products: List[CopernicusMarineProduct] = [ + marine_data_store_product.to_copernicus_marine_product() + for marine_data_store_product in products_from_marine_data_store + ] for portal_product in products_from_portal: maybe_merged_product = list( @@ -894,7 +956,7 @@ def merge_products( @cachier(cache_dir=CACHE_BASE_DIRECTORY, stale_after=timedelta(hours=24)) def _parse_catalogue( - _version: str, # force cachier to overwrite cache in case of version update + _versions: str, # force cachier to overwrite cache in case of version update disable_progress_bar: bool, staging: bool = False, ) -> CopernicusMarineCatalogue: @@ -942,13 +1004,9 @@ async def _async_fetch_raw_products( ): tasks = [] for product_id in product_ids: - tasks.append( - asyncio.ensure_future( - connection.get_json_file(product_url(product_id)) - ) - ) + tasks.append(connection.get_json_file(product_url(product_id))) - return await asyncio.gather(*tasks) + return await rolling_batch_gather(tasks, MAX_CONCURRENT_REQUESTS) def product_url(product_id: str) -> str: @@ -969,23 +1027,29 @@ def variable_to_pick(layer: dict[str, Any]) -> bool: def _to_service( - service_name: str, service_url: str, layer_elements + service_name: str, + stac_asset: dict, + layer_elements, ) -> Optional[CopernicusMarineService]: + service_format_asset = stac_asset.get("type") + service_url = stac_asset.get("href") try: service_type = _service_type_from_web_api_string(service_name) - if service_type in ( - CopernicusMarineDatasetServiceType.GEOSERIES, - CopernicusMarineDatasetServiceType.TIMESERIES, - ): - if not service_url.endswith(".zarr"): - return None - if not service_url.endswith("thredds/dodsC/"): + service_format = None + if service_format_asset and "zarr" in service_format_asset: + service_format = CopernicusMarineServiceFormat.ZARR + elif service_format_asset and "sqlite3" in service_format_asset: + service_format = CopernicusMarineServiceFormat.SQLITE + if service_url and not service_url.endswith("thredds/dodsC/"): return CopernicusMarineService( service_type=service_type, uri=service_url, - variables=list( - map(to_variable, filter(variable_to_pick, layer_elements)) - ), + service_format=service_format, + variables=[ + to_variable(layer, stac_asset) + for layer in layer_elements + if variable_to_pick(layer) + ], ) else: return None @@ -995,7 +1059,9 @@ def _to_service( def to_coordinates( - subset_attributes: Tuple[str, dict[str, Any]], layer: dict[str, Any] + subset_attributes: Tuple[str, dict[str, Any]], + layer: dict[str, Any], + asset: dict, ) -> CopernicusMarineCoordinates: coordinate_name = subset_attributes[0] values: Optional[str] @@ -1005,25 +1071,38 @@ def to_coordinates( values = layer.get("tValues") else: values = None + view_dim = asset.get("viewDims", {}).get(coordinate_name, {}) + chunking_length = view_dim.get("chunkLen") + if isinstance(chunking_length, dict): + chunking_length = chunking_length.get(layer["variableId"]) return CopernicusMarineCoordinates( coordinates_id=subset_attributes[0], - units=subset_attributes[1]["units"], - minimum_value=subset_attributes[1]["min"], - maximum_value=subset_attributes[1]["max"], - step=subset_attributes[1].get("step"), + units=view_dim.get("units", ""), + minimum_value=view_dim.get("min") or view_dim.get(values, [None])[0], + maximum_value=view_dim.get("max") or view_dim.get(values, [None])[0], + step=view_dim.get("step"), values=values, + chunking_length=chunking_length, + chunk_type=view_dim.get("chunkType"), + chunk_reference_coordinate=view_dim.get("chunkRefCoord"), + chunk_geometric_factor=view_dim.get("chunkGeometricFactor", {}) + .get("chunkGeometricFactor", {}) + .get(layer["variableId"]), ) -def to_variable(layer: dict[str, Any]) -> CopernicusMarineVariable: +def to_variable( + layer: dict[str, Any], asset: dict +) -> CopernicusMarineVariable: return CopernicusMarineVariable( short_name=layer["variableId"], standard_name=variable_title_to_standard_name(layer["variableTitle"]), units=layer["units"], bbox=layer["bbox"], - coordinates=list( - map(to_coordinates, layer["subsetAttrs"].items(), repeat(layer)) - ), + coordinates=[ + to_coordinates(subset_attr, layer, asset) + for subset_attr in layer["subsetAttrs"].items() + ], ) @@ -1053,7 +1132,7 @@ def mds_stac_to_services( ) in stac_assets.items(): service = _to_service( service_name, - service_url["href"], + service_url, distinct_dataset_version.layer_elements, ) if service: @@ -1074,7 +1153,7 @@ def portal_services_to_services( copernicus_marine_services.append( _to_service( service_name, - service_url, + {"href": service_url}, layer_elements, ) ) diff --git a/copernicusmarine/catalogue_parser/request_structure.py b/copernicusmarine/catalogue_parser/request_structure.py index 923cd7e2..ae455a79 100644 --- a/copernicusmarine/catalogue_parser/request_structure.py +++ b/copernicusmarine/catalogue_parser/request_structure.py @@ -1,6 +1,7 @@ import fnmatch import logging import pathlib +import re from dataclasses import dataclass, field from datetime import datetime from json import load @@ -208,8 +209,10 @@ class GetRequest: force_service: Optional[str] = None filter: Optional[str] = None regex: Optional[str] = None + file_list: Optional[pathlib.Path] = None sync: bool = False sync_delete: bool = False + index_parts: bool = False def update(self, new_dict: dict): """Method to update values in GetRequest object. @@ -253,17 +256,19 @@ def get_request_from_file(filepath: pathlib.Path) -> GetRequest: get_request = GetRequest() get_request.__dict__.update(json_with_mapped_options) get_request.enforce_types() + full_regex = get_request.regex if get_request.filter: - if get_request.regex: - get_request.regex = ( - "(" - + get_request.regex - + "|" - + fnmatch.translate(get_request.filter) - + ")" - ) - else: - get_request.regex = fnmatch.translate(get_request.filter) + filter_regex = filter_to_regex(get_request.filter) + full_regex = overload_regex_with_additionnal_filter( + filter_regex, full_regex + ) + if get_request.file_list: + file_list_regex = file_list_to_regex(get_request.file_list) + full_regex = overload_regex_with_additionnal_filter( + file_list_regex, full_regex + ) + get_request.regex = full_regex + return get_request @@ -300,3 +305,20 @@ def get_time_and_geographical_subset( start_datetime=self.temporal_parameters.start_datetime, end_datetime=self.temporal_parameters.end_datetime, ) + + +def filter_to_regex(filter: str) -> str: + return fnmatch.translate(filter) + + +def file_list_to_regex(file_list_path: pathlib.Path) -> str: + pattern = "" + with open(file_list_path) as file_list: + pattern = "|".join(map(re.escape, file_list.read().splitlines())) + return pattern + + +def overload_regex_with_additionnal_filter( + regex: str, filter: Optional[str] +) -> str: + return "(" + regex + "|" + filter + ")" if filter else regex diff --git a/copernicusmarine/command_line_interface/group_get.py b/copernicusmarine/command_line_interface/group_get.py index 32122935..7a2039b7 100644 --- a/copernicusmarine/command_line_interface/group_get.py +++ b/copernicusmarine/command_line_interface/group_get.py @@ -191,6 +191,25 @@ def cli_group_get() -> None: help="The regular expression that must match the absolute paths of " "the files to download.", ) +@click.option( + "--file-list", + type=pathlib.Path, + default=None, + help="A path to a text file that list filenames line by line. " + "Filenames must match the absolute paths of " + "the files to download.", +) +@click.option( + "--download-file-list", + type=bool, + is_flag=True, + default=False, + help="Option to only create a file files_to_download.txt containing " + "the names of the targeted files instead of downloading them. " + "It writes the file in the directory specified with the " + "--output-directory option (default to current directory). " + "If specified, no other action will be performed.", +) @click.option( "--sync", cls=MutuallyExclusiveOption, @@ -209,6 +228,13 @@ def cli_group_get() -> None: "the remote server while applying sync.", mutually_exclusive=["no-directories"], ) +@click.option( + "--index-parts", + type=bool, + is_flag=True, + default=False, + help="Option to get the index files of an INSITU dataset. Temporary option.", +) @tqdm_disable_option @click.option( "--log-level", @@ -247,8 +273,11 @@ def get( no_metadata_cache: bool, filter: Optional[str], regex: Optional[str], + file_list: Optional[pathlib.Path], + download_file_list: bool, sync: bool, sync_delete: bool, + index_parts: bool, disable_progress_bar: bool, log_level: str, staging: bool, @@ -270,26 +299,29 @@ def get( return return get_function( - dataset_url, - dataset_id, - dataset_version, - dataset_part, - username, - password, - no_directories, - show_outputnames, - output_directory, - credentials_file, - force_download, - overwrite_output_data, - request_file, - service, - overwrite_metadata_cache, - no_metadata_cache, - filter, - regex, - sync, - sync_delete, - disable_progress_bar, - staging, + dataset_url=dataset_url, + dataset_id=dataset_id, + force_dataset_version=dataset_version, + force_dataset_part=dataset_part, + username=username, + password=password, + no_directories=no_directories, + show_outputnames=show_outputnames, + output_directory=output_directory, + credentials_file=credentials_file, + force_download=force_download, + overwrite_output_data=overwrite_output_data, + request_file=request_file, + force_service=service, + overwrite_metadata_cache=overwrite_metadata_cache, + no_metadata_cache=no_metadata_cache, + filter=filter, + regex=regex, + file_list_path=file_list, + download_file_list=download_file_list, + sync=sync, + sync_delete=sync_delete, + index_parts=index_parts, + disable_progress_bar=disable_progress_bar, + staging=staging, ) diff --git a/copernicusmarine/command_line_interface/group_login.py b/copernicusmarine/command_line_interface/group_login.py index d284b82b..c1e1e9ef 100644 --- a/copernicusmarine/command_line_interface/group_login.py +++ b/copernicusmarine/command_line_interface/group_login.py @@ -43,7 +43,6 @@ def cli_group_login() -> None: ) @click.option( "--username", - prompt="username", hide_input=False, help="If not set, search for environment variable" + " COPERNICUS_MARINE_SERVICE_USERNAME" @@ -51,7 +50,6 @@ def cli_group_login() -> None: ) @click.option( "--password", - prompt="password", hide_input=True, help="If not set, search for environment variable" + " COPERNICUS_MARINE_SERVICE_PASSWORD" @@ -70,6 +68,12 @@ def cli_group_login() -> None: default=False, help="Flag to skip confirmation before overwriting configuration file.", ) +@click.option( + "--skip-if-user-logged-in", + is_flag=True, + default=False, + help="Flag to skip the logging process if the user is already logged in.", +) @click.option( "--log-level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -85,6 +89,7 @@ def login( password: Optional[str], configuration_file_directory: pathlib.Path, overwrite_configuration_file: bool, + skip_if_user_logged_in: bool, log_level: str = "INFO", ) -> None: if log_level == "QUIET": @@ -97,4 +102,5 @@ def login( password=password, configuration_file_directory=configuration_file_directory, overwrite_configuration_file=overwrite_configuration_file, + skip_if_user_logged_in=skip_if_user_logged_in, ) diff --git a/copernicusmarine/core_functions/credentials_utils.py b/copernicusmarine/core_functions/credentials_utils.py index 448b18c9..ec8a0aa6 100644 --- a/copernicusmarine/core_functions/credentials_utils.py +++ b/copernicusmarine/core_functions/credentials_utils.py @@ -167,6 +167,34 @@ def _retrieve_credential_from_configuration_files( return credential +def copernicusmarine_configuration_file_exists( + configuration_file_directory: pathlib.Path, +) -> bool: + configuration_filename = pathlib.Path( + configuration_file_directory / DEFAULT_CLIENT_CREDENTIALS_FILENAME + ) + return configuration_filename.exists() + + +def copernicusmarine_configuration_file_is_valid( + configuration_file_directory: pathlib.Path, +) -> bool: + configuration_filename = pathlib.Path( + configuration_file_directory / DEFAULT_CLIENT_CREDENTIALS_FILENAME + ) + username = _retrieve_credential_from_configuration_files( + "username", configuration_filename + ) + password = _retrieve_credential_from_configuration_files( + "password", configuration_filename + ) + return ( + username is not None + and password is not None + and _check_credentials_with_cas(username, password) + ) + + def create_copernicusmarine_configuration_file( username: str, password: str, @@ -198,7 +226,6 @@ def create_copernicusmarine_configuration_file( def _check_credentials_with_cas(username: str, password: str) -> bool: logger.debug("Checking user credentials...") - user_is_active = False service = "copernicus-marine-client" cmems_cas_login_url = ( f"https://cmems-cas.cls.fr/cas/login?service={service}" @@ -226,10 +253,8 @@ def _check_credentials_with_cas(username: str, password: str) -> bool: .replace("login?", f"serviceValidate?service={service}&") ) logger.debug(f"Getting profile to {get_profile_url}...") - profile_response = conn_session.get(get_profile_url) - user_is_active = '"status" value="active"' in profile_response.text logger.debug("User credentials checked") - return user_is_active + return True @cachier(stale_after=timedelta(hours=48), cache_dir=CACHE_BASE_DIRECTORY) diff --git a/copernicusmarine/core_functions/get.py b/copernicusmarine/core_functions/get.py index 0ae263c7..8bfc2fd8 100644 --- a/copernicusmarine/core_functions/get.py +++ b/copernicusmarine/core_functions/get.py @@ -1,4 +1,3 @@ -import fnmatch import json import logging import pathlib @@ -10,7 +9,10 @@ ) from copernicusmarine.catalogue_parser.request_structure import ( GetRequest, + file_list_to_regex, + filter_to_regex, get_request_from_file, + overload_regex_with_additionnal_filter, ) from copernicusmarine.core_functions.credentials_utils import ( get_and_check_username_password, @@ -53,8 +55,11 @@ def get_function( no_metadata_cache: bool, filter: Optional[str], regex: Optional[str], + file_list_path: Optional[pathlib.Path], + download_file_list: bool, sync: bool, sync_delete: bool, + index_parts: bool, disable_progress_bar: bool, staging: bool, ) -> List[pathlib.Path]: @@ -101,9 +106,16 @@ def get_function( if force_service: get_request.force_service = force_service if filter: - get_request.regex = _filter_to_regex(filter) + get_request.regex = filter_to_regex(filter) + if file_list_path: + file_list_regex = file_list_to_regex(file_list_path) + get_request.regex = overload_regex_with_additionnal_filter( + file_list_regex, get_request.regex + ) if regex: - get_request.regex = _overload_regex_with_filter(regex, filter) + get_request.regex = overload_regex_with_additionnal_filter( + regex, get_request.regex + ) if sync or sync_delete: get_request.sync = True if not get_request.force_dataset_version: @@ -113,32 +125,35 @@ def get_function( ) if sync_delete: get_request.sync_delete = sync_delete + if index_parts: + if force_service == "ftp": + raise ValueError( + "Index part flag is not supported for FTP services. " + "Please use '--force-service files' option." + ) + get_request.index_parts = index_parts + get_request.force_service = "files" + get_request.regex = overload_regex_with_additionnal_filter( + filter_to_regex("*index_*"), get_request.regex + ) return _run_get_request( - username, - password, - get_request, - credentials_file, - no_metadata_cache, - disable_progress_bar, + username=username, + password=password, + get_request=get_request, + download_file_list=download_file_list, + credentials_file=credentials_file, + no_metadata_cache=no_metadata_cache, + disable_progress_bar=disable_progress_bar, staging=staging, ) -def _filter_to_regex(filter: str) -> str: - return fnmatch.translate(filter) - - -def _overload_regex_with_filter(regex: str, filter: Optional[str]) -> str: - return ( - "(" + regex + "|" + _filter_to_regex(filter) + ")" if filter else regex - ) - - def _run_get_request( username: Optional[str], password: Optional[str], get_request: GetRequest, + download_file_list: bool, credentials_file: Optional[pathlib.Path], no_metadata_cache: bool, disable_progress_bar: bool, @@ -163,6 +178,7 @@ def _run_get_request( get_request.force_dataset_part, get_request.force_service, CommandType.GET, + get_request.index_parts, dataset_sync=get_request.sync, ) get_request.dataset_url = retrieval_service.uri @@ -182,11 +198,16 @@ def _run_get_request( password, get_request, disable_progress_bar, + download_file_list, ) if retrieval_service.service_type == CopernicusMarineDatasetServiceType.FTP else download_original_files( - username, password, get_request, disable_progress_bar + username, + password, + get_request, + disable_progress_bar, + download_file_list, ) ) logger.debug(downloaded_files) @@ -200,21 +221,27 @@ def create_get_template() -> None: with open(filename, "w") as output_file: json.dump( { - "dataset_url": ( - "ftp://my.cmems-du.eu/Core/" - "IBI_MULTIYEAR_PHY_005_002/" - "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m" - ), + "dataset_id": "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m", + "dataset_version": None, + "dataset_part": None, + "username": None, + "password": None, + "no_directories": False, "filter": "*01yav_200[0-2]*", - "regex": False, + "regex": None, "output_directory": "copernicusmarine_data", "show_outputnames": True, - "force_service": "files", + "service": "files", "force_download": False, - "request_file": False, + "file_list": None, + "sync": False, + "sync_delete": False, + "index_parts": False, + "disable_progress_bar": False, "overwrite_output_data": False, "overwrite_metadata_cache": False, "no_metadata_cache": False, + "log_level": "INFO", }, output_file, indent=4, diff --git a/copernicusmarine/core_functions/login.py b/copernicusmarine/core_functions/login.py index 0ae66d39..6635d017 100644 --- a/copernicusmarine/core_functions/login.py +++ b/copernicusmarine/core_functions/login.py @@ -3,6 +3,8 @@ from typing import Optional from copernicusmarine.core_functions.credentials_utils import ( + copernicusmarine_configuration_file_exists, + copernicusmarine_configuration_file_is_valid, credentials_file_builder, ) @@ -14,7 +16,19 @@ def login_function( password: Optional[str], configuration_file_directory: pathlib.Path, overwrite_configuration_file: bool, + skip_if_user_logged_in: bool, ) -> bool: + if ( + skip_if_user_logged_in + and copernicusmarine_configuration_file_exists( + configuration_file_directory + ) + and copernicusmarine_configuration_file_is_valid( + configuration_file_directory + ) + ): + logger.info("You are already logged in. Skipping login.") + return True credentials_file = credentials_file_builder( username=username, password=password, diff --git a/copernicusmarine/core_functions/services_utils.py b/copernicusmarine/core_functions/services_utils.py index 8ddba624..7850ab5e 100644 --- a/copernicusmarine/core_functions/services_utils.py +++ b/copernicusmarine/core_functions/services_utils.py @@ -11,6 +11,7 @@ CopernicusMarineDatasetVersion, CopernicusMarineProductDataset, CopernicusMarineService, + CopernicusMarineServiceFormat, CopernicusMarineVersionPart, dataset_version_not_found_exception, ) @@ -18,7 +19,10 @@ DatasetTimeAndGeographicalSubset, ) from copernicusmarine.core_functions import sessions -from copernicusmarine.core_functions.utils import next_or_raise_exception +from copernicusmarine.core_functions.utils import ( + FormatNotSupported, + next_or_raise_exception, +) from copernicusmarine.download_functions.subset_xarray import ( get_size_of_coordinate_subset, ) @@ -249,6 +253,11 @@ def _select_service_by_priority( and command_type in [CommandType.SUBSET, CommandType.LOAD] and dataset_subset is not None ): + if ( + first_available_service.service_format + == CopernicusMarineServiceFormat.SQLITE + ): + raise FormatNotSupported(first_available_service.service_format) best_arco_service_type: CopernicusMarineDatasetServiceType = ( _get_best_arco_service_type( dataset_subset, first_available_service.uri @@ -263,7 +272,7 @@ def _select_service_by_priority( def parse_dataset_id_and_service_and_suffix_path_from_url( catalogue: CopernicusMarineCatalogue, dataset_url: Optional[str], -) -> Tuple[str, CopernicusMarineDatasetServiceType, str]: +) -> Tuple[str, CopernicusMarineDatasetServiceType, str,]: if dataset_url is None: syntax_error = SyntaxError( "Must specify at least one of " @@ -297,6 +306,7 @@ def parse_dataset_id_and_service_and_suffix_path_from_url( class RetrievalService: dataset_id: str service_type: CopernicusMarineDatasetServiceType + service_format: Optional[CopernicusMarineServiceFormat] uri: str dataset_valid_start_date: Optional[str] @@ -309,6 +319,7 @@ def get_retrieval_service( force_dataset_part_label: Optional[str], force_service_type_string: Optional[str], command_type: CommandType, + index_parts: bool = False, dataset_subset: Optional[DatasetTimeAndGeographicalSubset] = None, dataset_sync: bool = False, ) -> RetrievalService: @@ -344,6 +355,7 @@ def get_retrieval_service( force_dataset_part_label=force_dataset_part_label, force_service_type=force_service_type, command_type=command_type, + index_parts=index_parts, dataset_subset=dataset_subset, dataset_sync=dataset_sync, ) @@ -357,6 +369,7 @@ def _get_retrieval_service_from_dataset_id( force_dataset_part_label: Optional[str], force_service_type: Optional[CopernicusMarineDatasetServiceType], command_type: CommandType, + index_parts: bool, dataset_subset: Optional[DatasetTimeAndGeographicalSubset], dataset_sync: bool, ) -> RetrievalService: @@ -380,6 +393,7 @@ def _get_retrieval_service_from_dataset_id( force_dataset_part_label=force_dataset_part_label, force_service_type=force_service_type, command_type=command_type, + index_parts=index_parts, dataset_subset=dataset_subset, dataset_sync=dataset_sync, ) @@ -392,6 +406,7 @@ def _get_retrieval_service_from_dataset( force_dataset_part_label: Optional[str], force_service_type: Optional[CopernicusMarineDatasetServiceType], command_type: CommandType, + index_parts: bool, dataset_subset: Optional[DatasetTimeAndGeographicalSubset], dataset_sync: bool, ) -> RetrievalService: @@ -423,6 +438,7 @@ def _get_retrieval_service_from_dataset( suffix_path=suffix_path, force_service_type=force_service_type, command_type=command_type, + index_parts=index_parts, dataset_subset=dataset_subset, dataset_sync=dataset_sync, ) @@ -435,6 +451,7 @@ def _get_retrieval_service_from_dataset_version( suffix_path: str, force_service_type: Optional[CopernicusMarineDatasetServiceType], command_type: CommandType, + index_parts: bool, dataset_subset: Optional[DatasetTimeAndGeographicalSubset], dataset_sync: bool, ) -> RetrievalService: @@ -445,6 +462,7 @@ def _get_retrieval_service_from_dataset_version( if ( force_service_type == CopernicusMarineDatasetServiceType.FILES and not force_dataset_part_label + and not index_parts and len(dataset_version.parts) > 1 ): raise Exception( @@ -475,6 +493,8 @@ def _get_retrieval_service_from_dataset_version( force_service_type=force_service_type, command_type=command_type, ) + if service.service_format == CopernicusMarineServiceFormat.SQLITE: + raise FormatNotSupported(service.service_format) else: service = _select_service_by_priority( dataset_version_part=dataset_part, @@ -492,6 +512,7 @@ def _get_retrieval_service_from_dataset_version( service_type=service.service_type, uri=service.uri + suffix_path, dataset_valid_start_date=dataset_start_date, + service_format=service.service_format, ) diff --git a/copernicusmarine/core_functions/subset.py b/copernicusmarine/core_functions/subset.py index 45212011..32439663 100644 --- a/copernicusmarine/core_functions/subset.py +++ b/copernicusmarine/core_functions/subset.py @@ -7,6 +7,7 @@ from copernicusmarine.catalogue_parser.catalogue_parser import ( CopernicusMarineDatasetServiceType, + CopernicusMarineServiceFormat, parse_catalogue, ) from copernicusmarine.catalogue_parser.request_structure import ( @@ -204,7 +205,7 @@ def subset_function( subset_request.force_dataset_part, subset_request.force_service, CommandType.SUBSET, - subset_request.get_time_and_geographical_subset(), + dataset_subset=subset_request.get_time_and_geographical_subset(), ) subset_request.dataset_url = retrieval_service.uri if ( @@ -230,18 +231,26 @@ def subset_function( CopernicusMarineDatasetServiceType.OMI_ARCO, CopernicusMarineDatasetServiceType.STATIC_ARCO, ]: - output_path = download_zarr( - username, - password, - subset_request, - retrieval_service.dataset_id, - disable_progress_bar, - retrieval_service.dataset_valid_start_date, - ) + if ( + retrieval_service.service_format + == CopernicusMarineServiceFormat.ZARR + ): + output_path = download_zarr( + username, + password, + subset_request, + retrieval_service.dataset_id, + disable_progress_bar, + retrieval_service.dataset_valid_start_date, + ) elif ( retrieval_service.service_type == CopernicusMarineDatasetServiceType.OPENDAP ): + logger.warning( + "The OPeNDAP service is deprecated, please use one of " + "'arco-geo-series', 'arco-time-series', 'omi-arco', 'static-arco' instead." + ) output_path = download_opendap( username, password, @@ -253,6 +262,10 @@ def subset_function( retrieval_service.service_type == CopernicusMarineDatasetServiceType.MOTU ): + logger.warning( + "The MOTU service is deprecated, please use one of " + "'arco-geo-series', 'arco-time-series', 'omi-arco', 'static-arco' instead." + ) output_path = download_motu( username, password, diff --git a/copernicusmarine/core_functions/utils.py b/copernicusmarine/core_functions/utils.py index 6ddb6e6d..76aa7b12 100644 --- a/copernicusmarine/core_functions/utils.py +++ b/copernicusmarine/core_functions/utils.py @@ -1,9 +1,21 @@ +import asyncio import logging import os import pathlib from datetime import datetime from importlib.metadata import version -from typing import Callable, Iterable, Iterator, Optional, TypeVar +from typing import ( + Any, + Awaitable, + Callable, + Coroutine, + Iterable, + Iterator, + List, + Optional, + TypeVar, + Union, +) import xarray from requests import PreparedRequest @@ -62,6 +74,13 @@ def __init__(self, service_type): super().__init__(f"Service type {service_type} not supported.") +class FormatNotSupported(Exception): + def __init__(self, format_type): + super().__init__( + f"Subsetting format type {format_type} not supported yet." + ) + + _T = TypeVar("_T") _S = TypeVar("_S") @@ -135,3 +154,22 @@ def delete_cache_folder(quiet: bool = False): except Exception as exc: logger.warning("Error occurred while deleting old cache files") raise exc + + +async def rolling_batch_gather( + promises: Union[List[Coroutine[Any, Any, Any]], List[Awaitable[Any]]], + per_batch: int, +) -> List[Any]: + tasks: asyncio.Queue = asyncio.Queue() + for promise in promises: + tasks.put_nowait(promise) + + async def worker(): + res = [] + while not tasks.empty(): + res.append(await tasks.get_nowait()) + + return res + + results = await asyncio.gather(*[worker() for _ in range(per_batch)]) + return [s for r in results for s in r] diff --git a/copernicusmarine/download_functions/download_ftp.py b/copernicusmarine/download_functions/download_ftp.py index ad10970b..a6aa071f 100644 --- a/copernicusmarine/download_functions/download_ftp.py +++ b/copernicusmarine/download_functions/download_ftp.py @@ -65,11 +65,16 @@ def download_ftp( password: str, get_request: GetRequest, disable_progress_bar: bool, + download_file_list: bool, ) -> list[pathlib.Path]: + logger.warning( + "The FTP service is deprecated, please use 'original-files' instead." + ) filenames_in, filenames_out, host = download_get( username, password, get_request, + download_file_list, download_header, create_filenames_out, ) @@ -127,7 +132,12 @@ def download_files( def download_header( - data_path: str, regex: Optional[str], username: str, password: str + data_path: str, + regex: Optional[str], + username: str, + password: str, + output_directory: pathlib.Path, + download_file_list: bool, ) -> Tuple[str, str, list[str], float]: (host, path) = parse_ftp_dataset_url(data_path) logger.debug(f"Downloading header via FTP on {host + path}") @@ -137,9 +147,20 @@ def download_header( raw_filenames = get_filenames_recursively(ftp, path) if regex: regex_compiled = re.compile(regex) - filenames = list(filter(regex_compiled.match, raw_filenames)) + filenames = list(filter(regex_compiled.search, raw_filenames)) else: filenames = raw_filenames + + if download_file_list: + download_filename = get_unique_filename( + output_directory / "files_to_download.txt", False + ) + logger.info(f"The file list is written at {download_filename}") + with open(download_filename, "w") as file_out: + for filename in filenames: + file_out.write(f"{filename}\n") + exit(0) + pool = ThreadPool() nfilenames_per_process, nfilenames = 100, len(filenames) indexes = append( diff --git a/copernicusmarine/download_functions/download_get.py b/copernicusmarine/download_functions/download_get.py index 6b857afd..f0cbd5a9 100644 --- a/copernicusmarine/download_functions/download_get.py +++ b/copernicusmarine/download_functions/download_get.py @@ -17,6 +17,7 @@ def download_get( username: str, password: str, get_request: GetRequest, + download_file_list: bool, download_header: Callable, create_filenames_out: Callable, ) -> Tuple[List[str], List[pathlib.Path], Any]: @@ -26,6 +27,8 @@ def download_get( get_request.regex, username, password, + pathlib.Path(get_request.output_directory), + download_file_list, ) filenames_out = create_filenames_out( filenames_in=filenames_in, diff --git a/copernicusmarine/download_functions/download_original_files.py b/copernicusmarine/download_functions/download_original_files.py index 0725c1ec..22b53454 100644 --- a/copernicusmarine/download_functions/download_original_files.py +++ b/copernicusmarine/download_functions/download_original_files.py @@ -45,43 +45,62 @@ def download_original_files( password: str, get_request: GetRequest, disable_progress_bar: bool, + download_file_list: bool, ) -> list[pathlib.Path]: - message, locator, filenames_in, total_size = _download_header( + ( + message, + locator, + filenames_in, + total_size, + filenames_in_sync_ignored, + ) = _download_header( str(get_request.dataset_url), get_request.regex, username, password, get_request.sync, + download_file_list, pathlib.Path(get_request.output_directory), + only_list_root_path=get_request.index_parts, ) filenames_out = create_filenames_out( filenames_in=filenames_in, output_directory=pathlib.Path(get_request.output_directory), no_directories=get_request.no_directories, - overwrite=get_request.overwrite_output_data - if not get_request.sync - else False, + overwrite=( + get_request.overwrite_output_data + if not get_request.sync + else False + ), ) if not get_request.force_download: logger.info(message) - if not total_size: - logger.info("No data to download") - exit(1) if get_request.show_outputnames: logger.info("Output filenames:") for filename_out in filenames_out: logger.info(filename_out) files_to_delete = [] if get_request.sync_delete: + filenames_out_sync_ignored = create_filenames_out( + filenames_in=filenames_in_sync_ignored, + output_directory=pathlib.Path(get_request.output_directory), + no_directories=get_request.no_directories, + overwrite=False, + unique_names_compared_to_local_files=False, + ) files_to_delete = _get_files_to_delete_with_sync( - filenames_in=filenames_in, + filenames_in=filenames_in_sync_ignored, output_directory=pathlib.Path(get_request.output_directory), - filenames_out=filenames_out, + filenames_out=filenames_out_sync_ignored, ) if files_to_delete: logger.info("Some files will be deleted due to sync delete:") for file_to_delete in files_to_delete: logger.info(file_to_delete) + if not total_size: + logger.info("No data to download") + if not files_to_delete: + exit(1) if not get_request.force_download: click.confirm( FORCE_DOWNLOAD_CLI_PROMPT_MESSAGE, default=True, abort=True @@ -174,38 +193,42 @@ def _download_header( username: str, _password: str, sync: bool, + download_file_list: bool, directory_out: pathlib.Path, - sync_delete: bool = False, -) -> Tuple[str, Tuple[str, str], list[str], float]: - (endpoint_url, bucket, path) = parse_original_files_dataset_url(data_path) + only_list_root_path: bool = False, +) -> Tuple[str, Tuple[str, str], list[str], float, list[str]]: + (endpoint_url, bucket, path) = parse_original_files_dataset_url( + data_path, only_list_root_path + ) + filenames, sizes, total_size = [], [], 0.0 raw_filenames = _list_files_on_marine_data_lake_s3( - username, endpoint_url, bucket, path + username, endpoint_url, bucket, path, not only_list_root_path ) filename_filtered = [] + filenames_without_sync = [] for filename, size, last_modified_datetime in raw_filenames: - if not regex or re.match(regex, filename): + if not regex or re.search(regex, filename): + filenames_without_sync.append(filename) if not sync or _check_needs_to_be_synced( filename, size, last_modified_datetime, directory_out ): - filenames += [filename] - sizes += [float(size)] + filenames.append(filename) + sizes.append(float(size)) total_size += float(size) filename_filtered.append( (filename, size, last_modified_datetime) ) - if sync_delete: - files_to_delete = list(directory_out.glob("**/*")) - for file_to_delete in files_to_delete: - if not any( - [ - _local_path_from_s3_url(filename, directory_out) - == file_to_delete - for filename, _, _ in filename_filtered - ] - ): - file_to_delete.unlink() + if download_file_list: + download_filename = get_unique_filename( + directory_out / "files_to_download.txt", False + ) + logger.info(f"The file list is written at {download_filename}") + with open(download_filename, "w") as file_out: + for filename, _, _ in filename_filtered: + file_out.write(f"{filename}\n") + exit(0) message = "You requested the download of the following files:\n" for filename, size, last_modified_datetime in filename_filtered[:20]: @@ -224,7 +247,7 @@ def _download_header( f"\nTotal size of the download: {format_file_size(total_size)}\n\n" ) locator = (endpoint_url, bucket) - return (message, locator, filenames, total_size) + return (message, locator, filenames, total_size, filenames_without_sync) def _check_needs_to_be_synced( @@ -258,6 +281,7 @@ def _list_files_on_marine_data_lake_s3( endpoint_url: str, bucket: str, prefix: str, + recursive: bool, ) -> list[tuple[str, int, datetime.datetime]]: def _add_custom_query_param(params, context, **kwargs): """ @@ -293,9 +317,15 @@ def _add_custom_query_param(params, context, **kwargs): ) paginator = s3_client.get_paginator("list_objects") - page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix) + page_iterator = paginator.paginate( + Bucket=bucket, + Prefix=prefix, + Delimiter="/" if not recursive else "", + ) - s3_objects = chain(*map(lambda page: page["Contents"], page_iterator)) + s3_objects = chain( + *map(lambda page: page.get("Contents", []), page_iterator) + ) files_already_found = [] for s3_object in s3_objects: @@ -384,7 +414,9 @@ def _original_files_file_download( # Example data_path # https://s3.waw3-1.cloudferro.com/mdl-native-01/native/NWSHELF_MULTIYEAR_BGC_004_011/cmems_mod_nws_bgc-pft_myint_7km-3D-diato_P1M-m_202105 -def parse_original_files_dataset_url(data_path: str) -> Tuple[str, str, str]: +def parse_original_files_dataset_url( + data_path: str, only_dataset_root_path: bool +) -> Tuple[str, str, str]: match = re.search( r"^(http|https):\/\/([\w\-\.]+)(:[\d]+)?(\/.*)", data_path ) @@ -393,7 +425,11 @@ def parse_original_files_dataset_url(data_path: str) -> Tuple[str, str, str]: full_path = match.group(4) segments = full_path.split("/") bucket = segments[1] - path = "/".join(segments[2:]) + path = ( + "/".join(segments[2:]) + if not only_dataset_root_path + else "/".join(segments[2:5]) + "/" + ) return endpoint_url, bucket, path else: raise Exception(f"Invalid data path: {data_path}") @@ -404,6 +440,7 @@ def create_filenames_out( overwrite: bool, output_directory: pathlib.Path = pathlib.Path("."), no_directories=False, + unique_names_compared_to_local_files=True, ) -> list[pathlib.Path]: filenames_out = [] for filename_in in filenames_in: @@ -416,10 +453,10 @@ def create_filenames_out( filename_out = _local_path_from_s3_url( filename_in, output_directory ) - - filename_out = get_unique_filename( - filepath=filename_out, overwrite_option=overwrite - ) + if unique_names_compared_to_local_files: + filename_out = get_unique_filename( + filepath=filename_out, overwrite_option=overwrite + ) filenames_out.append(filename_out) return filenames_out diff --git a/copernicusmarine/download_functions/subset_xarray.py b/copernicusmarine/download_functions/subset_xarray.py index 8e8a7339..c1dca27e 100644 --- a/copernicusmarine/download_functions/subset_xarray.py +++ b/copernicusmarine/download_functions/subset_xarray.py @@ -418,15 +418,15 @@ def check_dataset_subset_bounds( if coordinate_label in dataset.dims: times = dataset_coordinates[coordinate_label].values if dataset_valid_date: - times_min = dataset_valid_date.replace("Z", "") + times_min = ( + dataset_valid_date.replace("Z", "") + if isinstance(dataset_valid_date, str) + else dataset_valid_date + ) else: times_min = times.min() - dataset_minimum_coordinate_value = Timestamp( - times_min - ).to_pydatetime() - dataset_maximum_coordinate_value = Timestamp( - times.max() - ).to_pydatetime() + dataset_minimum_coordinate_value = _date_to_datetime(times_min) + dataset_maximum_coordinate_value = _date_to_datetime(times.max()) user_minimum_coordinate_value = ( dataset_subset.start_datetime if dataset_subset.start_datetime is not None @@ -447,6 +447,13 @@ def check_dataset_subset_bounds( ) +def _date_to_datetime(date: Union[str, int]) -> datetime: + if isinstance(date, int): + return Timestamp(date * 1e6).to_pydatetime() + else: + return Timestamp(date).to_pydatetime() + + @typing.no_type_check def _check_coordinate_overlap( dimension: str, diff --git a/copernicusmarine/python_interface/get.py b/copernicusmarine/python_interface/get.py index 4d9b4014..4547dc39 100644 --- a/copernicusmarine/python_interface/get.py +++ b/copernicusmarine/python_interface/get.py @@ -32,6 +32,9 @@ def get( no_metadata_cache: bool = False, filter: Optional[str] = None, regex: Optional[str] = None, + file_list: Optional[Union[pathlib.Path, str]] = None, + download_file_list: bool = False, + index_parts: bool = False, sync: bool = False, sync_delete: bool = False, disable_progress_bar: bool = False, @@ -59,6 +62,9 @@ def get( no_metadata_cache (bool, optional): If True, do not use the metadata cache. filter (str, optional): Apply a filter to the downloaded data. regex (str, optional): Apply a regular expression filter to the downloaded data. + file_list (Union[pathlib.Path, str], optional): A path to a text file that list filenames line by line. Filenames must match the absolute paths of the files to download. + download_file_list (bool, optional): Option to only create a file files_to_download.txt containing the names of the the targeted files instead of downloading them. It writes the file in the directory specified with the --output-directory option (default to current directory). If specified, no other action will be performed. + index_parts (bool, optional): If True, download index files. Only for INSITU datasets. Temporary option. sync (bool, optional): If True, synchronize the local directory with the remote directory. sync_delete (bool, optional): If True, delete local files that are not present on the remote server while applying sync. @@ -71,28 +77,32 @@ def get( credentials_file = ( pathlib.Path(credentials_file) if credentials_file else None ) + file_list = pathlib.Path(file_list) if file_list else None request_file = pathlib.Path(request_file) if request_file else None return get_function( - dataset_url, - dataset_id, - dataset_version, - dataset_part, - username, - password, - no_directories, - show_outputnames, - output_directory, - credentials_file, - force_download, - overwrite_output_data, - request_file, - service, - overwrite_metadata_cache, - no_metadata_cache, - filter, - regex, - sync, - sync_delete, - disable_progress_bar, + dataset_url=dataset_url, + dataset_id=dataset_id, + force_dataset_version=dataset_version, + force_dataset_part=dataset_part, + username=username, + password=password, + no_directories=no_directories, + show_outputnames=show_outputnames, + output_directory=output_directory, + credentials_file=credentials_file, + force_download=force_download, + overwrite_output_data=overwrite_output_data, + request_file=request_file, + force_service=service, + overwrite_metadata_cache=overwrite_metadata_cache, + no_metadata_cache=no_metadata_cache, + filter=filter, + regex=regex, + file_list_path=file_list, + download_file_list=download_file_list, + index_parts=index_parts, + sync=sync, + sync_delete=sync_delete, + disable_progress_bar=disable_progress_bar, staging=staging, ) diff --git a/copernicusmarine/python_interface/login.py b/copernicusmarine/python_interface/login.py index 48441ce4..1c269559 100644 --- a/copernicusmarine/python_interface/login.py +++ b/copernicusmarine/python_interface/login.py @@ -10,10 +10,12 @@ def login( password: Optional[str] = None, configuration_file_directory: pathlib.Path = DEFAULT_CLIENT_BASE_DIRECTORY, overwrite_configuration_file: bool = False, + skip_if_user_logged_in: bool = False, ) -> bool: return login_function( username=username, password=password, configuration_file_directory=configuration_file_directory, overwrite_configuration_file=overwrite_configuration_file, + skip_if_user_logged_in=skip_if_user_logged_in, ) diff --git a/pip.conf b/pip.conf deleted file mode 100644 index ea76e179..00000000 --- a/pip.conf +++ /dev/null @@ -1,4 +0,0 @@ -[global] -index = https://nexus.mercator-ocean.fr/repository/pypi-all/pypi -index-url = https://nexus.mercator-ocean.fr/repository/pypi-all/simple -no-cache-dir = false diff --git a/poetry.lock b/poetry.lock index a69b81ca..140b6706 100644 --- a/poetry.lock +++ b/poetry.lock @@ -96,6 +96,17 @@ yarl = ">=1.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns", "brotlicffi"] +[[package]] +name = "aioretry" +version = "5.0.2" +description = "Asyncio retry utility for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aioretry-5.0.2-py3-none-any.whl", hash = "sha256:ae0970f4079b91ef647fc342b4b24c3108c57c31ec64a9ec682daeb9936db302"}, + {file = "aioretry-5.0.2.tar.gz", hash = "sha256:fdfe11ab0a54b762e206f5485b2240e56c631f4dc6594f2d005af4c37e10511b"}, +] + [[package]] name = "aiosignal" version = "1.3.1" @@ -302,7 +313,10 @@ files = [ ] [package.dependencies] -numpy = {version = ">1.13.3", markers = "python_version < \"3.12.0.rc1\""} +numpy = [ + {version = ">1.13.3", markers = "python_version < \"3.12.0.rc1\""}, + {version = ">=1.26.0b1", markers = "python_version >= \"3.12.0.rc1\""}, +] [[package]] name = "charset-normalizer" @@ -1306,6 +1320,7 @@ files = [ numpy = [ {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2143,5 +2158,5 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" -python-versions = ">=3.9,<3.12" -content-hash = "35e035f93eb1482a9adc66985eafd3e8627028768291d22f43950e8822557cd5" +python-versions = ">=3.9,<3.13" +content-hash = "5897b3bc524b5d1d912d0a174a40e7bc46c7eb5c2dd9874007e200da38ae4c3a" diff --git a/pyproject.toml b/pyproject.toml index 412e0af5..b29ffd0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "copernicusmarine" -version = "1.0.1" +version = "1.0.6" description = "" authors = ["Copernicus Marine User Support "] readme = "README.md" @@ -26,6 +26,7 @@ semver = ">=3.0.2" nest-asyncio = ">=1.5.8" pystac = ">=1.8.3" lxml = ">=4.9.0" +aioretry = "^5.0.2" [tool.poetry.dev-dependencies] pre-commit = "^2.20.0" diff --git a/release.sh b/release.sh index 04dde3f6..09f34022 100755 --- a/release.sh +++ b/release.sh @@ -2,48 +2,28 @@ set -eufo pipefail -RELEASE_WATCHER_JIRA_ID="70121:ac53a7e4-e096-45f2-9408-118133f1c9ca" -RELEASE_ISSUE_PREFIX="New copernicusmarine package release " -PYPI_URL="https://pypi.org/project/copernicusmarine/${VERSION}/" -LAST_RELEASE_COMMIT=$(git log --grep "${RELEASE_ISSUE_PREFIX}" -n 1 --pretty=format:'%h') - -if [[ -z "$LAST_RELEASE_COMMIT" ]]; then - RELATED_CMCD_ISSUE_KEYS=$(git log | grep -E "CMCD-[0-9]+" -wo | xargs | sed 's/ /,/g') -else - RELATED_CMCD_ISSUE_KEYS=$(git log "${LAST_RELEASE_COMMIT}..HEAD" | grep -E "CMCD-[0-9]+" -wo | xargs | sed 's/ /,/g') -fi - -RELATED_CMCS_ISSUE_KEYS=$(moi related-issues --issue-keys "${RELATED_CMCD_ISSUE_KEYS}" | grep -E "^CMCS-[0-9]+" -wo || echo "") -RELEASE_DESCRIPTION="Pypi link: ${PYPI_URL}" -MOI_TOOL=$(command -v moi) - git switch main git pull -if [ ! -z "${MOI_TOOL}" ] && [ -z `git status --porcelain` ] && [ ! -z "${VERSION}" ] && [ ! -z "${PYPI_TOKEN}" ] ; then +if [ -z `git status --porcelain` ] && [ ! -z "${VERSION}" ] && [ ! -z "${PYPI_TOKEN}" ] ; then + RELEASE_BRANCH_NAME="New-copernicusmarine-package-release-${VERSION}" echo "Starting release..." - moi new -k CMCD --summary "New copernicusmarine package release ${VERSION}" --epic-key CMCD-96 --description "${RELEASE_DESCRIPTION}" --watchers "${RELEASE_WATCHER_JIRA_ID}" --related-issue-keys "${RELATED_CMCD_ISSUE_KEYS}" --sprint-project-key DO --start + git checkout -b $RELEASE_BRANCH_NAME poetry version ${VERSION} - git commit -am "`moi commit-message`" - moi review + git commit -am "New copernicusmarine package release ${VERSION}" poetry publish --build --username __token__ --password "${PYPI_TOKEN}" - moi merge --force-ci-pipeline-not-yet-successful + git switch main + git merge $RELEASE_BRANCH_NAME rm -rf dist/ - for cmcs in ${RELATED_CMCS_ISSUE_KEYS} - do - status=`moi details --issue-key "${cmcs}"| grep -o -P '(?<=status: ).*'` - case "${status}" in - "Cancelled"|"Done") - echo "${cmcs} is ${status}" - ;; - *) - echo "Updating ${cmcs}..." - moi comment-issue --issue-key "${cmcs}" --comment "[AUTOMATIC] Some related issues are part of release ${VERSION}. Moving to In Progress" - moi change-issue-status --issue-key "${cmcs}" --new-status "In Progress" - ;; - esac - done echo "Release ${VERSION} done." else git status - echo "Release ${VERSION} aborted. Install the 'moi' tool, clean your repository and specify VERSION and PYPI_TOKEN." + echo "Release ${VERSION} aborted. Clean your repository and specify VERSION and PYPI_TOKEN." fi + +while [[ $(poetry search copernicusmarine | grep copernicusmarine | awk '{print $2}') != "($VERSION)" ]] +do + echo "Waiting for version $VERSION to be available on Pypi..." + sleep 10 +done + +make build-and-publish-dockerhub-image diff --git a/tests/resources/file_list_example.txt b/tests/resources/file_list_example.txt new file mode 100644 index 00000000..38c3ce1f --- /dev/null +++ b/tests/resources/file_list_example.txt @@ -0,0 +1,2 @@ +2022/01/nrt_global_allsat_phy_l4_20220119_20220125.nc +2022/01/nrt_global_allsat_phy_l4_20220120_20220126.nc diff --git a/tests/resources/request_files/test_get_request_with_one_wrong_attribute.json b/tests/resources/request_files/test_get_request_with_one_wrong_attribute.json index d2889fdf..e4d2b73a 100644 --- a/tests/resources/request_files/test_get_request_with_one_wrong_attribute.json +++ b/tests/resources/request_files/test_get_request_with_one_wrong_attribute.json @@ -2,6 +2,6 @@ "dataset_id": "cmems_mod_nws_bgc-pft_myint_7km-3D-diato_P1M-m", "force_download": false, "no_directories": false, - "service": "ftp", + "service": "files", "toto": false } diff --git a/tests/resources/request_files/test_get_request_with_request_file.json b/tests/resources/request_files/test_get_request_with_request_file.json index b608cf57..7c8f53d6 100644 --- a/tests/resources/request_files/test_get_request_with_request_file.json +++ b/tests/resources/request_files/test_get_request_with_request_file.json @@ -2,5 +2,5 @@ "dataset_id": "cmems_mod_nws_bgc-pft_myint_7km-3D-diato_P1M-m", "force_download": false, "no_directories": false, - "service": "ftp" + "service": "files" } diff --git a/tests/resources/request_files/test_subset_request_with_dataset_not_in_catalog.json b/tests/resources/request_files/test_subset_request_with_dataset_not_in_catalog.json index 5121d977..00ca6117 100644 --- a/tests/resources/request_files/test_subset_request_with_dataset_not_in_catalog.json +++ b/tests/resources/request_files/test_subset_request_with_dataset_not_in_catalog.json @@ -6,7 +6,7 @@ "maximum_longitude": 0.1, "minimum_latitude": 0.0, "minimum_longitude": 0.0, - "service": "opendap", + "service": "arco-geo-series", "start_datetime": "2021-01-01", "variables": [ "analysed_sst" diff --git a/tests/resources/request_files/test_subset_request_with_request_file.json b/tests/resources/request_files/test_subset_request_with_request_file.json index 8ab83398..8b9de7af 100644 --- a/tests/resources/request_files/test_subset_request_with_request_file.json +++ b/tests/resources/request_files/test_subset_request_with_request_file.json @@ -7,7 +7,7 @@ "maximum_longitude": 0.1, "minimum_latitude": 0.0, "minimum_longitude": 0.0, - "service": "opendap", + "service": "arco-geo-series", "start_datetime": "2021-01-01", "variables": [ "analysed_sst" diff --git a/tests/test_command_line_interface.py b/tests/test_command_line_interface.py index b1ac9bf8..df8ff9e7 100644 --- a/tests/test_command_line_interface.py +++ b/tests/test_command_line_interface.py @@ -184,7 +184,6 @@ def then_products_from_marine_data_store_catalog_are_available(self): def then_all_dataset_parts_are_filled(self): expected_product_id = "BALTICSEA_ANALYSISFORECAST_BGC_003_007" expected_dataset_id = "cmems_mod_bal_bgc_anfc_static" - default_part_uri = "ftp://nrt.cmems-du.eu/Core/BALTICSEA_ANALYSISFORECAST_BGC_003_007/cmems_mod_bal_bgc_anfc_static" # noqa json_result = loads(self.output) expected_product = list( @@ -217,9 +216,9 @@ def then_all_dataset_parts_are_filled(self): version_ordered = sorted( dataset["versions"], - key=lambda x: x["label"] - if x["label"] != VERSION_DEFAULT - else "110001", + key=lambda x: ( + x["label"] if x["label"] != VERSION_DEFAULT else "110001" + ), reverse=True, ) @@ -232,23 +231,6 @@ def then_all_dataset_parts_are_filled(self): ) assert len(maybe_default_part) == 0 - parts = list( - filter( - lambda part: part["name"] != PART_DEFAULT, - latest_version["parts"], - ) - ) - for part in parts: - maybe_ftp_service = list( - filter( - lambda x: x["service_type"]["service_name"] == "ftp", - part["services"], - ) - ) - assert len(maybe_ftp_service) == 1 - ftp_service = maybe_ftp_service[0] - assert ftp_service["uri"] == default_part_uri - def when_I_run_copernicus_marine_describe_with_contains_option(self): filter_token = "OMI_HEALTH_CHL_GLOBAL_OCEANCOLOUR_oligo_n" command = f"copernicusmarine describe --contains {filter_token}" @@ -400,14 +382,6 @@ def then_I_can_read_the_json_including_datasets(self): CopernicusMarineDatasetServiceType.STATIC_ARCO.service_name.value # noqa not in service_names ) - for service in services: - service_uri = service["uri"] - assert isinstance(service_uri, str) - if service["service_type"]["service_name"] in ( - CopernicusMarineDatasetServiceType.GEOSERIES.service_name.value, # noqa - CopernicusMarineDatasetServiceType.TIMESERIES.service_name.value, # noqa - ): - assert service_uri.endswith(".zarr") def when_I_use_staging_environment_in_debug_logging_level(self): command = [ @@ -433,14 +407,6 @@ class SubsetServiceToTest: subpath: str dataset_url: str - OPENDAP = SubsetServiceToTest( - "opendap", - "download_opendap", - ( - "https://nrt.cmems-du.eu/thredds/dodsC/" - "cmems_mod_glo_phy-so_anfc_0.083deg_P1D-m" - ), - ) GEOSERIES = SubsetServiceToTest( "geoseries", "download_zarr", @@ -461,9 +427,6 @@ class SubsetServiceToTest: ), ) - def test_subset_opendap_functionnality(self, tmp_path): - self._test_subset_functionnalities(self.OPENDAP, tmp_path) - def flatten_request_dict( self, request_dict: dict[str, Optional[Union[str, Path]]] ) -> List: @@ -571,42 +534,6 @@ def check_subset_request_with_no_subsetting(self): b"copernicusmarine get --dataset-id " + bytes(dataset_id, "utf-8") ) in output.stdout - def test_subset_request_input_hierarchy(self, tmp_path): - filepath = ( - "./tests/resources/request_files/" - "test_subset_request_with_request_file.json" - ) - motu_api_request = ( - "python -m motuclient --motu https://nrt.cmems-du.eu/motu-web/Motu " - "--service-id SST_GLO_SST_L4_NRT_OBSERVATIONS_010_001-TDS " - "--product-id METOFFICE-GLO-SST-L4-NRT-OBS-SST-V2 " - "--longitude-min -1.8080337053571427 " - "--longitude-max 17.07587388392857 " - "--latitude-min 32.670164592633924 " - "--latitude-max 47.53621950334821 " - "--variable analysis_error " - "--variable mask " - "--out-dir " - "--out-name " - "--user --pwd " - ) - - command = [ - "copernicusmarine", - "subset", - "--force-download", - "--output-directory", - f"{tmp_path}", - "--minimum-latitude", - "30.", - "--request-file", - f"{filepath}", - "--motu-api-request", - f"'{motu_api_request}'", - ] - output = subprocess.run(command) - assert output.returncode == 0 - def test_if_dataset_coordinate_valid_minmax_attributes_are_setted( self, tmp_path ): @@ -625,7 +552,7 @@ def test_if_dataset_coordinate_valid_minmax_attributes_are_setted( "--output-directory": tmp_path, } - self.check_default_subset_request(self.OPENDAP.subpath, tmp_path) + self.check_default_subset_request(self.GEOSERIES.subpath, tmp_path) dataset_path = pathlib.Path(tmp_path) / "output.nc" dataset = xarray.open_dataset(dataset_path) @@ -651,12 +578,8 @@ def test_if_dataset_coordinate_valid_minmax_attributes_are_setted( class GetServiceToTest: name: str - FTP = GetServiceToTest("ftp") FILES = GetServiceToTest("files") - def test_get_ftp_functionnality(self, tmp_path): - self._test_get_functionalities(self.FTP, tmp_path) - def test_get_original_files_functionnality(self, tmp_path): self._test_get_functionalities(self.FILES, tmp_path) @@ -717,7 +640,7 @@ def test_get_download_s3_with_regex(self, tmp_path): "-i", f"{dataset_id}", "--service", - f"{self.FTP.name}", + f"{self.FILES.name}", "--regex", f"{regex}", "--force-download", @@ -742,7 +665,7 @@ def test_files_to_download_are_displayed(self, tmp_path): "-i", f"{dataset_id}", "--service", - f"{self.FTP.name}", + f"{self.FILES.name}", "--regex", f"{regex}", "--output-directory", @@ -766,7 +689,7 @@ def test_downloaded_files_are_not_displayed_with_force_download_option( "-i", f"{dataset_id}", "--service", - f"{self.FTP.name}", + f"{self.FILES.name}", "--regex", f"{regex}", "--force-download", @@ -817,7 +740,7 @@ def test_subset_output_file_as_netcdf(self, tmp_path): assert output.returncode == 0 assert is_file - def test_process_is_stopped_when_credentials_are_invalid(self): + def test_process_is_not_stopped_when_credentials_are_invalid(self): dataset_id = "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m" command = [ @@ -831,12 +754,13 @@ def test_process_is_stopped_when_credentials_are_invalid(self): f"{dataset_id}", "--variable", "thetao", + "--force-download", ] output = subprocess.run(command, capture_output=True) - assert output.returncode == 1 - assert b"Invalid username or password" in output.stdout + assert output.returncode == 0 + assert b"Invalid username or password" not in output.stdout def test_login_is_prompt_when_configuration_file_doest_not_exist( self, tmp_path @@ -1017,6 +941,16 @@ def test_login(self, tmp_path): assert output.returncode == 0 assert non_existing_directory.is_dir() + command_with_skip = [ + "copernicusmarine", + "login", + "--configuration-file-directory", + f"{non_existing_directory}", + "--skip-if-user-logged-in", + ] + output_with_skip = subprocess.run(command_with_skip) + assert output_with_skip.returncode == 0 + def test_subset_error_when_forced_service_does_not_exist(self): self.when_I_run_copernicus_marine_subset_forcing_a_service_not_available() self.then_I_got_a_clear_output_with_available_service_for_subset() @@ -1207,18 +1141,6 @@ def then_files_are_created_without_tree_folder( assert set(expected_files).issubset(downloaded_files) - def test_no_directories_option_ftp(self, tmp_path): - self.when_I_run_copernicus_marine_command_using_no_directories_option( - tmp_path, force_service=self.FTP - ) - self.then_files_are_created_without_tree_folder(tmp_path) - self.when_I_run_copernicus_marine_command_using_no_directories_option( - tmp_path, force_service=self.FTP, output_directory="test" - ) - self.then_files_are_created_without_tree_folder( - tmp_path, output_directory="test" - ) - def test_no_directories_option_original_files(self, tmp_path): self.when_I_run_copernicus_marine_command_using_no_directories_option( tmp_path, force_service=self.FILES @@ -1231,44 +1153,6 @@ def test_no_directories_option_original_files(self, tmp_path): tmp_path, output_directory="test" ) - def test_motu_subset_with_non_existing_directory(self, tmp_path): - non_existing_directory = Path(tmp_path, "i_dont_exist") - command = [ - "copernicusmarine", - "subset", - "-i", - "cmems_obs-ins_glo_phy-temp-sal_nrt_oa_P1M", - "-v", - "TEMP", - "-t", - "2022-12-15T00:00:00", - "-T", - "2022-12-15T00:00:00", - "-x", - "20", - "-X", - "21", - "-y", - "30", - "-Y", - "31", - "-z", - "0", - "-Z", - "5", - "-o", - f"{non_existing_directory}", - "-f", - "data.nc", - "--service", - "motu", - "--force-download", - ] - output = subprocess.run(command) - - assert output.returncode == 0, output.stderr - assert (non_existing_directory / "data.nc").is_file() - def test_default_prompt_for_get_command(self, tmp_path): command = [ "copernicusmarine", @@ -1365,7 +1249,8 @@ def test_no_traceback_is_printed_on_dataset_url_error(self): "copernicusmarine", "get", "--dataset-url", - "ftp://nrt.cmems-du.eu/Core/GLOBAL_ANALYSISFORECAST_PHY_001_024_XXXXX/" + "https://s3.waw3-1.cloudferro.com/mdl-arco-time-013/arco/" + "GLOBAL_ANALYSISFORECAST_PHY_XXXXXXX/" "cmems_mod_glo_phy_anfc_0.083deg_P1D-m/2023", ] @@ -1426,13 +1311,14 @@ def test_dataset_url_suffix_path_are_used_as_filter(self): "copernicusmarine", "get", "--dataset-url", - "ftp://nrt.cmems-du.eu/Core/GLOBAL_ANALYSISFORECAST_PHY_001_024/" - "cmems_mod_glo_phy_anfc_0.083deg_P1D-m/2023/08", + "https://s3.waw3-1.cloudferro.com/mdl-native-14/native/" + "GLOBAL_ANALYSISFORECAST_PHY_001_024/" + "cmems_mod_glo_phy_anfc_0.083deg_P1D-m_202211/2023/11", ] output = subprocess.run(command, capture_output=True) - assert b"Printed 20 out of 31 files" in output.stdout + assert b"Printed 20 out of 30 files" in output.stdout def test_short_option_for_copernicus_marine_command_helper(self): short_option_command = [ @@ -1546,43 +1432,6 @@ def test_error_log_for_service_that_does_not_exist(self): assert b"Service ft does not exist for command subset" in output.stdout - def when_I_request_subset_dataset_with_opendap_service(self, output_path): - command = [ - "copernicusmarine", - "subset", - "-i", - "cmems_mod_glo_phy-thetao_anfc_0.083deg_P1D-m", - "-t", - "2023-05-10", - "-T", - "2023-05-12", - "-x", - "-18", - "-X", - "-10", - "-y", - "35", - "-Y", - "40", - "-z", - "0", - "-Z", - "10", - "-v", - "thetao", - "--service", - "opendap", - "-o", - f"{output_path}", - "-f", - "data.nc", - "--force-download", - ] - - self.run_output = subprocess.run( - command, stderr=subprocess.PIPE, stdout=subprocess.PIPE - ) - def then_I_can_read_copernicusmarine_version_in_the_dataset_attributes( self, filepath ): @@ -1597,14 +1446,6 @@ def test_copernicusmarine_version_in_dataset_attributes_with_arco( tmp_path / "data.zarr" ) - def test_copernicusmarine_version_in_dataset_attributes_with_opendap( - self, tmp_path - ): - self.when_I_request_subset_dataset_with_opendap_service(tmp_path) - self.then_I_can_read_copernicusmarine_version_in_the_dataset_attributes( - tmp_path / "data.nc" - ) - def test_subset_filter_by_standard_name(self, tmp_path): dataset_id = "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m" output_filename = "data.zarr" @@ -1774,12 +1615,6 @@ def test_dataset_size_is_displayed_when_downloading_with_arco_service( self.when_I_request_subset_dataset_with_zarr_service(tmp_path, True) self.then_I_can_read_dataset_size() - def test_dataset_size_is_displayed_when_downloading_with_opendap_service( - self, tmp_path - ): - self.when_I_request_subset_dataset_with_opendap_service(tmp_path) - self.then_I_can_read_dataset_size() - def test_dataset_has_always_every_dimensions(self, tmp_path): output_filename = "data.nc" command = [ @@ -1825,20 +1660,6 @@ def test_dataset_has_always_every_dimensions(self, tmp_path): == 4 ) - def test_i_can_get_ftp_url_subpath(self, tmp_path): - command = [ - "copernicusmarine", - "get", - "--dataset-url", - "ftp://nrt.cmems-du.eu/Core/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr/index_latest.txt", # noqa - "--force-download", - "-o", - f"{tmp_path}", - ] - - output = subprocess.run(command, capture_output=True) - assert output.returncode == 0 - def when_I_request_a_dataset_with_subset_method_option( self, subset_method ): @@ -1892,18 +1713,6 @@ def test_subset_nearest_method(self): self.when_I_request_a_dataset_with_subset_method_option("nearest") self.then_I_can_read_a_warning_in_stdout() - def test_request_file_over_ftp_is_quick(self): - command = [ - "copernicusmarine", - "get", - "--dataset-url", - "ftp://nrt.cmems-du.eu/Core/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr/index_latest.txt", # noqa - "-s", - "ftp", - ] - output = execute_in_terminal(command=command, timeout_second=20) - assert b"Total size of the download:" in output.stdout - def test_netcdf_compression_option(self, tmp_path): filename_without_option = "without_option.nc" filename_with_option = "with_option.nc" @@ -2171,3 +1980,74 @@ def test_that_cache_folder_isnt_created_when_no_metadata_cache_option_was_provid assert cache_path.is_dir() is True del os.environ["COPERNICUSMARINE_CACHE_DIRECTORY"] + + def test_file_list_filter(self, tmp_path): + dataset_id = "cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.25deg_P1D" + command = [ + "copernicusmarine", + "get", + "-i", + f"{dataset_id}", + "--service", + f"{self.FILES.name}", + "--file-list", + "./tests/resources/file_list_example.txt", + "--force-download", + "--output-directory", + f"{tmp_path}", + ] + + output = subprocess.run(command) + downloaded_files = get_all_files_in_folder_tree(folder=tmp_path) + assert output.returncode == 0 + assert len(downloaded_files) == 2 + + for filename in downloaded_files: + assert ( + re.search( + ( + r"nrt_global_allsat_phy_l4_20220119_20220125\.nc|" + r"nrt_global_allsat_phy_l4_20220120_20220126\.nc" + ), + filename, + ) + is not None + ) + + def test_get_download_file_list(self, tmp_path): + regex = ".*_(2001|2002|2003).*.nc" + dataset_id = "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m" + command = [ + "copernicusmarine", + "get", + "-i", + f"{dataset_id}", + "--service", + "files", + "--regex", + f"{regex}", + "--download-file-list", + "--output-directory", + f"{tmp_path}", + ] + + output_filename = pathlib.Path(tmp_path) / "files_to_download.txt" + + output = subprocess.run(command) + assert output.returncode == 0 + assert output_filename.is_file() + with open(output_filename) as file: + lines = file.read().splitlines() + assert len(lines) == 3 + assert ( + "CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20010101_20011231_R20221101_RE01.nc" + in lines[0] + ) + assert ( + "CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20020101_20021231_R20221101_RE01.nc" + in lines[1] + ) + assert ( + "CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20030101_20031231_R20221101_RE01.nc" + in lines[2] + ) diff --git a/tests/test_command_line_interface_nearest_layer_subset.py b/tests/test_command_line_interface_nearest_layer_subset.py index 4281b5ec..07e582a8 100644 --- a/tests/test_command_line_interface_nearest_layer_subset.py +++ b/tests/test_command_line_interface_nearest_layer_subset.py @@ -165,23 +165,6 @@ def test_subset_same_depth_zarr(self, tmp_path): assert min_depth == SUBSET_NEAREST_LAYER_OPTIONS["expected_depth"] assert max_depth == SUBSET_NEAREST_LAYER_OPTIONS["expected_depth"] - def test_subset_same_depth_opendap(self, tmp_path): - output_filename = "test_subset_same_depth_opendap.nc" - - command = self._nearest_layer_subset( - tmp_path, output_filename, "opendap", same_depth=True - ) - output = subprocess.run(command) - - dataset = xarray.open_dataset(pathlib.Path(tmp_path, output_filename)) - min_depth = dataset.depth.values.min() - max_depth = dataset.depth.values.max() - - assert output.returncode == 0 - assert dataset.depth.size == 1 - assert min_depth == SUBSET_NEAREST_LAYER_OPTIONS["expected_depth"] - assert max_depth == SUBSET_NEAREST_LAYER_OPTIONS["expected_depth"] - def test_subset_same_depth_with_vertical_dimension_as_originally_produced( self, tmp_path ): @@ -257,27 +240,6 @@ def test_subset_same_datetime_zarr(self, tmp_path): max_datetime == SUBSET_NEAREST_LAYER_OPTIONS["expected_datetime"] ) - def test_subset_same_datetime_opendap(self, tmp_path): - output_filename = "test_subset_same_datetime_opendap.nc" - - command = self._nearest_layer_subset( - tmp_path, output_filename, "opendap", same_datetime=True - ) - output = subprocess.run(command) - - dataset = xarray.open_dataset(pathlib.Path(tmp_path, output_filename)) - min_datetime = dataset.time.values.min() - max_datetime = dataset.time.values.max() - - assert output.returncode == 0 - assert dataset.time.size == 1 - assert min_datetime == numpy.datetime64( - "2023-04-26 12:00:00" - ) # SUBSET_NEAREST_LAYER_OPTIONS["expected_datetime"] - assert max_datetime == numpy.datetime64( - "2023-04-26 12:00:00" - ) # SUBSET_NEAREST_LAYER_OPTIONS["expected_datetime"] - # -----------------------# # Test on same longitude # # -----------------------# @@ -305,27 +267,6 @@ def test_subset_same_longitude_zarr(self, tmp_path): max_elevation == SUBSET_NEAREST_LAYER_OPTIONS["expected_longitude"] ) - def test_subset_same_longitude_opendap(self, tmp_path): - output_filename = "test_subset_same_longitude_opendap.nc" - - command = self._nearest_layer_subset( - tmp_path, output_filename, "opendap", same_longitude=True - ) - output = subprocess.run(command) - - dataset = xarray.open_dataset(pathlib.Path(tmp_path, output_filename)) - min_longitude = dataset.longitude.values.min() - max_longitude = dataset.longitude.values.max() - - assert output.returncode == 0 - assert dataset.longitude.size == 1 - assert ( - min_longitude == SUBSET_NEAREST_LAYER_OPTIONS["expected_longitude"] - ) - assert ( - max_longitude == SUBSET_NEAREST_LAYER_OPTIONS["expected_longitude"] - ) - # ----------------------# # Test on same latitude # # ----------------------# @@ -353,27 +294,6 @@ def test_subset_same_latitude_zarr(self, tmp_path): max_elevation == SUBSET_NEAREST_LAYER_OPTIONS["expected_latitude"] ) - def test_subset_same_latitude_opendap(self, tmp_path): - output_filename = "test_subset_same_latitude_opendap.nc" - - command = self._nearest_layer_subset( - tmp_path, output_filename, "opendap", same_latitude=True - ) - output = subprocess.run(command) - - dataset = xarray.open_dataset(pathlib.Path(tmp_path, output_filename)) - min_latitude = dataset.latitude.values.min() - max_latitude = dataset.latitude.values.max() - - assert output.returncode == 0 - assert dataset.latitude.size == 1 - assert ( - min_latitude == SUBSET_NEAREST_LAYER_OPTIONS["expected_latitude"] - ) - assert ( - max_latitude == SUBSET_NEAREST_LAYER_OPTIONS["expected_latitude"] - ) - def test_subset_with_coordinates_range_falling_between_two_values( self, tmp_path ): diff --git a/tests/test_dataset_version_selection.py b/tests/test_dataset_version_selection.py index ba6a95bb..5498b6d9 100644 --- a/tests/test_dataset_version_selection.py +++ b/tests/test_dataset_version_selection.py @@ -90,7 +90,7 @@ def test_get_when_dataset_specified_version_does_not_exist_with_forced_service( "--dataset-version", "default", "--service", - "ftp", + "files", ] output = execute_in_terminal(command) diff --git a/tests/test_get_index_files_insitu.py b/tests/test_get_index_files_insitu.py new file mode 100644 index 00000000..cdaa7425 --- /dev/null +++ b/tests/test_get_index_files_insitu.py @@ -0,0 +1,79 @@ +from copernicusmarine import get +from tests.test_utils import execute_in_terminal + + +class TestGetIndexInsituFiles: + def test_get_index_insitu_files(self): + self.command = [ + "copernicusmarine", + "get", + "--dataset-id", + "cmems_obs-ins_glo_phybgcwav_mynrt_na_irr", + "--index-parts", + ] + self.output = execute_in_terminal(self.command) + + assert ( + b"s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030" + b"/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/index_reference.txt" + in self.output.stdout + ) + assert ( + b"s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030" + b"/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/index_history.txt" + in self.output.stdout + ) + assert ( + b"s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030" + b"/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/index_latest.txt" + in self.output.stdout + ) + assert ( + b"s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030" + b"/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/index_monthly.txt" + in self.output.stdout + ) + assert ( + b"s3://mdl-native-01/native/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030" + b"/cmems_obs-ins_glo_phybgcwav_mynrt_na_irr_202311/index_platform.txt" + in self.output.stdout + ) + + def test_get_index_insitu_files_fails_if_ftp_forced(self): + self.command = [ + "copernicusmarine", + "get", + "--dataset-id", + "cmems_obs-ins_glo_phybgcwav_mynrt_na_irr", + "--index-parts", + "--force-service", + "ftp", + ] + self.output = execute_in_terminal(self.command) + + assert ( + b"Index part flag is not supported for FTP services." + b" Please use '--force-service files' option." + in self.output.stdout + ) + + def test_get_index_insitu_files_not_an_insitu(self): + self.command = [ + "copernicusmarine", + "get", + "--dataset-id", + "cmems_obs-sl_eur_phy-ssh_my_al-l3-duacs_PT1S", + "--index-parts", + ] + self.output = execute_in_terminal(self.command) + + assert b"No data to download" in self.output.stdout + + def test_get_index_insitu_files_python(self): + get_result = get( + dataset_id="cmems_obs-ins_blk_phybgcwav_mynrt_na_irr", + index_parts=True, + force_download=True, + ) + assert get_result is not None + assert all(map(lambda x: x.exists(), get_result)) diff --git a/tests/test_get_sync.py b/tests/test_get_sync.py index cc597161..f0d70fa3 100644 --- a/tests/test_get_sync.py +++ b/tests/test_get_sync.py @@ -11,6 +11,7 @@ def test_get_sync(self): self.then_same_command_with_sync_should_download_only_one_file() def test_get_sync_delete(self): + self.when_I_get_some_native_files_with_sync() self.when_I_add_a_file_locally() self.then_command_sync_delete_should_propose_to_delete_it_and_delete_it() diff --git a/tests/test_overwrite_output_data.py b/tests/test_overwrite_output_data.py index 8417831d..c4a72198 100644 --- a/tests/test_overwrite_output_data.py +++ b/tests/test_overwrite_output_data.py @@ -18,16 +18,6 @@ def expected_downloaded_filepath_with_counter( extension = self.expected_downloaded_filepath.suffix return parent / (filename + "_(" + str(counter) + ")" + extension) - def test_download_ftp(self, tmp_path): - self.service = "ftp" - self.filename = "data.nc" - self.tmp_path = tmp_path - self.expected_downloaded_filepath = pathlib.Path( - "IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m/CMEMS_v5r1_IBI_PHY_MY_NL_01yav_20120101_20121231_R20221101_RE01.nc" # noqa - ) - self._test_download_with_overwrite_option() - self._test_download_without_overwrite_option() - def test_download_original_files(self, tmp_path): self.service = "original-files" self.filename = "data.nc" @@ -38,14 +28,6 @@ def test_download_original_files(self, tmp_path): self._test_download_with_overwrite_option() self._test_download_without_overwrite_option() - def test_download_opendap(self, tmp_path): - self.service = "opendap" - self.filename = "data.nc" - self.tmp_path = tmp_path - self.expected_downloaded_filepath = pathlib.Path(self.filename) - self._test_download_with_overwrite_option() - self._test_download_without_overwrite_option() - def test_download_zarr(self, tmp_path): self.service = "arco-geo-series" self.filename = "data.zarr" @@ -207,12 +189,6 @@ def request_data_download( .stat() .st_mtime ) - elif service == "ftp": - last_modification_time = ( - pathlib.Path(folder, self.expected_downloaded_filepath) - .stat() - .st_mtime - ) elif command == "subset": if service == "arco-time-series": last_modification_time = ( @@ -226,12 +202,6 @@ def request_data_download( .stat() .st_mtime ) - elif service == "opendap": - last_modification_time = ( - pathlib.Path(folder, self.expected_downloaded_filepath) - .stat() - .st_mtime - ) attribute = ( "initial_output_data_modification_time" @@ -267,13 +237,11 @@ def test_that_overwrite_option_does_not_create_subdirectory(self): def command_from_service(service: str) -> Optional[str]: - if service in ["ftp", "original-files"]: + if service in ["original-files"]: return "get" elif service in [ - "opendap", "arco-time-series", "arco-geo-series", - "motu", ]: return "subset" return None diff --git a/tests/test_python_interface.py b/tests/test_python_interface.py index 7c3cf89e..848fd2bd 100644 --- a/tests/test_python_interface.py +++ b/tests/test_python_interface.py @@ -97,7 +97,13 @@ def test_login_ok(self, tmp_path): non_existing_directory / ".copernicusmarine-credentials" ).is_file() - def test_login_not_ok(self, tmp_path): + is_valid_with_skip = login( + configuration_file_directory=non_existing_directory, + skip_if_user_logged_in=True, + ) + assert is_valid_with_skip is True + + def test_login_ok_with_wrong_credentials(self, tmp_path): non_existing_directory = Path(tmp_path, "i_dont_exist") is_valid = login( username=os.getenv("COPERNICUS_MARINE_SERVICE_USERNAME"), @@ -106,8 +112,8 @@ def test_login_not_ok(self, tmp_path): overwrite_configuration_file=True, ) - assert is_valid is False - assert non_existing_directory.is_dir() is False + assert is_valid is True + assert non_existing_directory.is_dir() is True def test_signature_inspection_is_working(self): assert inspect.signature(describe).parameters[ diff --git a/tests/test_sqlite_subsetting.py b/tests/test_sqlite_subsetting.py new file mode 100644 index 00000000..6e824189 --- /dev/null +++ b/tests/test_sqlite_subsetting.py @@ -0,0 +1,45 @@ +from tests.test_utils import execute_in_terminal + + +class TestSqliteSubsetting: + def test_sqlite_subsetting_not_supported_yet(self): + command = [ + "copernicusmarine", + "subset", + "--dataset-id", + "cmems_obs-wave_glo_phy-swh_nrt_j3-l3_PT1S", + "--variable", + "VAVH", + "--start-datetime", + "2024-01-01T00:00:00", + "--end-datetime", + "2024-01-01T03:00:00", + ] + output = execute_in_terminal(command) + assert ( + b"Format not supported: Subsetting format type sqlite not supported yet." + in output.stdout + ) + assert output.returncode == 1 + + def test_sqlite_subsetting_not_supported_yet_even_when_force_service(self): + command = [ + "copernicusmarine", + "subset", + "--dataset-id", + "cmems_obs-wave_glo_phy-swh_nrt_j3-l3_PT1S", + "--variable", + "VAVH", + "--start-datetime", + "2024-01-01T00:00:00", + "--end-datetime", + "2024-01-01T03:00:00", + "--service", + "geoseries", + ] + output = execute_in_terminal(command) + assert ( + b"Format not supported: Subsetting format type sqlite not supported yet." + in output.stdout + ) + assert output.returncode == 1