diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..5d86975b75 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# introduce formatting with black +c3ddbaa6e61c44a3809e625c802cb4c7632934a3 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b597d49e6b..96dae8044c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -13,11 +13,19 @@ jobs: uses: ./.github/workflows/get_docs_changes.yml run_lint: - name: Runs mypy, flake and bandit + name: Lint needs: get_docs_changes if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.8.x", "3.9.x", "3.10.x", "3.11.x"] - runs-on: ubuntu-latest + defaults: + run: + shell: bash + runs-on: ${{ matrix.os }} steps: @@ -27,34 +35,44 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: "3.10.x" + python-version: ${{ matrix.python-version }} - name: Install Poetry uses: snok/install-poetry@v1 with: virtualenvs-create: true virtualenvs-in-project: true - installer-parallel: true + installer-parallel: true - name: Load cached venv id: cached-poetry-dependencies uses: actions/cache@v3 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --all-extras --with airflow - - # - name: Install self - # run: poetry install --no-interaction + run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk - - name: Run lint - run: make lint + - name: Run make lint + run: | + export PATH=$PATH:"/c/Program Files/usr/bin" # needed for Windows + make lint # - name: print envs # run: | # echo "The GitHub Actor's username is: $GITHUB_ACTOR" # echo "The GitHub repo owner is: $GITHUB_REPOSITORY_OWNER" # echo "The GitHub repo is: $GITHUB_REPOSITORY" + + matrix_job_required_check: + name: Lint results + needs: run_lint + runs-on: ubuntu-latest + if: always() + steps: + - name: Check matrix job results + if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') + run: | + echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1 diff --git a/.github/workflows/test_airflow.yml b/.github/workflows/test_airflow.yml index d78a48e8f7..bbed326344 100644 --- a/.github/workflows/test_airflow.yml +++ b/.github/workflows/test_airflow.yml @@ -41,7 +41,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-airflow-runner - name: Install dependencies - run: poetry install --no-interaction --with airflow -E duckdb -E parquet + run: poetry install --no-interaction --with airflow --with pipeline -E duckdb -E parquet --with sentry-sdk - run: | poetry run pytest tests/helpers/airflow_tests diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 23b6eb9fdd..dd7c7db2fe 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -55,40 +55,67 @@ jobs: virtualenvs-in-project: true installer-parallel: true - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - # path: ${{ steps.pip-cache.outputs.dir }} - path: .venv - key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + # NOTE: do not cache. we want to have a clean state each run and we upgrade depdendencies later + # - name: Load cached venv + # id: cached-poetry-dependencies + # uses: actions/cache@v3 + # with: + # # path: ${{ steps.pip-cache.outputs.dir }} + # path: .venv + # key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies + run: poetry install --no-interaction --with sentry-sdk + + - run: | + poetry run pytest tests/common tests/normalize tests/reflection tests/sources tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py + if: runner.os != 'Windows' + name: Run common tests with minimum dependencies Linux/MAC + - run: | + poetry run pytest tests/common tests/normalize tests/reflection tests/sources tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py -m "not forked" + if: runner.os == 'Windows' + name: Run common tests with minimum dependencies Windows + shell: cmd - - name: Install dependencies + sentry - run: poetry install --no-interaction -E parquet -E pydantic && pip install sentry-sdk + - name: Install duckdb dependencies + run: poetry install --no-interaction -E duckdb --with sentry-sdk - run: | - poetry run pytest tests/common tests/normalize tests/reflection tests/sources + poetry run pytest tests/pipeline/test_pipeline.py if: runner.os != 'Windows' - name: Run tests Linux/MAC + name: Run pipeline smoke tests with minimum deps Linux/MAC - run: | - poetry run pytest tests/common tests/normalize tests/reflection tests/sources -m "not forked" + poetry run pytest tests/pipeline/test_pipeline.py if: runner.os == 'Windows' - name: Run tests Windows + name: Run smoke tests with minimum deps Windows shell: cmd - - name: Install extra dependencies - run: poetry install --no-interaction -E duckdb -E cli -E parquet -E pydantic + - name: Install pipeline dependencies + run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk --with pipeline - run: | - poetry run pytest tests/extract tests/pipeline tests/cli/common + poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations if: runner.os != 'Windows' - name: Run extra tests Linux/MAC + name: Run extract and pipeline tests Linux/MAC - run: | - poetry run pytest tests/extract tests/pipeline tests/cli/common + poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations if: runner.os == 'Windows' - name: Run extra tests Windows + name: Run extract tests Windows shell: cmd + # - name: Install Pydantic 1.0 + # run: pip install "pydantic<2" + + # - run: | + # poetry run pytest tests/libs + # if: runner.os != 'Windows' + # name: Run extract and pipeline tests Linux/MAC + # - run: | + # poetry run pytest tests/libs + # if: runner.os == 'Windows' + # name: Run extract tests Windows + # shell: cmd + matrix_job_required_check: name: Common tests needs: run_common diff --git a/.github/workflows/test_dbt_runner.yml b/.github/workflows/test_dbt_runner.yml index db3b53e9fa..1803a53fc1 100644 --- a/.github/workflows/test_dbt_runner.yml +++ b/.github/workflows/test_dbt_runner.yml @@ -68,7 +68,7 @@ jobs: - name: Install dependencies # install dlt with postgres support - run: poetry install --no-interaction -E postgres -E dbt + run: poetry install --no-interaction -E postgres -E dbt --with sentry-sdk - run: | poetry run pytest tests/helpers/dbt_tests -k '(not venv)' diff --git a/.github/workflows/test_destination_athena.yml b/.github/workflows/test_destination_athena.yml index 704e66522b..2d57f26a51 100644 --- a/.github/workflows/test_destination_athena.yml +++ b/.github/workflows/test_destination_athena.yml @@ -9,9 +9,9 @@ on: workflow_dispatch: env: - DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__ATHENA__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DESTINATION__ATHENA__CREDENTIALS__REGION_NAME: eu-central-1 DESTINATION__ATHENA__QUERY_RESULT_BUCKET: s3://dlt-athena-output @@ -70,7 +70,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E athena + run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load diff --git a/.github/workflows/test_destination_athena_iceberg.yml b/.github/workflows/test_destination_athena_iceberg.yml index 6892a96bf1..d8d8521063 100644 --- a/.github/workflows/test_destination_athena_iceberg.yml +++ b/.github/workflows/test_destination_athena_iceberg.yml @@ -9,9 +9,9 @@ on: workflow_dispatch: env: - DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__ATHENA__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DESTINATION__ATHENA__CREDENTIALS__REGION_NAME: eu-central-1 DESTINATION__ATHENA__QUERY_RESULT_BUCKET: s3://dlt-athena-output @@ -70,7 +70,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E athena + run: poetry install --no-interaction -E --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load diff --git a/.github/workflows/test_destination_bigquery.yml b/.github/workflows/test_destination_bigquery.yml index dcc7e7ba9b..45dc78a085 100644 --- a/.github/workflows/test_destination_bigquery.yml +++ b/.github/workflows/test_destination_bigquery.yml @@ -18,7 +18,7 @@ env: CREDENTIALS__REFRESH_TOKEN: ${{ secrets.CREDENTIALS__REFRESH_TOKEN }} # needed for bigquery staging tests - # DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + # DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK # DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 @@ -79,7 +79,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E bigquery --with providers -E parquet + run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_destination_mssql.yml b/.github/workflows/test_destination_mssql.yml index bba44e750d..6eb4427bbf 100644 --- a/.github/workflows/test_destination_mssql.yml +++ b/.github/workflows/test_destination_mssql.yml @@ -65,7 +65,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - name: Install dependencies - run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet + run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py diff --git a/.github/workflows/test_destination_qdrant.yml b/.github/workflows/test_destination_qdrant.yml index 09ded40f59..0ce3e3a3f9 100644 --- a/.github/workflows/test_destination_qdrant.yml +++ b/.github/workflows/test_destination_qdrant.yml @@ -59,7 +59,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - name: Install dependencies - run: poetry install --no-interaction -E qdrant -E parquet + run: poetry install --no-interaction -E qdrant -E parquet --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load/ if: runner.os != 'Windows' diff --git a/.github/workflows/test_destination_snowflake.yml b/.github/workflows/test_destination_snowflake.yml index 4aae3ec62e..afc4263daf 100644 --- a/.github/workflows/test_destination_snowflake.yml +++ b/.github/workflows/test_destination_snowflake.yml @@ -13,7 +13,7 @@ env: CREDENTIALS__PASSWORD: ${{ secrets.PG_PASSWORD }} # needed for snowflake staging tests - DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DESTINATION__FILESYSTEM__CREDENTIALS__PROJECT_ID: chat-analytics-rasa-ci DESTINATION__FILESYSTEM__CREDENTIALS__CLIENT_EMAIL: chat-analytics-loader@chat-analytics-rasa-ci.iam.gserviceaccount.com @@ -71,7 +71,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - name: Install dependencies - run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az + run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load diff --git a/.github/workflows/test_destination_synapse.yml b/.github/workflows/test_destination_synapse.yml index e86e29ebf6..d0f364c382 100644 --- a/.github/workflows/test_destination_synapse.yml +++ b/.github/workflows/test_destination_synapse.yml @@ -5,9 +5,9 @@ on: branches: - master - devel - + workflow_dispatch: - + env: DESTINATION__SYNAPSE__CREDENTIALS: ${{ secrets.SYNAPSE_CREDENTIALS }} DESTINATION__SYNAPSE__CREDENTIALS__PASSWORD: ${{ secrets.SYNAPSE_PASSWORD }} @@ -42,7 +42,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - + - name: Check out uses: actions/checkout@master @@ -70,7 +70,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - name: Install dependencies - run: poetry install --no-interaction -E synapse -E s3 -E gs -E az + run: poetry install --no-interaction -E synapse -E s3 -E gs -E az --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py diff --git a/.github/workflows/test_destination_weaviate.yml b/.github/workflows/test_destination_weaviate.yml index 6a7a2e95cd..c771a28204 100644 --- a/.github/workflows/test_destination_weaviate.yml +++ b/.github/workflows/test_destination_weaviate.yml @@ -61,7 +61,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp - name: Install dependencies - run: poetry install --no-interaction -E weaviate -E parquet + run: poetry install --no-interaction -E weaviate -E parquet --with sentry-sdk --with pipeline - run: | poetry run pytest tests/load/ if: runner.os != 'Windows' diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml index f3f6c492db..6aaf440dcd 100644 --- a/.github/workflows/test_destinations.yml +++ b/.github/workflows/test_destinations.yml @@ -12,7 +12,7 @@ env: DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader@dlttests.cwz0jfxu0m7m.eu-central-1.rds.amazonaws.com:5432/dlt_data DESTINATION__DUCKDB__CREDENTIALS: duckdb:///_storage/test_quack.duckdb DESTINATION__REDSHIFT__CREDENTIALS: postgresql://loader@3.73.90.3:5439/dlt_ci - DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME: dltdata DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY: ${{ secrets.AZURE_STORAGE_ACCOUNT_KEY }} @@ -22,7 +22,7 @@ env: TESTS__R2_AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} TESTS__R2_ENDPOINT_URL: https://9830548e4e4b582989be0811f2a0a97f.r2.cloudflarestorage.com - # DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4J46G55G4 + # DESTINATION__ATHENA__CREDENTIALS__AWS_ACCESS_KEY_ID: AKIAT4QMVMC4LGORLZOK # DESTINATION__ATHENA__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} # DESTINATION__ATHENA__CREDENTIALS__REGION_NAME: eu-central-1 # DESTINATION__ATHENA__QUERY_RESULT_BUCKET: s3://dlt-athena-output @@ -87,7 +87,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E redshift -E gs -E s3 -E az -E parquet -E duckdb -E cli + run: poetry install --no-interaction -E redshift -E gs -E s3 -E az -E parquet -E duckdb -E cli --with sentry-sdk --with pipeline # - name: Install self # run: poetry install --no-interaction diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index 83c7332d52..5c6482e3ea 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -63,7 +63,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant --with docs --without airflow + run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant --with docs,sentry-sdk --without airflow - name: Run linter and tests run: make test-and-lint-snippets diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index 6c538d1968..42c3c2d13a 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -84,7 +84,7 @@ jobs: key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations - name: Install dependencies - run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate + run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate --with sentry-sdk --with pipeline - run: poetry run pytest tests/load && poetry run pytest tests/cli name: Run tests Linux diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c5fb6f9658..29601f50ce 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,9 +30,21 @@ When you're ready to contribute, follow these steps: 3. Write your code and tests. 4. Lint your code by running `make lint` and test common modules with `make test-common`. 5. If you're working on destination code, contact us to get access to test destinations. -6. Create a pull request targeting the `devel` branch of the main repository. +6. Create a pull request targeting the **devel** branch of the main repository. **Note:** for some special cases, you'd need to contact us to create a branch in this repository (not fork). See below. + +### Active branches + +We use **devel** (which is our default Github branch) to prepare a next release of `dlt`. We accept all regular contributions there (including most of the bugfixes). + +We use **master** branch for hot fixes (including documentation) that needs to be released out of normal schedule. + +On the release day, **devel** branch is merged into **master**. All releases of `dlt` happen only from the **master**. + +### Submitting a hotfix +We'll fix critical bugs and release `dlt` our of the schedule. Follow the regular procedure, but make your PR against **master** branch. Please ping us on Slack if you do it. + ### Testing with Github Actions We enable our CI to run tests for contributions from forks. All the tests are run, but not all destinations are available due to credentials. Currently only the `duckdb` and `postgres` are available to forks. @@ -77,33 +89,42 @@ We'll provide you with access to the resources above if you wish to test locally Use Python 3.8 for development, as it's the lowest supported version for `dlt`. You'll need `distutils` and `venv`. You may also use `pyenv`, as suggested by [poetry](https://python-poetry.org/docs/managing-environments/). -# Publishing (Maintainers Only) +## Publishing (Maintainers Only) This section is intended for project maintainers who have the necessary permissions to manage the project's versioning and publish new releases. If you're a contributor, you can skip this section. -## Project Versioning +Please read how we [version the library](README.md#adding-as-dependency) first. -`dlt` follows the semantic versioning with the [`MAJOR.MINOR.PATCH`](https://peps.python.org/pep-0440/#semantic-versioning) pattern. Currently, we are using **pre-release versioning** with the major version being 0. +The source of truth of the current version is is `pyproject.toml`, and we use `poetry` to manage it. -- `minor` version change means breaking changes -- `patch` version change means new features that should be backward compatible -- any suffix change, e.g., `post10` -> `post11`, is considered a patch +### Regular release Before publishing a new release, make sure to bump the project's version accordingly: -1. Modify `pyproject.toml` to add a `post` label or increase post release number ie: `version = "0.2.6.post1"` -2. Run `make build-library` to apply the changes to the project. -3. The source of the version is `pyproject.toml`, and we use `poetry` to manage it. +1. Check out the **devel** branch. +2. Use `poetry version patch` to increase the **patch** version +3. Run `make build-library` to apply the changes to the project. +4. Create a new branch, and submit the PR to **devel**. Go through standard process to merge it. +5. Create a merge PR from `devel` to `master` and merge it. + +### Hotfix release +1. Check out the **master** branch +2. Use `poetry version patch` to increase the **patch** version +3. Run `make build-library` to apply the changes to the project. +4. Create a new branch, and submit the PR to **master** and merge it. -For pre-release please replace step (1) with: -1. Make sure you are not bumping post-release version. There are reports of `poetry` not working in that case. -2. Use `poetry version prerelease` to bump the pre-release version. +### Pre-release +Occasionally we may release an alpha version directly from the **branch**. +1. Check out the **devel** branch +2. Use `poetry version prerelease` to increase the **alpha** version +3. Run `make build-library` to apply the changes to the project. +4. Create a new branch, and submit the PR to **devel** and merge it. -## Publishing to PyPI +### Publishing to PyPI Once the version has been bumped, follow these steps to publish the new release to PyPI: -1. Ensure that you are on the `devel` branch and have the latest code that has passed all tests on CI. +1. Ensure that you are on the **master** branch and have the latest code that has passed all tests on CI. 2. Verify the current version with `poetry version`. 3. Obtain a PyPI access token and configure it with `poetry config pypi-token.pypi your-api-token`. 4. Run `make publish-library` to publish the new version. diff --git a/Makefile b/Makefile index 85f67818ac..c1cb9bec98 100644 --- a/Makefile +++ b/Makefile @@ -44,15 +44,21 @@ has-poetry: poetry --version dev: has-poetry - poetry install --all-extras --with airflow --with docs --with providers + poetry install --all-extras --with airflow --with docs --with providers --with pipeline --with sentry-sdk lint: ./check-package.sh poetry run mypy --config-file mypy.ini dlt tests poetry run flake8 --max-line-length=200 dlt poetry run flake8 --max-line-length=200 tests --exclude tests/reflection/module_cases + poetry run black dlt docs tests --diff --extend-exclude=".*syntax_error.py" + # poetry run isort ./ --diff # $(MAKE) lint-security +format: + poetry run black dlt docs tests --exclude=".*syntax_error.py|\.venv.*|_storage/.*" + # poetry run isort ./ + test-and-lint-snippets: poetry run mypy --config-file mypy.ini docs/website docs/examples poetry run flake8 --max-line-length=200 docs/website docs/examples @@ -87,3 +93,4 @@ test-build-images: build-library grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" . docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" . + diff --git a/README.md b/README.md index ec56d97ab7..5a1376f08a 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,17 @@ For detailed usage and configuration, please refer to the [official documentatio You can find examples for various use cases in the [examples](docs/examples) folder. +## Adding as dependency + +`dlt` follows the semantic versioning with the [`MAJOR.MINOR.PATCH`](https://peps.python.org/pep-0440/#semantic-versioning) pattern. Currently, we are using **pre-release versioning** with the major version being 0. + +- `minor` version change means breaking changes +- `patch` version change means new features that should be backward compatible +- any suffix change, e.g., `post10` -> `post11`, is considered a patch + +We suggest that you allow only `patch` level updates automatically: +* Using the [Compatible Release Specifier](https://packaging.python.org/en/latest/specifications/version-specifiers/#compatible-release). For example **dlt~=0.3.10** allows only versions **>=0.3.10** and less than **<0.4** +* Poetry [caret requirements](https://python-poetry.org/docs/dependency-specification/). For example **^0.3.10** allows only versions **>=0.3.10** to **<0.4** ## Get Involved The dlt project is quickly growing, and we're excited to have you join our community! Here's how you can get involved: diff --git a/dlt/__init__.py b/dlt/__init__.py index f5dde3f204..e2a6b1a3a7 100644 --- a/dlt/__init__.py +++ b/dlt/__init__.py @@ -29,8 +29,17 @@ from dlt import sources from dlt.extract.decorators import source, resource, transformer, defer -from dlt.pipeline import pipeline as _pipeline, run, attach, Pipeline, dbt, current as _current, mark as _mark +from dlt.pipeline import ( + pipeline as _pipeline, + run, + attach, + Pipeline, + dbt, + current as _current, + mark as _mark, +) from dlt.pipeline import progress +from dlt import destinations pipeline = _pipeline current = _current @@ -64,4 +73,5 @@ "TSecretValue", "TCredentials", "sources", + "destinations", ] diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py index dfda2966b9..1d5f7ce932 100644 --- a/dlt/cli/_dlt.py +++ b/dlt/cli/_dlt.py @@ -14,13 +14,28 @@ from dlt.cli import utils from dlt.pipeline.exceptions import CannotRestorePipelineException -from dlt.cli.init_command import init_command, list_verified_sources_command, DLT_INIT_DOCS_URL, DEFAULT_VERIFIED_SOURCES_REPO +from dlt.cli.init_command import ( + init_command, + list_verified_sources_command, + DLT_INIT_DOCS_URL, + DEFAULT_VERIFIED_SOURCES_REPO, +) from dlt.cli.pipeline_command import pipeline_command, DLT_PIPELINE_COMMAND_DOCS_URL -from dlt.cli.telemetry_command import DLT_TELEMETRY_DOCS_URL, change_telemetry_status_command, telemetry_status_command +from dlt.cli.telemetry_command import ( + DLT_TELEMETRY_DOCS_URL, + change_telemetry_status_command, + telemetry_status_command, +) try: from dlt.cli import deploy_command - from dlt.cli.deploy_command import PipelineWasNotRun, DLT_DEPLOY_DOCS_URL, DeploymentMethods, COMMAND_DEPLOY_REPO_LOCATION, SecretFormats + from dlt.cli.deploy_command import ( + PipelineWasNotRun, + DLT_DEPLOY_DOCS_URL, + DeploymentMethods, + COMMAND_DEPLOY_REPO_LOCATION, + SecretFormats, + ) except ModuleNotFoundError: pass @@ -35,10 +50,16 @@ def on_exception(ex: Exception, info: str) -> None: raise ex -@utils.track_command("init", False, "source_name", "destination_name") -def init_command_wrapper(source_name: str, destination_name: str, use_generic_template: bool, repo_location: str, branch: str) -> int: +@utils.track_command("init", False, "source_name", "destination_type") +def init_command_wrapper( + source_name: str, + destination_type: str, + use_generic_template: bool, + repo_location: str, + branch: str, +) -> int: try: - init_command(source_name, destination_name, use_generic_template, repo_location, branch) + init_command(source_name, destination_type, use_generic_template, repo_location, branch) except Exception as ex: on_exception(ex, DLT_INIT_DOCS_URL) return -1 @@ -56,7 +77,12 @@ def list_verified_sources_command_wrapper(repo_location: str, branch: str) -> in @utils.track_command("deploy", False, "deployment_method") -def deploy_command_wrapper(pipeline_script_path: str, deployment_method: str, repo_location: str, branch: Optional[str] = None, **kwargs: Any +def deploy_command_wrapper( + pipeline_script_path: str, + deployment_method: str, + repo_location: str, + branch: Optional[str] = None, + **kwargs: Any, ) -> int: try: utils.ensure_git_command("deploy") @@ -65,35 +91,41 @@ def deploy_command_wrapper(pipeline_script_path: str, deployment_method: str, re return -1 from git import InvalidGitRepositoryError, NoSuchPathError + try: deploy_command.deploy_command( pipeline_script_path=pipeline_script_path, deployment_method=deployment_method, repo_location=repo_location, branch=branch, - **kwargs + **kwargs, ) except (CannotRestorePipelineException, PipelineWasNotRun) as ex: - fmt.note("You must run the pipeline locally successfully at least once in order to deploy it.") + fmt.note( + "You must run the pipeline locally successfully at least once in order to deploy it." + ) on_exception(ex, DLT_DEPLOY_DOCS_URL) return -2 except InvalidGitRepositoryError: click.secho( "No git repository found for pipeline script %s." % fmt.bold(pipeline_script_path), err=True, - fg="red" + fg="red", ) fmt.note("If you do not have a repository yet, you can do either of:") - fmt.note("- Run the following command to initialize new repository: %s" % fmt.bold("git init")) - fmt.note("- Add your local code to Github as described here: %s" % fmt.bold("https://docs.github.com/en/get-started/importing-your-projects-to-github/importing-source-code-to-github/adding-locally-hosted-code-to-github")) + fmt.note( + "- Run the following command to initialize new repository: %s" % fmt.bold("git init") + ) + fmt.note( + "- Add your local code to Github as described here: %s" + % fmt.bold( + "https://docs.github.com/en/get-started/importing-your-projects-to-github/importing-source-code-to-github/adding-locally-hosted-code-to-github" + ) + ) fmt.note("Please refer to %s for further assistance" % fmt.bold(DLT_DEPLOY_DOCS_URL)) return -3 except NoSuchPathError as path_ex: - click.secho( - "The pipeline script does not exist\n%s" % str(path_ex), - err=True, - fg="red" - ) + click.secho("The pipeline script does not exist\n%s" % str(path_ex), err=True, fg="red") return -4 except Exception as ex: on_exception(ex, DLT_DEPLOY_DOCS_URL) @@ -103,14 +135,17 @@ def deploy_command_wrapper(pipeline_script_path: str, deployment_method: str, re @utils.track_command("pipeline", True, "operation") def pipeline_command_wrapper( - operation: str, pipeline_name: str, pipelines_dir: str, verbosity: int, **command_kwargs: Any + operation: str, pipeline_name: str, pipelines_dir: str, verbosity: int, **command_kwargs: Any ) -> int: try: pipeline_command(operation, pipeline_name, pipelines_dir, verbosity, **command_kwargs) return 0 except CannotRestorePipelineException as ex: click.secho(str(ex), err=True, fg="red") - click.secho("Try command %s to restore the pipeline state from destination" % fmt.bold(f"dlt pipeline {pipeline_name} sync")) + click.secho( + "Try command %s to restore the pipeline state from destination" + % fmt.bold(f"dlt pipeline {pipeline_name} sync") + ) return -1 except Exception as ex: on_exception(ex, DLT_PIPELINE_COMMAND_DOCS_URL) @@ -155,21 +190,31 @@ def telemetry_change_status_command_wrapper(enabled: bool) -> int: ACTION_EXECUTED = False + def print_help(parser: argparse.ArgumentParser) -> None: if not ACTION_EXECUTED: parser.print_help() class TelemetryAction(argparse.Action): - def __init__(self, option_strings: Sequence[str], dest: Any = argparse.SUPPRESS, default: Any = argparse.SUPPRESS, help: str = None) -> None: # noqa + def __init__( + self, + option_strings: Sequence[str], + dest: Any = argparse.SUPPRESS, + default: Any = argparse.SUPPRESS, + help: str = None, # noqa + ) -> None: super(TelemetryAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help + option_strings=option_strings, dest=dest, default=default, nargs=0, help=help ) - def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Any, option_string: str = None) -> None: + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Any, + option_string: str = None, + ) -> None: global ACTION_EXECUTED ACTION_EXECUTED = True @@ -177,129 +222,347 @@ def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespac class NonInteractiveAction(argparse.Action): - def __init__(self, option_strings: Sequence[str], dest: Any = argparse.SUPPRESS, default: Any = argparse.SUPPRESS, help: str = None) -> None: # noqa + def __init__( + self, + option_strings: Sequence[str], + dest: Any = argparse.SUPPRESS, + default: Any = argparse.SUPPRESS, + help: str = None, # noqa + ) -> None: super(NonInteractiveAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help + option_strings=option_strings, dest=dest, default=default, nargs=0, help=help ) - def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Any, option_string: str = None) -> None: + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Any, + option_string: str = None, + ) -> None: fmt.ALWAYS_CHOOSE_DEFAULT = True class DebugAction(argparse.Action): - def __init__(self, option_strings: Sequence[str], dest: Any = argparse.SUPPRESS, default: Any = argparse.SUPPRESS, help: str = None) -> None: # noqa + def __init__( + self, + option_strings: Sequence[str], + dest: Any = argparse.SUPPRESS, + default: Any = argparse.SUPPRESS, + help: str = None, # noqa + ) -> None: super(DebugAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help + option_strings=option_strings, dest=dest, default=default, nargs=0, help=help ) - def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Any, option_string: str = None) -> None: + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Any, + option_string: str = None, + ) -> None: global DEBUG_FLAG # will show stack traces (and maybe more debug things) DEBUG_FLAG = True def main() -> int: - parser = argparse.ArgumentParser(description="Creates, adds, inspects and deploys dlt pipelines.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--version', action="version", version='%(prog)s {version}'.format(version=__version__)) - parser.add_argument('--disable-telemetry', action=TelemetryAction, help="Disables telemetry before command is executed") - parser.add_argument('--enable-telemetry', action=TelemetryAction, help="Enables telemetry before command is executed") - parser.add_argument('--non-interactive', action=NonInteractiveAction, help="Non interactive mode. Default choices are automatically made for confirmations and prompts.") - parser.add_argument('--debug', action=DebugAction, help="Displays full stack traces on exceptions.") + parser = argparse.ArgumentParser( + description="Creates, adds, inspects and deploys dlt pipelines.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--version", action="version", version="%(prog)s {version}".format(version=__version__) + ) + parser.add_argument( + "--disable-telemetry", + action=TelemetryAction, + help="Disables telemetry before command is executed", + ) + parser.add_argument( + "--enable-telemetry", + action=TelemetryAction, + help="Enables telemetry before command is executed", + ) + parser.add_argument( + "--non-interactive", + action=NonInteractiveAction, + help=( + "Non interactive mode. Default choices are automatically made for confirmations and" + " prompts." + ), + ) + parser.add_argument( + "--debug", action=DebugAction, help="Displays full stack traces on exceptions." + ) subparsers = parser.add_subparsers(dest="command") - init_cmd = subparsers.add_parser("init", help="Creates a pipeline project in the current folder by adding existing verified source or creating a new one from template.") - init_cmd.add_argument("--list-verified-sources", "-l", default=False, action="store_true", help="List available verified sources") - init_cmd.add_argument("source", nargs='?', help="Name of data source for which to create a pipeline. Adds existing verified source or creates a new pipeline template if verified source for your data source is not yet implemented.") - init_cmd.add_argument("destination", nargs='?', help="Name of a destination ie. bigquery or redshift") - init_cmd.add_argument("--location", default=DEFAULT_VERIFIED_SOURCES_REPO, help="Advanced. Uses a specific url or local path to verified sources repository.") - init_cmd.add_argument("--branch", default=None, help="Advanced. Uses specific branch of the init repository to fetch the template.") - init_cmd.add_argument("--generic", default=False, action="store_true", help="When present uses a generic template with all the dlt loading code present will be used. Otherwise a debug template is used that can be immediately run to get familiar with the dlt sources.") + init_cmd = subparsers.add_parser( + "init", + help=( + "Creates a pipeline project in the current folder by adding existing verified source or" + " creating a new one from template." + ), + ) + init_cmd.add_argument( + "--list-verified-sources", + "-l", + default=False, + action="store_true", + help="List available verified sources", + ) + init_cmd.add_argument( + "source", + nargs="?", + help=( + "Name of data source for which to create a pipeline. Adds existing verified source or" + " creates a new pipeline template if verified source for your data source is not yet" + " implemented." + ), + ) + init_cmd.add_argument( + "destination", nargs="?", help="Name of a destination ie. bigquery or redshift" + ) + init_cmd.add_argument( + "--location", + default=DEFAULT_VERIFIED_SOURCES_REPO, + help="Advanced. Uses a specific url or local path to verified sources repository.", + ) + init_cmd.add_argument( + "--branch", + default=None, + help="Advanced. Uses specific branch of the init repository to fetch the template.", + ) + init_cmd.add_argument( + "--generic", + default=False, + action="store_true", + help=( + "When present uses a generic template with all the dlt loading code present will be" + " used. Otherwise a debug template is used that can be immediately run to get familiar" + " with the dlt sources." + ), + ) # deploy command requires additional dependencies try: # make sure the name is defined _ = deploy_command - deploy_comm = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=False) - deploy_comm.add_argument("--location", default=COMMAND_DEPLOY_REPO_LOCATION, help="Advanced. Uses a specific url or local path to pipelines repository.") - deploy_comm.add_argument("--branch", help="Advanced. Uses specific branch of the deploy repository to fetch the template.") + deploy_comm = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=False + ) + deploy_comm.add_argument( + "--location", + default=COMMAND_DEPLOY_REPO_LOCATION, + help="Advanced. Uses a specific url or local path to pipelines repository.", + ) + deploy_comm.add_argument( + "--branch", + help="Advanced. Uses specific branch of the deploy repository to fetch the template.", + ) - deploy_cmd = subparsers.add_parser("deploy", help="Creates a deployment package for a selected pipeline script") - deploy_cmd.add_argument("pipeline_script_path", metavar="pipeline-script-path", help="Path to a pipeline script") + deploy_cmd = subparsers.add_parser( + "deploy", help="Creates a deployment package for a selected pipeline script" + ) + deploy_cmd.add_argument( + "pipeline_script_path", metavar="pipeline-script-path", help="Path to a pipeline script" + ) deploy_sub_parsers = deploy_cmd.add_subparsers(dest="deployment_method") # deploy github actions - deploy_github_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.github_actions.value, help="Deploys the pipeline to Github Actions", parents=[deploy_comm]) - deploy_github_cmd.add_argument("--schedule", required=True, help="A schedule with which to run the pipeline, in cron format. Example: '*/30 * * * *' will run the pipeline every 30 minutes. Remember to enclose the scheduler expression in quotation marks!") - deploy_github_cmd.add_argument("--run-manually", default=True, action="store_true", help="Allows the pipeline to be run manually form Github Actions UI.") - deploy_github_cmd.add_argument("--run-on-push", default=False, action="store_true", help="Runs the pipeline with every push to the repository.") + deploy_github_cmd = deploy_sub_parsers.add_parser( + DeploymentMethods.github_actions.value, + help="Deploys the pipeline to Github Actions", + parents=[deploy_comm], + ) + deploy_github_cmd.add_argument( + "--schedule", + required=True, + help=( + "A schedule with which to run the pipeline, in cron format. Example: '*/30 * * * *'" + " will run the pipeline every 30 minutes. Remember to enclose the scheduler" + " expression in quotation marks!" + ), + ) + deploy_github_cmd.add_argument( + "--run-manually", + default=True, + action="store_true", + help="Allows the pipeline to be run manually form Github Actions UI.", + ) + deploy_github_cmd.add_argument( + "--run-on-push", + default=False, + action="store_true", + help="Runs the pipeline with every push to the repository.", + ) # deploy airflow composer - deploy_airflow_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.airflow_composer.value, help="Deploys the pipeline to Airflow", parents=[deploy_comm]) - deploy_airflow_cmd.add_argument("--secrets-format", default=SecretFormats.toml.value, choices=[v.value for v in SecretFormats], required=False, help="Format of the secrets") + deploy_airflow_cmd = deploy_sub_parsers.add_parser( + DeploymentMethods.airflow_composer.value, + help="Deploys the pipeline to Airflow", + parents=[deploy_comm], + ) + deploy_airflow_cmd.add_argument( + "--secrets-format", + default=SecretFormats.toml.value, + choices=[v.value for v in SecretFormats], + required=False, + help="Format of the secrets", + ) except NameError: # create placeholder command - deploy_cmd = subparsers.add_parser("deploy", help='Install additional dependencies with pip install "dlt[cli]" to create deployment packages', add_help=False) + deploy_cmd = subparsers.add_parser( + "deploy", + help=( + 'Install additional dependencies with pip install "dlt[cli]" to create deployment' + " packages" + ), + add_help=False, + ) deploy_cmd.add_argument("--help", "-h", nargs="?", const=True) - deploy_cmd.add_argument("pipeline_script_path", metavar="pipeline-script-path", nargs=argparse.REMAINDER) + deploy_cmd.add_argument( + "pipeline_script_path", metavar="pipeline-script-path", nargs=argparse.REMAINDER + ) schema = subparsers.add_parser("schema", help="Shows, converts and upgrades schemas") - schema.add_argument("file", help="Schema file name, in yaml or json format, will autodetect based on extension") - schema.add_argument("--format", choices=["json", "yaml"], default="yaml", help="Display schema in this format") - schema.add_argument("--remove-defaults", action="store_true", help="Does not show default hint values") + schema.add_argument( + "file", help="Schema file name, in yaml or json format, will autodetect based on extension" + ) + schema.add_argument( + "--format", choices=["json", "yaml"], default="yaml", help="Display schema in this format" + ) + schema.add_argument( + "--remove-defaults", action="store_true", help="Does not show default hint values" + ) - pipe_cmd = subparsers.add_parser("pipeline", help="Operations on pipelines that were ran locally") - pipe_cmd.add_argument("--list-pipelines", "-l", default=False, action="store_true", help="List local pipelines") - pipe_cmd.add_argument("pipeline_name", nargs='?', help="Pipeline name") + pipe_cmd = subparsers.add_parser( + "pipeline", help="Operations on pipelines that were ran locally" + ) + pipe_cmd.add_argument( + "--list-pipelines", "-l", default=False, action="store_true", help="List local pipelines" + ) + pipe_cmd.add_argument("pipeline_name", nargs="?", help="Pipeline name") pipe_cmd.add_argument("--pipelines-dir", help="Pipelines working directory", default=None) - pipe_cmd.add_argument("--verbose", "-v", action='count', default=0, help="Provides more information for certain commands.", dest="verbosity") + pipe_cmd.add_argument( + "--verbose", + "-v", + action="count", + default=0, + help="Provides more information for certain commands.", + dest="verbosity", + ) pipeline_subparsers = pipe_cmd.add_subparsers(dest="operation", required=False) pipe_cmd_sync_parent = argparse.ArgumentParser(add_help=False) - pipe_cmd_sync_parent.add_argument("--destination", help="Sync from this destination when local pipeline state is missing.") - pipe_cmd_sync_parent.add_argument("--dataset-name", help="Dataset name to sync from when local pipeline state is missing.") + pipe_cmd_sync_parent.add_argument( + "--destination", help="Sync from this destination when local pipeline state is missing." + ) + pipe_cmd_sync_parent.add_argument( + "--dataset-name", help="Dataset name to sync from when local pipeline state is missing." + ) - pipeline_subparsers.add_parser("info", help="Displays state of the pipeline, use -v or -vv for more info") - pipeline_subparsers.add_parser("show", help="Generates and launches Streamlit app with the loading status and dataset explorer") - pipeline_subparsers.add_parser("failed-jobs", help="Displays information on all the failed loads in all completed packages, failed jobs and associated error messages") - pipeline_subparsers.add_parser("drop-pending-packages", help="Deletes all extracted and normalized packages including those that are partially loaded.") + pipeline_subparsers.add_parser( + "info", help="Displays state of the pipeline, use -v or -vv for more info" + ) + pipeline_subparsers.add_parser( + "show", + help="Generates and launches Streamlit app with the loading status and dataset explorer", + ) + pipeline_subparsers.add_parser( + "failed-jobs", + help=( + "Displays information on all the failed loads in all completed packages, failed jobs" + " and associated error messages" + ), + ) + pipeline_subparsers.add_parser( + "drop-pending-packages", + help=( + "Deletes all extracted and normalized packages including those that are partially" + " loaded." + ), + ) pipeline_subparsers.add_parser( "sync", - help="Drops the local state of the pipeline and resets all the schemas and restores it from destination. The destination state, data and schemas are left intact.", - parents=[pipe_cmd_sync_parent] + help=( + "Drops the local state of the pipeline and resets all the schemas and restores it from" + " destination. The destination state, data and schemas are left intact." + ), + parents=[pipe_cmd_sync_parent], + ) + pipeline_subparsers.add_parser( + "trace", help="Displays last run trace, use -v or -vv for more info" ) - pipeline_subparsers.add_parser("trace", help="Displays last run trace, use -v or -vv for more info") pipe_cmd_schema = pipeline_subparsers.add_parser("schema", help="Displays default schema") - pipe_cmd_schema.add_argument("--format", choices=["json", "yaml"], default="yaml", help="Display schema in this format") - pipe_cmd_schema.add_argument("--remove-defaults", action="store_true", help="Does not show default hint values") + pipe_cmd_schema.add_argument( + "--format", choices=["json", "yaml"], default="yaml", help="Display schema in this format" + ) + pipe_cmd_schema.add_argument( + "--remove-defaults", action="store_true", help="Does not show default hint values" + ) pipe_cmd_drop = pipeline_subparsers.add_parser( "drop", help="Selectively drop tables and reset state", parents=[pipe_cmd_sync_parent], - epilog=f"See {DLT_PIPELINE_COMMAND_DOCS_URL}#selectively-drop-tables-and-reset-state for more info" + epilog=( + f"See {DLT_PIPELINE_COMMAND_DOCS_URL}#selectively-drop-tables-and-reset-state for more" + " info" + ), + ) + pipe_cmd_drop.add_argument( + "resources", + nargs="*", + help=( + "One or more resources to drop. Can be exact resource name(s) or regex pattern(s)." + " Regex patterns must start with re:" + ), + ) + pipe_cmd_drop.add_argument( + "--drop-all", + action="store_true", + default=False, + help="Drop all resources found in schema. Supersedes [resources] argument.", + ) + pipe_cmd_drop.add_argument( + "--state-paths", nargs="*", help="State keys or json paths to drop", default=() + ) + pipe_cmd_drop.add_argument( + "--schema", + help="Schema name to drop from (if other than default schema).", + dest="schema_name", + ) + pipe_cmd_drop.add_argument( + "--state-only", + action="store_true", + help="Only wipe state for matching resources without dropping tables.", + default=False, ) - pipe_cmd_drop.add_argument("resources", nargs="*", help="One or more resources to drop. Can be exact resource name(s) or regex pattern(s). Regex patterns must start with re:") - pipe_cmd_drop.add_argument("--drop-all", action="store_true", default=False, help="Drop all resources found in schema. Supersedes [resources] argument.") - pipe_cmd_drop.add_argument("--state-paths", nargs="*", help="State keys or json paths to drop", default=()) - pipe_cmd_drop.add_argument("--schema", help="Schema name to drop from (if other than default schema).", dest="schema_name") - pipe_cmd_drop.add_argument("--state-only", action="store_true", help="Only wipe state for matching resources without dropping tables.", default=False) - pipe_cmd_package = pipeline_subparsers.add_parser("load-package", help="Displays information on load package, use -v or -vv for more info") - pipe_cmd_package.add_argument("load_id", metavar="load-id", nargs='?', help="Load id of completed or normalized package. Defaults to the most recent package.") + pipe_cmd_package = pipeline_subparsers.add_parser( + "load-package", help="Displays information on load package, use -v or -vv for more info" + ) + pipe_cmd_package.add_argument( + "load_id", + metavar="load-id", + nargs="?", + help="Load id of completed or normalized package. Defaults to the most recent package.", + ) subparsers.add_parser("telemetry", help="Shows telemetry status") args = parser.parse_args() if Venv.is_virtual_env() and not Venv.is_venv_activated(): - fmt.warning("You are running dlt installed in the global environment, however you have virtual environment activated. The dlt command will not see dependencies from virtual environment. You should uninstall the dlt from global environment and install it in the current virtual environment instead.") + fmt.warning( + "You are running dlt installed in the global environment, however you have virtual" + " environment activated. The dlt command will not see dependencies from virtual" + " environment. You should uninstall the dlt from global environment and install it in" + " the current virtual environment instead." + ) if args.command == "schema": return schema_command_wrapper(args.file, args.format, args.remove_defaults) @@ -311,7 +574,7 @@ def main() -> int: if not command_kwargs.get("pipeline_name"): pipe_cmd.print_usage() return -1 - command_kwargs['operation'] = args.operation or "info" + command_kwargs["operation"] = args.operation or "info" del command_kwargs["command"] del command_kwargs["list_pipelines"] return pipeline_command_wrapper(**command_kwargs) @@ -323,7 +586,9 @@ def main() -> int: init_cmd.print_usage() return -1 else: - return init_command_wrapper(args.source, args.destination, args.generic, args.location, args.branch) + return init_command_wrapper( + args.source, args.destination, args.generic, args.location, args.branch + ) elif args.command == "deploy": try: deploy_args = vars(args) @@ -332,12 +597,17 @@ def main() -> int: deployment_method=deploy_args.pop("deployment_method"), repo_location=deploy_args.pop("location"), branch=deploy_args.pop("branch"), - **deploy_args + **deploy_args, ) except (NameError, KeyError): - fmt.warning("Please install additional command line dependencies to use deploy command:") + fmt.warning( + "Please install additional command line dependencies to use deploy command:" + ) fmt.secho('pip install "dlt[cli]"', bold=True) - fmt.echo("We ask you to install those dependencies separately to keep our core library small and make it work everywhere.") + fmt.echo( + "We ask you to install those dependencies separately to keep our core library small" + " and make it work everywhere." + ) return -1 elif args.command == "telemetry": return telemetry_status_command_wrapper() diff --git a/dlt/cli/config_toml_writer.py b/dlt/cli/config_toml_writer.py index ca2e74fd15..8cf831d725 100644 --- a/dlt/cli/config_toml_writer.py +++ b/dlt/cli/config_toml_writer.py @@ -5,7 +5,11 @@ from collections.abc import Sequence as C_Sequence from dlt.common import pendulum -from dlt.common.configuration.specs import BaseConfiguration, is_base_configuration_inner_hint, extract_inner_hint +from dlt.common.configuration.specs import ( + BaseConfiguration, + is_base_configuration_inner_hint, + extract_inner_hint, +) from dlt.common.data_types import py_type_to_sc_type from dlt.common.typing import AnyType, is_final_type, is_optional_type @@ -53,13 +57,15 @@ def write_value( hint: AnyType, overwrite_existing: bool, default_value: Any = None, - is_default_of_interest: bool = False + is_default_of_interest: bool = False, ) -> None: # skip if table contains the name already if name in toml_table and not overwrite_existing: return # do not dump final and optional fields if they are not of special interest - if (is_final_type(hint) or is_optional_type(hint) or default_value is not None) and not is_default_of_interest: + if ( + is_final_type(hint) or is_optional_type(hint) or default_value is not None + ) and not is_default_of_interest: return # get the inner hint to generate cool examples hint = extract_inner_hint(hint) @@ -84,10 +90,19 @@ def write_spec(toml_table: TOMLTable, config: BaseConfiguration, overwrite_exist default_value = getattr(config, name, None) # check if field is of particular interest and should be included if it has default is_default_of_interest = name in config.__config_gen_annotations__ - write_value(toml_table, name, hint, overwrite_existing, default_value=default_value, is_default_of_interest=is_default_of_interest) + write_value( + toml_table, + name, + hint, + overwrite_existing, + default_value=default_value, + is_default_of_interest=is_default_of_interest, + ) -def write_values(toml: TOMLContainer, values: Iterable[WritableConfigValue], overwrite_existing: bool) -> None: +def write_values( + toml: TOMLContainer, values: Iterable[WritableConfigValue], overwrite_existing: bool +) -> None: for value in values: toml_table: TOMLTable = toml # type: ignore for section in value.sections: @@ -98,4 +113,11 @@ def write_values(toml: TOMLContainer, values: Iterable[WritableConfigValue], ove else: toml_table = toml_table[section] # type: ignore - write_value(toml_table, value.name, value.hint, overwrite_existing, default_value=value.default_value, is_default_of_interest=True) + write_value( + toml_table, + value.name, + value.hint, + overwrite_existing, + default_value=value.default_value, + is_default_of_interest=True, + ) diff --git a/dlt/cli/deploy_command.py b/dlt/cli/deploy_command.py index 7634f173b3..5a25752a6d 100644 --- a/dlt/cli/deploy_command.py +++ b/dlt/cli/deploy_command.py @@ -11,16 +11,26 @@ from dlt.cli import utils from dlt.cli import echo as fmt -from dlt.cli.deploy_command_helpers import (PipelineWasNotRun, BaseDeployment, ask_files_overwrite, generate_pip_freeze, github_origin_to_url, serialize_templated_yaml, - wrap_template_str, get_schedule_description) +from dlt.cli.deploy_command_helpers import ( + PipelineWasNotRun, + BaseDeployment, + ask_files_overwrite, + generate_pip_freeze, + github_origin_to_url, + serialize_templated_yaml, + wrap_template_str, + get_schedule_description, +) from dlt.version import DLT_PKG_NAME -from dlt.common.destination.reference import DestinationReference +from dlt.common.destination.reference import Destination REQUIREMENTS_GITHUB_ACTION = "requirements_github_action.txt" DLT_DEPLOY_DOCS_URL = "https://dlthub.com/docs/walkthroughs/deploy-a-pipeline" -DLT_AIRFLOW_GCP_DOCS_URL = "https://dlthub.com/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer" +DLT_AIRFLOW_GCP_DOCS_URL = ( + "https://dlthub.com/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer" +) AIRFLOW_GETTING_STARTED = "https://airflow.apache.org/docs/apache-airflow/stable/start.html" AIRFLOW_DAG_TEMPLATE_SCRIPT = "dag_template.py" AIRFLOW_CLOUDBUILD_YAML = "cloudbuild.yaml" @@ -38,9 +48,13 @@ class SecretFormats(Enum): toml = "toml" -def deploy_command(pipeline_script_path: str, deployment_method: str, repo_location: str, branch: Optional[str] = None, **kwargs: Any +def deploy_command( + pipeline_script_path: str, + deployment_method: str, + repo_location: str, + branch: Optional[str] = None, + **kwargs: Any, ) -> None: - # get current repo local folder deployment_class: Type[BaseDeployment] = None if deployment_method == DeploymentMethods.github_actions.value: @@ -48,10 +62,15 @@ def deploy_command(pipeline_script_path: str, deployment_method: str, repo_locat elif deployment_method == DeploymentMethods.airflow_composer.value: deployment_class = AirflowDeployment else: - raise ValueError(f"Deployment method '{deployment_method}' is not supported. Only {', '.join([m.value for m in DeploymentMethods])} are available.'") + raise ValueError( + f"Deployment method '{deployment_method}' is not supported. Only" + f" {', '.join([m.value for m in DeploymentMethods])} are available.'" + ) # command no longer needed kwargs.pop("command", None) - deployment_class(pipeline_script_path=pipeline_script_path, location=repo_location, branch=branch, **kwargs).run_deployment() + deployment_class( + pipeline_script_path=pipeline_script_path, location=repo_location, branch=branch, **kwargs + ).run_deployment() class GithubActionDeployment(BaseDeployment): @@ -77,22 +96,25 @@ def _generate_workflow(self, *args: Optional[Any]) -> None: if self.schedule_description is None: # TODO: move that check to _dlt and some intelligent help message on missing arg raise ValueError( - f"Setting 'schedule' for '{self.deployment_method}' is required! Use deploy command as 'dlt deploy chess.py {self.deployment_method} --schedule \"*/30 * * * *\"'." + f"Setting 'schedule' for '{self.deployment_method}' is required! Use deploy command" + f" as 'dlt deploy chess.py {self.deployment_method} --schedule \"*/30 * * * *\"'." ) workflow = self._create_new_workflow() serialized_workflow = serialize_templated_yaml(workflow) serialized_workflow_name = f"run_{self.state['pipeline_name']}_workflow.yml" - self.artifacts['serialized_workflow'] = serialized_workflow - self.artifacts['serialized_workflow_name'] = serialized_workflow_name + self.artifacts["serialized_workflow"] = serialized_workflow + self.artifacts["serialized_workflow_name"] = serialized_workflow_name # pip freeze special requirements file - with self.template_storage.open_file(os.path.join(self.deployment_method, "requirements_blacklist.txt")) as f: + with self.template_storage.open_file( + os.path.join(self.deployment_method, "requirements_blacklist.txt") + ) as f: requirements_blacklist = f.readlines() requirements_txt = generate_pip_freeze(requirements_blacklist, REQUIREMENTS_GITHUB_ACTION) requirements_txt_name = REQUIREMENTS_GITHUB_ACTION # if repo_storage.has_file(utils.REQUIREMENTS_TXT): - self.artifacts['requirements_txt'] = requirements_txt - self.artifacts['requirements_txt_name'] = requirements_txt_name + self.artifacts["requirements_txt"] = requirements_txt + self.artifacts["requirements_txt_name"] = requirements_txt_name def _make_modification(self) -> None: if not self.repo_storage.has_folder(utils.GITHUB_WORKFLOWS_DIR): @@ -100,15 +122,21 @@ def _make_modification(self) -> None: self.repo_storage.save( os.path.join(utils.GITHUB_WORKFLOWS_DIR, self.artifacts["serialized_workflow_name"]), - self.artifacts["serialized_workflow"] + self.artifacts["serialized_workflow"], + ) + self.repo_storage.save( + self.artifacts["requirements_txt_name"], self.artifacts["requirements_txt"] ) - self.repo_storage.save(self.artifacts["requirements_txt_name"], self.artifacts["requirements_txt"]) def _create_new_workflow(self) -> Any: - with self.template_storage.open_file(os.path.join(self.deployment_method, "run_pipeline_workflow.yml")) as f: + with self.template_storage.open_file( + os.path.join(self.deployment_method, "run_pipeline_workflow.yml") + ) as f: workflow = yaml.safe_load(f) # customize the workflow - workflow["name"] = f"Run {self.state['pipeline_name']} pipeline from {self.pipeline_script_path}" + workflow["name"] = ( + f"Run {self.state['pipeline_name']} pipeline from {self.pipeline_script_path}" + ) if self.run_on_push is False: del workflow["on"]["push"] if self.run_manually is False: @@ -137,51 +165,98 @@ def _create_new_workflow(self) -> Any: return workflow def _echo_instructions(self, *args: Optional[Any]) -> None: - fmt.echo("Your %s deployment for pipeline %s in script %s is ready!" % ( - fmt.bold(self.deployment_method), fmt.bold(self.state["pipeline_name"]), fmt.bold(self.pipeline_script_path) - )) + fmt.echo( + "Your %s deployment for pipeline %s in script %s is ready!" + % ( + fmt.bold(self.deployment_method), + fmt.bold(self.state["pipeline_name"]), + fmt.bold(self.pipeline_script_path), + ) + ) # It contains all relevant configurations and references to credentials that are needed to run the pipeline - fmt.echo("* A github workflow file %s was created in %s." % ( - fmt.bold(self.artifacts["serialized_workflow_name"]), fmt.bold(utils.GITHUB_WORKFLOWS_DIR) - )) - fmt.echo("* The schedule with which the pipeline is run is: %s.%s%s" % ( - fmt.bold(self.schedule_description), - " You can also run the pipeline manually." if self.run_manually else "", - " Pipeline will also run on each push to the repository." if self.run_on_push else "", - )) fmt.echo( - "* The dependencies that will be used to run the pipeline are stored in %s. If you change add more dependencies, remember to refresh your deployment by running the same 'deploy' command again." % fmt.bold( - self.artifacts['requirements_txt_name']) + "* A github workflow file %s was created in %s." + % ( + fmt.bold(self.artifacts["serialized_workflow_name"]), + fmt.bold(utils.GITHUB_WORKFLOWS_DIR), + ) + ) + fmt.echo( + "* The schedule with which the pipeline is run is: %s.%s%s" + % ( + fmt.bold(self.schedule_description), + " You can also run the pipeline manually." if self.run_manually else "", + ( + " Pipeline will also run on each push to the repository." + if self.run_on_push + else "" + ), + ) + ) + fmt.echo( + "* The dependencies that will be used to run the pipeline are stored in %s. If you" + " change add more dependencies, remember to refresh your deployment by running the same" + " 'deploy' command again." + % fmt.bold(self.artifacts["requirements_txt_name"]) ) fmt.echo() if len(self.secret_envs) == 0 and len(self.envs) == 0: fmt.echo("1. Your pipeline does not seem to need any secrets.") else: - fmt.echo("You should now add the secrets to github repository secrets, commit and push the pipeline files to github.") - fmt.echo("1. Add the following secret values (typically stored in %s): \n%s\nin %s" % ( - fmt.bold(make_dlt_settings_path(SECRETS_TOML)), - fmt.bold("\n".join(self.env_prov.get_key_name(s_v.key, *s_v.sections) for s_v in self.secret_envs)), - fmt.bold(github_origin_to_url(self.origin, "/settings/secrets/actions")) - )) + fmt.echo( + "You should now add the secrets to github repository secrets, commit and push the" + " pipeline files to github." + ) + fmt.echo( + "1. Add the following secret values (typically stored in %s): \n%s\nin %s" + % ( + fmt.bold(make_dlt_settings_path(SECRETS_TOML)), + fmt.bold( + "\n".join( + self.env_prov.get_key_name(s_v.key, *s_v.sections) + for s_v in self.secret_envs + ) + ), + fmt.bold(github_origin_to_url(self.origin, "/settings/secrets/actions")), + ) + ) fmt.echo() self._echo_secrets() - fmt.echo("2. Add stage deployment files to commit. Use your Git UI or the following command") - new_req_path = self.repo_storage.from_relative_path_to_wd(self.artifacts['requirements_txt_name']) - new_workflow_path = self.repo_storage.from_relative_path_to_wd(os.path.join(utils.GITHUB_WORKFLOWS_DIR, self.artifacts['serialized_workflow_name'])) - fmt.echo(fmt.bold( - f"git add {new_req_path} {new_workflow_path}")) + fmt.echo( + "2. Add stage deployment files to commit. Use your Git UI or the following command" + ) + new_req_path = self.repo_storage.from_relative_path_to_wd( + self.artifacts["requirements_txt_name"] + ) + new_workflow_path = self.repo_storage.from_relative_path_to_wd( + os.path.join(utils.GITHUB_WORKFLOWS_DIR, self.artifacts["serialized_workflow_name"]) + ) + fmt.echo(fmt.bold(f"git add {new_req_path} {new_workflow_path}")) fmt.echo() fmt.echo("3. Commit the files above. Use your Git UI or the following command") - fmt.echo(fmt.bold(f"git commit -m 'run {self.state['pipeline_name']} pipeline with github action'")) + fmt.echo( + fmt.bold( + f"git commit -m 'run {self.state['pipeline_name']} pipeline with github action'" + ) + ) if is_dirty(self.repo): - fmt.warning("You have modified files in your repository. Do not forget to push changes to your pipeline script as well!") + fmt.warning( + "You have modified files in your repository. Do not forget to push changes to your" + " pipeline script as well!" + ) fmt.echo() fmt.echo("4. Push changes to github. Use your Git UI or the following command") fmt.echo(fmt.bold("git push origin")) fmt.echo() fmt.echo("5. Your pipeline should be running! You can monitor it here:") - fmt.echo(fmt.bold(github_origin_to_url(self.origin, f"/actions/workflows/{self.artifacts['serialized_workflow_name']}"))) + fmt.echo( + fmt.bold( + github_origin_to_url( + self.origin, f"/actions/workflows/{self.artifacts['serialized_workflow_name']}" + ) + ) + ) class AirflowDeployment(BaseDeployment): @@ -198,7 +273,7 @@ def __init__( def _generate_workflow(self, *args: Optional[Any]) -> None: self.deployment_method = DeploymentMethods.airflow_composer.value - req_dep = f"{DLT_PKG_NAME}[{DestinationReference.to_name(self.state['destination'])}]" + req_dep = f"{DLT_PKG_NAME}[{Destination.to_name(self.state['destination_type'])}]" req_dep_line = f"{req_dep}>={pkg_version(DLT_PKG_NAME)}" self.artifacts["requirements_txt"] = req_dep_line @@ -206,11 +281,15 @@ def _generate_workflow(self, *args: Optional[Any]) -> None: dag_script_name = f"dag_{self.state['pipeline_name']}.py" self.artifacts["dag_script_name"] = dag_script_name - cloudbuild_file = self.template_storage.load(os.path.join(self.deployment_method, AIRFLOW_CLOUDBUILD_YAML)) + cloudbuild_file = self.template_storage.load( + os.path.join(self.deployment_method, AIRFLOW_CLOUDBUILD_YAML) + ) self.artifacts["cloudbuild_file"] = cloudbuild_file # TODO: rewrite dag file to at least set the schedule - dag_file = self.template_storage.load(os.path.join(self.deployment_method, AIRFLOW_DAG_TEMPLATE_SCRIPT)) + dag_file = self.template_storage.load( + os.path.join(self.deployment_method, AIRFLOW_DAG_TEMPLATE_SCRIPT) + ) self.artifacts["dag_file"] = dag_file # ask user if to overwrite the files @@ -227,61 +306,92 @@ def _make_modification(self) -> None: # save cloudbuild.yaml only if not exist to allow to run the deploy command for many different pipelines dest_cloud_build = os.path.join(utils.AIRFLOW_BUILD_FOLDER, AIRFLOW_CLOUDBUILD_YAML) if not self.repo_storage.has_file(dest_cloud_build): - self.repo_storage.save( - dest_cloud_build, - self.artifacts["cloudbuild_file"] - ) + self.repo_storage.save(dest_cloud_build, self.artifacts["cloudbuild_file"]) else: - fmt.warning(f"{AIRFLOW_CLOUDBUILD_YAML} already created. Delete the file and run the deploy command again to re-create.") + fmt.warning( + f"{AIRFLOW_CLOUDBUILD_YAML} already created. Delete the file and run the deploy" + " command again to re-create." + ) dest_dag_script = os.path.join(utils.AIRFLOW_DAGS_FOLDER, self.artifacts["dag_script_name"]) - self.repo_storage.save( - dest_dag_script, - self.artifacts["dag_file"] - ) - + self.repo_storage.save(dest_dag_script, self.artifacts["dag_file"]) def _echo_instructions(self, *args: Optional[Any]) -> None: - fmt.echo("Your %s deployment for pipeline %s is ready!" % ( - fmt.bold(self.deployment_method), fmt.bold(self.state["pipeline_name"]), - )) - fmt.echo("* The airflow %s file was created in %s." % ( - fmt.bold(AIRFLOW_CLOUDBUILD_YAML), fmt.bold(utils.AIRFLOW_BUILD_FOLDER) - )) - fmt.echo("* The %s script was created in %s." % ( - fmt.bold(self.artifacts["dag_script_name"]), fmt.bold(utils.AIRFLOW_DAGS_FOLDER) - )) + fmt.echo( + "Your %s deployment for pipeline %s is ready!" + % ( + fmt.bold(self.deployment_method), + fmt.bold(self.state["pipeline_name"]), + ) + ) + fmt.echo( + "* The airflow %s file was created in %s." + % (fmt.bold(AIRFLOW_CLOUDBUILD_YAML), fmt.bold(utils.AIRFLOW_BUILD_FOLDER)) + ) + fmt.echo( + "* The %s script was created in %s." + % (fmt.bold(self.artifacts["dag_script_name"]), fmt.bold(utils.AIRFLOW_DAGS_FOLDER)) + ) fmt.echo() fmt.echo("You must prepare your DAG first:") - fmt.echo("1. Import your sources in %s, configure the DAG ans tasks as needed." % (fmt.bold(self.artifacts["dag_script_name"]))) - fmt.echo("2. Test the DAG with Airflow locally .\nSee Airflow getting started: %s" % (fmt.bold(AIRFLOW_GETTING_STARTED))) + fmt.echo( + "1. Import your sources in %s, configure the DAG ans tasks as needed." + % (fmt.bold(self.artifacts["dag_script_name"])) + ) + fmt.echo( + "2. Test the DAG with Airflow locally .\nSee Airflow getting started: %s" + % (fmt.bold(AIRFLOW_GETTING_STARTED)) + ) fmt.echo() - fmt.echo("If you are planning run the pipeline with Google Cloud Composer, follow the next instructions:\n") - fmt.echo("1. Read this doc and set up the Environment: %s" % ( - fmt.bold(DLT_AIRFLOW_GCP_DOCS_URL) - )) - fmt.echo("2. Set _BUCKET_NAME up in %s/%s file. " % ( - fmt.bold(utils.AIRFLOW_BUILD_FOLDER), fmt.bold(AIRFLOW_CLOUDBUILD_YAML), - )) + fmt.echo( + "If you are planning run the pipeline with Google Cloud Composer, follow the next" + " instructions:\n" + ) + fmt.echo( + "1. Read this doc and set up the Environment: %s" % (fmt.bold(DLT_AIRFLOW_GCP_DOCS_URL)) + ) + fmt.echo( + "2. Set _BUCKET_NAME up in %s/%s file. " + % ( + fmt.bold(utils.AIRFLOW_BUILD_FOLDER), + fmt.bold(AIRFLOW_CLOUDBUILD_YAML), + ) + ) if len(self.secret_envs) == 0 and len(self.envs) == 0: fmt.echo("3. Your pipeline does not seem to need any secrets.") else: if self.secrets_format == SecretFormats.env.value: - fmt.echo("3. Add the following secret values (typically stored in %s): \n%s\n%s\nin ENVIRONMENT VARIABLES using Google Composer UI" % ( - fmt.bold(make_dlt_settings_path(SECRETS_TOML)), - fmt.bold("\n".join(self.env_prov.get_key_name(s_v.key, *s_v.sections) for s_v in self.secret_envs)), - fmt.bold("\n".join(self.env_prov.get_key_name(v.key, *v.sections) for v in self.envs)), - )) + fmt.echo( + "3. Add the following secret values (typically stored in %s): \n%s\n%s\nin" + " ENVIRONMENT VARIABLES using Google Composer UI" + % ( + fmt.bold(make_dlt_settings_path(SECRETS_TOML)), + fmt.bold( + "\n".join( + self.env_prov.get_key_name(s_v.key, *s_v.sections) + for s_v in self.secret_envs + ) + ), + fmt.bold( + "\n".join( + self.env_prov.get_key_name(v.key, *v.sections) for v in self.envs + ) + ), + ) + ) fmt.echo() # if fmt.confirm("Do you want to list the environment variables in the format suitable for Airflow?", default=True): self._echo_secrets() self._echo_envs() elif self.secrets_format == SecretFormats.toml.value: # build toml - fmt.echo(f"3. Add the following toml-string in the Google Composer UI as the {SECRETS_TOML_KEY} variable.") + fmt.echo( + "3. Add the following toml-string in the Google Composer UI as the" + f" {SECRETS_TOML_KEY} variable." + ) fmt.echo() toml_provider = StringTomlProvider("") for s_v in self.secret_envs: @@ -294,18 +404,34 @@ def _echo_instructions(self, *args: Optional[Any]) -> None: fmt.echo("4. Add dlt package below using Google Composer UI.") fmt.echo(fmt.bold(self.artifacts["requirements_txt"])) - fmt.note("You may need to add more packages ie. when your source requires additional dependencies") + fmt.note( + "You may need to add more packages ie. when your source requires additional" + " dependencies" + ) fmt.echo("5. Commit and push the pipeline files to github:") - fmt.echo("a. Add stage deployment files to commit. Use your Git UI or the following command") + fmt.echo( + "a. Add stage deployment files to commit. Use your Git UI or the following command" + ) - dag_script_path = self.repo_storage.from_relative_path_to_wd(os.path.join(utils.AIRFLOW_DAGS_FOLDER, self.artifacts["dag_script_name"])) - cloudbuild_path = self.repo_storage.from_relative_path_to_wd(os.path.join(utils.AIRFLOW_BUILD_FOLDER, AIRFLOW_CLOUDBUILD_YAML)) + dag_script_path = self.repo_storage.from_relative_path_to_wd( + os.path.join(utils.AIRFLOW_DAGS_FOLDER, self.artifacts["dag_script_name"]) + ) + cloudbuild_path = self.repo_storage.from_relative_path_to_wd( + os.path.join(utils.AIRFLOW_BUILD_FOLDER, AIRFLOW_CLOUDBUILD_YAML) + ) fmt.echo(fmt.bold(f"git add {dag_script_path} {cloudbuild_path}")) fmt.echo("b. Commit the files above. Use your Git UI or the following command") - fmt.echo(fmt.bold(f"git commit -m 'initiate {self.state['pipeline_name']} pipeline with Airflow'")) + fmt.echo( + fmt.bold( + f"git commit -m 'initiate {self.state['pipeline_name']} pipeline with Airflow'" + ) + ) if is_dirty(self.repo): - fmt.warning("You have modified files in your repository. Do not forget to push changes to your pipeline script as well!") + fmt.warning( + "You have modified files in your repository. Do not forget to push changes to your" + " pipeline script as well!" + ) fmt.echo("c. Push changes to github. Use your Git UI or the following command") fmt.echo(fmt.bold("git push origin")) fmt.echo("6. You should see your pipeline in Airflow.") diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index 81852f3ce1..5065ba1cfc 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -6,6 +6,7 @@ from itertools import chain from typing import List, Optional, Sequence, Tuple, Any, Dict from astunparse import unparse + # optional dependencies import pipdeptree import cron_descriptor @@ -77,20 +78,36 @@ def _prepare_deployment(self) -> None: # make sure the repo has origin self.origin = self._get_origin() # convert to path relative to repo - self.repo_pipeline_script_path = self.repo_storage.from_wd_to_relative_path(self.pipeline_script_path) + self.repo_pipeline_script_path = self.repo_storage.from_wd_to_relative_path( + self.pipeline_script_path + ) # load a pipeline script and extract full_refresh and pipelines_dir args self.pipeline_script = self.repo_storage.load(self.repo_pipeline_script_path) - fmt.echo("Looking up the deployment template scripts in %s...\n" % fmt.bold(self.repo_location)) - self.template_storage = git.get_fresh_repo_files(self.repo_location, get_dlt_repos_dir(), branch=self.branch) + fmt.echo( + "Looking up the deployment template scripts in %s...\n" % fmt.bold(self.repo_location) + ) + self.template_storage = git.get_fresh_repo_files( + self.repo_location, get_dlt_repos_dir(), branch=self.branch + ) self.working_directory = os.path.split(self.pipeline_script_path)[0] def _get_origin(self) -> str: try: origin = get_origin(self.repo) if "github.com" not in origin: - raise CliCommandException("deploy", f"Your current repository origin is not set to github but to {origin}.\nYou must change it to be able to run the pipelines with github actions: https://docs.github.com/en/get-started/getting-started-with-git/managing-remote-repositories") + raise CliCommandException( + "deploy", + f"Your current repository origin is not set to github but to {origin}.\nYou" + " must change it to be able to run the pipelines with github actions:" + " https://docs.github.com/en/get-started/getting-started-with-git/managing-remote-repositories", + ) except ValueError: - raise CliCommandException("deploy", "Your current repository has no origin set. Please set it up to be able to run the pipelines with github actions: https://docs.github.com/en/get-started/importing-your-projects-to-github/importing-source-code-to-github/adding-locally-hosted-code-to-github") + raise CliCommandException( + "deploy", + "Your current repository has no origin set. Please set it up to be able to run the" + " pipelines with github actions:" + " https://docs.github.com/en/get-started/importing-your-projects-to-github/importing-source-code-to-github/adding-locally-hosted-code-to-github", + ) return origin @@ -104,14 +121,18 @@ def run_deployment(self) -> None: pipeline_name: str = None pipelines_dir: str = None - uniq_possible_pipelines = {t[0]:t for t in possible_pipelines} + uniq_possible_pipelines = {t[0]: t for t in possible_pipelines} if len(uniq_possible_pipelines) == 1: pipeline_name, pipelines_dir = possible_pipelines[0] elif len(uniq_possible_pipelines) > 1: choices = list(uniq_possible_pipelines.keys()) - choices_str = "".join([str(i+1) for i in range(len(choices))]) + choices_str = "".join([str(i + 1) for i in range(len(choices))]) choices_selection = [f"{idx+1}-{name}" for idx, name in enumerate(choices)] - sel = fmt.prompt("Several pipelines found in script, please select one: " + ", ".join(choices_selection), choices=choices_str) + sel = fmt.prompt( + "Several pipelines found in script, please select one: " + + ", ".join(choices_selection), + choices=choices_str, + ) pipeline_name, pipelines_dir = uniq_possible_pipelines[choices[int(sel) - 1]] if pipelines_dir: @@ -126,11 +147,17 @@ def run_deployment(self) -> None: self.pipeline_name = dlt.config.get("pipeline_name") if not self.pipeline_name: self.pipeline_name = get_default_pipeline_name(self.pipeline_script_path) - fmt.warning(f"Using default pipeline name {self.pipeline_name}. The pipeline name is not passed as argument to dlt.pipeline nor configured via config provides ie. config.toml") + fmt.warning( + f"Using default pipeline name {self.pipeline_name}. The pipeline name" + " is not passed as argument to dlt.pipeline nor configured via config" + " provides ie. config.toml" + ) # fmt.echo("Generating deployment for pipeline %s" % fmt.bold(self.pipeline_name)) # attach to pipeline name, get state and trace - pipeline = dlt.attach(pipeline_name=self.pipeline_name, pipelines_dir=self.pipelines_dir) + pipeline = dlt.attach( + pipeline_name=self.pipeline_name, pipelines_dir=self.pipelines_dir + ) self.state, trace = get_state_and_trace(pipeline) self._update_envs(trace) @@ -148,12 +175,26 @@ def _update_envs(self, trace: PipelineTrace) -> None: for resolved_value in trace.resolved_config_values: if resolved_value.is_secret_hint: # generate special forms for all secrets - self.secret_envs.append(LookupTrace(self.env_prov.name, tuple(resolved_value.sections), resolved_value.key, resolved_value.value)) + self.secret_envs.append( + LookupTrace( + self.env_prov.name, + tuple(resolved_value.sections), + resolved_value.key, + resolved_value.value, + ) + ) # fmt.echo(f"{resolved_value.key}:{resolved_value.value}{type(resolved_value.value)} in {resolved_value.sections} is SECRET") else: # move all config values that are not in config.toml into env if resolved_value.provider_name != self.config_prov.name: - self.envs.append(LookupTrace(self.env_prov.name, tuple(resolved_value.sections), resolved_value.key, resolved_value.value)) + self.envs.append( + LookupTrace( + self.env_prov.name, + tuple(resolved_value.sections), + resolved_value.key, + resolved_value.value, + ) + ) # fmt.echo(f"{resolved_value.key} in {resolved_value.sections} moved to CONFIG") def _echo_secrets(self) -> None: @@ -189,12 +230,20 @@ def get_state_and_trace(pipeline: Pipeline) -> Tuple[TPipelineState, PipelineTra # trace must exist and end with a successful loading step trace = pipeline.last_trace if trace is None or len(trace.steps) == 0: - raise PipelineWasNotRun("Pipeline run trace could not be found. Please run the pipeline at least once locally.") + raise PipelineWasNotRun( + "Pipeline run trace could not be found. Please run the pipeline at least once locally." + ) last_step = trace.steps[-1] if last_step.step_exception is not None: - raise PipelineWasNotRun(f"The last pipeline run ended with error. Please make sure that pipeline runs correctly before deployment.\n{last_step.step_exception}") + raise PipelineWasNotRun( + "The last pipeline run ended with error. Please make sure that pipeline runs correctly" + f" before deployment.\n{last_step.step_exception}" + ) if not isinstance(last_step.step_info, LoadInfo): - raise PipelineWasNotRun("The last pipeline run did not reach the load step. Please run the pipeline locally until it loads data into destination.") + raise PipelineWasNotRun( + "The last pipeline run did not reach the load step. Please run the pipeline locally" + " until it loads data into destination." + ) return pipeline.state, trace @@ -202,7 +251,10 @@ def get_state_and_trace(pipeline: Pipeline) -> Tuple[TPipelineState, PipelineTra def get_visitors(pipeline_script: str, pipeline_script_path: str) -> PipelineScriptVisitor: visitor = utils.parse_init_script("deploy", pipeline_script, pipeline_script_path) if n.RUN not in visitor.known_calls: - raise CliCommandException("deploy", f"The pipeline script {pipeline_script_path} does not seem to run the pipeline.") + raise CliCommandException( + "deploy", + f"The pipeline script {pipeline_script_path} does not seem to run the pipeline.", + ) return visitor @@ -215,22 +267,40 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio if f_r_node: f_r_value = evaluate_node_literal(f_r_node) if f_r_value is None: - fmt.warning(f"The value of `full_refresh` in call to `dlt.pipeline` cannot be determined from {unparse(f_r_node).strip()}. We assume that you know what you are doing :)") + fmt.warning( + "The value of `full_refresh` in call to `dlt.pipeline` cannot be" + f" determined from {unparse(f_r_node).strip()}. We assume that you know" + " what you are doing :)" + ) if f_r_value is True: - if fmt.confirm("The value of 'full_refresh' is set to True. Do you want to abort to set it to False?", default=True): + if fmt.confirm( + "The value of 'full_refresh' is set to True. Do you want to abort to set it" + " to False?", + default=True, + ): raise CliCommandException("deploy", "Please set the full_refresh to False") p_d_node = call_args.arguments.get("pipelines_dir") if p_d_node: pipelines_dir = evaluate_node_literal(p_d_node) if pipelines_dir is None: - raise CliCommandException("deploy", f"The value of 'pipelines_dir' argument in call to `dlt_pipeline` cannot be determined from {unparse(p_d_node).strip()}. Pipeline working dir will be found. Pass it directly with --pipelines-dir option.") + raise CliCommandException( + "deploy", + "The value of 'pipelines_dir' argument in call to `dlt_pipeline` cannot be" + f" determined from {unparse(p_d_node).strip()}. Pipeline working dir will" + " be found. Pass it directly with --pipelines-dir option.", + ) p_n_node = call_args.arguments.get("pipeline_name") if p_n_node: pipeline_name = evaluate_node_literal(p_n_node) if pipeline_name is None: - raise CliCommandException("deploy", f"The value of 'pipeline_name' argument in call to `dlt_pipeline` cannot be determined from {unparse(p_d_node).strip()}. Pipeline working dir will be found. Pass it directly with --pipeline-name option.") + raise CliCommandException( + "deploy", + "The value of 'pipeline_name' argument in call to `dlt_pipeline` cannot be" + f" determined from {unparse(p_d_node).strip()}. Pipeline working dir will" + " be found. Pass it directly with --pipeline-name option.", + ) pipelines.append((pipeline_name, pipelines_dir)) return pipelines @@ -240,8 +310,8 @@ def str_representer(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: # format multiline strings as blocks with the exception of placeholders # that will be expanded as yaml if len(data.splitlines()) > 1 and "{{ toYaml" not in data: # check for multiline string - return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') - return dumper.represent_scalar('tag:yaml.org,2002:str', data) + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) def wrap_template_str(s: str) -> str: @@ -253,17 +323,14 @@ def serialize_templated_yaml(tree: StrAny) -> str: try: yaml.add_representer(str, str_representer) # pretty serialize yaml - serialized: str = yaml.dump(tree, allow_unicode=True, default_flow_style=False, sort_keys=False) + serialized: str = yaml.dump( + tree, allow_unicode=True, default_flow_style=False, sort_keys=False + ) # removes apostrophes around the template - serialized = re.sub(r"'([\s\n]*?\${{.+?}})'", - r"\1", - serialized, - flags=re.DOTALL) + serialized = re.sub(r"'([\s\n]*?\${{.+?}})'", r"\1", serialized, flags=re.DOTALL) # print(serialized) # fix the new lines in templates ending }} - serialized = re.sub(r"(\${{.+)\n.+(}})", - r"\1 \2", - serialized) + serialized = re.sub(r"(\${{.+)\n.+(}})", r"\1 \2", serialized) return serialized finally: yaml.add_representer(str, old_representer) @@ -292,7 +359,10 @@ def generate_pip_freeze(requirements_blacklist: List[str], requirements_file_nam conflicts = pipdeptree.conflicting_deps(tree) cycles = pipdeptree.cyclic_deps(tree) if conflicts: - fmt.warning(f"Unable to create dependencies for the github action. Please edit {requirements_file_name} yourself") + fmt.warning( + "Unable to create dependencies for the github action. Please edit" + f" {requirements_file_name} yourself" + ) pipdeptree.render_conflicts_text(conflicts) pipdeptree.render_cycles_text(cycles) fmt.echo() diff --git a/dlt/cli/echo.py b/dlt/cli/echo.py index 41c9fc1f7f..bd9cf24f64 100644 --- a/dlt/cli/echo.py +++ b/dlt/cli/echo.py @@ -20,7 +20,6 @@ def always_choose(always_choose_default: bool, always_choose_value: Any) -> Iter ALWAYS_CHOOSE_VALUE = _always_choose_value - echo = click.echo secho = click.secho style = click.style @@ -65,5 +64,6 @@ def prompt(text: str, choices: Iterable[str], default: Optional[Any] = None) -> click_choices = click.Choice(choices) return click.prompt(text, type=click_choices, default=default) + def text_input(text: str) -> str: return click.prompt(text) # type: ignore[no-any-return] diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py index c246ac87de..522b3a6712 100644 --- a/dlt/cli/init_command.py +++ b/dlt/cli/init_command.py @@ -8,11 +8,16 @@ from dlt.common import git from dlt.common.configuration.paths import get_dlt_settings_dir, make_dlt_settings_path from dlt.common.configuration.specs import known_sections -from dlt.common.configuration.providers import CONFIG_TOML, SECRETS_TOML, ConfigTomlProvider, SecretsTomlProvider +from dlt.common.configuration.providers import ( + CONFIG_TOML, + SECRETS_TOML, + ConfigTomlProvider, + SecretsTomlProvider, +) from dlt.common.pipeline import get_dlt_repos_dir from dlt.common.source import _SOURCES from dlt.version import DLT_PKG_NAME, __version__ -from dlt.common.destination import DestinationReference +from dlt.common.destination import Destination from dlt.common.reflection.utils import rewrite_python_script from dlt.common.schema.utils import is_valid_schema_name from dlt.common.schema.exceptions import InvalidSchemaName @@ -24,7 +29,11 @@ from dlt.cli import echo as fmt, pipeline_files as files_ops, source_detection from dlt.cli import utils from dlt.cli.config_toml_writer import WritableConfigValue, write_values -from dlt.cli.pipeline_files import VerifiedSourceFiles, TVerifiedSourceFileEntry, TVerifiedSourceFileIndex +from dlt.cli.pipeline_files import ( + VerifiedSourceFiles, + TVerifiedSourceFileEntry, + TVerifiedSourceFileIndex, +) from dlt.cli.exceptions import CliCommandException from dlt.cli.requirements import SourceRequirements @@ -34,7 +43,9 @@ SOURCES_MODULE_NAME = "sources" -def _get_template_files(command_module: ModuleType, use_generic_template: bool) -> Tuple[str, List[str]]: +def _get_template_files( + command_module: ModuleType, use_generic_template: bool +) -> Tuple[str, List[str]]: template_files: List[str] = command_module.TEMPLATE_FILES pipeline_script: str = command_module.PIPELINE_SCRIPT if use_generic_template: @@ -48,22 +59,41 @@ def _select_source_files( remote_modified: Dict[str, TVerifiedSourceFileEntry], remote_deleted: Dict[str, TVerifiedSourceFileEntry], conflict_modified: Sequence[str], - conflict_deleted: Sequence[str] + conflict_deleted: Sequence[str], ) -> Tuple[str, Dict[str, TVerifiedSourceFileEntry], Dict[str, TVerifiedSourceFileEntry]]: # some files were changed and cannot be updated (or are created without index) - fmt.echo("Existing files for %s source were changed and cannot be automatically updated" % fmt.bold(source_name)) + fmt.echo( + "Existing files for %s source were changed and cannot be automatically updated" + % fmt.bold(source_name) + ) if conflict_modified: - fmt.echo("Following files are MODIFIED locally and CONFLICT with incoming changes: %s" % fmt.bold(", ".join(conflict_modified))) + fmt.echo( + "Following files are MODIFIED locally and CONFLICT with incoming changes: %s" + % fmt.bold(", ".join(conflict_modified)) + ) if conflict_deleted: - fmt.echo("Following files are DELETED locally and CONFLICT with incoming changes: %s" % fmt.bold(", ".join(conflict_deleted))) + fmt.echo( + "Following files are DELETED locally and CONFLICT with incoming changes: %s" + % fmt.bold(", ".join(conflict_deleted)) + ) can_update_files = set(remote_modified.keys()) - set(conflict_modified) can_delete_files = set(remote_deleted.keys()) - set(conflict_deleted) if len(can_update_files) > 0 or len(can_delete_files) > 0: if len(can_update_files) > 0: - fmt.echo("Following files can be automatically UPDATED: %s" % fmt.bold(", ".join(can_update_files))) + fmt.echo( + "Following files can be automatically UPDATED: %s" + % fmt.bold(", ".join(can_update_files)) + ) if len(can_delete_files) > 0: - fmt.echo("Following files can be automatically DELETED: %s" % fmt.bold(", ".join(can_delete_files))) - prompt = "Should incoming changes be Skipped, Applied (local changes will be lost) or Merged (%s UPDATED | %s DELETED | all local changes remain)?" % (fmt.bold(",".join(can_update_files)), fmt.bold(",".join(can_delete_files))) + fmt.echo( + "Following files can be automatically DELETED: %s" + % fmt.bold(", ".join(can_delete_files)) + ) + prompt = ( + "Should incoming changes be Skipped, Applied (local changes will be lost) or Merged (%s" + " UPDATED | %s DELETED | all local changes remain)?" + % (fmt.bold(",".join(can_update_files)), fmt.bold(",".join(can_delete_files))) + ) choices = "sam" else: prompt = "Should incoming changes be Skipped or Applied?" @@ -78,8 +108,8 @@ def _select_source_files( elif resolution == "m": # update what we can fmt.echo("Merging the incoming changes. No files with local changes were modified.") - remote_modified = {n:e for n, e in remote_modified.items() if n in can_update_files} - remote_deleted = {n:e for n, e in remote_deleted.items() if n in can_delete_files} + remote_modified = {n: e for n, e in remote_modified.items() if n in can_update_files} + remote_deleted = {n: e for n, e in remote_deleted.items() if n in can_delete_files} else: # fully overwrite, leave all files to be copied fmt.echo("Applying all incoming changes to local files.") @@ -96,7 +126,9 @@ def _get_dependency_system(dest_storage: FileStorage) -> str: return None -def _list_verified_sources(repo_location: str, branch: str = None) -> Dict[str, VerifiedSourceFiles]: +def _list_verified_sources( + repo_location: str, branch: str = None +) -> Dict[str, VerifiedSourceFiles]: clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch) sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME)) @@ -110,41 +142,73 @@ def _list_verified_sources(repo_location: str, branch: str = None) -> Dict[str, return sources -def _welcome_message(source_name: str, destination_name: str, source_files: VerifiedSourceFiles, dependency_system: str, is_new_source: bool) -> None: +def _welcome_message( + source_name: str, + destination_type: str, + source_files: VerifiedSourceFiles, + dependency_system: str, + is_new_source: bool, +) -> None: fmt.echo() if source_files.is_template: fmt.echo("Your new pipeline %s is ready to be customized!" % fmt.bold(source_name)) - fmt.echo("* Review and change how dlt loads your data in %s" % fmt.bold(source_files.dest_pipeline_script)) + fmt.echo( + "* Review and change how dlt loads your data in %s" + % fmt.bold(source_files.dest_pipeline_script) + ) else: if is_new_source: fmt.echo("Verified source %s was added to your project!" % fmt.bold(source_name)) - fmt.echo("* See the usage examples and code snippets to copy from %s" % fmt.bold(source_files.dest_pipeline_script)) + fmt.echo( + "* See the usage examples and code snippets to copy from %s" + % fmt.bold(source_files.dest_pipeline_script) + ) else: - fmt.echo("Verified source %s was updated to the newest version!" % fmt.bold(source_name)) + fmt.echo( + "Verified source %s was updated to the newest version!" % fmt.bold(source_name) + ) if is_new_source: - fmt.echo("* Add credentials for %s and other secrets in %s" % (fmt.bold(destination_name), fmt.bold(make_dlt_settings_path(SECRETS_TOML)))) + fmt.echo( + "* Add credentials for %s and other secrets in %s" + % (fmt.bold(destination_type), fmt.bold(make_dlt_settings_path(SECRETS_TOML))) + ) if dependency_system: fmt.echo("* Add the required dependencies to %s:" % fmt.bold(dependency_system)) compiled_requirements = source_files.requirements.compiled() for dep in compiled_requirements: fmt.echo(" " + fmt.bold(dep)) - fmt.echo(" If the dlt dependency is already added, make sure you install the extra for %s to it" % fmt.bold(destination_name)) + fmt.echo( + " If the dlt dependency is already added, make sure you install the extra for %s to it" + % fmt.bold(destination_type) + ) if dependency_system == utils.REQUIREMENTS_TXT: qs = "' '" - fmt.echo(" To install with pip: %s" % fmt.bold(f"pip3 install '{qs.join(compiled_requirements)}'")) + fmt.echo( + " To install with pip: %s" + % fmt.bold(f"pip3 install '{qs.join(compiled_requirements)}'") + ) elif dependency_system == utils.PYPROJECT_TOML: fmt.echo(" If you are using poetry you may issue the following command:") - fmt.echo(fmt.bold(" poetry add %s -E %s" % (DLT_PKG_NAME, destination_name))) + fmt.echo(fmt.bold(" poetry add %s -E %s" % (DLT_PKG_NAME, destination_type))) fmt.echo() else: - fmt.echo("* %s was created. Install it with:\npip3 install -r %s" % (fmt.bold(utils.REQUIREMENTS_TXT), utils.REQUIREMENTS_TXT)) + fmt.echo( + "* %s was created. Install it with:\npip3 install -r %s" + % (fmt.bold(utils.REQUIREMENTS_TXT), utils.REQUIREMENTS_TXT) + ) if is_new_source: - fmt.echo("* Read %s for more information" % fmt.bold("https://dlthub.com/docs/walkthroughs/create-a-pipeline")) + fmt.echo( + "* Read %s for more information" + % fmt.bold("https://dlthub.com/docs/walkthroughs/create-a-pipeline") + ) else: - fmt.echo("* Read %s for more information" % fmt.bold("https://dlthub.com/docs/walkthroughs/add-a-verified-source")) + fmt.echo( + "* Read %s for more information" + % fmt.bold("https://dlthub.com/docs/walkthroughs/add-a-verified-source") + ) def list_verified_sources_command(repo_location: str, branch: str = None) -> None: @@ -158,10 +222,16 @@ def list_verified_sources_command(repo_location: str, branch: str = None) -> Non fmt.echo(msg) -def init_command(source_name: str, destination_name: str, use_generic_template: bool, repo_location: str, branch: str = None) -> None: +def init_command( + source_name: str, + destination_type: str, + use_generic_template: bool, + repo_location: str, + branch: str = None, +) -> None: # try to import the destination and get config spec - destination_reference = DestinationReference.from_name(destination_name) - destination_spec = destination_reference.spec() + destination_reference = Destination.from_reference(destination_type) + destination_spec = destination_reference.spec fmt.echo("Looking up the init scripts in %s..." % fmt.bold(repo_location)) clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch) @@ -192,76 +262,115 @@ def init_command(source_name: str, destination_name: str, use_generic_template: source_files = files_ops.get_verified_source_files(sources_storage, source_name) # get file index from remote verified source files being copied remote_index = files_ops.get_remote_source_index( - source_files.storage.storage_path, source_files.files, source_files.requirements.dlt_version_constraint() + source_files.storage.storage_path, + source_files.files, + source_files.requirements.dlt_version_constraint(), ) # diff local and remote index to get modified and deleted files - remote_new, remote_modified, remote_deleted = files_ops.gen_index_diff(local_index, remote_index) + remote_new, remote_modified, remote_deleted = files_ops.gen_index_diff( + local_index, remote_index + ) # find files that are modified locally - conflict_modified, conflict_deleted = files_ops.find_conflict_files(local_index, remote_new, remote_modified, remote_deleted, dest_storage) + conflict_modified, conflict_deleted = files_ops.find_conflict_files( + local_index, remote_new, remote_modified, remote_deleted, dest_storage + ) # add new to modified remote_modified.update(remote_new) if conflict_modified or conflict_deleted: # select source files that can be copied/updated _, remote_modified, remote_deleted = _select_source_files( - source_name, - remote_modified, - remote_deleted, - conflict_modified, - conflict_deleted + source_name, remote_modified, remote_deleted, conflict_modified, conflict_deleted ) if not remote_deleted and not remote_modified: fmt.echo("No files to update, exiting") return if remote_index["is_dirty"]: - fmt.warning(f"The verified sources repository is dirty. {source_name} source files may not update correctly in the future.") + fmt.warning( + f"The verified sources repository is dirty. {source_name} source files may not" + " update correctly in the future." + ) # add template files source_files.files.extend(template_files) else: - if not is_valid_schema_name(source_name): raise InvalidSchemaName(source_name) dest_pipeline_script = source_name + ".py" - source_files = VerifiedSourceFiles(True, init_storage, pipeline_script, dest_pipeline_script, template_files, SourceRequirements([]), "") + source_files = VerifiedSourceFiles( + True, + init_storage, + pipeline_script, + dest_pipeline_script, + template_files, + SourceRequirements([]), + "", + ) if dest_storage.has_file(dest_pipeline_script): fmt.warning("Pipeline script %s already exist, exiting" % dest_pipeline_script) return # add .dlt/*.toml files to be copied - source_files.files.extend([make_dlt_settings_path(CONFIG_TOML), make_dlt_settings_path(SECRETS_TOML)]) + source_files.files.extend( + [make_dlt_settings_path(CONFIG_TOML), make_dlt_settings_path(SECRETS_TOML)] + ) # add dlt extras line to requirements - source_files.requirements.update_dlt_extras(destination_name) + source_files.requirements.update_dlt_extras(destination_type) # Check compatibility with installed dlt if not source_files.requirements.is_installed_dlt_compatible(): - msg = f"This pipeline requires a newer version of dlt than your installed version ({source_files.requirements.current_dlt_version()}). " \ - f"Pipeline requires '{source_files.requirements.dlt_requirement_base}'" + msg = ( + "This pipeline requires a newer version of dlt than your installed version" + f" ({source_files.requirements.current_dlt_version()}). Pipeline requires" + f" '{source_files.requirements.dlt_requirement_base}'" + ) fmt.warning(msg) - if not fmt.confirm("Would you like to continue anyway? (you can update dlt after this step)", default=True): - fmt.echo(f'You can update dlt with: pip3 install -U "{source_files.requirements.dlt_requirement_base}"') + if not fmt.confirm( + "Would you like to continue anyway? (you can update dlt after this step)", default=True + ): + fmt.echo( + "You can update dlt with: pip3 install -U" + f' "{source_files.requirements.dlt_requirement_base}"' + ) return # read module source and parse it - visitor = utils.parse_init_script("init", source_files.storage.load(source_files.pipeline_script), source_files.pipeline_script) + visitor = utils.parse_init_script( + "init", + source_files.storage.load(source_files.pipeline_script), + source_files.pipeline_script, + ) if visitor.is_destination_imported: - raise CliCommandException("init", f"The pipeline script {source_files.pipeline_script} import a destination from dlt.destinations. You should specify destinations by name when calling dlt.pipeline or dlt.run in init scripts.") + raise CliCommandException( + "init", + f"The pipeline script {source_files.pipeline_script} import a destination from" + " dlt.destinations. You should specify destinations by name when calling dlt.pipeline" + " or dlt.run in init scripts.", + ) if n.PIPELINE not in visitor.known_calls: - raise CliCommandException("init", f"The pipeline script {source_files.pipeline_script} does not seem to initialize pipeline with dlt.pipeline. Please initialize pipeline explicitly in init scripts.") + raise CliCommandException( + "init", + f"The pipeline script {source_files.pipeline_script} does not seem to initialize" + " pipeline with dlt.pipeline. Please initialize pipeline explicitly in init scripts.", + ) # find all arguments in all calls to replace transformed_nodes = source_detection.find_call_arguments_to_replace( visitor, - [("destination", destination_name), ("pipeline_name", source_name), ("dataset_name", source_name + "_data")], - source_files.pipeline_script + [ + ("destination", destination_type), + ("pipeline_name", source_name), + ("dataset_name", source_name + "_data"), + ], + source_files.pipeline_script, ) # inspect the script inspect_pipeline_script( source_files.storage.storage_path, source_files.storage.to_relative_path(source_files.pipeline_script), - ignore_missing_imports=True + ignore_missing_imports=True, ) # detect all the required secrets and configs that should go into tomls files @@ -269,32 +378,57 @@ def init_command(source_name: str, destination_name: str, use_generic_template: # replace destination, pipeline_name and dataset_name in templates transformed_nodes = source_detection.find_call_arguments_to_replace( visitor, - [("destination", destination_name), ("pipeline_name", source_name), ("dataset_name", source_name + "_data")], - source_files.pipeline_script + [ + ("destination", destination_type), + ("pipeline_name", source_name), + ("dataset_name", source_name + "_data"), + ], + source_files.pipeline_script, ) # template sources are always in module starting with "pipeline" # for templates, place config and secrets into top level section - required_secrets, required_config, checked_sources = source_detection.detect_source_configs(_SOURCES, "pipeline", ()) + required_secrets, required_config, checked_sources = source_detection.detect_source_configs( + _SOURCES, "pipeline", () + ) # template has a strict rules where sources are placed for source_q_name, source_config in checked_sources.items(): if source_q_name not in visitor.known_sources_resources: - raise CliCommandException("init", f"The pipeline script {source_files.pipeline_script} imports a source/resource {source_config.f.__name__} from module {source_config.module.__name__}. In init scripts you must declare all sources and resources in single file.") + raise CliCommandException( + "init", + f"The pipeline script {source_files.pipeline_script} imports a source/resource" + f" {source_config.f.__name__} from module {source_config.module.__name__}. In" + " init scripts you must declare all sources and resources in single file.", + ) # rename sources and resources - transformed_nodes.extend(source_detection.find_source_calls_to_replace(visitor, source_name)) + transformed_nodes.extend( + source_detection.find_source_calls_to_replace(visitor, source_name) + ) else: # replace only destination for existing pipelines - transformed_nodes = source_detection.find_call_arguments_to_replace(visitor, [("destination", destination_name)], source_files.pipeline_script) + transformed_nodes = source_detection.find_call_arguments_to_replace( + visitor, [("destination", destination_type)], source_files.pipeline_script + ) # pipeline sources are in module with name starting from {pipeline_name} # for verified pipelines place in the specific source section - required_secrets, required_config, checked_sources = source_detection.detect_source_configs(_SOURCES, source_name, (known_sections.SOURCES, source_name)) + required_secrets, required_config, checked_sources = source_detection.detect_source_configs( + _SOURCES, source_name, (known_sections.SOURCES, source_name) + ) if len(checked_sources) == 0: - raise CliCommandException("init", f"The pipeline script {source_files.pipeline_script} is not creating or importing any sources or resources") + raise CliCommandException( + "init", + f"The pipeline script {source_files.pipeline_script} is not creating or importing any" + " sources or resources", + ) # add destination spec to required secrets - required_secrets["destinations:" + destination_name] = WritableConfigValue(destination_name, destination_spec, None, ("destination",)) + required_secrets["destinations:" + destination_type] = WritableConfigValue( + destination_type, destination_spec, None, ("destination",) + ) # add the global telemetry to required config - required_config["runtime.dlthub_telemetry"] = WritableConfigValue("dlthub_telemetry", bool, utils.get_telemetry_status(), ("runtime", )) + required_config["runtime.dlthub_telemetry"] = WritableConfigValue( + "dlthub_telemetry", bool, utils.get_telemetry_status(), ("runtime",) + ) # modify the script script_lines = rewrite_python_script(visitor.source_lines, transformed_nodes) @@ -305,16 +439,22 @@ def init_command(source_name: str, destination_name: str, use_generic_template: # ask for confirmation if is_new_source: if source_files.is_template: - fmt.echo("A verified source %s was not found. Using a template to create a new source and pipeline with name %s." % (fmt.bold(source_name), fmt.bold(source_name))) + fmt.echo( + "A verified source %s was not found. Using a template to create a new source and" + " pipeline with name %s." % (fmt.bold(source_name), fmt.bold(source_name)) + ) else: - fmt.echo("Cloning and configuring a verified source %s (%s)" % (fmt.bold(source_name), source_files.doc)) + fmt.echo( + "Cloning and configuring a verified source %s (%s)" + % (fmt.bold(source_name), source_files.doc) + ) if use_generic_template: fmt.warning("--generic parameter is meaningless if verified source is found") if not fmt.confirm("Do you want to proceed?", default=True): raise CliCommandException("init", "Aborted") dependency_system = _get_dependency_system(dest_storage) - _welcome_message(source_name, destination_name, source_files, dependency_system, is_new_source) + _welcome_message(source_name, destination_type, source_files, dependency_system, is_new_source) # copy files at the very end for file_name in source_files.files: @@ -339,7 +479,9 @@ def init_command(source_name: str, destination_name: str, use_generic_template: for file_name in remote_deleted: if dest_storage.has_file(file_name): dest_storage.delete(file_name) - files_ops.save_verified_source_local_index(source_name, remote_index, remote_modified, remote_deleted) + files_ops.save_verified_source_local_index( + source_name, remote_index, remote_modified, remote_deleted + ) # create script if not dest_storage.has_file(source_files.dest_pipeline_script): dest_storage.save(source_files.dest_pipeline_script, dest_script_source) diff --git a/dlt/cli/pipeline_command.py b/dlt/cli/pipeline_command.py index b17981c1b1..91f64763d3 100644 --- a/dlt/cli/pipeline_command.py +++ b/dlt/cli/pipeline_command.py @@ -9,7 +9,7 @@ from dlt.common.runners import Venv from dlt.common.runners.stdout import iter_stdout from dlt.common.schema.utils import group_tables_by_resource, remove_defaults -from dlt.common.storages import FileStorage, LoadStorage +from dlt.common.storages import FileStorage, PackageStorage from dlt.pipeline.helpers import DropCommand from dlt.pipeline.exceptions import CannotRestorePipelineException @@ -18,7 +18,15 @@ DLT_PIPELINE_COMMAND_DOCS_URL = "https://dlthub.com/docs/reference/command-line-interface" -def pipeline_command(operation: str, pipeline_name: str, pipelines_dir: str, verbosity: int, dataset_name: str = None, destination: TDestinationReferenceArg = None, **command_kwargs: Any) -> None: +def pipeline_command( + operation: str, + pipeline_name: str, + pipelines_dir: str, + verbosity: int, + dataset_name: str = None, + destination: TDestinationReferenceArg = None, + **command_kwargs: Any, +) -> None: if operation == "list": pipelines_dir = pipelines_dir or get_dlt_pipelines_dir() storage = FileStorage(pipelines_dir) @@ -39,35 +47,55 @@ def pipeline_command(operation: str, pipeline_name: str, pipelines_dir: str, ver if operation not in {"sync", "drop"}: raise fmt.warning(str(e)) - if not fmt.confirm("Do you want to attempt to restore the pipeline state from destination?", default=False): + if not fmt.confirm( + "Do you want to attempt to restore the pipeline state from destination?", default=False + ): return - destination = destination or fmt.text_input(f"Enter destination name for pipeline {fmt.bold(pipeline_name)}") - dataset_name = dataset_name or fmt.text_input(f"Enter dataset name for pipeline {fmt.bold(pipeline_name)}") - p = dlt.pipeline(pipeline_name, pipelines_dir, destination=destination, dataset_name=dataset_name) + destination = destination or fmt.text_input( + f"Enter destination name for pipeline {fmt.bold(pipeline_name)}" + ) + dataset_name = dataset_name or fmt.text_input( + f"Enter dataset name for pipeline {fmt.bold(pipeline_name)}" + ) + p = dlt.pipeline( + pipeline_name, pipelines_dir, destination=destination, dataset_name=dataset_name + ) p.sync_destination() if p.first_run: # remote state was not found p._wipe_working_folder() - fmt.error(f"Pipeline {pipeline_name} was not found in dataset {dataset_name} in {destination}") + fmt.error( + f"Pipeline {pipeline_name} was not found in dataset {dataset_name} in {destination}" + ) return if operation == "sync": return # No need to sync again def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: - extracted_files = p.list_extracted_resources() - if extracted_files: - fmt.echo("Has %s extracted files ready to be normalized" % fmt.bold(str(len(extracted_files)))) + extracted_packages = p.list_extracted_load_packages() + if extracted_packages: + fmt.echo( + "Has %s extracted packages ready to be normalized with following load ids:" + % fmt.bold(str(len(extracted_packages))) + ) + for load_id in extracted_packages: + fmt.echo(load_id) norm_packages = p.list_normalized_load_packages() if norm_packages: - fmt.echo("Has %s load packages ready to be loaded with following load ids:" % fmt.bold(str(len(norm_packages)))) + fmt.echo( + "Has %s normalized packages ready to be loaded with following load ids:" + % fmt.bold(str(len(norm_packages))) + ) for load_id in norm_packages: fmt.echo(load_id) # load first (oldest) package first_package_info = p.get_load_package_info(norm_packages[0]) - if LoadStorage.is_package_partially_loaded(first_package_info): - fmt.warning("This package is partially loaded. Data in the destination may be modified.") + if PackageStorage.is_package_partially_loaded(first_package_info): + fmt.warning( + "This package is partially loaded. Data in the destination may be modified." + ) fmt.echo() - return extracted_files, norm_packages + return extracted_packages, norm_packages fmt.echo("Found pipeline %s in %s" % (fmt.bold(p.pipeline_name), fmt.bold(p.pipelines_dir))) @@ -77,7 +105,9 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: with signals.delayed_signals(): venv = Venv.restore_current() - for line in iter_stdout(venv, "streamlit", "run", streamlit_helper.__file__, pipeline_name): + for line in iter_stdout( + venv, "streamlit", "run", streamlit_helper.__file__, pipeline_name + ): fmt.echo(line) if operation == "info": @@ -105,24 +135,38 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: fmt.warning("This pipeline does not have a default schema") else: is_single_schema = len(p.schema_names) == 1 - for schema_name in p.schema_names: + for schema_name in p.schema_names: fmt.echo("Resources in schema: %s" % fmt.bold(schema_name)) schema = p.schemas[schema_name] data_tables = {t["name"]: t for t in schema.data_tables()} for resource_name, tables in group_tables_by_resource(data_tables).items(): res_state_slots = 0 if sources_state: - source_state = next(iter(sources_state.items()))[1] if is_single_schema else sources_state.get(schema_name) + source_state = ( + next(iter(sources_state.items()))[1] + if is_single_schema + else sources_state.get(schema_name) + ) if source_state: resource_state_ = resource_state(resource_name, source_state) res_state_slots = len(resource_state_) - fmt.echo("%s with %s table(s) and %s resource state slot(s)" % (fmt.bold(resource_name), fmt.bold(str(len(tables))), fmt.bold(str(res_state_slots)))) + fmt.echo( + "%s with %s table(s) and %s resource state slot(s)" + % ( + fmt.bold(resource_name), + fmt.bold(str(len(tables))), + fmt.bold(str(res_state_slots)), + ) + ) fmt.echo() fmt.echo("Working dir content:") _display_pending_packages() loaded_packages = p.list_completed_load_packages() if loaded_packages: - fmt.echo("Has %s completed load packages with following load ids:" % fmt.bold(str(len(loaded_packages)))) + fmt.echo( + "Has %s completed load packages with following load ids:" + % fmt.bold(str(len(loaded_packages))) + ) for load_id in loaded_packages: fmt.echo(load_id) fmt.echo() @@ -130,7 +174,10 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: if trace is None or len(trace.steps) == 0: fmt.echo("Pipeline does not have last run trace.") else: - fmt.echo("Pipeline has last run trace. Use 'dlt pipeline %s trace' to inspect " % pipeline_name) + fmt.echo( + "Pipeline has last run trace. Use 'dlt pipeline %s trace' to inspect " + % pipeline_name + ) if operation == "trace": trace = p.last_trace @@ -147,7 +194,13 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: failed_jobs = p.list_failed_jobs_in_package(load_id) if failed_jobs: for failed_job in p.list_failed_jobs_in_package(load_id): - fmt.echo("JOB: %s(%s)" % (fmt.bold(failed_job.job_file_info.job_id()), fmt.bold(failed_job.job_file_info.table_name))) + fmt.echo( + "JOB: %s(%s)" + % ( + fmt.bold(failed_job.job_file_info.job_id()), + fmt.bold(failed_job.job_file_info.table_name), + ) + ) fmt.echo("JOB file type: %s" % fmt.bold(failed_job.job_file_info.file_format)) fmt.echo("JOB file path: %s" % fmt.bold(failed_job.file_path)) if verbosity > 0: @@ -158,32 +211,42 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: fmt.echo("No failed jobs found") if operation == "drop-pending-packages": - extracted_files, norm_packages = _display_pending_packages() - if len(extracted_files) == 0 and len(norm_packages) == 0: + extracted_packages, norm_packages = _display_pending_packages() + if len(extracted_packages) == 0 and len(norm_packages) == 0: fmt.echo("No pending packages found") if fmt.confirm("Delete the above packages?", default=False): p.drop_pending_packages(with_partial_loads=True) fmt.echo("Pending packages deleted") if operation == "sync": - if fmt.confirm("About to drop the local state of the pipeline and reset all the schemas. The destination state, data and schemas are left intact. Proceed?", default=False): + if fmt.confirm( + "About to drop the local state of the pipeline and reset all the schemas. The" + " destination state, data and schemas are left intact. Proceed?", + default=False, + ): fmt.echo("Dropping local state") p = p.drop() fmt.echo("Restoring from destination") p.sync_destination() if operation == "load-package": - load_id = command_kwargs.get('load_id') + load_id = command_kwargs.get("load_id") if not load_id: - packages = sorted(p.list_normalized_load_packages()) + packages = sorted(p.list_extracted_load_packages()) + if not packages: + packages = sorted(p.list_normalized_load_packages()) if not packages: packages = sorted(p.list_completed_load_packages()) if not packages: - raise CliCommandException("pipeline", "There are no load packages for that pipeline") + raise CliCommandException( + "pipeline", "There are no load packages for that pipeline" + ) load_id = packages[-1] package_info = p.get_load_package_info(load_id) - fmt.echo("Package %s found in %s" % (fmt.bold(load_id), fmt.bold(package_info.package_path))) + fmt.echo( + "Package %s found in %s" % (fmt.bold(load_id), fmt.bold(package_info.package_path)) + ) fmt.echo(package_info.asstr(verbosity)) if len(package_info.schema_update) > 0: if verbosity == 0: @@ -191,7 +254,9 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: else: tables = remove_defaults({"tables": package_info.schema_update}) # type: ignore fmt.echo(fmt.bold("Schema update:")) - fmt.echo(yaml.dump(tables, allow_unicode=True, default_flow_style=False, sort_keys=False)) + fmt.echo( + yaml.dump(tables, allow_unicode=True, default_flow_style=False, sort_keys=False) + ) if operation == "schema": if not p.default_schema_name: @@ -204,7 +269,10 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: if operation == "drop": drop = DropCommand(p, **command_kwargs) if drop.is_empty: - fmt.echo("Could not select any resources to drop and no resource/source state to reset. Use the command below to inspect the pipeline:") + fmt.echo( + "Could not select any resources to drop and no resource/source state to reset. Use" + " the command below to inspect the pipeline:" + ) fmt.echo(f"dlt pipeline -v {p.pipeline_name} info") if len(drop.info["warnings"]): fmt.echo("Additional warnings are available") @@ -212,12 +280,23 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]: fmt.warning(warning) return - fmt.echo("About to drop the following data in dataset %s in destination %s:" % (fmt.bold(drop.info["dataset_name"]), fmt.bold(p.destination.__name__))) + fmt.echo( + "About to drop the following data in dataset %s in destination %s:" + % (fmt.bold(drop.info["dataset_name"]), fmt.bold(p.destination.destination_name)) + ) fmt.echo("%s: %s" % (fmt.style("Selected schema", fg="green"), drop.info["schema_name"])) - fmt.echo("%s: %s" % (fmt.style("Selected resource(s)", fg="green"), drop.info["resource_names"])) + fmt.echo( + "%s: %s" % (fmt.style("Selected resource(s)", fg="green"), drop.info["resource_names"]) + ) fmt.echo("%s: %s" % (fmt.style("Table(s) to drop", fg="green"), drop.info["tables"])) - fmt.echo("%s: %s" % (fmt.style("Resource(s) state to reset", fg="green"), drop.info["resource_states"])) - fmt.echo("%s: %s" % (fmt.style("Source state path(s) to reset", fg="green"), drop.info["state_paths"])) + fmt.echo( + "%s: %s" + % (fmt.style("Resource(s) state to reset", fg="green"), drop.info["resource_states"]) + ) + fmt.echo( + "%s: %s" + % (fmt.style("Source state path(s) to reset", fg="green"), drop.info["state_paths"]) + ) # for k, v in drop.info.items(): # fmt.echo("%s: %s" % (fmt.style(k, fg="green"), v)) for warning in drop.info["warnings"]: diff --git a/dlt/cli/pipeline_files.py b/dlt/cli/pipeline_files.py index acd3a95e80..49c0f71b21 100644 --- a/dlt/cli/pipeline_files.py +++ b/dlt/cli/pipeline_files.py @@ -65,17 +65,14 @@ def _load_dot_sources() -> TVerifiedSourcesFileIndex: raise FileNotFoundError(SOURCES_INIT_INFO_FILE) return index except FileNotFoundError: - return { - "engine_version": SOURCES_INIT_INFO_ENGINE_VERSION, - "sources": {} - } + return {"engine_version": SOURCES_INIT_INFO_ENGINE_VERSION, "sources": {}} def _merge_remote_index( local_index: TVerifiedSourceFileIndex, remote_index: TVerifiedSourceFileIndex, remote_modified: Dict[str, TVerifiedSourceFileEntry], - remote_deleted: Dict[str, TVerifiedSourceFileEntry] + remote_deleted: Dict[str, TVerifiedSourceFileEntry], ) -> TVerifiedSourceFileIndex: # update all modified files local_index["files"].update(remote_modified) @@ -92,13 +89,15 @@ def _merge_remote_index( def load_verified_sources_local_index(source_name: str) -> TVerifiedSourceFileIndex: - return _load_dot_sources()["sources"].get(source_name, { - "is_dirty": False, - "last_commit_sha": None, - "last_commit_timestamp": None, - "files": {}, - "dlt_version_constraint": ">=0.1.0" - } + return _load_dot_sources()["sources"].get( + source_name, + { + "is_dirty": False, + "last_commit_sha": None, + "last_commit_timestamp": None, + "files": {}, + "dlt_version_constraint": ">=0.1.0", + }, ) @@ -106,17 +105,17 @@ def save_verified_source_local_index( source_name: str, remote_index: TVerifiedSourceFileIndex, remote_modified: Dict[str, TVerifiedSourceFileEntry], - remote_deleted: Dict[str, TVerifiedSourceFileEntry] + remote_deleted: Dict[str, TVerifiedSourceFileEntry], ) -> None: - all_sources = _load_dot_sources() local_index = all_sources["sources"].setdefault(source_name, remote_index) _merge_remote_index(local_index, remote_index, remote_modified, remote_deleted) _save_dot_sources(all_sources) -def get_remote_source_index(repo_path: str, files: Sequence[str], dlt_version_constraint: str) -> TVerifiedSourceFileIndex: - +def get_remote_source_index( + repo_path: str, files: Sequence[str], dlt_version_constraint: str +) -> TVerifiedSourceFileIndex: with git.get_repo(repo_path) as repo: tree = repo.tree() commit_sha = repo.head.commit.hexsha @@ -136,7 +135,7 @@ def get_remote_source_index(repo_path: str, files: Sequence[str], dlt_version_co files_sha[file] = { "commit_sha": commit_sha, "git_sha": blob_sha3, - "sha3_256": hashlib.sha3_256(file_blob).hexdigest() + "sha3_256": hashlib.sha3_256(file_blob).hexdigest(), } return { @@ -144,26 +143,37 @@ def get_remote_source_index(repo_path: str, files: Sequence[str], dlt_version_co "last_commit_sha": commit_sha, "last_commit_timestamp": repo.head.commit.committed_datetime.isoformat(), "files": files_sha, - "dlt_version_constraint": dlt_version_constraint + "dlt_version_constraint": dlt_version_constraint, } def get_verified_source_names(sources_storage: FileStorage) -> List[str]: candidates: List[str] = [] - for name in [n for n in sources_storage.list_folder_dirs(".", to_root=False) if not any(fnmatch.fnmatch(n, ignore) for ignore in IGNORE_SOURCES)]: + for name in [ + n + for n in sources_storage.list_folder_dirs(".", to_root=False) + if not any(fnmatch.fnmatch(n, ignore) for ignore in IGNORE_SOURCES) + ]: # must contain at least one valid python script if any(f.endswith(".py") for f in sources_storage.list_folder_files(name, to_root=False)): candidates.append(name) return candidates -def get_verified_source_files(sources_storage: FileStorage, source_name: str) -> VerifiedSourceFiles: +def get_verified_source_files( + sources_storage: FileStorage, source_name: str +) -> VerifiedSourceFiles: if not sources_storage.has_folder(source_name): - raise VerifiedSourceRepoError(f"Verified source {source_name} could not be found in the repository", source_name) + raise VerifiedSourceRepoError( + f"Verified source {source_name} could not be found in the repository", source_name + ) # find example script example_script = f"{source_name}_pipeline.py" if not sources_storage.has_file(example_script): - raise VerifiedSourceRepoError(f"Pipeline example script {example_script} could not be found in the repository", source_name) + raise VerifiedSourceRepoError( + f"Pipeline example script {example_script} could not be found in the repository", + source_name, + ) # get all files recursively files: List[str] = [] for root, subdirs, _files in os.walk(sources_storage.make_full_path(source_name)): @@ -172,9 +182,15 @@ def get_verified_source_files(sources_storage: FileStorage, source_name: str) -> if any(fnmatch.fnmatch(subdir, ignore) for ignore in IGNORE_FILES): subdirs.remove(subdir) rel_root = sources_storage.to_relative_path(root) - files.extend([os.path.join(rel_root, file) for file in _files if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES)]) + files.extend( + [ + os.path.join(rel_root, file) + for file in _files + if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES) + ] + ) # read the docs - init_py = os.path.join(source_name, utils.MODULE_INIT) + init_py = os.path.join(source_name, utils.MODULE_INIT) docstring: str = "" if sources_storage.has_file(init_py): docstring = get_module_docstring(sources_storage.load(init_py)) @@ -187,14 +203,18 @@ def get_verified_source_files(sources_storage: FileStorage, source_name: str) -> else: requirements = SourceRequirements([]) # find requirements - return VerifiedSourceFiles(False, sources_storage, example_script, example_script, files, requirements, docstring) + return VerifiedSourceFiles( + False, sources_storage, example_script, example_script, files, requirements, docstring + ) def gen_index_diff( - local_index: TVerifiedSourceFileIndex, - remote_index: TVerifiedSourceFileIndex -) -> Tuple[Dict[str, TVerifiedSourceFileEntry], Dict[str, TVerifiedSourceFileEntry], Dict[str, TVerifiedSourceFileEntry]]: - + local_index: TVerifiedSourceFileIndex, remote_index: TVerifiedSourceFileIndex +) -> Tuple[ + Dict[str, TVerifiedSourceFileEntry], + Dict[str, TVerifiedSourceFileEntry], + Dict[str, TVerifiedSourceFileEntry], +]: deleted: Dict[str, TVerifiedSourceFileEntry] = {} modified: Dict[str, TVerifiedSourceFileEntry] = {} new: Dict[str, TVerifiedSourceFileEntry] = {} @@ -223,7 +243,7 @@ def find_conflict_files( remote_new: Dict[str, TVerifiedSourceFileEntry], remote_modified: Dict[str, TVerifiedSourceFileEntry], remote_deleted: Dict[str, TVerifiedSourceFileEntry], - dest_storage: FileStorage + dest_storage: FileStorage, ) -> Tuple[List[str], List[str]]: """Use files index from .sources to identify modified files via sha3 content hash""" diff --git a/dlt/cli/requirements.py b/dlt/cli/requirements.py index 79907ae01c..5b16f7a60f 100644 --- a/dlt/cli/requirements.py +++ b/dlt/cli/requirements.py @@ -7,6 +7,7 @@ class SourceRequirements: """Helper class to parse and manipulate entries in source's requirements.txt""" + dlt_requirement: Requirement """Final dlt requirement that may be updated with destination extras""" dlt_requirement_base: Requirement diff --git a/dlt/cli/source_detection.py b/dlt/cli/source_detection.py index 369663b82f..636615af61 100644 --- a/dlt/cli/source_detection.py +++ b/dlt/cli/source_detection.py @@ -14,7 +14,9 @@ from dlt.reflection.script_visitor import PipelineScriptVisitor -def find_call_arguments_to_replace(visitor: PipelineScriptVisitor, replace_nodes: List[Tuple[str, str]], init_script_name: str) -> List[Tuple[ast.AST, ast.AST]]: +def find_call_arguments_to_replace( + visitor: PipelineScriptVisitor, replace_nodes: List[Tuple[str, str]], init_script_name: str +) -> List[Tuple[ast.AST, ast.AST]]: # the input tuple (call argument name, replacement value) # the returned tuple (node, replacement value, node type) transformed_nodes: List[Tuple[ast.AST, ast.AST]] = [] @@ -26,7 +28,11 @@ def find_call_arguments_to_replace(visitor: PipelineScriptVisitor, replace_nodes dn_node: ast.AST = args.arguments.get(t_arg_name) if dn_node is not None: if not isinstance(dn_node, ast.Constant) or not isinstance(dn_node.value, str): - raise CliCommandException("init", f"The pipeline script {init_script_name} must pass the {t_arg_name} as string to '{arg_name}' function in line {dn_node.lineno}") + raise CliCommandException( + "init", + f"The pipeline script {init_script_name} must pass the {t_arg_name} as" + f" string to '{arg_name}' function in line {dn_node.lineno}", + ) else: transformed_nodes.append((dn_node, ast.Constant(value=t_value, kind=None))) replaced_args.add(t_arg_name) @@ -34,27 +40,39 @@ def find_call_arguments_to_replace(visitor: PipelineScriptVisitor, replace_nodes # there was at least one replacement for t_arg_name, _ in replace_nodes: if t_arg_name not in replaced_args: - raise CliCommandException("init", f"The pipeline script {init_script_name} is not explicitly passing the '{t_arg_name}' argument to 'pipeline' or 'run' function. In init script the default and configured values are not accepted.") + raise CliCommandException( + "init", + f"The pipeline script {init_script_name} is not explicitly passing the" + f" '{t_arg_name}' argument to 'pipeline' or 'run' function. In init script the" + " default and configured values are not accepted.", + ) return transformed_nodes -def find_source_calls_to_replace(visitor: PipelineScriptVisitor, pipeline_name: str) -> List[Tuple[ast.AST, ast.AST]]: +def find_source_calls_to_replace( + visitor: PipelineScriptVisitor, pipeline_name: str +) -> List[Tuple[ast.AST, ast.AST]]: transformed_nodes: List[Tuple[ast.AST, ast.AST]] = [] for source_def in visitor.known_sources_resources.values(): # append function name to be replaced - transformed_nodes.append((creates_func_def_name_node(source_def, visitor.source_lines), ast.Name(id=pipeline_name + "_" + source_def.name))) + transformed_nodes.append( + ( + creates_func_def_name_node(source_def, visitor.source_lines), + ast.Name(id=pipeline_name + "_" + source_def.name), + ) + ) for calls in visitor.known_sources_resources_calls.values(): for call in calls: - transformed_nodes.append((call.func, ast.Name(id=pipeline_name + "_" + unparse(call.func)))) + transformed_nodes.append( + (call.func, ast.Name(id=pipeline_name + "_" + unparse(call.func))) + ) return transformed_nodes def detect_source_configs( - sources: Dict[str, SourceInfo], - module_prefix: str, - section: Tuple[str, ...] + sources: Dict[str, SourceInfo], module_prefix: str, section: Tuple[str, ...] ) -> Tuple[Dict[str, WritableConfigValue], Dict[str, WritableConfigValue], Dict[str, SourceInfo]]: # all detected secrets with sections required_secrets: Dict[str, WritableConfigValue] = {} @@ -75,11 +93,15 @@ def detect_source_configs( if is_secret_hint(field_type): val_store = required_secrets # all configs that are required and do not have a default value must go to config.toml - elif not is_optional_type(field_type) and getattr(source_config, field_name) is None: + elif ( + not is_optional_type(field_type) and getattr(source_config, field_name) is None + ): val_store = required_config if val_store is not None: # we are sure that all resources come from single file so we can put them in single section - val_store[source_name + ":" + field_name] = WritableConfigValue(field_name, field_type, None, section) + val_store[source_name + ":" + field_name] = WritableConfigValue( + field_name, field_type, None, section + ) return required_secrets, required_config, checked_sources diff --git a/dlt/cli/telemetry_command.py b/dlt/cli/telemetry_command.py index 574005797a..bb451ea979 100644 --- a/dlt/cli/telemetry_command.py +++ b/dlt/cli/telemetry_command.py @@ -24,7 +24,9 @@ def telemetry_status_command() -> None: def change_telemetry_status_command(enabled: bool) -> None: # value to write - telemetry_value = [WritableConfigValue("dlthub_telemetry", bool, enabled, (RunConfiguration.__section__, ))] + telemetry_value = [ + WritableConfigValue("dlthub_telemetry", bool, enabled, (RunConfiguration.__section__,)) + ] # write local config config = ConfigTomlProvider(add_global_config=False) if not config.is_empty: diff --git a/dlt/cli/utils.py b/dlt/cli/utils.py index 996770b40d..5ea4471d7e 100644 --- a/dlt/cli/utils.py +++ b/dlt/cli/utils.py @@ -25,14 +25,20 @@ MODULE_INIT = "__init__.py" -def parse_init_script(command: str, script_source: str, init_script_name: str) -> PipelineScriptVisitor: +def parse_init_script( + command: str, script_source: str, init_script_name: str +) -> PipelineScriptVisitor: # parse the script first tree = ast.parse(source=script_source) set_ast_parents(tree) visitor = PipelineScriptVisitor(script_source) visitor.visit_passes(tree) if len(visitor.mod_aliases) == 0: - raise CliCommandException(command, f"The pipeline script {init_script_name} does not import dlt and does not seem to run any pipelines") + raise CliCommandException( + command, + f"The pipeline script {init_script_name} does not import dlt and does not seem to run" + " any pipelines", + ) return visitor @@ -45,8 +51,9 @@ def ensure_git_command(command: str) -> None: raise raise CliCommandException( command, - "'git' command is not available. Install and setup git with the following the guide %s" % "https://docs.github.com/en/get-started/quickstart/set-up-git", - imp_ex + "'git' command is not available. Install and setup git with the following the guide %s" + % "https://docs.github.com/en/get-started/quickstart/set-up-git", + imp_ex, ) from imp_ex diff --git a/dlt/common/arithmetics.py b/dlt/common/arithmetics.py index 5277acad4f..56d8fcd49b 100644 --- a/dlt/common/arithmetics.py +++ b/dlt/common/arithmetics.py @@ -1,7 +1,18 @@ -import decimal # noqa: I251 +import decimal # noqa: I251 from contextlib import contextmanager from typing import Iterator -from decimal import ROUND_HALF_UP, Decimal, Inexact, DivisionByZero, DefaultContext, InvalidOperation, localcontext, Context, Subnormal, ConversionSyntax # noqa: I251 +from decimal import ( # noqa: I251 + ROUND_HALF_UP, + Decimal, + Inexact, + DivisionByZero, + DefaultContext, + InvalidOperation, + localcontext, + Context, + Subnormal, + ConversionSyntax, +) DEFAULT_NUMERIC_PRECISION = 38 diff --git a/dlt/common/configuration/__init__.py b/dlt/common/configuration/__init__.py index a5ffd3e7b8..b7d868ff8b 100644 --- a/dlt/common/configuration/__init__.py +++ b/dlt/common/configuration/__init__.py @@ -7,7 +7,7 @@ ConfigFieldMissingException, ConfigValueCannotBeCoercedException, ConfigFileNotFoundException, - ConfigurationValueError + ConfigurationValueError, ) diff --git a/dlt/common/configuration/accessors.py b/dlt/common/configuration/accessors.py index cf71db7030..dfadc97fa3 100644 --- a/dlt/common/configuration/accessors.py +++ b/dlt/common/configuration/accessors.py @@ -15,8 +15,8 @@ DLT_CONFIG_VALUE = "config.value" TConfigAny = TypeVar("TConfigAny", bound=Any) -class _Accessor(abc.ABC): +class _Accessor(abc.ABC): def __getitem__(self, field: str) -> Any: value, traces = self._get_value(field) if value is None: @@ -100,9 +100,13 @@ def default_type(self) -> AnyType: @property def writable_provider(self) -> ConfigProvider: """find first writable provider that does not support secrets - should be config.toml""" - return next(p for p in self._get_providers_from_context() if p.is_writable and not p.supports_secrets) + return next( + p + for p in self._get_providers_from_context() + if p.is_writable and not p.supports_secrets + ) - value: ClassVar[None] = ConfigValue + value: ClassVar[Any] = ConfigValue "A placeholder that tells dlt to replace it with actual config value during the call to a source or resource decorated function." @@ -121,9 +125,11 @@ def default_type(self) -> AnyType: @property def writable_provider(self) -> ConfigProvider: """find first writable provider that supports secrets - should be secrets.toml""" - return next(p for p in self._get_providers_from_context() if p.is_writable and p.supports_secrets) + return next( + p for p in self._get_providers_from_context() if p.is_writable and p.supports_secrets + ) - value: ClassVar[None] = ConfigValue + value: ClassVar[Any] = ConfigValue "A placeholder that tells dlt to replace it with actual secret during the call to a source or resource decorated function." diff --git a/dlt/common/configuration/container.py b/dlt/common/configuration/container.py index 46d64f7a37..ad20765489 100644 --- a/dlt/common/configuration/container.py +++ b/dlt/common/configuration/container.py @@ -1,8 +1,13 @@ from contextlib import contextmanager -from typing import Dict, Iterator, Type, TypeVar +import re +import threading +from typing import ClassVar, Dict, Iterator, Tuple, Type, TypeVar from dlt.common.configuration.specs.base_configuration import ContainerInjectableContext -from dlt.common.configuration.exceptions import ContainerInjectableContextMangled, ContextDefaultCannotBeCreated +from dlt.common.configuration.exceptions import ( + ContainerInjectableContextMangled, + ContextDefaultCannotBeCreated, +) TConfiguration = TypeVar("TConfiguration", bound=ContainerInjectableContext) @@ -13,20 +18,33 @@ class Container: Injection context is identified by its type and available via dict indexer. The common pattern is to instantiate default context value if it is not yet present in container. - The indexer is settable and allows to explicitly set the value. This is required by for context that needs to be explicitly instantiated. + By default, the context is thread-affine so it can be injected only n the thread that originally set it. This behavior may be changed + in particular context type (spec). + + The indexer is settable and allows to explicitly set the value. This is required by in any context that needs to be explicitly instantiated. The `injectable_context` allows to set a context with a `with` keyword and then restore the previous one after it gets out of scope. """ - _INSTANCE: "Container" = None + _INSTANCE: ClassVar["Container"] = None + _LOCK: ClassVar[threading.Lock] = threading.Lock() + _MAIN_THREAD_ID: ClassVar[int] = threading.get_ident() + """A main thread id to which get item will fallback for contexts without default""" - contexts: Dict[Type[ContainerInjectableContext], ContainerInjectableContext] + thread_contexts: Dict[int, Dict[Type[ContainerInjectableContext], ContainerInjectableContext]] + """A thread aware mapping of injection context """ + main_context: Dict[Type[ContainerInjectableContext], ContainerInjectableContext] + """Injection context for the main thread""" def __new__(cls: Type["Container"]) -> "Container": if not cls._INSTANCE: cls._INSTANCE = super().__new__(cls) - cls._INSTANCE.contexts = {} + cls._INSTANCE.thread_contexts = {} + cls._INSTANCE.main_context = cls._INSTANCE.thread_contexts[ + Container._MAIN_THREAD_ID + ] = {} + return cls._INSTANCE def __init__(self) -> None: @@ -37,49 +55,105 @@ def __getitem__(self, spec: Type[TConfiguration]) -> TConfiguration: if not issubclass(spec, ContainerInjectableContext): raise KeyError(f"{spec.__name__} is not a context") - item = self.contexts.get(spec) + context, item = self._thread_getitem(spec) if item is None: if spec.can_create_default: item = spec() - self.contexts[spec] = item + self._thread_setitem(context, spec, item) item.add_extras() else: raise ContextDefaultCannotBeCreated(spec) - return item # type: ignore + return item # type: ignore[return-value] def __setitem__(self, spec: Type[TConfiguration], value: TConfiguration) -> None: # value passed to container must be final value.resolve() # put it into context - self.contexts[spec] = value + self._thread_setitem(self._thread_context(spec), spec, value) def __delitem__(self, spec: Type[TConfiguration]) -> None: - del self.contexts[spec] + context = self._thread_context(spec) + self._thread_delitem(context, spec) def __contains__(self, spec: Type[TConfiguration]) -> bool: - return spec in self.contexts - + context = self._thread_context(spec) + return spec in context + + def _thread_context( + self, spec: Type[TConfiguration] + ) -> Dict[Type[ContainerInjectableContext], ContainerInjectableContext]: + if spec.global_affinity: + context = self.main_context + else: + # thread pool names used in dlt contain originating thread id. use this id over pool id + if m := re.match(r"dlt-pool-(\d+)-", threading.currentThread().getName()): + thread_id = int(m.group(1)) + else: + thread_id = threading.get_ident() + # return main context for main thread + if thread_id == Container._MAIN_THREAD_ID: + return self.main_context + # we may add a new empty thread context so lock here + with Container._LOCK: + context = self.thread_contexts.get(thread_id) + if context is None: + context = self.thread_contexts[thread_id] = {} + return context + + def _thread_getitem( + self, spec: Type[TConfiguration] + ) -> Tuple[ + Dict[Type[ContainerInjectableContext], ContainerInjectableContext], + ContainerInjectableContext, + ]: + context = self._thread_context(spec) + item = context.get(spec) + return context, item + + def _thread_setitem( + self, + context: Dict[Type[ContainerInjectableContext], ContainerInjectableContext], + spec: Type[ContainerInjectableContext], + value: TConfiguration, + ) -> None: + context[spec] = value + + def _thread_delitem( + self, + context: Dict[Type[ContainerInjectableContext], ContainerInjectableContext], + spec: Type[ContainerInjectableContext], + ) -> None: + del context[spec] @contextmanager def injectable_context(self, config: TConfiguration) -> Iterator[TConfiguration]: """A context manager that will insert `config` into the container and restore the previous value when it gets out of scope.""" + config.resolve() spec = type(config) previous_config: ContainerInjectableContext = None - if spec in self.contexts: - previous_config = self.contexts[spec] + context, previous_config = self._thread_getitem(spec) + # set new config and yield context + self._thread_setitem(context, spec, config) try: - self[spec] = config yield config finally: # before setting the previous config for given spec, check if there was no overlapping modification - if self.contexts[spec] is config: + context, current_config = self._thread_getitem(spec) + if current_config is config: # config is injected for spec so restore previous if previous_config is None: - del self.contexts[spec] + self._thread_delitem(context, spec) else: - self.contexts[spec] = previous_config + self._thread_setitem(context, spec, previous_config) else: # value was modified in the meantime and not restored - raise ContainerInjectableContextMangled(spec, self.contexts[spec], config) + raise ContainerInjectableContextMangled(spec, context[spec], config) + + @staticmethod + def thread_pool_prefix() -> str: + """Creates a container friendly pool prefix that contains starting thread id. Container implementation will automatically use it + for any thread-affine contexts instead of using id of the pool thread + """ + return f"dlt-pool-{threading.get_ident()}-" diff --git a/dlt/common/configuration/exceptions.py b/dlt/common/configuration/exceptions.py index f019565013..1d8423057f 100644 --- a/dlt/common/configuration/exceptions.py +++ b/dlt/common/configuration/exceptions.py @@ -1,5 +1,5 @@ import os -from typing import Any, Mapping, Type, Tuple, NamedTuple, Sequence +from typing import Any, Dict, Mapping, Type, Tuple, NamedTuple, Sequence from dlt.common.exceptions import DltException, TerminalException from dlt.common.utils import main_module_file_path @@ -22,17 +22,22 @@ class ConfigurationValueError(ConfigurationException, ValueError): class ContainerException(DltException): """base exception for all exceptions related to injectable container""" + pass class ConfigProviderException(ConfigurationException): """base exceptions for all exceptions raised by config providers""" + pass class ConfigurationWrongTypeException(ConfigurationException): def __init__(self, _typ: type) -> None: - super().__init__(f"Invalid configuration instance type {_typ}. Configuration instances must derive from BaseConfiguration.") + super().__init__( + f"Invalid configuration instance type {_typ}. Configuration instances must derive from" + " BaseConfiguration." + ) class ConfigFieldMissingException(KeyError, ConfigurationException): @@ -45,42 +50,70 @@ def __init__(self, spec_name: str, traces: Mapping[str, Sequence[LookupTrace]]) super().__init__(spec_name) def __str__(self) -> str: - msg = f"Following fields are missing: {str(self.fields)} in configuration with spec {self.spec_name}\n" + msg = ( + f"Following fields are missing: {str(self.fields)} in configuration with spec" + f" {self.spec_name}\n" + ) for f, field_traces in self.traces.items(): msg += f'\tfor field "{f}" config providers and keys were tried in following order:\n' for tr in field_traces: - msg += f'\t\tIn {tr.provider} key {tr.key} was not found.\n' + msg += f"\t\tIn {tr.provider} key {tr.key} was not found.\n" # check if entry point is run with path. this is common problem so warn the user main_path = main_module_file_path() - main_dir = os.path.dirname(main_path) - abs_main_dir = os.path.abspath(main_dir) - if abs_main_dir != os.getcwd(): - # directory was specified - msg += "WARNING: dlt looks for .dlt folder in your current working directory and your cwd (%s) is different from directory of your pipeline script (%s).\n" % (os.getcwd(), abs_main_dir) - msg += "If you keep your secret files in the same folder as your pipeline script but run your script from some other folder, secrets/configs will not be found\n" - msg += "Please refer to https://dlthub.com/docs/general-usage/credentials for more information\n" + if main_path: + main_dir = os.path.dirname(main_path) + abs_main_dir = os.path.abspath(main_dir) + if abs_main_dir != os.getcwd(): + # directory was specified + msg += ( + "WARNING: dlt looks for .dlt folder in your current working directory and your" + " cwd (%s) is different from directory of your pipeline script (%s).\n" + % (os.getcwd(), abs_main_dir) + ) + msg += ( + "If you keep your secret files in the same folder as your pipeline script but" + " run your script from some other folder, secrets/configs will not be found\n" + ) + msg += ( + "Please refer to https://dlthub.com/docs/general-usage/credentials for more" + " information\n" + ) return msg + def attrs(self) -> Dict[str, Any]: + attrs_ = super().attrs() + if "traces" in attrs_: + for _, traces in self.traces.items(): + for idx, trace in enumerate(traces): + # drop all values as they may contain secrets + traces[idx] = trace._replace(value=None) # type: ignore[index] + return attrs_ + class UnmatchedConfigHintResolversException(ConfigurationException): """Raised when using `@resolve_type` on a field that doesn't exist in the spec""" + def __init__(self, spec_name: str, field_names: Sequence[str]) -> None: self.field_names = field_names self.spec_name = spec_name - example = f">>> class {spec_name}(BaseConfiguration)\n" + "\n".join(f">>> {name}: Any" for name in field_names) + example = f">>> class {spec_name}(BaseConfiguration)\n" + "\n".join( + f">>> {name}: Any" for name in field_names + ) msg = ( - f"The config spec {spec_name} has dynamic type resolvers for fields: {field_names} " - "but these fields are not defined in the spec.\n" - "When using @resolve_type() decorator, Add the fields with 'Any' or another common type hint, example:\n" - f"\n{example}" + f"The config spec {spec_name} has dynamic type resolvers for fields: {field_names} but" + " these fields are not defined in the spec.\nWhen using @resolve_type() decorator, Add" + f" the fields with 'Any' or another common type hint, example:\n\n{example}" ) super().__init__(msg) class FinalConfigFieldException(ConfigurationException): """rises when field was annotated as final ie Final[str] and the value is modified by config provider""" + def __init__(self, spec_name: str, field: str) -> None: - super().__init__(f"Field {field} in spec {spec_name} is final but is being changed by a config provider") + super().__init__( + f"Field {field} in spec {spec_name} is final but is being changed by a config provider" + ) class ConfigValueCannotBeCoercedException(ConfigurationValueError): @@ -90,7 +123,9 @@ def __init__(self, field_name: str, field_value: Any, hint: type) -> None: self.field_name = field_name self.field_value = field_value self.hint = hint - super().__init__('Configured value for field %s cannot be coerced into type %s' % (field_name, str(hint))) + super().__init__( + "Configured value for field %s cannot be coerced into type %s" % (field_name, str(hint)) + ) # class ConfigIntegrityException(ConfigurationException): @@ -116,7 +151,9 @@ class ConfigFieldMissingTypeHintException(ConfigurationException): def __init__(self, field_name: str, spec: Type[Any]) -> None: self.field_name = field_name self.typ_ = spec - super().__init__(f"Field {field_name} on configspec {spec} does not provide required type hint") + super().__init__( + f"Field {field_name} on configspec {spec} does not provide required type hint" + ) class ConfigFieldTypeHintNotSupported(ConfigurationException): @@ -125,25 +162,39 @@ class ConfigFieldTypeHintNotSupported(ConfigurationException): def __init__(self, field_name: str, spec: Type[Any], typ_: Type[Any]) -> None: self.field_name = field_name self.typ_ = spec - super().__init__(f"Field {field_name} on configspec {spec} has hint with unsupported type {typ_}") + super().__init__( + f"Field {field_name} on configspec {spec} has hint with unsupported type {typ_}" + ) class ValueNotSecretException(ConfigurationException): def __init__(self, provider_name: str, key: str) -> None: self.provider_name = provider_name self.key = key - super().__init__(f"Provider {provider_name} cannot hold secret values but key {key} with secret value is present") + super().__init__( + f"Provider {provider_name} cannot hold secret values but key {key} with secret value is" + " present" + ) class InvalidNativeValue(ConfigurationException): - def __init__(self, spec: Type[Any], native_value_type: Type[Any], embedded_sections: Tuple[str, ...], inner_exception: Exception) -> None: + def __init__( + self, + spec: Type[Any], + native_value_type: Type[Any], + embedded_sections: Tuple[str, ...], + inner_exception: Exception, + ) -> None: self.spec = spec self.native_value_type = native_value_type self.embedded_sections = embedded_sections self.inner_exception = inner_exception inner_msg = f" {self.inner_exception}" if inner_exception is not ValueError else "" super().__init__( - f"{spec.__name__} cannot parse the configuration value provided. The value is of type {native_value_type.__name__} and comes from the {embedded_sections} section(s).{inner_msg}") + f"{spec.__name__} cannot parse the configuration value provided. The value is of type" + f" {native_value_type.__name__} and comes from the" + f" {embedded_sections} section(s).{inner_msg}" + ) class ContainerInjectableContextMangled(ContainerException): @@ -151,7 +202,10 @@ def __init__(self, spec: Type[Any], existing_config: Any, expected_config: Any) self.spec = spec self.existing_config = existing_config self.expected_config = expected_config - super().__init__(f"When restoring context {spec.__name__}, instance {expected_config} was expected, instead instance {existing_config} was found.") + super().__init__( + f"When restoring context {spec.__name__}, instance {expected_config} was expected," + f" instead instance {existing_config} was found." + ) class ContextDefaultCannotBeCreated(ContainerException, KeyError): @@ -163,4 +217,6 @@ def __init__(self, spec: Type[Any]) -> None: class DuplicateConfigProviderException(ConfigProviderException): def __init__(self, provider_name: str) -> None: self.provider_name = provider_name - super().__init__(f"Provider with name {provider_name} already present in ConfigProvidersContext") + super().__init__( + f"Provider with name {provider_name} already present in ConfigProvidersContext" + ) diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py index 1880727a0f..a22f299ae8 100644 --- a/dlt/common/configuration/inject.py +++ b/dlt/common/configuration/inject.py @@ -1,5 +1,4 @@ import inspect -import threading from functools import wraps from typing import Callable, Dict, Type, Any, Optional, Tuple, TypeVar, overload from inspect import Signature, Parameter @@ -15,7 +14,6 @@ _ORIGINAL_ARGS = "_dlt_orig_args" # keep a registry of all the decorated functions _FUNC_SPECS: Dict[int, Type[BaseConfiguration]] = {} -_RESOLVE_LOCK = threading.Lock() TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration) @@ -32,9 +30,9 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True -) -> TFun: - ... + include_defaults: bool = True, + accept_partial: bool = False, +) -> TFun: ... @overload @@ -45,9 +43,9 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True -) -> Callable[[TFun], TFun]: - ... + include_defaults: bool = True, + accept_partial: bool = False, +) -> Callable[[TFun], TFun]: ... def with_config( @@ -57,8 +55,10 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True -) -> Callable[[TFun], TFun]: + include_defaults: bool = True, + accept_partial: bool = False, + initial_config: Optional[BaseConfiguration] = None, +) -> Callable[[TFun], TFun]: """Injects values into decorated function arguments following the specification in `spec` or by deriving one from function's signature. The synthesized spec contains the arguments marked with `dlt.secrets.value` and `dlt.config.value` which are required to be injected at runtime. @@ -83,7 +83,9 @@ def with_config( def decorator(f: TFun) -> TFun: SPEC: Type[BaseConfiguration] = None sig: Signature = inspect.signature(f) - kwargs_arg = next((p for p in sig.parameters.values() if p.kind == Parameter.VAR_KEYWORD), None) + kwargs_arg = next( + (p for p in sig.parameters.values() if p.kind == Parameter.VAR_KEYWORD), None + ) spec_arg: Parameter = None pipeline_name_arg: Parameter = None @@ -107,7 +109,6 @@ def decorator(f: TFun) -> TFun: pipeline_name_arg = p pipeline_name_arg_default = None if p.default == Parameter.empty else p.default - @wraps(f) def _wrap(*args: Any, **kwargs: Any) -> Any: # bind parameters to signature @@ -119,7 +120,7 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: else: # if section derivation function was provided then call it if section_f: - curr_sections: Tuple[str, ...] = (section_f(bound_args.arguments), ) + curr_sections: Tuple[str, ...] = (section_f(bound_args.arguments),) # sections may be a string elif isinstance(sections, str): curr_sections = (sections,) @@ -127,19 +128,30 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: curr_sections = sections # if one of arguments is spec the use it as initial value - if spec_arg: + if initial_config: + config = initial_config + elif spec_arg: config = bound_args.arguments.get(spec_arg.name, None) # resolve SPEC, also provide section_context with pipeline_name if pipeline_name_arg: - curr_pipeline_name = bound_args.arguments.get(pipeline_name_arg.name, pipeline_name_arg_default) + curr_pipeline_name = bound_args.arguments.get( + pipeline_name_arg.name, pipeline_name_arg_default + ) else: curr_pipeline_name = None - section_context = ConfigSectionContext(pipeline_name=curr_pipeline_name, sections=curr_sections, merge_style=sections_merge_style) - # this may be called from many threads so make sure context is not mangled - with _RESOLVE_LOCK: - with inject_section(section_context): - # print(f"RESOLVE CONF in inject: {f.__name__}: {section_context.sections} vs {sections}") - config = resolve_configuration(config or SPEC(), explicit_value=bound_args.arguments) + section_context = ConfigSectionContext( + pipeline_name=curr_pipeline_name, + sections=curr_sections, + merge_style=sections_merge_style, + ) + # this may be called from many threads so section_context is thread affine + with inject_section(section_context): + # print(f"RESOLVE CONF in inject: {f.__name__}: {section_context.sections} vs {sections}") + config = resolve_configuration( + config or SPEC(), + explicit_value=bound_args.arguments, + accept_partial=accept_partial, + ) resolved_params = dict(config) # overwrite or add resolved params for p in sig.parameters.values(): @@ -169,14 +181,17 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: return decorator if not callable(func): - raise ValueError("First parameter to the with_config must be callable ie. by using it as function decorator") + raise ValueError( + "First parameter to the with_config must be callable ie. by using it as function" + " decorator" + ) # We're called as @with_config without parens. return decorator(func) def last_config(**kwargs: Any) -> Any: - """Get configuration instance used to inject function arguments """ + """Get configuration instance used to inject function arguments""" return kwargs[_LAST_DLT_CONFIG] diff --git a/dlt/common/configuration/paths.py b/dlt/common/configuration/paths.py index f773a779f8..89494ba6bd 100644 --- a/dlt/common/configuration/paths.py +++ b/dlt/common/configuration/paths.py @@ -27,17 +27,17 @@ def make_dlt_settings_path(path: str) -> str: def get_dlt_data_dir() -> str: - """ Gets default directory where pipelines' data will be stored - 1. in user home directory: ~/.dlt/ - 2. if current user is root: in /var/dlt/ - 3. if current user does not have a home directory: in /tmp/dlt/ - 4. if DLT_DATA_DIR is set in env then it is used + """Gets default directory where pipelines' data will be stored + 1. in user home directory: ~/.dlt/ + 2. if current user is root: in /var/dlt/ + 3. if current user does not have a home directory: in /tmp/dlt/ + 4. if DLT_DATA_DIR is set in env then it is used """ if "DLT_DATA_DIR" in os.environ: return os.environ["DLT_DATA_DIR"] - # getuid not available on Windows - if hasattr(os, "getuid") and os.geteuid() == 0: + # geteuid not available on Windows + if hasattr(os, "geteuid") and os.geteuid() == 0: # we are root so use standard /var return os.path.join("/var", "dlt") @@ -49,5 +49,6 @@ def get_dlt_data_dir() -> str: # if home directory is available use ~/.dlt/pipelines return os.path.join(home, DOT_DLT) + def _get_user_home_dir() -> str: return os.path.expanduser("~") diff --git a/dlt/common/configuration/providers/__init__.py b/dlt/common/configuration/providers/__init__.py index 76268c14b1..3f5bc20cdc 100644 --- a/dlt/common/configuration/providers/__init__.py +++ b/dlt/common/configuration/providers/__init__.py @@ -1,7 +1,15 @@ from .provider import ConfigProvider from .environ import EnvironProvider from .dictionary import DictionaryProvider -from .toml import SecretsTomlProvider, ConfigTomlProvider, TomlFileProvider, CONFIG_TOML, SECRETS_TOML, StringTomlProvider, SECRETS_TOML_KEY +from .toml import ( + SecretsTomlProvider, + ConfigTomlProvider, + TomlFileProvider, + CONFIG_TOML, + SECRETS_TOML, + StringTomlProvider, + SECRETS_TOML_KEY, +) from .google_secrets import GoogleSecretsProvider from .context import ContextProvider diff --git a/dlt/common/configuration/providers/airflow.py b/dlt/common/configuration/providers/airflow.py index 3c3149adb1..99edf258d2 100644 --- a/dlt/common/configuration/providers/airflow.py +++ b/dlt/common/configuration/providers/airflow.py @@ -10,12 +10,13 @@ def __init__(self, only_secrets: bool = False, only_toml_fragments: bool = False @property def name(self) -> str: - return 'Airflow Secrets TOML Provider' + return "Airflow Secrets TOML Provider" def _look_vault(self, full_key: str, hint: type) -> str: """Get Airflow Variable with given `full_key`, return None if not found""" from airflow.models import Variable + with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()): return Variable.get(full_key, default_var=None) # type: ignore diff --git a/dlt/common/configuration/providers/context.py b/dlt/common/configuration/providers/context.py index 84e26923a3..c6c1aac644 100644 --- a/dlt/common/configuration/providers/context.py +++ b/dlt/common/configuration/providers/context.py @@ -8,7 +8,6 @@ class ContextProvider(ConfigProvider): - NAME: ClassVar[str] = "Injectable Context" def __init__(self) -> None: @@ -18,7 +17,9 @@ def __init__(self) -> None: def name(self) -> str: return ContextProvider.NAME - def get_value(self, key: str, hint: Type[Any], pipeline_name: str = None, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str = None, *sections: str + ) -> Tuple[Optional[Any], str]: assert sections == () # only context is a valid hint diff --git a/dlt/common/configuration/providers/dictionary.py b/dlt/common/configuration/providers/dictionary.py index 40a51eeb72..dffe5f0c71 100644 --- a/dlt/common/configuration/providers/dictionary.py +++ b/dlt/common/configuration/providers/dictionary.py @@ -7,7 +7,6 @@ class DictionaryProvider(ConfigProvider): - NAME: ClassVar[str] = "Dictionary Provider" def __init__(self) -> None: @@ -17,14 +16,16 @@ def __init__(self) -> None: def name(self) -> str: return self.NAME - def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: full_path = sections + (key,) if pipeline_name: - full_path = (pipeline_name, ) + full_path + full_path = (pipeline_name,) + full_path full_key = get_key_name(key, "__", pipeline_name, *sections) node = self._values try: - for k in full_path: + for k in full_path: if not isinstance(node, dict): raise KeyError(k) node = node[k] diff --git a/dlt/common/configuration/providers/environ.py b/dlt/common/configuration/providers/environ.py index 7406a1207b..f83ea9a24d 100644 --- a/dlt/common/configuration/providers/environ.py +++ b/dlt/common/configuration/providers/environ.py @@ -8,8 +8,8 @@ SECRET_STORAGE_PATH: str = "/run/secrets/%s" -class EnvironProvider(ConfigProvider): +class EnvironProvider(ConfigProvider): @staticmethod def get_key_name(key: str, *sections: str) -> str: return get_key_name(key, "__", *sections).upper() @@ -18,7 +18,9 @@ def get_key_name(key: str, *sections: str) -> str: def name(self) -> str: return "Environment Variables" - def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: # apply section to the key key = self.get_key_name(key, pipeline_name, *sections) if hint is TSecretValue: diff --git a/dlt/common/configuration/providers/google_secrets.py b/dlt/common/configuration/providers/google_secrets.py index ccf891a575..e6da3da0a8 100644 --- a/dlt/common/configuration/providers/google_secrets.py +++ b/dlt/common/configuration/providers/google_secrets.py @@ -9,7 +9,12 @@ class GoogleSecretsProvider(VaultTomlProvider): - def __init__(self, credentials: GcpServiceAccountCredentials, only_secrets: bool = True, only_toml_fragments: bool = True) -> None: + def __init__( + self, + credentials: GcpServiceAccountCredentials, + only_secrets: bool = True, + only_toml_fragments: bool = True, + ) -> None: self.credentials = credentials super().__init__(only_secrets, only_toml_fragments) @@ -26,7 +31,11 @@ def _look_vault(self, full_key: str, hint: type) -> str: from googleapiclient.discovery import build from googleapiclient.errors import HttpError except ModuleNotFoundError: - raise MissingDependencyException("GoogleSecretsProvider", ["google-api-python-client"], "We need google-api-python-client to build client for secretmanager v1") + raise MissingDependencyException( + "GoogleSecretsProvider", + ["google-api-python-client"], + "We need google-api-python-client to build client for secretmanager v1", + ) from dlt.common import logger resource_name = f"projects/{self.credentials.project_id}/secrets/{full_key}/versions/latest" @@ -42,10 +51,17 @@ def _look_vault(self, full_key: str, hint: type) -> str: # logger.warning(f"{self.credentials.client_email} has roles/secretmanager.secretAccessor role but {full_key} not found in Google Secrets: {error_doc['message']}[{error_doc['status']}]") return None elif error.resp.status == 403: - logger.warning(f"{self.credentials.client_email} does not have roles/secretmanager.secretAccessor role. It also does not have read permission to {full_key} or the key is not found in Google Secrets: {error_doc['message']}[{error_doc['status']}]") + logger.warning( + f"{self.credentials.client_email} does not have" + " roles/secretmanager.secretAccessor role. It also does not have read" + f" permission to {full_key} or the key is not found in Google Secrets:" + f" {error_doc['message']}[{error_doc['status']}]" + ) return None elif error.resp.status == 400: - logger.warning(f"Unable to read {full_key} : {error_doc['message']}[{error_doc['status']}]") + logger.warning( + f"Unable to read {full_key} : {error_doc['message']}[{error_doc['status']}]" + ) return None raise @@ -68,4 +84,4 @@ def _look_vault(self, full_key: str, hint: type) -> str: # has_required_role = True # break # if not has_required_role: - # print("no secrets read access") \ No newline at end of file + # print("no secrets read access") diff --git a/dlt/common/configuration/providers/provider.py b/dlt/common/configuration/providers/provider.py index c6bfea5dc3..405a42bcf0 100644 --- a/dlt/common/configuration/providers/provider.py +++ b/dlt/common/configuration/providers/provider.py @@ -5,9 +5,10 @@ class ConfigProvider(abc.ABC): - @abc.abstractmethod - def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: pass def set_value(self, key: str, value: Any, pipeline_name: str, *sections: str) -> None: diff --git a/dlt/common/configuration/providers/toml.py b/dlt/common/configuration/providers/toml.py index 9e8b2a0059..7c856e8c27 100644 --- a/dlt/common/configuration/providers/toml.py +++ b/dlt/common/configuration/providers/toml.py @@ -19,7 +19,7 @@ CONFIG_TOML = "config.toml" SECRETS_TOML = "secrets.toml" -SECRETS_TOML_KEY = 'dlt_secrets_toml' +SECRETS_TOML_KEY = "dlt_secrets_toml" class BaseTomlProvider(ConfigProvider): @@ -30,10 +30,12 @@ def __init__(self, toml_document: TOMLContainer) -> None: def get_key_name(key: str, *sections: str) -> str: return get_key_name(key, ".", *sections) - def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: full_path = sections + (key,) if pipeline_name: - full_path = (pipeline_name, ) + full_path + full_path = (pipeline_name,) + full_path full_key = self.get_key_name(key, pipeline_name, *sections) node: Union[TOMLContainer, TOMLItem] = self._toml try: @@ -48,7 +50,7 @@ def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: st def set_value(self, key: str, value: Any, pipeline_name: str, *sections: str) -> None: if pipeline_name: - sections = (pipeline_name, ) + sections + sections = (pipeline_name,) + sections if isinstance(value, TOMLContainer): if key is None: @@ -70,10 +72,14 @@ def set_value(self, key: str, value: Any, pipeline_name: str, *sections: str) -> if k not in master: master[k] = tomlkit.table() master = master[k] # type: ignore - if isinstance(value, dict) and isinstance(master.get(key), dict): - update_dict_nested(master[key], value) # type: ignore - else: - master[key] = value + if isinstance(value, dict): + # remove none values, TODO: we need recursive None removal + value = {k: v for k, v in value.items() if v is not None} + # if target is also dict then merge recursively + if isinstance(master.get(key), dict): + update_dict_nested(master[key], value) # type: ignore + return + master[key] = value @property def supports_sections(self) -> bool: @@ -85,7 +91,6 @@ def is_empty(self) -> bool: class StringTomlProvider(BaseTomlProvider): - def __init__(self, toml_string: str) -> None: super().__init__(StringTomlProvider.loads(toml_string)) @@ -141,12 +146,13 @@ def __init__(self, only_secrets: bool, only_toml_fragments: bool) -> None: super().__init__(tomlkit.document()) self._update_from_vault(SECRETS_TOML_KEY, None, AnyType, None, ()) - def get_value(self, key: str, hint: type, pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: type, pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: full_key = self.get_key_name(key, pipeline_name, *sections) value, _ = super().get_value(key, hint, pipeline_name, *sections) if value is None: - # only secrets hints are handled if self.only_secrets and not is_secret_hint(hint) and hint is not AnyType: return None, full_key @@ -156,7 +162,6 @@ def get_value(self, key: str, hint: type, pipeline_name: str, *sections: str) -> lookup_fk = self.get_key_name(SECRETS_TOML_KEY, pipeline_name) self._update_from_vault(lookup_fk, "", AnyType, pipeline_name, ()) - # generate auxiliary paths to get from vault for known_section in [known_sections.SOURCES, known_sections.DESTINATION]: @@ -164,7 +169,9 @@ def _look_at_idx(idx: int, full_path: Tuple[str, ...], pipeline_name: str) -> No lookup_key = full_path[idx] lookup_sections = full_path[:idx] lookup_fk = self.get_key_name(lookup_key, *lookup_sections) - self._update_from_vault(lookup_fk, lookup_key, AnyType, pipeline_name, lookup_sections) + self._update_from_vault( + lookup_fk, lookup_key, AnyType, pipeline_name, lookup_sections + ) def _lookup_paths(pipeline_name_: str, known_section_: str) -> None: with contextlib.suppress(ValueError): @@ -180,7 +187,9 @@ def _lookup_paths(pipeline_name_: str, known_section_: str) -> None: # first query the shortest paths so the longer paths can override it _lookup_paths(None, known_section) # check sources and sources. if pipeline_name: - _lookup_paths(pipeline_name, known_section) # check .sources and .sources. + _lookup_paths( + pipeline_name, known_section + ) # check .sources and .sources. value, _ = super().get_value(key, hint, pipeline_name, *sections) # skip checking the exact path if we check only toml fragments @@ -203,7 +212,9 @@ def supports_secrets(self) -> bool: def _look_vault(self, full_key: str, hint: type) -> str: pass - def _update_from_vault(self, full_key: str, key: str, hint: type, pipeline_name: str, sections: Tuple[str, ...]) -> None: + def _update_from_vault( + self, full_key: str, key: str, hint: type, pipeline_name: str, sections: Tuple[str, ...] + ) -> None: if full_key in self._vault_lookups: return # print(f"tries '{key}' {pipeline_name} | {sections} at '{full_key}'") @@ -216,8 +227,11 @@ def _update_from_vault(self, full_key: str, key: str, hint: type, pipeline_name: def is_empty(self) -> bool: return False + class TomlFileProvider(BaseTomlProvider): - def __init__(self, file_name: str, project_dir: str = None, add_global_config: bool = False) -> None: + def __init__( + self, file_name: str, project_dir: str = None, add_global_config: bool = False + ) -> None: """Creates config provider from a `toml` file The provider loads the `toml` file with specified name and from specified folder. If `add_global_config` flags is specified, @@ -236,7 +250,9 @@ def __init__(self, file_name: str, project_dir: str = None, add_global_config: b toml_document = self._read_toml_file(file_name, project_dir, add_global_config) super().__init__(toml_document) - def _read_toml_file(self, file_name: str, project_dir: str = None, add_global_config: bool = False) -> tomlkit.TOMLDocument: + def _read_toml_file( + self, file_name: str, project_dir: str = None, add_global_config: bool = False + ) -> tomlkit.TOMLDocument: self._file_name = file_name self._toml_path = os.path.join(project_dir or get_dlt_settings_dir(), file_name) self._add_global_config = add_global_config @@ -254,7 +270,9 @@ def global_config_path() -> str: return get_dlt_data_dir() def write_toml(self) -> None: - assert not self._add_global_config, "Will not write configs when `add_global_config` flag was set" + assert ( + not self._add_global_config + ), "Will not write configs when `add_global_config` flag was set" with open(self._toml_path, "w", encoding="utf-8") as f: tomlkit.dump(self._toml, f) @@ -269,7 +287,6 @@ def _read_toml(toml_path: str) -> tomlkit.TOMLDocument: class ConfigTomlProvider(TomlFileProvider): - def __init__(self, project_dir: str = None, add_global_config: bool = False) -> None: super().__init__(CONFIG_TOML, project_dir=project_dir, add_global_config=add_global_config) @@ -287,7 +304,6 @@ def is_writable(self) -> bool: class SecretsTomlProvider(TomlFileProvider): - def __init__(self, project_dir: str = None, add_global_config: bool = False) -> None: super().__init__(SECRETS_TOML, project_dir=project_dir, add_global_config=add_global_config) @@ -305,7 +321,9 @@ def is_writable(self) -> bool: class TomlProviderReadException(ConfigProviderException): - def __init__(self, provider_name: str, file_name: str, full_path: str, toml_exception: str) -> None: + def __init__( + self, provider_name: str, file_name: str, full_path: str, toml_exception: str + ) -> None: self.file_name = file_name self.full_path = full_path msg = f"A problem encountered when loading {provider_name} from {full_path}:\n" diff --git a/dlt/common/configuration/resolve.py b/dlt/common/configuration/resolve.py index 8e1af2831e..db69cd9572 100644 --- a/dlt/common/configuration/resolve.py +++ b/dlt/common/configuration/resolve.py @@ -3,22 +3,49 @@ from typing import Any, Dict, ContextManager, List, Optional, Sequence, Tuple, Type, TypeVar from dlt.common.configuration.providers.provider import ConfigProvider -from dlt.common.typing import AnyType, StrAny, TSecretValue, get_all_types_of_class_in_union, is_final_type, is_optional_type, is_union - -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration, is_secret_hint, extract_inner_hint, is_context_inner_hint, is_base_configuration_inner_hint, is_valid_hint +from dlt.common.typing import ( + AnyType, + StrAny, + TSecretValue, + get_all_types_of_class_in_union, + is_final_type, + is_optional_type, + is_union_type, +) + +from dlt.common.configuration.specs.base_configuration import ( + BaseConfiguration, + CredentialsConfiguration, + is_secret_hint, + extract_inner_hint, + is_context_inner_hint, + is_base_configuration_inner_hint, + is_valid_hint, +) from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.specs.exceptions import NativeValueError from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.configuration.container import Container from dlt.common.configuration.utils import log_traces, deserialize_value from dlt.common.configuration.exceptions import ( - FinalConfigFieldException, LookupTrace, ConfigFieldMissingException, ConfigurationWrongTypeException, - ValueNotSecretException, InvalidNativeValue, UnmatchedConfigHintResolversException) + LookupTrace, + ConfigFieldMissingException, + ConfigurationWrongTypeException, + ValueNotSecretException, + InvalidNativeValue, + UnmatchedConfigHintResolversException, +) TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration) -def resolve_configuration(config: TConfiguration, *, sections: Tuple[str, ...] = (), explicit_value: Any = None, accept_partial: bool = False) -> TConfiguration: +def resolve_configuration( + config: TConfiguration, + *, + sections: Tuple[str, ...] = (), + explicit_value: Any = None, + accept_partial: bool = False +) -> TConfiguration: if not isinstance(config, BaseConfiguration): raise ConfigurationWrongTypeException(type(config)) @@ -26,7 +53,9 @@ def resolve_configuration(config: TConfiguration, *, sections: Tuple[str, ...] = # allows, for example, to store connection string or service.json in their native form in single env variable or under single vault key if config.__section__ and explicit_value is None: initial_hint = TSecretValue if isinstance(config, CredentialsConfiguration) else AnyType - explicit_value, traces = _resolve_single_value(config.__section__, initial_hint, AnyType, None, sections, ()) + explicit_value, traces = _resolve_single_value( + config.__section__, initial_hint, AnyType, None, sections, () + ) if isinstance(explicit_value, C_Mapping): # mappings cannot be used as explicit values, we want to enumerate mappings and request the fields' values one by one explicit_value = None @@ -41,7 +70,7 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur or a dictionary corresponding to credential's fields. In case of union of credentials, the first configuration in the union fully resolved by initial value will be instantiated.""" # use passed credentials as initial value. initial value may resolve credentials - if is_union(hint): + if is_union_type(hint): specs_in_union = get_all_types_of_class_in_union(hint, CredentialsConfiguration) assert len(specs_in_union) > 0 first_credentials: CredentialsConfiguration = None @@ -62,7 +91,9 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur return hint(initial_value) # type: ignore -def inject_section(section_context: ConfigSectionContext, merge_existing: bool = True) -> ContextManager[ConfigSectionContext]: +def inject_section( + section_context: ConfigSectionContext, merge_existing: bool = True +) -> ContextManager[ConfigSectionContext]: """Context manager that sets section specified in `section_context` to be used during configuration resolution. Optionally merges the context already in the container with the one provided Args: @@ -83,9 +114,14 @@ def inject_section(section_context: ConfigSectionContext, merge_existing: bool = return container.injectable_context(section_context) -def _maybe_parse_native_value(config: TConfiguration, explicit_value: Any, embedded_sections: Tuple[str, ...]) -> Any: + +def _maybe_parse_native_value( + config: TConfiguration, explicit_value: Any, embedded_sections: Tuple[str, ...] +) -> Any: # use initial value to resolve the whole configuration. if explicit value is a mapping it will be applied field by field later - if explicit_value and (not isinstance(explicit_value, C_Mapping) or isinstance(explicit_value, BaseConfiguration)): + if explicit_value and ( + not isinstance(explicit_value, C_Mapping) or isinstance(explicit_value, BaseConfiguration) + ): try: config.parse_native_representation(explicit_value) except ValueError as v_err: @@ -97,13 +133,14 @@ def _maybe_parse_native_value(config: TConfiguration, explicit_value: Any, embed explicit_value = None return explicit_value + def _resolve_configuration( - config: TConfiguration, - explicit_sections: Tuple[str, ...], - embedded_sections: Tuple[str, ...], - explicit_value: Any, - accept_partial: bool - ) -> TConfiguration: + config: TConfiguration, + explicit_sections: Tuple[str, ...], + embedded_sections: Tuple[str, ...], + explicit_value: Any, + accept_partial: bool, +) -> TConfiguration: # do not resolve twice if config.is_resolved(): return config @@ -114,7 +151,9 @@ def _resolve_configuration( explicit_value = _maybe_parse_native_value(config, explicit_value, embedded_sections) # if native representation didn't fully resolve the config, we try to resolve field by field if not config.is_resolved(): - _resolve_config_fields(config, explicit_value, explicit_sections, embedded_sections, accept_partial) + _resolve_config_fields( + config, explicit_value, explicit_sections, embedded_sections, accept_partial + ) # full configuration was resolved config.resolve() except ConfigFieldMissingException as cm_ex: @@ -134,13 +173,12 @@ def _resolve_configuration( def _resolve_config_fields( - config: BaseConfiguration, - explicit_values: StrAny, - explicit_sections: Tuple[str, ...], - embedded_sections: Tuple[str, ...], - accept_partial: bool - ) -> None: - + config: BaseConfiguration, + explicit_values: StrAny, + explicit_sections: Tuple[str, ...], + embedded_sections: Tuple[str, ...], + accept_partial: bool, +) -> None: fields = config.get_resolvable_fields() unresolved_fields: Dict[str, Sequence[LookupTrace]] = {} @@ -164,9 +202,13 @@ def _resolve_config_fields( # if hint is union of configurations, any of them must be resolved specs_in_union: List[Type[BaseConfiguration]] = [] current_value = None - if is_union(hint): + if is_union_type(hint): # if union contains a type of explicit value which is not a valid hint, return it as current value - if explicit_value and not is_valid_hint(type(explicit_value)) and get_all_types_of_class_in_union(hint, type(explicit_value)): + if ( + explicit_value + and not is_valid_hint(type(explicit_value)) + and get_all_types_of_class_in_union(hint, type(explicit_value)) + ): current_value, traces = explicit_value, [] else: specs_in_union = get_all_types_of_class_in_union(hint, BaseConfiguration) @@ -184,7 +226,7 @@ def _resolve_config_fields( config.__section__, explicit_sections, embedded_sections, - accept_partial + accept_partial, ) break except ConfigFieldMissingException as cfm_ex: @@ -205,7 +247,7 @@ def _resolve_config_fields( config.__section__, explicit_sections, embedded_sections, - accept_partial + accept_partial, ) # check if hint optional @@ -215,9 +257,9 @@ def _resolve_config_fields( unresolved_fields[key] = traces # set resolved value in config if default_value != current_value: - if is_final_type(hint): - raise FinalConfigFieldException(type(config).__name__, key) - setattr(config, key, current_value) + if not is_final_type(hint): + # ignore final types + setattr(config, key, current_value) # Check for dynamic hint resolvers which have no corresponding fields unmatched_hint_resolvers: List[str] = [] @@ -233,17 +275,16 @@ def _resolve_config_fields( def _resolve_config_field( - key: str, - hint: Type[Any], - default_value: Any, - explicit_value: Any, - config: BaseConfiguration, - config_sections: str, - explicit_sections: Tuple[str, ...], - embedded_sections: Tuple[str, ...], - accept_partial: bool - ) -> Tuple[Any, List[LookupTrace]]: - + key: str, + hint: Type[Any], + default_value: Any, + explicit_value: Any, + config: BaseConfiguration, + config_sections: str, + explicit_sections: Tuple[str, ...], + embedded_sections: Tuple[str, ...], + accept_partial: bool, +) -> Tuple[Any, List[LookupTrace]]: inner_hint = extract_inner_hint(hint) if explicit_value is not None: @@ -251,7 +292,9 @@ def _resolve_config_field( traces: List[LookupTrace] = [] else: # resolve key value via active providers passing the original hint ie. to preserve TSecretValue - value, traces = _resolve_single_value(key, hint, inner_hint, config_sections, explicit_sections, embedded_sections) + value, traces = _resolve_single_value( + key, hint, inner_hint, config_sections, explicit_sections, embedded_sections + ) log_traces(config, key, hint, value, default_value, traces) # contexts must be resolved as a whole if is_context_inner_hint(inner_hint): @@ -280,23 +323,44 @@ def _resolve_config_field( # only config with sections may look for initial values if embedded_config.__section__ and value is None: # config section becomes the key if the key does not start with, otherwise it keeps its original value - initial_key, initial_embedded = _apply_embedded_sections_to_config_sections(embedded_config.__section__, embedded_sections + (key,)) + initial_key, initial_embedded = _apply_embedded_sections_to_config_sections( + embedded_config.__section__, embedded_sections + (key,) + ) # it must be a secret value is config is credentials - initial_hint = TSecretValue if isinstance(embedded_config, CredentialsConfiguration) else AnyType - value, initial_traces = _resolve_single_value(initial_key, initial_hint, AnyType, None, explicit_sections, initial_embedded) + initial_hint = ( + TSecretValue + if isinstance(embedded_config, CredentialsConfiguration) + else AnyType + ) + value, initial_traces = _resolve_single_value( + initial_key, initial_hint, AnyType, None, explicit_sections, initial_embedded + ) if isinstance(value, C_Mapping): # mappings are not passed as initials value = None else: traces.extend(initial_traces) - log_traces(config, initial_key, type(embedded_config), value, default_value, initial_traces) + log_traces( + config, + initial_key, + type(embedded_config), + value, + default_value, + initial_traces, + ) # check if hint optional is_optional = is_optional_type(hint) # accept partial becomes True if type if optional so we do not fail on optional configs that do not resolve fully accept_partial = accept_partial or is_optional # create new instance and pass value from the provider as initial, add key to sections - value = _resolve_configuration(embedded_config, explicit_sections, embedded_sections + (key,), default_value if value is None else value, accept_partial) + value = _resolve_configuration( + embedded_config, + explicit_sections, + embedded_sections + (key,), + default_value if value is None else value, + accept_partial, + ) if value.is_partial() and is_optional: # do not return partially resolved optional embeds value = None @@ -311,14 +375,13 @@ def _resolve_config_field( def _resolve_single_value( - key: str, - hint: Type[Any], - inner_hint: Type[Any], - config_section: str, - explicit_sections: Tuple[str, ...], - embedded_sections: Tuple[str, ...] - ) -> Tuple[Optional[Any], List[LookupTrace]]: - + key: str, + hint: Type[Any], + inner_hint: Type[Any], + config_section: str, + explicit_sections: Tuple[str, ...], + embedded_sections: Tuple[str, ...], +) -> Tuple[Optional[Any], List[LookupTrace]]: traces: List[LookupTrace] = [] value = None @@ -335,7 +398,9 @@ def _resolve_single_value( return value, traces # resolve a field of the config - config_section, embedded_sections = _apply_embedded_sections_to_config_sections(config_section, embedded_sections) + config_section, embedded_sections = _apply_embedded_sections_to_config_sections( + config_section, embedded_sections + ) providers = providers_context.providers # get additional sections to look in from container sections_context = container[ConfigSectionContext] @@ -356,7 +421,7 @@ def look_sections(pipeline_name: str = None) -> Any: config_section, # if explicit sections are provided, ignore the injected context explicit_sections or sections_context.sections, - embedded_sections + embedded_sections, ) traces.extend(provider_traces) if value is not None: @@ -382,7 +447,7 @@ def resolve_single_provider_value( pipeline_name: str = None, config_section: str = None, explicit_sections: Tuple[str, ...] = (), - embedded_sections: Tuple[str, ...] = () + embedded_sections: Tuple[str, ...] = (), ) -> Tuple[Optional[Any], List[LookupTrace]]: traces: List[LookupTrace] = [] @@ -429,7 +494,9 @@ def resolve_single_provider_value( return value, traces -def _apply_embedded_sections_to_config_sections(config_section: str, embedded_sections: Tuple[str, ...]) -> Tuple[str, Tuple[str, ...]]: +def _apply_embedded_sections_to_config_sections( + config_section: str, embedded_sections: Tuple[str, ...] +) -> Tuple[str, Tuple[str, ...]]: # for the configurations that have __section__ (config_section) defined and are embedded in other configurations, # the innermost embedded section replaces config_section if embedded_sections: diff --git a/dlt/common/configuration/specs/__init__.py b/dlt/common/configuration/specs/__init__.py index 2a033b6bbd..9acf14bde3 100644 --- a/dlt/common/configuration/specs/__init__.py +++ b/dlt/common/configuration/specs/__init__.py @@ -1,8 +1,22 @@ from .run_configuration import RunConfiguration -from .base_configuration import BaseConfiguration, CredentialsConfiguration, CredentialsWithDefault, ContainerInjectableContext, extract_inner_hint, is_base_configuration_inner_hint, configspec +from .base_configuration import ( + BaseConfiguration, + CredentialsConfiguration, + CredentialsWithDefault, + ContainerInjectableContext, + extract_inner_hint, + is_base_configuration_inner_hint, + configspec, +) from .config_section_context import ConfigSectionContext -from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults, GcpServiceAccountCredentials, GcpOAuthCredentialsWithoutDefaults, GcpOAuthCredentials, GcpCredentials +from .gcp_credentials import ( + GcpServiceAccountCredentialsWithoutDefaults, + GcpServiceAccountCredentials, + GcpOAuthCredentialsWithoutDefaults, + GcpOAuthCredentials, + GcpCredentials, +) from .connection_string_credentials import ConnectionStringCredentials from .api_credentials import OAuth2Credentials from .aws_credentials import AwsCredentials, AwsCredentialsWithoutDefaults @@ -10,17 +24,33 @@ # backward compatibility for service account credentials -from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults as GcpClientCredentials, GcpServiceAccountCredentials as GcpClientCredentialsWithDefault +from .gcp_credentials import ( + GcpServiceAccountCredentialsWithoutDefaults as GcpClientCredentials, + GcpServiceAccountCredentials as GcpClientCredentialsWithDefault, +) __all__ = [ "RunConfiguration", - "BaseConfiguration", "CredentialsConfiguration", "CredentialsWithDefault", "ContainerInjectableContext", "extract_inner_hint", "is_base_configuration_inner_hint", "configspec", + "BaseConfiguration", + "CredentialsConfiguration", + "CredentialsWithDefault", + "ContainerInjectableContext", + "extract_inner_hint", + "is_base_configuration_inner_hint", + "configspec", "ConfigSectionContext", - "GcpServiceAccountCredentialsWithoutDefaults", "GcpServiceAccountCredentials", "GcpOAuthCredentialsWithoutDefaults", "GcpOAuthCredentials", "GcpCredentials", + "GcpServiceAccountCredentialsWithoutDefaults", + "GcpServiceAccountCredentials", + "GcpOAuthCredentialsWithoutDefaults", + "GcpOAuthCredentials", + "GcpCredentials", "ConnectionStringCredentials", "OAuth2Credentials", - "AwsCredentials", "AwsCredentialsWithoutDefaults", - "AzureCredentials", "AzureCredentialsWithoutDefaults", - "GcpClientCredentials", "GcpClientCredentialsWithDefault", + "AwsCredentials", + "AwsCredentialsWithoutDefaults", + "AzureCredentials", + "AzureCredentialsWithoutDefaults", + "GcpClientCredentials", + "GcpClientCredentialsWithDefault", ] diff --git a/dlt/common/configuration/specs/api_credentials.py b/dlt/common/configuration/specs/api_credentials.py index 6a06a42713..fd7ae8cb09 100644 --- a/dlt/common/configuration/specs/api_credentials.py +++ b/dlt/common/configuration/specs/api_credentials.py @@ -17,7 +17,6 @@ class OAuth2Credentials(CredentialsConfiguration): # add refresh_token when generating config samples __config_gen_annotations__: ClassVar[List[str]] = ["refresh_token"] - def auth(self, scopes: Union[str, List[str]] = None, redirect_url: str = None) -> None: """Authorizes the client using the available credentials @@ -44,4 +43,3 @@ def add_scopes(self, scopes: Union[str, List[str]]) -> None: self.scopes += [scopes] elif scopes: self.scopes = list(set(self.scopes + scopes)) - diff --git a/dlt/common/configuration/specs/aws_credentials.py b/dlt/common/configuration/specs/aws_credentials.py index 8c4aabc4ee..f6df1d8cce 100644 --- a/dlt/common/configuration/specs/aws_credentials.py +++ b/dlt/common/configuration/specs/aws_credentials.py @@ -2,7 +2,11 @@ from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import TSecretStrValue, DictStrAny -from dlt.common.configuration.specs import CredentialsConfiguration, CredentialsWithDefault, configspec +from dlt.common.configuration.specs import ( + CredentialsConfiguration, + CredentialsWithDefault, + configspec, +) from dlt.common.configuration.specs.exceptions import InvalidBoto3Session from dlt import version @@ -37,7 +41,6 @@ def to_native_representation(self) -> Dict[str, Optional[str]]: @configspec class AwsCredentials(AwsCredentialsWithoutDefaults, CredentialsWithDefault): - def on_partial(self) -> None: # Try get default credentials session = self._to_botocore_session() @@ -48,31 +51,34 @@ def _to_botocore_session(self) -> Any: try: import botocore.session except ModuleNotFoundError: - raise MissingDependencyException(self.__class__.__name__, [f"{version.DLT_PKG_NAME}[s3]"]) + raise MissingDependencyException( + self.__class__.__name__, [f"{version.DLT_PKG_NAME}[s3]"] + ) # taken from boto3 Session session = botocore.session.get_session() if self.profile_name is not None: - session.set_config_variable('profile', self.profile_name) + session.set_config_variable("profile", self.profile_name) if self.aws_access_key_id or self.aws_secret_access_key or self.aws_session_token: session.set_credentials( self.aws_access_key_id, self.aws_secret_access_key, self.aws_session_token ) if self.region_name is not None: - session.set_config_variable('region', self.region_name) + session.set_config_variable("region", self.region_name) return session def _from_session(self, session: Any) -> Any: """Sets the credentials properties from botocore or boto3 `session` and return session's credentials if found""" import botocore.session + if not isinstance(session, botocore.session.Session): # assume this is boto3 session session = session._session # NOTE: we do not set profile name from boto3 session # we either pass it explicitly in `_to_session` so we know it is identical # this is what boto3 does: return self._session.profile or 'default' which is obviously wrong (returning default when there's no session) - self.region_name = session.get_config_variable('region') + self.region_name = session.get_config_variable("region") default = session.get_credentials() if not default: return None diff --git a/dlt/common/configuration/specs/azure_credentials.py b/dlt/common/configuration/specs/azure_credentials.py index 49393a6343..f7cac78dca 100644 --- a/dlt/common/configuration/specs/azure_credentials.py +++ b/dlt/common/configuration/specs/azure_credentials.py @@ -3,7 +3,11 @@ from dlt.common import pendulum from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import TSecretStrValue -from dlt.common.configuration.specs import CredentialsConfiguration, CredentialsWithDefault, configspec +from dlt.common.configuration.specs import ( + CredentialsConfiguration, + CredentialsWithDefault, + configspec, +) from dlt.common.configuration.specs.exceptions import InvalidBoto3Session from dlt import version @@ -30,12 +34,13 @@ def to_adlfs_credentials(self) -> Dict[str, Any]: def create_sas_token(self) -> None: from azure.storage.blob import generate_account_sas, ResourceTypes + self.azure_storage_sas_token = generate_account_sas( # type: ignore[assignment] account_name=self.azure_storage_account_name, account_key=self.azure_storage_account_key, resource_types=ResourceTypes(container=True, object=True), permission=self.azure_sas_token_permissions, - expiry=pendulum.now().add(days=1) + expiry=pendulum.now().add(days=1), ) def on_partial(self) -> None: @@ -50,6 +55,7 @@ def on_partial(self) -> None: class AzureCredentials(AzureCredentialsWithoutDefaults, CredentialsWithDefault): def on_partial(self) -> None: from azure.identity import DefaultAzureCredential + if not self.azure_storage_account_key and not self.azure_storage_sas_token: self._set_default_credentials(DefaultAzureCredential()) if self.azure_storage_account_name: @@ -60,5 +66,5 @@ def on_partial(self) -> None: def to_adlfs_credentials(self) -> Dict[str, Any]: base_kwargs = super().to_adlfs_credentials() if self.has_default_credentials(): - base_kwargs['anon'] = False + base_kwargs["anon"] = False return base_kwargs diff --git a/dlt/common/configuration/specs/base_configuration.py b/dlt/common/configuration/specs/base_configuration.py index 08940ffe31..84f59fa894 100644 --- a/dlt/common/configuration/specs/base_configuration.py +++ b/dlt/common/configuration/specs/base_configuration.py @@ -3,7 +3,22 @@ import contextlib import dataclasses from collections.abc import Mapping as C_Mapping -from typing import Callable, List, Optional, Union, Any, Dict, Iterator, MutableMapping, Type, TYPE_CHECKING, get_args, get_origin, overload, ClassVar, TypeVar +from typing import ( + Callable, + List, + Optional, + Union, + Any, + Dict, + Iterator, + MutableMapping, + Type, + TYPE_CHECKING, + overload, + ClassVar, + TypeVar, +) +from typing_extensions import get_args, get_origin from functools import wraps if TYPE_CHECKING: @@ -11,9 +26,17 @@ else: TDtcField = dataclasses.Field -from dlt.common.typing import TAnyClass, TSecretValue, extract_inner_type, is_optional_type, is_union +from dlt.common.typing import ( + TAnyClass, + extract_inner_type, + is_optional_type, + is_union_type, +) from dlt.common.data_types import py_type_to_sc_type -from dlt.common.configuration.exceptions import ConfigFieldMissingTypeHintException, ConfigFieldTypeHintNotSupported +from dlt.common.configuration.exceptions import ( + ConfigFieldMissingTypeHintException, + ConfigFieldTypeHintNotSupported, +) # forward class declaration @@ -35,21 +58,21 @@ def is_credentials_inner_hint(inner_hint: Type[Any]) -> bool: def get_config_if_union_hint(hint: Type[Any]) -> Type[Any]: - if is_union(hint): + if is_union_type(hint): return next((t for t in get_args(hint) if is_base_configuration_inner_hint(t)), None) return None def is_valid_hint(hint: Type[Any]) -> bool: + if get_origin(hint) is ClassVar: + # class vars are skipped by dataclass + return True hint = extract_inner_type(hint) hint = get_config_if_union_hint(hint) or hint hint = get_origin(hint) or hint if hint is Any: return True - if hint is ClassVar: - # class vars are skipped by dataclass - return True if is_base_configuration_inner_hint(hint): return True with contextlib.suppress(TypeError): @@ -68,7 +91,7 @@ def extract_inner_hint(hint: Type[Any], preserve_new_types: bool = False) -> Typ def is_secret_hint(hint: Type[Any]) -> bool: - is_secret = False + is_secret = False if hasattr(hint, "__name__"): is_secret = hint.__name__ == "TSecretValue" if not is_secret: @@ -82,16 +105,16 @@ def is_secret_hint(hint: Type[Any]) -> bool: @overload -def configspec(cls: Type[TAnyClass]) -> Type[TAnyClass]: - ... +def configspec(cls: Type[TAnyClass]) -> Type[TAnyClass]: ... @overload -def configspec(cls: None = ...) -> Callable[[Type[TAnyClass]], Type[TAnyClass]]: - ... +def configspec(cls: None = ...) -> Callable[[Type[TAnyClass]], Type[TAnyClass]]: ... -def configspec(cls: Optional[Type[Any]] = None) -> Union[Type[TAnyClass], Callable[[Type[TAnyClass]], Type[TAnyClass]]]: +def configspec( + cls: Optional[Type[Any]] = None, +) -> Union[Type[TAnyClass], Callable[[Type[TAnyClass]], Type[TAnyClass]]]: """Converts (via derivation) any decorated class to a Python dataclass that may be used as a spec to resolve configurations In comparison the Python dataclass, a spec implements full dictionary interface for its attributes, allows instance creation from ie. strings @@ -99,6 +122,7 @@ def configspec(cls: Optional[Type[Any]] = None) -> Union[Type[TAnyClass], Callab more information. """ + def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: cls.__hint_resolvers__ = {} # type: ignore[attr-defined] is_context = issubclass(cls, _F_ContainerInjectableContext) @@ -106,8 +130,11 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: with contextlib.suppress(NameError): if not issubclass(cls, BaseConfiguration): # keep the original module and keep defaults for fields listed in annotations - fields = {"__module__": cls.__module__, "__annotations__": getattr(cls, "__annotations__", {})} - for key in fields['__annotations__'].keys(): # type: ignore[union-attr] + fields = { + "__module__": cls.__module__, + "__annotations__": getattr(cls, "__annotations__", {}), + } + for key in fields["__annotations__"].keys(): # type: ignore[union-attr] if key in cls.__dict__: fields[key] = cls.__dict__[key] cls = type(cls.__name__, (cls, _F_BaseConfiguration), fields) @@ -129,7 +156,9 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: except NameError: # Dealing with BaseConfiguration itself before it is defined continue - if not att_name.startswith(("__", "_abc_")) and not isinstance(att_value, (staticmethod, classmethod, property)): + if not att_name.startswith(("__", "_abc_")) and not isinstance( + att_value, (staticmethod, classmethod, property) + ): if att_name not in cls.__annotations__: raise ConfigFieldMissingTypeHintException(att_name, cls) hint = cls.__annotations__[att_name] @@ -142,8 +171,8 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: # blocking mutable defaults def default_factory(att_value=att_value): # type: ignore[no-untyped-def] return att_value.copy() - setattr(cls, att_name, dataclasses.field(default_factory=default_factory)) + setattr(cls, att_name, dataclasses.field(default_factory=default_factory)) # We don't want to overwrite user's __init__ method # Create dataclass init only when not defined in the class @@ -168,12 +197,11 @@ def default_factory(att_value=att_value): # type: ignore[no-untyped-def] @configspec class BaseConfiguration(MutableMapping[str, Any]): - - __is_resolved__: bool = dataclasses.field(default = False, init=False, repr=False) + __is_resolved__: bool = dataclasses.field(default=False, init=False, repr=False) """True when all config fields were resolved and have a specified value type""" - __section__: str = dataclasses.field(default = None, init=False, repr=False) + __section__: str = dataclasses.field(default=None, init=False, repr=False) """Obligatory section used by config providers when searching for keys, always present in the search path""" - __exception__: Exception = dataclasses.field(default = None, init=False, repr=False) + __exception__: Exception = dataclasses.field(default=None, init=False, repr=False) """Holds the exception that prevented the full resolution""" __config_gen_annotations__: ClassVar[List[str]] = [] """Additional annotations for config generator, currently holds a list of fields of interest that have defaults""" @@ -181,7 +209,6 @@ class BaseConfiguration(MutableMapping[str, Any]): """Typing for dataclass fields""" __hint_resolvers__: ClassVar[Dict[str, Callable[["BaseConfiguration"], Type[Any]]]] = {} - def parse_native_representation(self, native_value: Any) -> None: """Initialize the configuration fields by parsing the `native_value` which should be a native representation of the configuration or credentials, for example database connection string or JSON serialized GCP service credentials file. @@ -212,7 +239,7 @@ def _get_resolvable_dataclass_fields(cls) -> Iterator[TDtcField]: # Sort dynamic type hint fields last because they depend on other values yield from sorted( (f for f in cls.__dataclass_fields__.values() if cls.__is_valid_field(f)), - key=lambda f: f.name in cls.__hint_resolvers__ + key=lambda f: f.name in cls.__hint_resolvers__, ) @classmethod @@ -229,7 +256,9 @@ def is_partial(self) -> bool: return False # check if all resolvable fields have value return any( - field for field, hint in self.get_resolvable_fields().items() if getattr(self, field) is None and not is_optional_type(hint) + field + for field, hint in self.get_resolvable_fields().items() + if getattr(self, field) is None and not is_optional_type(hint) ) def resolve(self) -> None: @@ -265,7 +294,10 @@ def __delitem__(self, __key: str) -> None: def __iter__(self) -> Iterator[str]: """Iterator or valid key names""" - return map(lambda field: field.name, filter(lambda val: self.__is_valid_field(val), self.__dataclass_fields__.values())) + return map( + lambda field: field.name, + filter(lambda val: self.__is_valid_field(val), self.__dataclass_fields__.values()), + ) def __len__(self) -> int: return sum(1 for _ in self.__iter__()) @@ -280,7 +312,9 @@ def update(self, other: Any = (), /, **kwds: Any) -> None: # helper functions def __has_attr(self, __key: str) -> bool: - return __key in self.__dataclass_fields__ and self.__is_valid_field(self.__dataclass_fields__[__key]) + return __key in self.__dataclass_fields__ and self.__is_valid_field( + self.__dataclass_fields__[__key] + ) @staticmethod def __is_valid_field(field: TDtcField) -> bool: @@ -335,7 +369,7 @@ def to_native_credentials(self) -> Any: return self.to_native_representation() def __str__(self) -> str: - """Get string representation of credentials to be displayed, with all secret parts removed """ + """Get string representation of credentials to be displayed, with all secret parts removed""" return super().__str__() @@ -360,6 +394,8 @@ class ContainerInjectableContext(BaseConfiguration): can_create_default: ClassVar[bool] = True """If True, `Container` is allowed to create default context instance, if none exists""" + global_affinity: ClassVar[bool] = False + """If True, `Container` will create context that will be visible in any thread. If False, per thread context is created""" def add_extras(self) -> None: """Called right after context was added to the container. Benefits mostly the config provider injection context which adds extra providers using the initial ones.""" @@ -372,11 +408,15 @@ def add_extras(self) -> None: TSpec = TypeVar("TSpec", bound=BaseConfiguration) THintResolver = Callable[[TSpec], Type[Any]] + def resolve_type(field_name: str) -> Callable[[THintResolver[TSpec]], THintResolver[TSpec]]: def decorator(func: THintResolver[TSpec]) -> THintResolver[TSpec]: func.__hint_for_field__ = field_name # type: ignore[attr-defined] + @wraps(func) def wrapper(self: TSpec) -> Type[Any]: return func(self) + return wrapper + return decorator diff --git a/dlt/common/configuration/specs/config_providers_context.py b/dlt/common/configuration/specs/config_providers_context.py index 062714245b..860e7414de 100644 --- a/dlt/common/configuration/specs/config_providers_context.py +++ b/dlt/common/configuration/specs/config_providers_context.py @@ -1,10 +1,23 @@ import contextlib import io -from typing import List +from typing import ClassVar, List + from dlt.common.configuration.exceptions import DuplicateConfigProviderException -from dlt.common.configuration.providers import ConfigProvider, EnvironProvider, ContextProvider, SecretsTomlProvider, ConfigTomlProvider, GoogleSecretsProvider +from dlt.common.configuration.providers import ( + ConfigProvider, + EnvironProvider, + ContextProvider, + SecretsTomlProvider, + ConfigTomlProvider, + GoogleSecretsProvider, +) from dlt.common.configuration.specs.base_configuration import ContainerInjectableContext -from dlt.common.configuration.specs import GcpServiceAccountCredentials, BaseConfiguration, configspec, known_sections +from dlt.common.configuration.specs import ( + GcpServiceAccountCredentials, + BaseConfiguration, + configspec, + known_sections, +) from dlt.common.runtime.exec_info import is_airflow_installed @@ -21,6 +34,9 @@ class ConfigProvidersConfiguration(BaseConfiguration): @configspec class ConfigProvidersContext(ContainerInjectableContext): """Injectable list of providers used by the configuration `resolve` module""" + + global_affinity: ClassVar[bool] = True + providers: List[ConfigProvider] context_provider: ConfigProvider @@ -70,27 +86,36 @@ def _initial_providers() -> List[ConfigProvider]: providers = [ EnvironProvider(), SecretsTomlProvider(add_global_config=True), - ConfigTomlProvider(add_global_config=True) + ConfigTomlProvider(add_global_config=True), ] return providers def _extra_providers() -> List[ConfigProvider]: from dlt.common.configuration.resolve import resolve_configuration + providers_config = resolve_configuration(ConfigProvidersConfiguration()) extra_providers = [] if providers_config.enable_airflow_secrets: extra_providers.extend(_airflow_providers()) if providers_config.enable_google_secrets: - extra_providers.append(_google_secrets_provider(only_toml_fragments=providers_config.only_toml_fragments)) + extra_providers.append( + _google_secrets_provider(only_toml_fragments=providers_config.only_toml_fragments) + ) return extra_providers -def _google_secrets_provider(only_secrets: bool = True, only_toml_fragments: bool = True) -> ConfigProvider: +def _google_secrets_provider( + only_secrets: bool = True, only_toml_fragments: bool = True +) -> ConfigProvider: from dlt.common.configuration.resolve import resolve_configuration - c = resolve_configuration(GcpServiceAccountCredentials(), sections=(known_sections.PROVIDERS, "google_secrets")) - return GoogleSecretsProvider(c, only_secrets=only_secrets, only_toml_fragments=only_toml_fragments) + c = resolve_configuration( + GcpServiceAccountCredentials(), sections=(known_sections.PROVIDERS, "google_secrets") + ) + return GoogleSecretsProvider( + c, only_secrets=only_secrets, only_toml_fragments=only_toml_fragments + ) def _airflow_providers() -> List[ConfigProvider]: @@ -112,10 +137,12 @@ def _airflow_providers() -> List[ConfigProvider]: # hide stdio. airflow typically dumps tons of warnings and deprecations to stdout and stderr with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()): # try to get dlt secrets variable. many broken Airflow installations break here. in that case do not create - from airflow.models import Variable, TaskInstance # noqa + from airflow.models import Variable, TaskInstance # noqa from dlt.common.configuration.providers.airflow import AirflowSecretsTomlProvider + # probe if Airflow variable containing all secrets is present from dlt.common.configuration.providers.toml import SECRETS_TOML_KEY + secrets_toml_var = Variable.get(SECRETS_TOML_KEY, default_var=None) # providers can be returned - mind that AirflowSecretsTomlProvider() requests the variable above immediately @@ -123,13 +150,18 @@ def _airflow_providers() -> List[ConfigProvider]: # check if we are in task context and provide more info from airflow.operators.python import get_current_context # noqa + ti: TaskInstance = get_current_context()["ti"] # type: ignore # log outside of stderr/out redirect if secrets_toml_var is None: - message = f"Airflow variable '{SECRETS_TOML_KEY}' was not found. " + \ - "This Airflow variable is a recommended place to hold the content of secrets.toml." + \ - "If you do not use Airflow variables to hold dlt configuration or use variables with other names you can ignore this warning." + message = ( + f"Airflow variable '{SECRETS_TOML_KEY}' was not found. " + + "This Airflow variable is a recommended place to hold the content of" + " secrets.toml." + + "If you do not use Airflow variables to hold dlt configuration or use variables" + " with other names you can ignore this warning." + ) ti.log.warning(message) except Exception: diff --git a/dlt/common/configuration/specs/config_section_context.py b/dlt/common/configuration/specs/config_section_context.py index 753eb3b439..a656a2b0fe 100644 --- a/dlt/common/configuration/specs/config_section_context.py +++ b/dlt/common/configuration/specs/config_section_context.py @@ -3,9 +3,9 @@ from dlt.common.configuration.specs.base_configuration import ContainerInjectableContext, configspec + @configspec class ConfigSectionContext(ContainerInjectableContext): - TMergeFunc = Callable[["ConfigSectionContext", "ConfigSectionContext"], None] pipeline_name: Optional[str] @@ -13,7 +13,6 @@ class ConfigSectionContext(ContainerInjectableContext): merge_style: TMergeFunc = None source_state_key: str = None - def merge(self, existing: "ConfigSectionContext") -> None: """Merges existing context into incoming using a merge style function""" merge_style_f = self.merge_style or self.prefer_incoming @@ -40,27 +39,44 @@ def prefer_incoming(incoming: "ConfigSectionContext", existing: "ConfigSectionCo @staticmethod def prefer_existing(incoming: "ConfigSectionContext", existing: "ConfigSectionContext") -> None: """Prefer existing section context when merging this context before injecting""" - incoming.pipeline_name = existing.pipeline_name or incoming.pipeline_name - incoming.sections = existing.sections or incoming.sections - incoming.source_state_key = existing.source_state_key or incoming.source_state_key + incoming.pipeline_name = existing.pipeline_name or incoming.pipeline_name + incoming.sections = existing.sections or incoming.sections + incoming.source_state_key = existing.source_state_key or incoming.source_state_key @staticmethod - def resource_merge_style(incoming: "ConfigSectionContext", existing: "ConfigSectionContext") -> None: + def resource_merge_style( + incoming: "ConfigSectionContext", existing: "ConfigSectionContext" + ) -> None: """If top level section is same and there are 3 sections it replaces second element (source module) from existing and keeps the 3rd element (name)""" incoming.pipeline_name = incoming.pipeline_name or existing.pipeline_name - if len(incoming.sections) == 3 == len(existing.sections) and incoming.sections[0] == existing.sections[0]: + if ( + len(incoming.sections) == 3 == len(existing.sections) + and incoming.sections[0] == existing.sections[0] + ): # existing does not have middle section then keep incoming # standalone resources do not emit existing to not overwrite each other - incoming.sections = (incoming.sections[0], existing.sections[1] or incoming.sections[1], incoming.sections[2]) + incoming.sections = ( + incoming.sections[0], + existing.sections[1] or incoming.sections[1], + incoming.sections[2], + ) incoming.source_state_key = existing.source_state_key or incoming.source_state_key else: incoming.sections = incoming.sections or existing.sections incoming.source_state_key = incoming.source_state_key or existing.source_state_key def __str__(self) -> str: - return super().__str__() + f": {self.pipeline_name} {self.sections}@{self.merge_style} state['{self.source_state_key}']" + return ( + super().__str__() + + f": {self.pipeline_name} {self.sections}@{self.merge_style} state['{self.source_state_key}']" + ) if TYPE_CHECKING: # provide __init__ signature when type checking - def __init__(self, pipeline_name:str = None, sections: Tuple[str, ...] = (), merge_style: TMergeFunc = None, source_state_key: str = None) -> None: - ... + def __init__( + self, + pipeline_name: str = None, + sections: Tuple[str, ...] = (), + merge_style: TMergeFunc = None, + source_state_key: str = None, + ) -> None: ... diff --git a/dlt/common/configuration/specs/connection_string_credentials.py b/dlt/common/configuration/specs/connection_string_credentials.py index 386535122b..e7b0e5f900 100644 --- a/dlt/common/configuration/specs/connection_string_credentials.py +++ b/dlt/common/configuration/specs/connection_string_credentials.py @@ -24,9 +24,7 @@ def parse_native_representation(self, native_value: Any) -> None: try: url = make_url(native_value) # update only values that are not None - self.update( - {k: v for k,v in url._asdict().items() if v is not None} - ) + self.update({k: v for k, v in url._asdict().items() if v is not None}) if self.query is not None: self.query = dict(self.query) except Exception: @@ -40,7 +38,15 @@ def to_native_representation(self) -> str: return self.to_url().render_as_string(hide_password=False) def to_url(self) -> URL: - return URL.create(self.drivername, self.username, self.password, self.host, self.port, self.database, self.query) + return URL.create( + self.drivername, + self.username, + self.password, + self.host, + self.port, + self.database, + self.query, + ) def __str__(self) -> str: return self.to_url().render_as_string(hide_password=True) diff --git a/dlt/common/configuration/specs/exceptions.py b/dlt/common/configuration/specs/exceptions.py index 054d21c78c..7a0b283630 100644 --- a/dlt/common/configuration/specs/exceptions.py +++ b/dlt/common/configuration/specs/exceptions.py @@ -9,7 +9,10 @@ class SpecException(ConfigurationException): class OAuth2ScopesRequired(SpecException): def __init__(self, spec: type) -> None: self.spec = spec - super().__init__("Scopes are required to retrieve refresh_token. Use 'openid' scope for a token without any permissions to resources.") + super().__init__( + "Scopes are required to retrieve refresh_token. Use 'openid' scope for a token without" + " any permissions to resources." + ) class NativeValueError(SpecException, ValueError): @@ -22,29 +25,46 @@ def __init__(self, spec: Type[Any], native_value: str, msg: str) -> None: class InvalidConnectionString(NativeValueError): def __init__(self, spec: Type[Any], native_value: str, driver: str): driver = driver or "driver" - msg = f"The expected representation for {spec.__name__} is a standard database connection string with the following format: {driver}://username:password@host:port/database." + msg = ( + f"The expected representation for {spec.__name__} is a standard database connection" + f" string with the following format: {driver}://username:password@host:port/database." + ) super().__init__(spec, native_value, msg) class InvalidGoogleNativeCredentialsType(NativeValueError): def __init__(self, spec: Type[Any], native_value: Any): - msg = f"Credentials {spec.__name__} accept a string with serialized credentials json file or an instance of Credentials object from google.* namespace. The value passed is of type {type(native_value)}" + msg = ( + f"Credentials {spec.__name__} accept a string with serialized credentials json file or" + " an instance of Credentials object from google.* namespace. The value passed is of" + f" type {type(native_value)}" + ) super().__init__(spec, native_value, msg) class InvalidGoogleServicesJson(NativeValueError): def __init__(self, spec: Type[Any], native_value: Any): - msg = f"The expected representation for {spec.__name__} is a string with serialized service account credentials, where at least 'project_id', 'private_key' and 'client_email` keys are present" + msg = ( + f"The expected representation for {spec.__name__} is a string with serialized service" + " account credentials, where at least 'project_id', 'private_key' and 'client_email`" + " keys are present" + ) super().__init__(spec, native_value, msg) class InvalidGoogleOauth2Json(NativeValueError): def __init__(self, spec: Type[Any], native_value: Any): - msg = f"The expected representation for {spec.__name__} is a string with serialized oauth2 user info and may be wrapped in 'install'/'web' node - depending of oauth2 app type." + msg = ( + f"The expected representation for {spec.__name__} is a string with serialized oauth2" + " user info and may be wrapped in 'install'/'web' node - depending of oauth2 app type." + ) super().__init__(spec, native_value, msg) class InvalidBoto3Session(NativeValueError): def __init__(self, spec: Type[Any], native_value: Any): - msg = f"The expected representation for {spec.__name__} is and instance of boto3.Session containing credentials" + msg = ( + f"The expected representation for {spec.__name__} is and instance of boto3.Session" + " containing credentials" + ) super().__init__(spec, native_value, msg) diff --git a/dlt/common/configuration/specs/gcp_credentials.py b/dlt/common/configuration/specs/gcp_credentials.py index f96c1d44f5..f00ec62651 100644 --- a/dlt/common/configuration/specs/gcp_credentials.py +++ b/dlt/common/configuration/specs/gcp_credentials.py @@ -1,13 +1,22 @@ import sys from typing import Any, Final, List, Tuple, Union -from deprecated import deprecated from dlt.common import json, pendulum from dlt.common.configuration.specs.api_credentials import OAuth2Credentials -from dlt.common.configuration.specs.exceptions import InvalidGoogleNativeCredentialsType, InvalidGoogleOauth2Json, InvalidGoogleServicesJson, NativeValueError, OAuth2ScopesRequired +from dlt.common.configuration.specs.exceptions import ( + InvalidGoogleNativeCredentialsType, + InvalidGoogleOauth2Json, + InvalidGoogleServicesJson, + NativeValueError, + OAuth2ScopesRequired, +) from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import DictStrAny, TSecretValue, StrAny -from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration, CredentialsWithDefault, configspec +from dlt.common.configuration.specs.base_configuration import ( + CredentialsConfiguration, + CredentialsWithDefault, + configspec, +) from dlt.common.utils import is_interactive @@ -18,7 +27,9 @@ class GcpCredentials(CredentialsConfiguration): project_id: str = None - location: str = "US" # DEPRECATED! and present only for backward compatibility. please set bigquery location in BigQuery configuration + location: str = ( # DEPRECATED! and present only for backward compatibility. please set bigquery location in BigQuery configuration + "US" + ) def parse_native_representation(self, native_value: Any) -> None: if not isinstance(native_value, str): @@ -49,12 +60,13 @@ def parse_native_representation(self, native_value: Any) -> None: service_dict: DictStrAny = None try: from google.oauth2.service_account import Credentials as ServiceAccountCredentials + if isinstance(native_value, ServiceAccountCredentials): # extract credentials service_dict = { "project_id": native_value.project_id, "client_email": native_value.service_account_email, - "private_key": native_value # keep native credentials in private key + "private_key": native_value, # keep native credentials in private key } self.__is_resolved__ = True except ImportError: @@ -76,14 +88,10 @@ def on_resolved(self) -> None: # must end with new line, otherwise won't be parsed by Crypto self.private_key = TSecretValue(self.private_key + "\n") - @deprecated(reason="Use 'to_native_credentials' method instead") - def to_service_account_credentials(self) -> Any: - return self.to_native_credentials() - def to_native_credentials(self) -> Any: """Returns google.oauth2.service_account.Credentials""" - from google.oauth2.service_account import Credentials as ServiceAccountCredentials + if isinstance(self.private_key, ServiceAccountCredentials): # private key holds the native instance if it was passed to parse_native_representation return self.private_key @@ -105,6 +113,7 @@ def parse_native_representation(self, native_value: Any) -> None: oauth_dict: DictStrAny = None try: from google.oauth2.credentials import Credentials as GoogleOAuth2Credentials + if isinstance(native_value, GoogleOAuth2Credentials): # extract credentials, project id may not be present oauth_dict = { @@ -113,7 +122,7 @@ def parse_native_representation(self, native_value: Any) -> None: "client_secret": native_value.client_secret, "refresh_token": native_value.refresh_token, "scopes": native_value.scopes, - "token": native_value.token + "token": native_value.token, } # if token is present, we are logged in self.__is_resolved__ = native_value.token is not None @@ -141,8 +150,12 @@ def auth(self, scopes: Union[str, List[str]] = None, redirect_url: str = None) - self.add_scopes(scopes) if not self.scopes: raise OAuth2ScopesRequired(self.__class__) - assert sys.stdin.isatty() or is_interactive(), "Must have a tty or interactive mode for web flow" - self.refresh_token, self.token = self._get_refresh_token(redirect_url or "http://localhost") + assert ( + sys.stdin.isatty() or is_interactive() + ), "Must have a tty or interactive mode for web flow" + self.refresh_token, self.token = self._get_refresh_token( + redirect_url or "http://localhost" + ) else: # if scopes or redirect_url: # logger.warning("Please note that scopes and redirect_url are ignored when getting access token") @@ -164,11 +177,10 @@ def _get_access_token(self) -> TSecretValue: raise MissingDependencyException("GcpOAuthCredentials", ["requests_oauthlib"]) google = OAuth2Session(client_id=self.client_id, scope=self.scopes) - extra = { - "client_id": self.client_id, - "client_secret": self.client_secret - } - token = google.refresh_token(token_url=self.token_uri, refresh_token=self.refresh_token, **extra)["access_token"] + extra = {"client_id": self.client_id, "client_secret": self.client_secret} + token = google.refresh_token( + token_url=self.token_uri, refresh_token=self.refresh_token, **extra + )["access_token"] return TSecretValue(token) def _get_refresh_token(self, redirect_url: str) -> Tuple[TSecretValue, TSecretValue]: @@ -191,9 +203,7 @@ def to_native_credentials(self) -> Any: return credentials def _installed_dict(self, redirect_url: str = "http://localhost") -> StrAny: - installed_dict = { - self.client_type: self._info_dict() - } + installed_dict = {self.client_type: self._info_dict()} if redirect_url: installed_dict[self.client_type]["redirect_uris"] = [redirect_url] @@ -211,13 +221,13 @@ def __str__(self) -> str: @configspec class GcpDefaultCredentials(CredentialsWithDefault, GcpCredentials): - _LAST_FAILED_DEFAULT: float = 0.0 def parse_native_representation(self, native_value: Any) -> None: """Accepts google credentials as native value""" try: from google.auth.credentials import Credentials as GoogleCredentials + if isinstance(native_value, GoogleCredentials): self.project_id = self.project_id or native_value.quota_project_id self._set_default_credentials(native_value) @@ -226,11 +236,12 @@ def parse_native_representation(self, native_value: Any) -> None: return except ImportError: pass - raise NativeValueError(self.__class__, native_value, "Default Google Credentials not present") + raise NativeValueError( + self.__class__, native_value, "Default Google Credentials not present" + ) @staticmethod def _get_default_credentials(retry_timeout_s: float = 600.0) -> Tuple[Any, str]: - now = pendulum.now().timestamp() if now - GcpDefaultCredentials._LAST_FAILED_DEFAULT < retry_timeout_s: return None, None @@ -268,7 +279,9 @@ def to_native_credentials(self) -> Any: @configspec -class GcpServiceAccountCredentials(GcpDefaultCredentials, GcpServiceAccountCredentialsWithoutDefaults): +class GcpServiceAccountCredentials( + GcpDefaultCredentials, GcpServiceAccountCredentialsWithoutDefaults +): def parse_native_representation(self, native_value: Any) -> None: try: GcpDefaultCredentials.parse_native_representation(self, native_value) diff --git a/dlt/common/configuration/specs/known_sections.py b/dlt/common/configuration/specs/known_sections.py index 31ca0ff7ff..97ba85ffd6 100644 --- a/dlt/common/configuration/specs/known_sections.py +++ b/dlt/common/configuration/specs/known_sections.py @@ -19,8 +19,8 @@ DATA_WRITER = "data_writer" """default section holding BufferedDataWriter settings""" -DBT_PACKAGE_RUNNER = "dbt_package_runner" +DBT_PACKAGE_RUNNER = "dbt_package_runner" """dbt package runner configuration (DBTRunnerConfiguration)""" -DBT_CLOUD = "dbt_cloud" -"""dbt cloud helpers configuration (DBTCloudConfiguration)""" \ No newline at end of file +DBT_CLOUD = "dbt_cloud" +"""dbt cloud helpers configuration (DBTCloudConfiguration)""" diff --git a/dlt/common/configuration/specs/run_configuration.py b/dlt/common/configuration/specs/run_configuration.py index 2ec3648dbe..54ce46ceba 100644 --- a/dlt/common/configuration/specs/run_configuration.py +++ b/dlt/common/configuration/specs/run_configuration.py @@ -16,7 +16,7 @@ class RunConfiguration(BaseConfiguration): slack_incoming_hook: Optional[TSecretStrValue] = None dlthub_telemetry: bool = True # enable or disable dlthub telemetry dlthub_telemetry_segment_write_key: str = "a1F2gc6cNYw2plyAt02sZouZcsRjG7TD" - log_format: str = '{asctime}|[{levelname:<21}]|{process}|{name}|{filename}|{funcName}:{lineno}|{message}' + log_format: str = "{asctime}|[{levelname:<21}]|{process}|{thread}|{name}|{filename}|{funcName}:{lineno}|{message}" log_level: str = "WARNING" request_timeout: float = 60 """Timeout for http requests""" @@ -27,6 +27,8 @@ class RunConfiguration(BaseConfiguration): request_max_retry_delay: float = 300 """Maximum delay between http request retries""" config_files_storage_path: str = "/run/config/" + """Platform connection""" + dlthub_dsn: Optional[TSecretStrValue] = None __section__ = "runtime" @@ -38,7 +40,9 @@ def on_resolved(self) -> None: # it may be obfuscated base64 value # TODO: that needs to be removed ASAP try: - self.slack_incoming_hook = TSecretStrValue(reveal_pseudo_secret(self.slack_incoming_hook, b"dlt-runtime-2022")) + self.slack_incoming_hook = TSecretStrValue( + reveal_pseudo_secret(self.slack_incoming_hook, b"dlt-runtime-2022") + ) except binascii.Error: # just keep the original value pass diff --git a/dlt/common/configuration/utils.py b/dlt/common/configuration/utils.py index 4841c8e3fa..5a7330447b 100644 --- a/dlt/common/configuration/utils.py +++ b/dlt/common/configuration/utils.py @@ -10,7 +10,10 @@ from dlt.common.data_types import coerce_value, py_type_to_sc_type from dlt.common.configuration.providers import EnvironProvider from dlt.common.configuration.exceptions import ConfigValueCannotBeCoercedException, LookupTrace -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, is_base_configuration_inner_hint +from dlt.common.configuration.specs.base_configuration import ( + BaseConfiguration, + is_base_configuration_inner_hint, +) class ResolvedValueTrace(NamedTuple): @@ -111,40 +114,56 @@ def auto_cast(value: str) -> Any: return value - -def log_traces(config: Optional[BaseConfiguration], key: str, hint: Type[Any], value: Any, default_value: Any, traces: Sequence[LookupTrace]) -> None: +def log_traces( + config: Optional[BaseConfiguration], + key: str, + hint: Type[Any], + value: Any, + default_value: Any, + traces: Sequence[LookupTrace], +) -> None: from dlt.common import logger # if logger.is_logging() and logger.log_level() == "DEBUG" and config: # logger.debug(f"Field {key} with type {hint} in {type(config).__name__} {'NOT RESOLVED' if value is None else 'RESOLVED'}") - # print(f"Field {key} with type {hint} in {type(config).__name__} {'NOT RESOLVED' if value is None else 'RESOLVED'}") - # for tr in traces: - # # print(str(tr)) - # logger.debug(str(tr)) + # print(f"Field {key} with type {hint} in {type(config).__name__} {'NOT RESOLVED' if value is None else 'RESOLVED'}") + # for tr in traces: + # # print(str(tr)) + # logger.debug(str(tr)) # store all traces with resolved values resolved_trace = next((trace for trace in traces if trace.value is not None), None) if resolved_trace is not None: path = f'{".".join(resolved_trace.sections)}.{key}' - _RESOLVED_TRACES[path] = ResolvedValueTrace(key, resolved_trace.value, default_value, hint, resolved_trace.sections, resolved_trace.provider, config) + _RESOLVED_TRACES[path] = ResolvedValueTrace( + key, + resolved_trace.value, + default_value, + hint, + resolved_trace.sections, + resolved_trace.provider, + config, + ) def get_resolved_traces() -> Dict[str, ResolvedValueTrace]: return _RESOLVED_TRACES -def add_config_to_env(config: BaseConfiguration, sections: Tuple[str, ...] = ()) -> None: +def add_config_to_env(config: BaseConfiguration, sections: Tuple[str, ...] = ()) -> None: """Writes values in configuration back into environment using the naming convention of EnvironProvider. Will descend recursively if embedded BaseConfiguration instances are found""" if config.__section__: - sections += (config.__section__, ) + sections += (config.__section__,) return add_config_dict_to_env(dict(config), sections, overwrite_keys=True) -def add_config_dict_to_env(dict_: Mapping[str, Any], sections: Tuple[str, ...] = (), overwrite_keys: bool = False) -> None: +def add_config_dict_to_env( + dict_: Mapping[str, Any], sections: Tuple[str, ...] = (), overwrite_keys: bool = False +) -> None: """Writes values in dict_ back into environment using the naming convention of EnvironProvider. Applies `sections` if specified. Does not overwrite existing keys by default""" for k, v in dict_.items(): if isinstance(v, BaseConfiguration): if not v.__section__: - embedded_sections = sections + (k, ) + embedded_sections = sections + (k,) else: embedded_sections = sections add_config_to_env(v, embedded_sections) diff --git a/dlt/common/data_types/__init__.py b/dlt/common/data_types/__init__.py index 83e123f124..672aeddea4 100644 --- a/dlt/common/data_types/__init__.py +++ b/dlt/common/data_types/__init__.py @@ -1,6 +1,4 @@ from dlt.common.data_types.type_helpers import coerce_value, py_type_to_sc_type from dlt.common.data_types.typing import TDataType, DATA_TYPES -__all__ = [ - "coerce_value", "py_type_to_sc_type", "TDataType", "DATA_TYPES" -] +__all__ = ["coerce_value", "py_type_to_sc_type", "TDataType", "DATA_TYPES"] diff --git a/dlt/common/data_types/type_helpers.py b/dlt/common/data_types/type_helpers.py index f42f81b06f..9e1cd2278d 100644 --- a/dlt/common/data_types/type_helpers.py +++ b/dlt/common/data_types/type_helpers.py @@ -10,7 +10,12 @@ from dlt.common.json._simplejson import custom_encode as json_custom_encode from dlt.common.arithmetics import InvalidOperation from dlt.common.data_types.typing import TDataType -from dlt.common.time import ensure_pendulum_datetime, parse_iso_like_datetime, ensure_pendulum_date, ensure_pendulum_time +from dlt.common.time import ( + ensure_pendulum_datetime, + parse_iso_like_datetime, + ensure_pendulum_date, + ensure_pendulum_time, +) from dlt.common.utils import map_nested_in_place, str2bool @@ -93,7 +98,7 @@ def coerce_value(to_type: TDataType, from_type: TDataType, value: Any) -> Any: return map_nested_in_place(custom_pua_remove, value) # Make sure we use enum value instead of the object itself # This check is faster than `isinstance(value, Enum)` for non-enum types - if hasattr(value, 'value'): + if hasattr(value, "value"): if to_type == "text": return str(value.value) elif to_type == "bigint": @@ -120,7 +125,7 @@ def coerce_value(to_type: TDataType, from_type: TDataType, value: Any) -> Any: except binascii.Error: raise ValueError(value) if from_type == "bigint": - return value.to_bytes((value.bit_length() + 7) // 8, 'little') + return value.to_bytes((value.bit_length() + 7) // 8, "little") if to_type == "bigint": if from_type in ["wei", "decimal", "double"]: diff --git a/dlt/common/data_types/typing.py b/dlt/common/data_types/typing.py index c32e6a38c9..d061b28df0 100644 --- a/dlt/common/data_types/typing.py +++ b/dlt/common/data_types/typing.py @@ -1,5 +1,17 @@ from typing import Literal, Set, get_args -TDataType = Literal["text", "double", "bool", "timestamp", "bigint", "binary", "complex", "decimal", "wei", "date", "time"] +TDataType = Literal[ + "text", + "double", + "bool", + "timestamp", + "bigint", + "binary", + "complex", + "decimal", + "wei", + "date", + "time", +] DATA_TYPES: Set[TDataType] = set(get_args(TDataType)) diff --git a/dlt/common/data_writers/__init__.py b/dlt/common/data_writers/__init__.py index 5865466b8f..04c5d04328 100644 --- a/dlt/common/data_writers/__init__.py +++ b/dlt/common/data_writers/__init__.py @@ -1,8 +1,18 @@ -from dlt.common.data_writers.writers import DataWriter, TLoaderFileFormat -from dlt.common.data_writers.buffered import BufferedDataWriter -from dlt.common.data_writers.escape import escape_redshift_literal, escape_redshift_identifier, escape_bigquery_identifier +from dlt.common.data_writers.writers import DataWriter, DataWriterMetrics, TLoaderFileFormat +from dlt.common.data_writers.buffered import BufferedDataWriter, new_file_id +from dlt.common.data_writers.escape import ( + escape_redshift_literal, + escape_redshift_identifier, + escape_bigquery_identifier, +) __all__ = [ - "DataWriter", "TLoaderFileFormat", "BufferedDataWriter", - "escape_redshift_literal", "escape_redshift_identifier", "escape_bigquery_identifier" + "DataWriter", + "DataWriterMetrics", + "TLoaderFileFormat", + "BufferedDataWriter", + "new_file_id", + "escape_redshift_literal", + "escape_redshift_identifier", + "escape_bigquery_identifier", ] diff --git a/dlt/common/data_writers/buffered.py b/dlt/common/data_writers/buffered.py index 5c93e22bc6..24935d73ac 100644 --- a/dlt/common/data_writers/buffered.py +++ b/dlt/common/data_writers/buffered.py @@ -1,23 +1,30 @@ import gzip -from functools import reduce +import time from typing import List, IO, Any, Optional, Type, TypeVar, Generic -from dlt.common.utils import uniq_id from dlt.common.typing import TDataItem, TDataItems from dlt.common.data_writers import TLoaderFileFormat -from dlt.common.data_writers.exceptions import BufferedDataWriterClosed, DestinationCapabilitiesRequired, InvalidFileNameTemplateException -from dlt.common.data_writers.writers import DataWriter +from dlt.common.data_writers.exceptions import ( + BufferedDataWriterClosed, + DestinationCapabilitiesRequired, + InvalidFileNameTemplateException, +) +from dlt.common.data_writers.writers import DataWriter, DataWriterMetrics from dlt.common.schema.typing import TTableSchemaColumns from dlt.common.configuration import with_config, known_sections, configspec from dlt.common.configuration.specs import BaseConfiguration from dlt.common.destination import DestinationCapabilitiesContext - +from dlt.common.utils import uniq_id TWriter = TypeVar("TWriter", bound=DataWriter) -class BufferedDataWriter(Generic[TWriter]): +def new_file_id() -> str: + """Creates new file id which is globally unique within table_name scope""" + return uniq_id(5) + +class BufferedDataWriter(Generic[TWriter]): @configspec class BufferedDataWriterConfiguration(BaseConfiguration): buffer_max_items: int = 5000 @@ -28,7 +35,6 @@ class BufferedDataWriterConfiguration(BaseConfiguration): __section__ = known_sections.DATA_WRITER - @with_config(spec=BufferedDataWriterConfiguration) def __init__( self, @@ -48,13 +54,17 @@ def __init__( self._caps = _caps # validate if template has correct placeholders self.file_name_template = file_name_template - self.closed_files: List[str] = [] # all fully processed files + self.closed_files: List[DataWriterMetrics] = [] # all fully processed files # buffered items must be less than max items in file self.buffer_max_items = min(buffer_max_items, file_max_items or buffer_max_items) self.file_max_bytes = file_max_bytes self.file_max_items = file_max_items # the open function is either gzip.open or open - self.open = gzip.open if self._file_format_spec.supports_compression and not disable_compression else open + self.open = ( + gzip.open + if self._file_format_spec.supports_compression and not disable_compression + else open + ) self._current_columns: TTableSchemaColumns = None self._file_name: str = None @@ -62,40 +72,53 @@ def __init__( self._buffered_items_count: int = 0 self._writer: TWriter = None self._file: IO[Any] = None + self._created: float = None + self._last_modified: float = None self._closed = False try: self._rotate_file() except TypeError: raise InvalidFileNameTemplateException(file_name_template) - def write_data_item(self, item: TDataItems, columns: TTableSchemaColumns) -> None: - self._ensure_open() + def write_data_item(self, item: TDataItems, columns: TTableSchemaColumns) -> int: + if self._closed: + self._rotate_file() + self._closed = False # rotate file if columns changed and writer does not allow for that # as the only allowed change is to add new column (no updates/deletes), we detect the change by comparing lengths - if self._writer and not self._writer.data_format().supports_schema_changes and len(columns) != len(self._current_columns): + if ( + self._writer + and not self._writer.data_format().supports_schema_changes + and len(columns) != len(self._current_columns) + ): assert len(columns) > len(self._current_columns) self._rotate_file() # until the first chunk is written we can change the columns schema freely if columns is not None: self._current_columns = dict(columns) + + new_rows_count: int if isinstance(item, List): # items coming in single list will be written together, not matter how many are there self._buffered_items.extend(item) # update row count, if item supports "num_rows" it will be used to count items if len(item) > 0 and hasattr(item[0], "num_rows"): - self._buffered_items_count += sum(tbl.num_rows for tbl in item) + new_rows_count = sum(tbl.num_rows for tbl in item) else: - self._buffered_items_count += len(item) + new_rows_count = len(item) else: self._buffered_items.append(item) # update row count, if item supports "num_rows" it will be used to count items if hasattr(item, "num_rows"): - self._buffered_items_count += item.num_rows + new_rows_count = item.num_rows else: - self._buffered_items_count += 1 + new_rows_count = 1 + self._buffered_items_count += new_rows_count # flush if max buffer exceeded if self._buffered_items_count >= self.buffer_max_items: self._flush_items() + # set last modification date + self._last_modified = time.time() # rotate the file if max_bytes exceeded if self._file: # rotate on max file size @@ -104,11 +127,43 @@ def write_data_item(self, item: TDataItems, columns: TTableSchemaColumns) -> Non # rotate on max items elif self.file_max_items and self._writer.items_count >= self.file_max_items: self._rotate_file() + return new_rows_count - def write_empty_file(self, columns: TTableSchemaColumns) -> None: + def write_empty_file(self, columns: TTableSchemaColumns) -> DataWriterMetrics: + """Writes empty file: only header and footer without actual items. Closed the + empty file and returns metrics. Mind that header and footer will be written.""" + self._rotate_file() if columns is not None: self._current_columns = dict(columns) - self._flush_items(allow_empty_file=True) + self._last_modified = time.time() + return self._rotate_file(allow_empty_file=True) + + def import_file(self, file_path: str, metrics: DataWriterMetrics) -> DataWriterMetrics: + """Import a file from `file_path` into items storage under a new file name. Does not check + the imported file format. Uses counts from `metrics` as a base. Logically closes the imported file + + The preferred import method is a hard link to avoid copying the data. If current filesystem does not + support it, a regular copy is used. + """ + # TODO: we should separate file storage from other storages. this creates circular deps + from dlt.common.storages import FileStorage + + self._rotate_file() + FileStorage.link_hard_with_fallback(file_path, self._file_name) + self._last_modified = time.time() + metrics = metrics._replace( + file_path=self._file_name, + created=self._created, + last_modified=self._last_modified or self._created, + ) + self.closed_files.append(metrics) + # reset current file + self._file_name = None + self._last_modified = None + self._created = None + # get ready for a next one + self._rotate_file() + return metrics def close(self) -> None: self._ensure_open() @@ -125,9 +180,13 @@ def __enter__(self) -> "BufferedDataWriter[TWriter]": def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: Any) -> None: self.close() - def _rotate_file(self) -> None: - self._flush_and_close_file() - self._file_name = self.file_name_template % uniq_id(5) + "." + self._file_format_spec.file_extension + def _rotate_file(self, allow_empty_file: bool = False) -> DataWriterMetrics: + metrics = self._flush_and_close_file(allow_empty_file) + self._file_name = ( + self.file_name_template % new_file_id() + "." + self._file_format_spec.file_extension + ) + self._created = time.time() + return metrics def _flush_items(self, allow_empty_file: bool = False) -> None: if self._buffered_items_count > 0 or allow_empty_file: @@ -135,9 +194,9 @@ def _flush_items(self, allow_empty_file: bool = False) -> None: if not self._writer: # create new writer and write header if self._file_format_spec.is_binary_format: - self._file = self.open(self._file_name, "wb") # type: ignore + self._file = self.open(self._file_name, "wb") # type: ignore else: - self._file = self.open(self._file_name, "wt", encoding="utf-8") # type: ignore + self._file = self.open(self._file_name, "wt", encoding="utf-8") # type: ignore self._writer = DataWriter.from_file_format(self.file_format, self._file, caps=self._caps) # type: ignore[assignment] self._writer.write_header(self._current_columns) # write buffer @@ -147,18 +206,31 @@ def _flush_items(self, allow_empty_file: bool = False) -> None: self._buffered_items.clear() self._buffered_items_count = 0 - def _flush_and_close_file(self) -> None: + def _flush_and_close_file(self, allow_empty_file: bool = False) -> DataWriterMetrics: # if any buffered items exist, flush them - self._flush_items() + self._flush_items(allow_empty_file) # if writer exists then close it - if self._writer: - # write the footer of a file - self._writer.write_footer() - self._file.close() - # add file written to the list so we can commit all the files later - self.closed_files.append(self._file_name) - self._writer = None - self._file = None + if not self._writer: + return None + # write the footer of a file + self._writer.write_footer() + self._file.flush() + # add file written to the list so we can commit all the files later + metrics = DataWriterMetrics( + self._file_name, + self._writer.items_count, + self._file.tell(), + self._created, + self._last_modified, + ) + self.closed_files.append(metrics) + self._file.close() + self._writer = None + self._file = None + self._file_name = None + self._created = None + self._last_modified = None + return metrics def _ensure_open(self) -> None: if self._closed: diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py index 0656a69634..5bf8f29ccb 100644 --- a/dlt/common/data_writers/escape.py +++ b/dlt/common/data_writers/escape.py @@ -8,14 +8,19 @@ # use regex to escape characters in single pass SQL_ESCAPE_DICT = {"'": "''", "\\": "\\\\", "\n": "\\n", "\r": "\\r"} + def _make_sql_escape_re(escape_dict: Dict[str, str]) -> re.Pattern: # type: ignore[type-arg] - return re.compile("|".join([re.escape(k) for k in sorted(escape_dict, key=len, reverse=True)]), flags=re.DOTALL) + return re.compile( + "|".join([re.escape(k) for k in sorted(escape_dict, key=len, reverse=True)]), + flags=re.DOTALL, + ) SQL_ESCAPE_RE = _make_sql_escape_re(SQL_ESCAPE_DICT) + def _escape_extended( - v: str, prefix:str = "E'", escape_dict: Dict[str, str] = None, escape_re: re.Pattern = None # type: ignore[type-arg] + v: str, prefix: str = "E'", escape_dict: Dict[str, str] = None, escape_re: re.Pattern = None # type: ignore[type-arg] ) -> str: escape_dict = escape_dict or SQL_ESCAPE_DICT escape_re = escape_re or SQL_ESCAPE_RE @@ -33,7 +38,7 @@ def escape_redshift_literal(v: Any) -> Any: if isinstance(v, (datetime, date, time)): return f"'{v.isoformat()}'" if isinstance(v, (list, dict)): - return "json_parse(%s)" % _escape_extended(json.dumps(v), prefix='\'') + return "json_parse(%s)" % _escape_extended(json.dumps(v), prefix="'") if v is None: return "NULL" @@ -74,21 +79,26 @@ def escape_duckdb_literal(v: Any) -> Any: MS_SQL_ESCAPE_DICT = { "'": "''", - '\n': "' + CHAR(10) + N'", - '\r': "' + CHAR(13) + N'", - '\t': "' + CHAR(9) + N'", + "\n": "' + CHAR(10) + N'", + "\r": "' + CHAR(13) + N'", + "\t": "' + CHAR(9) + N'", } MS_SQL_ESCAPE_RE = _make_sql_escape_re(MS_SQL_ESCAPE_DICT) + def escape_mssql_literal(v: Any) -> Any: if isinstance(v, str): - return _escape_extended(v, prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE) + return _escape_extended( + v, prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE + ) if isinstance(v, (datetime, date, time)): return f"'{v.isoformat()}'" if isinstance(v, (list, dict)): - return _escape_extended(json.dumps(v), prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE) + return _escape_extended( + json.dumps(v), prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE + ) if isinstance(v, bytes): - base_64_string = base64.b64encode(v).decode('ascii') + base_64_string = base64.b64encode(v).decode("ascii") return f"""CAST('' AS XML).value('xs:base64Binary("{base_64_string}")', 'VARBINARY(MAX)')""" if isinstance(v, bool): return str(int(v)) @@ -107,7 +117,7 @@ def escape_redshift_identifier(v: str) -> str: def escape_bigquery_identifier(v: str) -> str: # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical - return "`" + v.replace("\\", "\\\\").replace("`","\\`") + "`" + return "`" + v.replace("\\", "\\\\").replace("`", "\\`") + "`" def escape_snowflake_identifier(v: str) -> str: diff --git a/dlt/common/data_writers/exceptions.py b/dlt/common/data_writers/exceptions.py index a86bd9440e..d3a073cf4e 100644 --- a/dlt/common/data_writers/exceptions.py +++ b/dlt/common/data_writers/exceptions.py @@ -9,7 +9,10 @@ class DataWriterException(DltException): class InvalidFileNameTemplateException(DataWriterException, ValueError): def __init__(self, file_name_template: str): self.file_name_template = file_name_template - super().__init__(f"Wrong file name template {file_name_template}. File name template must contain exactly one %s formatter") + super().__init__( + f"Wrong file name template {file_name_template}. File name template must contain" + " exactly one %s formatter" + ) class BufferedDataWriterClosed(DataWriterException): @@ -21,4 +24,6 @@ def __init__(self, file_name: str): class DestinationCapabilitiesRequired(DataWriterException, ValueError): def __init__(self, file_format: TLoaderFileFormat): self.file_format = file_format - super().__init__(f"Writer for {file_format} requires destination capabilities which were not provided.") + super().__init__( + f"Writer for {file_format} requires destination capabilities which were not provided." + ) diff --git a/dlt/common/data_writers/writers.py b/dlt/common/data_writers/writers.py index 401f6aafd2..0f9ff09259 100644 --- a/dlt/common/data_writers/writers.py +++ b/dlt/common/data_writers/writers.py @@ -1,6 +1,18 @@ import abc from dataclasses import dataclass -from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Type, Union +from typing import ( + IO, + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Sequence, + Tuple, + Type, + NamedTuple, + overload, +) from dlt.common import json from dlt.common.configuration import configspec, known_sections, with_config @@ -23,6 +35,28 @@ class TFileFormatSpec: supports_compression: bool = False +class DataWriterMetrics(NamedTuple): + file_path: str + items_count: int + file_size: int + created: float + last_modified: float + + def __add__(self, other: Tuple[object, ...], /) -> Tuple[object, ...]: + if isinstance(other, DataWriterMetrics): + return DataWriterMetrics( + "", # path is not known + self.items_count + other.items_count, + self.file_size + other.file_size, + min(self.created, other.created), + max(self.last_modified, other.last_modified), + ) + return NotImplemented + + +EMPTY_DATA_WRITER_METRICS = DataWriterMetrics("", 0, 0, 2**32, 0.0) + + class DataWriter(abc.ABC): def __init__(self, f: IO[Any], caps: DestinationCapabilitiesContext = None) -> None: self._f = f @@ -45,18 +79,21 @@ def write_all(self, columns_schema: TTableSchemaColumns, rows: Sequence[Any]) -> self.write_data(rows) self.write_footer() - @classmethod @abc.abstractmethod def data_format(cls) -> TFileFormatSpec: pass @classmethod - def from_file_format(cls, file_format: TLoaderFileFormat, f: IO[Any], caps: DestinationCapabilitiesContext = None) -> "DataWriter": + def from_file_format( + cls, file_format: TLoaderFileFormat, f: IO[Any], caps: DestinationCapabilitiesContext = None + ) -> "DataWriter": return cls.class_factory(file_format)(f, caps) @classmethod - def from_destination_capabilities(cls, caps: DestinationCapabilitiesContext, f: IO[Any]) -> "DataWriter": + def from_destination_capabilities( + cls, caps: DestinationCapabilitiesContext, f: IO[Any] + ) -> "DataWriter": return cls.class_factory(caps.preferred_loader_file_format)(f, caps) @classmethod @@ -74,13 +111,12 @@ def class_factory(file_format: TLoaderFileFormat) -> Type["DataWriter"]: elif file_format == "parquet": return ParquetDataWriter # type: ignore elif file_format == "arrow": - return ArrowWriter # type: ignore + return ArrowWriter # type: ignore else: raise ValueError(file_format) class JsonlWriter(DataWriter): - def write_header(self, columns_schema: TTableSchemaColumns) -> None: pass @@ -105,7 +141,6 @@ def data_format(cls) -> TFileFormatSpec: class JsonlListPUAEncodeWriter(JsonlWriter): - def write_data(self, rows: Sequence[Any]) -> None: # skip JsonlWriter when calling super super(JsonlWriter, self).write_data(rows) @@ -126,7 +161,6 @@ def data_format(cls) -> TFileFormatSpec: class InsertValuesWriter(DataWriter): - def __init__(self, f: IO[Any], caps: DestinationCapabilitiesContext = None) -> None: super().__init__(f, caps) self._chunks_written = 0 @@ -148,7 +182,7 @@ def write_data(self, rows: Sequence[Any]) -> None: def write_row(row: StrAny) -> None: output = ["NULL"] * len(self._headers_lookup) - for n,v in row.items(): + for n, v in row.items(): output[self._headers_lookup[n]] = self._caps.escape_literal(v) self._f.write("(") self._f.write(",".join(output)) @@ -194,19 +228,20 @@ class ParquetDataWriterConfiguration(BaseConfiguration): __section__: str = known_sections.DATA_WRITER -class ParquetDataWriter(DataWriter): +class ParquetDataWriter(DataWriter): @with_config(spec=ParquetDataWriterConfiguration) - def __init__(self, - f: IO[Any], - caps: DestinationCapabilitiesContext = None, - *, - flavor: str = "spark", - version: str = "2.4", - data_page_size: int = 1024 * 1024, - timestamp_timezone: str = "UTC", - row_group_size: Optional[int] = None - ) -> None: + def __init__( + self, + f: IO[Any], + caps: DestinationCapabilitiesContext = None, + *, + flavor: str = "spark", + version: str = "2.4", + data_page_size: int = 1024 * 1024, + timestamp_timezone: str = "UTC", + row_group_size: Optional[int] = None, + ) -> None: super().__init__(f, caps) from dlt.common.libs.pyarrow import pyarrow @@ -220,25 +255,36 @@ def __init__(self, self.parquet_row_group_size = row_group_size def _create_writer(self, schema: "pa.Schema") -> "pa.parquet.ParquetWriter": - from dlt.common.libs.pyarrow import pyarrow, get_py_arrow_datatype - return pyarrow.parquet.ParquetWriter(self._f, schema, flavor=self.parquet_flavor, version=self.parquet_version, data_page_size=self.parquet_data_page_size) + from dlt.common.libs.pyarrow import pyarrow + + return pyarrow.parquet.ParquetWriter( + self._f, + schema, + flavor=self.parquet_flavor, + version=self.parquet_version, + data_page_size=self.parquet_data_page_size, + ) def write_header(self, columns_schema: TTableSchemaColumns) -> None: from dlt.common.libs.pyarrow import pyarrow, get_py_arrow_datatype # build schema self.schema = pyarrow.schema( - [pyarrow.field( - name, - get_py_arrow_datatype(schema_item, self._caps, self.timestamp_timezone), - nullable=schema_item.get("nullable", True) - ) for name, schema_item in columns_schema.items()] + [ + pyarrow.field( + name, + get_py_arrow_datatype(schema_item, self._caps, self.timestamp_timezone), + nullable=schema_item.get("nullable", True), + ) + for name, schema_item in columns_schema.items() + ] ) # find row items that are of the complex type (could be abstracted out for use in other writers?) - self.complex_indices = [i for i, field in columns_schema.items() if field["data_type"] == "complex"] + self.complex_indices = [ + i for i, field in columns_schema.items() if field["data_type"] == "complex" + ] self.writer = self._create_writer(self.schema) - def write_data(self, rows: Sequence[Any]) -> None: super().write_data(rows) from dlt.common.libs.pyarrow import pyarrow @@ -257,19 +303,26 @@ def write_footer(self) -> None: self.writer.close() self.writer = None - @classmethod def data_format(cls) -> TFileFormatSpec: - return TFileFormatSpec("parquet", "parquet", True, False, requires_destination_capabilities=True, supports_compression=False) + return TFileFormatSpec( + "parquet", + "parquet", + True, + False, + requires_destination_capabilities=True, + supports_compression=False, + ) class ArrowWriter(ParquetDataWriter): def write_header(self, columns_schema: TTableSchemaColumns) -> None: # Schema will be written as-is from the arrow table - pass + self._column_schema = columns_schema def write_data(self, rows: Sequence[Any]) -> None: from dlt.common.libs.pyarrow import pyarrow + rows = list(rows) if not rows: return @@ -285,6 +338,11 @@ def write_data(self, rows: Sequence[Any]) -> None: # count rows that got written self.items_count += row.num_rows + def write_footer(self) -> None: + if not self.writer: + raise NotImplementedError("Arrow Writer does not support writing empty files") + return super().write_footer() + @classmethod def data_format(cls) -> TFileFormatSpec: return TFileFormatSpec( diff --git a/dlt/common/destination/__init__.py b/dlt/common/destination/__init__.py index 88b5d5ef06..00f129c69c 100644 --- a/dlt/common/destination/__init__.py +++ b/dlt/common/destination/__init__.py @@ -1,10 +1,15 @@ -from dlt.common.destination.capabilities import DestinationCapabilitiesContext, TLoaderFileFormat, ALL_SUPPORTED_FILE_FORMATS -from dlt.common.destination.reference import DestinationReference, TDestinationReferenceArg +from dlt.common.destination.capabilities import ( + DestinationCapabilitiesContext, + TLoaderFileFormat, + ALL_SUPPORTED_FILE_FORMATS, +) +from dlt.common.destination.reference import TDestinationReferenceArg, Destination, TDestination __all__ = [ "DestinationCapabilitiesContext", "TLoaderFileFormat", "ALL_SUPPORTED_FILE_FORMATS", - "DestinationReference", "TDestinationReferenceArg", + "Destination", + "TDestination", ] diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py index 06504ee590..2596b2bf99 100644 --- a/dlt/common/destination/capabilities.py +++ b/dlt/common/destination/capabilities.py @@ -14,17 +14,22 @@ # puae-jsonl - internal extract -> normalize format bases on jsonl # insert_values - insert SQL statements # sql - any sql statement -TLoaderFileFormat = Literal["jsonl", "puae-jsonl", "insert_values", "sql", "parquet", "reference", "arrow"] +TLoaderFileFormat = Literal[ + "jsonl", "puae-jsonl", "insert_values", "sql", "parquet", "reference", "arrow" +] ALL_SUPPORTED_FILE_FORMATS: Set[TLoaderFileFormat] = set(get_args(TLoaderFileFormat)) # file formats used internally by dlt INTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = {"puae-jsonl", "sql", "reference", "arrow"} # file formats that may be chosen by the user -EXTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = set(get_args(TLoaderFileFormat)) - INTERNAL_LOADER_FILE_FORMATS +EXTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = ( + set(get_args(TLoaderFileFormat)) - INTERNAL_LOADER_FILE_FORMATS +) @configspec class DestinationCapabilitiesContext(ContainerInjectableContext): """Injectable destination capabilities required for many Pipeline stages ie. normalize""" + preferred_loader_file_format: TLoaderFileFormat supported_loader_file_formats: List[TLoaderFileFormat] preferred_staging_file_format: Optional[TLoaderFileFormat] @@ -52,7 +57,9 @@ class DestinationCapabilitiesContext(ContainerInjectableContext): can_create_default: ClassVar[bool] = False @staticmethod - def generic_capabilities(preferred_loader_file_format: TLoaderFileFormat = None) -> "DestinationCapabilitiesContext": + def generic_capabilities( + preferred_loader_file_format: TLoaderFileFormat = None, + ) -> "DestinationCapabilitiesContext": caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = preferred_loader_file_format caps.supported_loader_file_formats = ["jsonl", "insert_values", "parquet"] diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 13172b41e9..07b8871a85 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -1,18 +1,40 @@ -from abc import ABC, abstractmethod, abstractproperty +from abc import ABC, abstractmethod from importlib import import_module -from types import TracebackType, ModuleType -from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any +from types import TracebackType +from typing import ( + ClassVar, + Optional, + NamedTuple, + Literal, + Sequence, + Iterable, + Type, + Union, + TYPE_CHECKING, + List, + ContextManager, + Dict, + Any, + TypeVar, + Generic, + Final, +) from contextlib import contextmanager import datetime # noqa: 251 from copy import deepcopy +import inspect from dlt.common import logger -from dlt.common.exceptions import IdentifierTooLongException, InvalidDestinationReference, UnknownDestinationModule +from dlt.common.exceptions import ( + IdentifierTooLongException, + InvalidDestinationReference, + UnknownDestinationModule, +) from dlt.common.schema import Schema, TTableSchema, TSchemaTables from dlt.common.schema.typing import TWriteDisposition from dlt.common.schema.exceptions import InvalidDatasetName from dlt.common.schema.utils import get_write_disposition, get_table_format -from dlt.common.configuration import configspec +from dlt.common.configuration import configspec, with_config, resolve_configuration, known_sections from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration from dlt.common.configuration.accessors import config from dlt.common.destination.capabilities import DestinationCapabilitiesContext @@ -23,7 +45,10 @@ from dlt.common.utils import get_module_name from dlt.common.configuration.specs import GcpCredentials, AwsCredentialsWithoutDefaults + TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] +TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") +TDestinationClient = TypeVar("TDestinationClient", bound="JobClientBase") class StorageSchemaInfo(NamedTuple): @@ -34,6 +59,7 @@ class StorageSchemaInfo(NamedTuple): inserted_at: datetime.datetime schema: str + class StateInfo(NamedTuple): version: int engine_version: int @@ -42,10 +68,15 @@ class StateInfo(NamedTuple): created_at: datetime.datetime dlt_load_id: str = None + @configspec class DestinationClientConfiguration(BaseConfiguration): - destination_name: str = None # which destination to load data to + destination_type: Final[str] = None # which destination to load data to credentials: Optional[CredentialsConfiguration] + destination_name: Optional[str] = ( + None # name of the destination, if not set, destination_type is used + ) + environment: Optional[str] = None def fingerprint(self) -> str: """Returns a destination fingerprint which is a hash of selected configuration fields. ie. host in case of connection string""" @@ -55,17 +86,25 @@ def __str__(self) -> str: """Return displayable destination location""" return str(self.credentials) + def on_resolved(self) -> None: + self.destination_name = self.destination_name or self.destination_type + if TYPE_CHECKING: - def __init__(self, destination_name: str = None, credentials: Optional[CredentialsConfiguration] = None -) -> None: - ... + + def __init__( + self, + *, + credentials: Optional[CredentialsConfiguration] = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... @configspec class DestinationClientDwhConfiguration(DestinationClientConfiguration): """Configuration of a destination that supports datasets/schemas""" - dataset_name: Final[str] = None + dataset_name: Final[str] = None # dataset must be final so it is not configurable """dataset name in the destination to load data to, for schemas that are not default schema, it is used as dataset prefix""" default_schema_name: Optional[str] = None """name of default schema to be used to name effective dataset to load data to""" @@ -75,7 +114,7 @@ class DestinationClientDwhConfiguration(DestinationClientConfiguration): def normalize_dataset_name(self, schema: Schema) -> str: """Builds full db dataset (schema) name out of configured dataset name and schema name: {dataset_name}_{schema.name}. The resulting name is normalized. - If default schema name is None or equals schema.name, the schema suffix is skipped. + If default schema name is None or equals schema.name, the schema suffix is skipped. """ if not schema.name: raise ValueError("schema_name is None or empty") @@ -83,59 +122,75 @@ def normalize_dataset_name(self, schema: Schema) -> str: # if default schema is None then suffix is not added if self.default_schema_name is not None and schema.name != self.default_schema_name: # also normalize schema name. schema name is Python identifier and here convention may be different - return schema.naming.normalize_table_identifier((self.dataset_name or "") + "_" + schema.name) + return schema.naming.normalize_table_identifier( + (self.dataset_name or "") + "_" + schema.name + ) - return self.dataset_name if not self.dataset_name else schema.naming.normalize_table_identifier(self.dataset_name) + return ( + self.dataset_name + if not self.dataset_name + else schema.naming.normalize_table_identifier(self.dataset_name) + ) if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Optional[CredentialsConfiguration] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, - ) -> None: - ... + destination_name: str = None, + environment: str = None, + ) -> None: ... + @configspec class DestinationClientStagingConfiguration(DestinationClientDwhConfiguration): """Configuration of a staging destination, able to store files with desired `layout` at `bucket_url`. - Also supports datasets and can act as standalone destination. + Also supports datasets and can act as standalone destination. """ + as_staging: bool = False bucket_url: str = None # layout of the destination files layout: str = "{table_name}/{load_id}.{file_id}.{ext}" if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Union[AwsCredentialsWithoutDefaults, GcpCredentials] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, as_staging: bool = False, bucket_url: str = None, - layout: str = None - ) -> None: - ... + layout: str = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... + @configspec class DestinationClientDwhWithStagingConfiguration(DestinationClientDwhConfiguration): """Configuration of a destination that can take data from staging destination""" + staging_config: Optional[DestinationClientStagingConfiguration] = None """configuration of the staging, if present, injected at runtime""" if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Optional[CredentialsConfiguration] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, - staging_config: Optional[DestinationClientStagingConfiguration] = None - ) -> None: - ... + staging_config: Optional[DestinationClientStagingConfiguration] = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... TLoadJobState = Literal["running", "failed", "retry", "completed"] @@ -144,14 +199,15 @@ def __init__( class LoadJob: """Represents a job that loads a single file - Each job starts in "running" state and ends in one of terminal states: "retry", "failed" or "completed". - Each job is uniquely identified by a file name. The file is guaranteed to exist in "running" state. In terminal state, the file may not be present. - In "running" state, the loader component periodically gets the state via `status()` method. When terminal state is reached, load job is discarded and not called again. - `exception` method is called to get error information in "failed" and "retry" states. + Each job starts in "running" state and ends in one of terminal states: "retry", "failed" or "completed". + Each job is uniquely identified by a file name. The file is guaranteed to exist in "running" state. In terminal state, the file may not be present. + In "running" state, the loader component periodically gets the state via `status()` method. When terminal state is reached, load job is discarded and not called again. + `exception` method is called to get error information in "failed" and "retry" states. - The `__init__` method is responsible to put the Job in "running" state. It may raise `LoadClientTerminalException` and `LoadClientTransientException` to - immediately transition job into "failed" or "retry" state respectively. + The `__init__` method is responsible to put the Job in "running" state. It may raise `LoadClientTerminalException` and `LoadClientTransientException` to + immediately transition job into "failed" or "retry" state respectively. """ + def __init__(self, file_name: str) -> None: """ File name is also a job id (or job id is deterministically derived) so it must be globally unique @@ -171,7 +227,7 @@ def file_name(self) -> str: return self._file_name def job_id(self) -> str: - """The job id that is derived from the file name""" + """The job id that is derived from the file name and does not changes during job lifecycle""" return self._parsed_file_name.job_id() def job_file_info(self) -> ParsedLoadJobFileName: @@ -194,12 +250,12 @@ def new_file_path(self) -> str: class FollowupJob: """Adds a trait that allows to create a followup job""" + def create_followup_jobs(self, next_state: str) -> List[NewLoadJob]: return [] class JobClientBase(ABC): - capabilities: ClassVar[DestinationCapabilitiesContext] = None def __init__(self, schema: Schema, config: DestinationClientConfiguration) -> None: @@ -208,8 +264,7 @@ def __init__(self, schema: Schema, config: DestinationClientConfiguration) -> No @abstractmethod def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: - """Prepares storage to be used ie. creates database schema or file system folder. Truncates requested tables. - """ + """Prepares storage to be used ie. creates database schema or file system folder. Truncates requested tables.""" pass @abstractmethod @@ -222,7 +277,9 @@ def drop_storage(self) -> None: """Brings storage back into not initialized state. Typically data in storage is destroyed.""" pass - def update_stored_schema(self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None) -> Optional[TSchemaTables]: + def update_stored_schema( + self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None + ) -> Optional[TSchemaTables]: """Updates storage to the current schema. Implementations should not assume that `expected_update` is the exact difference between destination state and the self.schema. This is only the case if @@ -250,7 +307,9 @@ def restore_file_load(self, file_path: str) -> LoadJob: def should_truncate_table_before_load(self, table: TTableSchema) -> bool: return table["write_disposition"] == "replace" - def create_table_chain_completed_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def create_table_chain_completed_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: """Creates a list of followup jobs that should be executed after a table chain is completed""" return [] @@ -264,7 +323,9 @@ def __enter__(self) -> "JobClientBase": pass @abstractmethod - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType + ) -> None: pass def _verify_schema(self) -> None: @@ -277,17 +338,27 @@ def _verify_schema(self) -> None: for table in self.schema.data_tables(): table_name = table["name"] if len(table_name) > self.capabilities.max_identifier_length: - raise IdentifierTooLongException(self.config.destination_name, "table", table_name, self.capabilities.max_identifier_length) + raise IdentifierTooLongException( + self.config.destination_type, + "table", + table_name, + self.capabilities.max_identifier_length, + ) for column_name, column in dict(table["columns"]).items(): if len(column_name) > self.capabilities.max_column_identifier_length: raise IdentifierTooLongException( - self.config.destination_name, + self.config.destination_type, "column", f"{table_name}.{column_name}", - self.capabilities.max_column_identifier_length + self.capabilities.max_column_identifier_length, ) if not is_complete_column(column): - logger.warning(f"A column {column_name} in table {table_name} in schema {self.schema.name} is incomplete. It was not bound to the data during normalizations stage and its data type is unknown. Did you add this column manually in code ie. as a merge key?") + logger.warning( + f"A column {column_name} in table {table_name} in schema" + f" {self.schema.name} is incomplete. It was not bound to the data during" + " normalizations stage and its data type is unknown. Did you add this" + " column manually in code ie. as a merge key?" + ) def get_load_table(self, table_name: str, prepare_for_staging: bool = False) -> TTableSchema: if table_name not in self.schema.tables: @@ -306,7 +377,6 @@ def get_load_table(self, table_name: str, prepare_for_staging: bool = False) -> class WithStateSync(ABC): - @abstractmethod def get_stored_schema(self) -> Optional[StorageSchemaInfo]: """Retrieves newest schema from destination storage""" @@ -330,73 +400,160 @@ def should_load_data_to_staging_dataset(self, table: TTableSchema) -> bool: return False @abstractmethod - def with_staging_dataset(self)-> ContextManager["JobClientBase"]: + def with_staging_dataset(self) -> ContextManager["JobClientBase"]: """Executes job client methods on staging dataset""" return self # type: ignore -class SupportsStagingDestination(): + +class SupportsStagingDestination: """Adds capability to support a staging destination for the load""" - def should_load_data_to_staging_dataset_on_staging_destination(self, table: TTableSchema) -> bool: + def should_load_data_to_staging_dataset_on_staging_destination( + self, table: TTableSchema + ) -> bool: return False def should_truncate_table_before_load_on_staging_destination(self, table: TTableSchema) -> bool: # the default is to truncate the tables on the staging destination... return True -TDestinationReferenceArg = Union["DestinationReference", ModuleType, None, str] +TDestinationReferenceArg = Union[str, "Destination", None] + + +class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): + """A destination factory that can be partially pre-configured + with credentials and other config params. + """ -class DestinationReference(Protocol): - __name__: str - """Name of the destination""" + config_params: Optional[Dict[str, Any]] = None + def __init__(self, **kwargs: Any) -> None: + # Create initial unresolved destination config + # Argument defaults are filtered out here because we only want arguments passed explicitly + # to supersede config from the environment or pipeline args + sig = inspect.signature(self.__class__.__init__) + params = sig.parameters + self.config_params = { + k: v for k, v in kwargs.items() if k not in params or v != params[k].default + } + + @property + @abstractmethod + def spec(self) -> Type[TDestinationConfig]: + """A spec of destination configuration that also contains destination credentials""" + ... + + @abstractmethod def capabilities(self) -> DestinationCapabilitiesContext: """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" + ... - def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": - """A job client responsible for starting and resuming load jobs""" + @property + def destination_name(self) -> str: + """The destination name will either be explicitly set while creating the destination or will be taken from the type""" + return self.config_params.get("destination_name") or self.to_name(self.destination_type) - def spec(self) -> Type[DestinationClientConfiguration]: - """A spec of destination configuration that also contains destination credentials""" + @property + def destination_type(self) -> str: + full_path = self.__class__.__module__ + "." + self.__class__.__qualname__ + return Destination.normalize_type(full_path) + + @property + def destination_description(self) -> str: + return f"{self.destination_name}({self.destination_type})" + + @property + @abstractmethod + def client_class(self) -> Type[TDestinationClient]: + """A job client class responsible for starting and resuming load jobs""" + ... + + def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfig: + """Get a fully resolved destination config from the initial config""" + config = resolve_configuration( + initial_config, + sections=(known_sections.DESTINATION, self.destination_name), + # Already populated values will supersede resolved env config + explicit_value=self.config_params, + ) + return config + + @staticmethod + def to_name(ref: TDestinationReferenceArg) -> str: + if ref is None: + raise InvalidDestinationReference(ref) + if isinstance(ref, str): + return ref.rsplit(".", 1)[-1] + return ref.destination_name + + @staticmethod + def normalize_type(destination_type: str) -> str: + """Normalizes destination type string into a canonical form. Assumes that type names without dots correspond to build in destinations.""" + if "." not in destination_type: + destination_type = "dlt.destinations." + destination_type + # the next two lines shorten the dlt internal destination paths to dlt.destinations. + name = Destination.to_name(destination_type) + destination_type = destination_type.replace( + f"dlt.destinations.impl.{name}.factory.", "dlt.destinations." + ) + return destination_type @staticmethod - def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": - if destination is None: + def from_reference( + ref: TDestinationReferenceArg, + credentials: Optional[CredentialsConfiguration] = None, + destination_name: Optional[str] = None, + environment: Optional[str] = None, + **kwargs: Any, + ) -> Optional["Destination[DestinationClientConfiguration, JobClientBase]"]: + """Instantiate destination from str reference. + The ref can be a destination name or import path pointing to a destination class (e.g. `dlt.destinations.postgres`) + """ + # if we only get a name but no ref, we assume that the name is the destination_type + if ref is None and destination_name is not None: + ref = destination_name + if ref is None: return None + if isinstance(ref, Destination): + if credentials or destination_name or environment: + logger.warning( + "Cannot override credentials, destination_name or environment when passing a" + " Destination instance, these values will be ignored." + ) + return ref + if not isinstance(ref, str): + raise InvalidDestinationReference(ref) + try: + module_path, attr_name = Destination.normalize_type(ref).rsplit(".", 1) + dest_module = import_module(module_path) + except ModuleNotFoundError as e: + raise UnknownDestinationModule(ref) from e - # if destination is a str, get destination reference by dynamically importing module - if isinstance(destination, str): - try: - if "." in destination: - # this is full module name - destination_ref = cast(DestinationReference, import_module(destination)) - else: - # from known location - destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.{destination}")) - except ImportError: - if "." in destination: - raise UnknownDestinationModule(destination) - else: - # allow local external module imported without dot - try: - destination_ref = cast(DestinationReference, import_module(destination)) - except ImportError: - raise UnknownDestinationModule(destination) - else: - destination_ref = cast(DestinationReference, destination) - - # make sure the reference is correct try: - c = destination_ref.spec() - c.credentials - except Exception: - raise InvalidDestinationReference(destination) + factory: Type[Destination[DestinationClientConfiguration, JobClientBase]] = getattr( + dest_module, attr_name + ) + except AttributeError as e: + raise UnknownDestinationModule(ref) from e + if credentials: + kwargs["credentials"] = credentials + if destination_name: + kwargs["destination_name"] = destination_name + if environment: + kwargs["environment"] = environment + try: + dest = factory(**kwargs) + dest.spec + except Exception as e: + raise InvalidDestinationReference(ref) from e + return dest - return destination_ref + def client( + self, schema: Schema, initial_config: TDestinationConfig = config.value + ) -> TDestinationClient: + """Returns a configured instance of the destination's job client""" + return self.client_class(schema, self.configuration(initial_config)) - @staticmethod - def to_name(destination: TDestinationReferenceArg) -> str: - if isinstance(destination, ModuleType): - return get_module_name(destination) - return destination.split(".")[-1] # type: ignore + +TDestination = Destination[DestinationClientConfiguration, JobClientBase] diff --git a/dlt/common/exceptions.py b/dlt/common/exceptions.py index aa987f6766..c14a743f33 100644 --- a/dlt/common/exceptions.py +++ b/dlt/common/exceptions.py @@ -1,4 +1,27 @@ -from typing import Any, AnyStr, List, Sequence, Optional, Iterable +from typing import Any, AnyStr, Dict, List, Sequence, Optional, Iterable, TypedDict + + +class ExceptionTrace(TypedDict, total=False): + """Exception trace. NOTE: we intend to change it with an extended line by line trace with code snippets""" + + message: str + exception_type: str + docstring: str + stack_trace: List[str] + is_terminal: bool + """Says if exception is terminal if happened to a job during load step""" + exception_attrs: Dict[str, Any] + """Public attributes of an exception deriving from DltException (not starting with _)""" + load_id: str + """Load id if found in exception attributes""" + pipeline_name: str + """Pipeline name if found in exception attributes or in the active pipeline (Container)""" + source_name: str + """Source name if found in exception attributes or in Container""" + resource_name: str + """Resource name if found in exception attributes""" + job_id: str + """Job id if found in exception attributes""" class DltException(Exception): @@ -6,10 +29,22 @@ def __reduce__(self) -> Any: """Enables exceptions with parametrized constructor to be pickled""" return type(self).__new__, (type(self), *self.args), self.__dict__ + def attrs(self) -> Dict[str, Any]: + """Returns "public" attributes of the DltException""" + return { + k: v + for k, v in vars(self).items() + if not k.startswith("_") and not callable(v) and not hasattr(self.__class__, k) + } + + class UnsupportedProcessStartMethodException(DltException): def __init__(self, method: str) -> None: self.method = method - super().__init__(f"Process pool supports only fork start method, {method} not supported. Switch the pool type to threading") + super().__init__( + f"Process pool supports only fork start method, {method} not supported. Switch the pool" + " type to threading" + ) class CannotInstallDependencies(DltException): @@ -20,7 +55,10 @@ def __init__(self, dependencies: Sequence[str], interpreter: str, output: AnyStr str_output = output.decode("utf-8") else: str_output = output - super().__init__(f"Cannot install dependencies {', '.join(dependencies)} with {interpreter} and pip:\n{str_output}\n") + super().__init__( + f"Cannot install dependencies {', '.join(dependencies)} with {interpreter} and" + f" pip:\n{str_output}\n" + ) class VenvNotFound(DltException): @@ -49,6 +87,7 @@ class TerminalValueError(ValueError, TerminalException): class SignalReceivedException(KeyboardInterrupt, TerminalException): """Raises when signal comes. Derives from `BaseException` to not be caught in regular exception handlers.""" + def __init__(self, signal_code: int) -> None: self.signal_code = signal_code super().__init__(f"Signal {signal_code} received") @@ -87,7 +126,7 @@ def _get_msg(self, appendix: str) -> str: return msg def _to_pip_install(self) -> str: - return "\n".join([f"pip install \"{d}\"" for d in self.dependencies]) + return "\n".join([f'pip install "{d}"' for d in self.dependencies]) class SystemConfigurationException(DltException): @@ -132,11 +171,13 @@ def __init__(self, destination: str) -> None: self.destination = destination super().__init__(f"Destination {destination} does not support loading via staging.") + class DestinationLoadingWithoutStagingNotSupported(DestinationTerminalException): def __init__(self, destination: str) -> None: self.destination = destination super().__init__(f"Destination {destination} does not support loading without staging.") + class DestinationNoStagingMode(DestinationTerminalException): def __init__(self, destination: str) -> None: self.destination = destination @@ -144,7 +185,9 @@ def __init__(self, destination: str) -> None: class DestinationIncompatibleLoaderFileFormatException(DestinationTerminalException): - def __init__(self, destination: str, staging: str, file_format: str, supported_formats: Iterable[str]) -> None: + def __init__( + self, destination: str, staging: str, file_format: str, supported_formats: Iterable[str] + ) -> None: self.destination = destination self.staging = staging self.file_format = file_format @@ -152,21 +195,41 @@ def __init__(self, destination: str, staging: str, file_format: str, supported_f supported_formats_str = ", ".join(supported_formats) if self.staging: if not supported_formats: - msg = f"Staging {staging} cannot be used with destination {destination} because they have no file formats in common." + msg = ( + f"Staging {staging} cannot be used with destination {destination} because they" + " have no file formats in common." + ) else: - msg = f"Unsupported file format {file_format} for destination {destination} in combination with staging destination {staging}. Supported formats: {supported_formats_str}" + msg = ( + f"Unsupported file format {file_format} for destination {destination} in" + f" combination with staging destination {staging}. Supported formats:" + f" {supported_formats_str}" + ) else: - msg = f"Unsupported file format {file_format} destination {destination}. Supported formats: {supported_formats_str}. Check the staging option in the dlt.pipeline for additional formats." + msg = ( + f"Unsupported file format {file_format} destination {destination}. Supported" + f" formats: {supported_formats_str}. Check the staging option in the dlt.pipeline" + " for additional formats." + ) super().__init__(msg) class IdentifierTooLongException(DestinationTerminalException): - def __init__(self, destination_name: str, identifier_type: str, identifier_name: str, max_identifier_length: int) -> None: + def __init__( + self, + destination_name: str, + identifier_type: str, + identifier_name: str, + max_identifier_length: int, + ) -> None: self.destination_name = destination_name self.identifier_type = identifier_type self.identifier_name = identifier_name self.max_identifier_length = max_identifier_length - super().__init__(f"The length of {identifier_type} {identifier_name} exceeds {max_identifier_length} allowed for {destination_name}") + super().__init__( + f"The length of {identifier_type} {identifier_name} exceeds" + f" {max_identifier_length} allowed for {destination_name}" + ) class DestinationHasFailedJobs(DestinationTerminalException): @@ -174,7 +237,9 @@ def __init__(self, destination_name: str, load_id: str, failed_jobs: List[Any]) self.destination_name = destination_name self.load_id = load_id self.failed_jobs = failed_jobs - super().__init__(f"Destination {destination_name} has failed jobs in load package {load_id}") + super().__init__( + f"Destination {destination_name} has failed jobs in load package {load_id}" + ) class PipelineException(DltException): @@ -187,21 +252,37 @@ def __init__(self, pipeline_name: str, msg: str) -> None: class PipelineStateNotAvailable(PipelineException): def __init__(self, source_state_key: Optional[str] = None) -> None: if source_state_key: - msg = f"The source {source_state_key} requested the access to pipeline state but no pipeline is active right now." + msg = ( + f"The source {source_state_key} requested the access to pipeline state but no" + " pipeline is active right now." + ) else: - msg = "The resource you called requested the access to pipeline state but no pipeline is active right now." - msg += " Call dlt.pipeline(...) before you call the @dlt.source or @dlt.resource decorated function." + msg = ( + "The resource you called requested the access to pipeline state but no pipeline is" + " active right now." + ) + msg += ( + " Call dlt.pipeline(...) before you call the @dlt.source or @dlt.resource decorated" + " function." + ) self.source_state_key = source_state_key super().__init__(None, msg) class ResourceNameNotAvailable(PipelineException): def __init__(self) -> None: - super().__init__(None, - "A resource state was requested but no active extract pipe context was found. Resource state may be only requested from @dlt.resource decorated function or with explicit resource name.") + super().__init__( + None, + "A resource state was requested but no active extract pipe context was found. Resource" + " state may be only requested from @dlt.resource decorated function or with explicit" + " resource name.", + ) class SourceSectionNotAvailable(PipelineException): def __init__(self) -> None: - msg = "Access to state was requested without source section active. State should be requested from within the @dlt.source and @dlt.resource decorated function." + msg = ( + "Access to state was requested without source section active. State should be requested" + " from within the @dlt.source and @dlt.resource decorated function." + ) super().__init__(None, msg) diff --git a/dlt/common/git.py b/dlt/common/git.py index 602e889a36..c4f83a7398 100644 --- a/dlt/common/git.py +++ b/dlt/common/git.py @@ -15,6 +15,7 @@ else: Repo = Any + @contextmanager def git_custom_key_command(private_key: Optional[str]) -> Iterator[str]: if private_key: @@ -24,7 +25,9 @@ def git_custom_key_command(private_key: Optional[str]) -> Iterator[str]: try: # permissions so SSH does not complain os.chmod(key_file, 0o600) - yield 'ssh -o "StrictHostKeyChecking accept-new" -i "%s"' % key_file.replace("\\", "\\\\") + yield 'ssh -o "StrictHostKeyChecking accept-new" -i "%s"' % key_file.replace( + "\\", "\\\\" + ) finally: os.remove(key_file) else: @@ -46,6 +49,7 @@ def is_dirty(repo: Repo) -> bool: status: str = repo.git.status("--short") return len(status.strip()) > 0 + # def is_dirty(repo: Repo) -> bool: # # get branch status # status: str = repo.git.status("--short", "--branch") @@ -53,7 +57,9 @@ def is_dirty(repo: Repo) -> bool: # return len(status.splitlines()) > 1 -def ensure_remote_head(repo_path: str, branch: Optional[str] = None, with_git_command: Optional[str] = None) -> None: +def ensure_remote_head( + repo_path: str, branch: Optional[str] = None, with_git_command: Optional[str] = None +) -> None: from git import Repo, RepositoryDirtyError # update remotes and check if heads are same. ignores locally modified files @@ -70,7 +76,12 @@ def ensure_remote_head(repo_path: str, branch: Optional[str] = None, with_git_co raise RepositoryDirtyError(repo, status) -def clone_repo(repository_url: str, clone_path: str, branch: Optional[str] = None, with_git_command: Optional[str] = None) -> Repo: +def clone_repo( + repository_url: str, + clone_path: str, + branch: Optional[str] = None, + with_git_command: Optional[str] = None, +) -> Repo: from git import Repo repo = Repo.clone_from(repository_url, clone_path, env=dict(GIT_SSH_COMMAND=with_git_command)) @@ -79,7 +90,13 @@ def clone_repo(repository_url: str, clone_path: str, branch: Optional[str] = Non return repo -def force_clone_repo(repo_url: str, repo_storage: FileStorage, repo_name: str, branch: Optional[str] = None, with_git_command: Optional[str] = None) -> None: +def force_clone_repo( + repo_url: str, + repo_storage: FileStorage, + repo_name: str, + branch: Optional[str] = None, + with_git_command: Optional[str] = None, +) -> None: """Deletes the working directory repo_storage.root/repo_name and clones the `repo_url` into it. Will checkout `branch` if provided""" try: # delete repo folder @@ -89,7 +106,7 @@ def force_clone_repo(repo_url: str, repo_storage: FileStorage, repo_name: str, b repo_url, repo_storage.make_full_path(repo_name), branch=branch, - with_git_command=with_git_command + with_git_command=with_git_command, ).close() except Exception: # delete folder so we start clean next time @@ -98,7 +115,12 @@ def force_clone_repo(repo_url: str, repo_storage: FileStorage, repo_name: str, b raise -def get_fresh_repo_files(repo_location: str, working_dir: str = None, branch: Optional[str] = None, with_git_command: Optional[str] = None) -> FileStorage: +def get_fresh_repo_files( + repo_location: str, + working_dir: str = None, + branch: Optional[str] = None, + with_git_command: Optional[str] = None, +) -> FileStorage: """Returns a file storage leading to the newest repository files. If `repo_location` is url, file will be checked out into `working_dir/repo_name`""" from git import GitError @@ -113,7 +135,13 @@ def get_fresh_repo_files(repo_location: str, working_dir: str = None, branch: Op try: ensure_remote_head(repo_path, branch=branch, with_git_command=with_git_command) except GitError: - force_clone_repo(repo_location, FileStorage(working_dir, makedirs=True), repo_name, branch=branch, with_git_command=with_git_command) + force_clone_repo( + repo_location, + FileStorage(working_dir, makedirs=True), + repo_name, + branch=branch, + with_git_command=with_git_command, + ) return FileStorage(repo_path) diff --git a/dlt/common/json/__init__.py b/dlt/common/json/__init__.py index edb48643ef..e9b52cc382 100644 --- a/dlt/common/json/__init__.py +++ b/dlt/common/json/__init__.py @@ -24,38 +24,29 @@ class SupportsJson(Protocol): _impl_name: str """Implementation name""" - def dump(self, obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty:bool = False) -> None: - ... + def dump( + self, obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty: bool = False + ) -> None: ... - def typed_dump(self, obj: Any, fp: IO[bytes], pretty:bool = False) -> None: - ... + def typed_dump(self, obj: Any, fp: IO[bytes], pretty: bool = False) -> None: ... - def typed_dumps(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: - ... + def typed_dumps(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: ... - def typed_loads(self, s: str) -> Any: - ... + def typed_loads(self, s: str) -> Any: ... - def typed_dumpb(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: - ... + def typed_dumpb(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: ... - def typed_loadb(self, s: Union[bytes, bytearray, memoryview]) -> Any: - ... + def typed_loadb(self, s: Union[bytes, bytearray, memoryview]) -> Any: ... - def dumps(self, obj: Any, sort_keys: bool = False, pretty:bool = False) -> str: - ... + def dumps(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: ... - def dumpb(self, obj: Any, sort_keys: bool = False, pretty:bool = False) -> bytes: - ... + def dumpb(self, obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: ... - def load(self, fp: Union[IO[bytes], IO[str]]) -> Any: - ... + def load(self, fp: Union[IO[bytes], IO[str]]) -> Any: ... - def loads(self, s: str) -> Any: - ... + def loads(self, s: str) -> Any: ... - def loadb(self, s: Union[bytes, bytearray, memoryview]) -> Any: - ... + def loadb(self, s: Union[bytes, bytearray, memoryview]) -> Any: ... def custom_encode(obj: Any) -> str: @@ -74,10 +65,10 @@ def custom_encode(obj: Any) -> str: elif isinstance(obj, HexBytes): return obj.hex() elif isinstance(obj, bytes): - return base64.b64encode(obj).decode('ascii') - elif hasattr(obj, 'asdict'): + return base64.b64encode(obj).decode("ascii") + elif hasattr(obj, "asdict"): return obj.asdict() # type: ignore - elif hasattr(obj, '_asdict'): + elif hasattr(obj, "_asdict"): return obj._asdict() # type: ignore elif PydanticBaseModel and isinstance(obj, PydanticBaseModel): return obj.dict() # type: ignore[return-value] @@ -89,23 +80,24 @@ def custom_encode(obj: Any) -> str: # use PUA range to encode additional types -_DECIMAL = '\uF026' -_DATETIME = '\uF027' -_DATE = '\uF028' -_UUIDT = '\uF029' -_HEXBYTES = '\uF02A' -_B64BYTES = '\uF02B' -_WEI = '\uF02C' -_TIME = '\uF02D' +_DECIMAL = "\uf026" +_DATETIME = "\uf027" +_DATE = "\uf028" +_UUIDT = "\uf029" +_HEXBYTES = "\uf02a" +_B64BYTES = "\uf02b" +_WEI = "\uf02c" +_TIME = "\uf02d" def _datetime_decoder(obj: str) -> datetime: - if obj.endswith('Z'): + if obj.endswith("Z"): # Backwards compatibility for data encoded with previous dlt version # fromisoformat does not support Z suffix (until py3.11) - obj = obj[:-1] + '+00:00' + obj = obj[:-1] + "+00:00" return pendulum.DateTime.fromisoformat(obj) # type: ignore[attr-defined, no-any-return] + # define decoder for each prefix DECODERS: List[Callable[[Any], Any]] = [ Decimal, @@ -139,10 +131,10 @@ def custom_pua_encode(obj: Any) -> str: elif isinstance(obj, HexBytes): return _HEXBYTES + obj.hex() elif isinstance(obj, bytes): - return _B64BYTES + base64.b64encode(obj).decode('ascii') - elif hasattr(obj, 'asdict'): + return _B64BYTES + base64.b64encode(obj).decode("ascii") + elif hasattr(obj, "asdict"): return obj.asdict() # type: ignore - elif hasattr(obj, '_asdict'): + elif hasattr(obj, "_asdict"): return obj._asdict() # type: ignore elif dataclasses.is_dataclass(obj): return dataclasses.asdict(obj) # type: ignore @@ -158,7 +150,7 @@ def custom_pua_decode(obj: Any) -> Any: if isinstance(obj, str) and len(obj) > 1: c = ord(obj[0]) - 0xF026 # decode only the PUA space defined in DECODERS - if c >=0 and c <= PUA_CHARACTER_MAX: + if c >= 0 and c <= PUA_CHARACTER_MAX: return DECODERS[c](obj[1:]) return obj @@ -176,22 +168,30 @@ def custom_pua_remove(obj: Any) -> Any: if isinstance(obj, str) and len(obj) > 1: c = ord(obj[0]) - 0xF026 # decode only the PUA space defined in DECODERS - if c >=0 and c <= PUA_CHARACTER_MAX: + if c >= 0 and c <= PUA_CHARACTER_MAX: return obj[1:] return obj +def may_have_pua(line: bytes) -> bool: + """Checks if bytes string contains pua marker""" + return b"\xef\x80" in line + + # pick the right impl json: SupportsJson = None if os.environ.get("DLT_USE_JSON") == "simplejson": from dlt.common.json import _simplejson as _json_d + json = _json_d # type: ignore[assignment] else: try: from dlt.common.json import _orjson as _json_or + json = _json_or # type: ignore[assignment] except ImportError: from dlt.common.json import _simplejson as _json_simple + json = _json_simple # type: ignore[assignment] @@ -202,5 +202,5 @@ def custom_pua_remove(obj: Any) -> Any: "custom_pua_decode", "custom_pua_decode_nested", "custom_pua_remove", - "SupportsJson" + "SupportsJson", ] diff --git a/dlt/common/json/_orjson.py b/dlt/common/json/_orjson.py index ada91cbb1b..d2d960e6ce 100644 --- a/dlt/common/json/_orjson.py +++ b/dlt/common/json/_orjson.py @@ -7,7 +7,9 @@ _impl_name = "orjson" -def _dumps(obj: Any, sort_keys: bool, pretty:bool, default:AnyFun = custom_encode, options: int = 0) -> bytes: +def _dumps( + obj: Any, sort_keys: bool, pretty: bool, default: AnyFun = custom_encode, options: int = 0 +) -> bytes: options = options | orjson.OPT_UTC_Z | orjson.OPT_NON_STR_KEYS if pretty: options |= orjson.OPT_INDENT_2 @@ -16,11 +18,11 @@ def _dumps(obj: Any, sort_keys: bool, pretty:bool, default:AnyFun = custom_encod return orjson.dumps(obj, default=default, option=options) -def dump(obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty:bool = False) -> None: +def dump(obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty: bool = False) -> None: fp.write(_dumps(obj, sort_keys, pretty)) -def typed_dump(obj: Any, fp: IO[bytes], pretty:bool = False) -> None: +def typed_dump(obj: Any, fp: IO[bytes], pretty: bool = False) -> None: fp.write(typed_dumpb(obj, pretty=pretty)) @@ -29,7 +31,7 @@ def typed_dumpb(obj: Any, sort_keys: bool = False, pretty: bool = False) -> byte def typed_dumps(obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: - return typed_dumpb(obj, sort_keys, pretty).decode('utf-8') + return typed_dumpb(obj, sort_keys, pretty).decode("utf-8") def typed_loads(s: str) -> Any: @@ -40,11 +42,11 @@ def typed_loadb(s: Union[bytes, bytearray, memoryview]) -> Any: return custom_pua_decode_nested(loadb(s)) -def dumps(obj: Any, sort_keys: bool = False, pretty:bool = False) -> str: +def dumps(obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: return _dumps(obj, sort_keys, pretty).decode("utf-8") -def dumpb(obj: Any, sort_keys: bool = False, pretty:bool = False) -> bytes: +def dumpb(obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: return _dumps(obj, sort_keys, pretty) diff --git a/dlt/common/json/_simplejson.py b/dlt/common/json/_simplejson.py index c670717527..10ee17e2f6 100644 --- a/dlt/common/json/_simplejson.py +++ b/dlt/common/json/_simplejson.py @@ -15,7 +15,7 @@ _impl_name = "simplejson" -def dump(obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty:bool = False) -> None: +def dump(obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty: bool = False) -> None: if pretty: indent = 2 else: @@ -28,13 +28,13 @@ def dump(obj: Any, fp: IO[bytes], sort_keys: bool = False, pretty:bool = False) default=custom_encode, encoding=None, ensure_ascii=False, - separators=(',', ':'), + separators=(",", ":"), sort_keys=sort_keys, - indent=indent + indent=indent, ) -def typed_dump(obj: Any, fp: IO[bytes], pretty:bool = False) -> None: +def typed_dump(obj: Any, fp: IO[bytes], pretty: bool = False) -> None: if pretty: indent = 2 else: @@ -47,10 +47,11 @@ def typed_dump(obj: Any, fp: IO[bytes], pretty:bool = False) -> None: default=custom_pua_encode, encoding=None, ensure_ascii=False, - separators=(',', ':'), - indent=indent + separators=(",", ":"), + indent=indent, ) + def typed_dumps(obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: indent = 2 if pretty else None return simplejson.dumps( @@ -59,8 +60,8 @@ def typed_dumps(obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: default=custom_pua_encode, encoding=None, ensure_ascii=False, - separators=(',', ':'), - indent=indent + separators=(",", ":"), + indent=indent, ) @@ -69,14 +70,14 @@ def typed_loads(s: str) -> Any: def typed_dumpb(obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: - return typed_dumps(obj, sort_keys, pretty).encode('utf-8') + return typed_dumps(obj, sort_keys, pretty).encode("utf-8") def typed_loadb(s: Union[bytes, bytearray, memoryview]) -> Any: return custom_pua_decode_nested(loadb(s)) -def dumps(obj: Any, sort_keys: bool = False, pretty:bool = False) -> str: +def dumps(obj: Any, sort_keys: bool = False, pretty: bool = False) -> str: if pretty: indent = 2 else: @@ -87,13 +88,13 @@ def dumps(obj: Any, sort_keys: bool = False, pretty:bool = False) -> str: default=custom_encode, encoding=None, ensure_ascii=False, - separators=(',', ':'), + separators=(",", ":"), sort_keys=sort_keys, - indent=indent + indent=indent, ) -def dumpb(obj: Any, sort_keys: bool = False, pretty:bool = False) -> bytes: +def dumpb(obj: Any, sort_keys: bool = False, pretty: bool = False) -> bytes: return dumps(obj, sort_keys, pretty).encode("utf-8") diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py index fb2f5c2e72..cb6d8b371b 100644 --- a/dlt/common/libs/pyarrow.py +++ b/dlt/common/libs/pyarrow.py @@ -1,24 +1,30 @@ from typing import Any, Tuple, Optional, Union, Callable, Iterable, Iterator, Sequence, Tuple +from copy import copy + from dlt import version from dlt.common.exceptions import MissingDependencyException -from dlt.common.schema.typing import TTableSchemaColumns +from dlt.common.schema.typing import DLT_NAME_PREFIX, TTableSchemaColumns from dlt.common.destination.capabilities import DestinationCapabilitiesContext -from dlt.common.schema.typing import TColumnType, TColumnSchemaBase -from dlt.common.data_types import TDataType -from dlt.common.typing import TFileOrPath +from dlt.common.schema.typing import TColumnType +from dlt.common.typing import StrStr, TFileOrPath +from dlt.common.normalizers.naming import NamingConvention try: import pyarrow import pyarrow.parquet except ModuleNotFoundError: - raise MissingDependencyException("DLT parquet Helpers", [f"{version.DLT_PKG_NAME}[parquet]"], "DLT Helpers for for parquet.") + raise MissingDependencyException( + "DLT parquet Helpers", [f"{version.DLT_PKG_NAME}[parquet]"], "DLT Helpers for for parquet." + ) TAnyArrowItem = Union[pyarrow.Table, pyarrow.RecordBatch] -def get_py_arrow_datatype(column: TColumnType, caps: DestinationCapabilitiesContext, tz: str) -> Any: +def get_py_arrow_datatype( + column: TColumnType, caps: DestinationCapabilitiesContext, tz: str +) -> Any: column_type = column["data_type"] if column_type == "text": return pyarrow.string() @@ -37,7 +43,11 @@ def get_py_arrow_datatype(column: TColumnType, caps: DestinationCapabilitiesCont return pyarrow.string() elif column_type == "decimal": precision, scale = column.get("precision"), column.get("scale") - precision_tuple = (precision, scale) if precision is not None and scale is not None else caps.decimal_precision + precision_tuple = ( + (precision, scale) + if precision is not None and scale is not None + else caps.decimal_precision + ) return get_py_arrow_numeric(precision_tuple) elif column_type == "wei": return get_py_arrow_numeric(caps.wei_precision) @@ -91,8 +101,7 @@ def get_pyarrow_int(precision: Optional[int]) -> Any: def _get_column_type_from_py_arrow(dtype: pyarrow.DataType) -> TColumnType: - """Returns (data_type, precision, scale) tuple from pyarrow.DataType - """ + """Returns (data_type, precision, scale) tuple from pyarrow.DataType""" if pyarrow.types.is_string(dtype) or pyarrow.types.is_large_string(dtype): return dict(data_type="text") elif pyarrow.types.is_floating(dtype): @@ -124,7 +133,7 @@ def _get_column_type_from_py_arrow(dtype: pyarrow.DataType) -> TColumnType: return dict(data_type="time", precision=precision) elif pyarrow.types.is_integer(dtype): result: TColumnType = dict(data_type="bigint") - if dtype.bit_width != 64: # 64bit is a default bigint + if dtype.bit_width != 64: # 64bit is a default bigint result["precision"] = dtype.bit_width return result elif pyarrow.types.is_fixed_size_binary(dtype): @@ -140,23 +149,133 @@ def _get_column_type_from_py_arrow(dtype: pyarrow.DataType) -> TColumnType: def remove_null_columns(item: TAnyArrowItem) -> TAnyArrowItem: - """Remove all columns of datatype pyarrow.null() from the table or record batch - """ + """Remove all columns of datatype pyarrow.null() from the table or record batch""" + return remove_columns( + item, [field.name for field in item.schema if pyarrow.types.is_null(field.type)] + ) + + +def remove_columns(item: TAnyArrowItem, columns: Sequence[str]) -> TAnyArrowItem: + """Remove `columns` from Arrow `item`""" + if not columns: + return item + if isinstance(item, pyarrow.Table): - return item.drop([field.name for field in item.schema if pyarrow.types.is_null(field.type)]) + return item.drop(columns) elif isinstance(item, pyarrow.RecordBatch): - null_idx = [i for i, col in enumerate(item.columns) if pyarrow.types.is_null(col.type)] - new_schema = item.schema - for i in reversed(null_idx): - new_schema = new_schema.remove(i) + # NOTE: select is available in pyarrow 12 an up + return item.select([n for n in item.schema.names if n not in columns]) # reverse selection + else: + raise ValueError(item) + + +def append_column(item: TAnyArrowItem, name: str, data: Any) -> TAnyArrowItem: + """Appends new column to Table or RecordBatch""" + if isinstance(item, pyarrow.Table): + return item.append_column(name, data) + elif isinstance(item, pyarrow.RecordBatch): + new_field = pyarrow.field(name, data.type) return pyarrow.RecordBatch.from_arrays( - [col for i, col in enumerate(item.columns) if i not in null_idx], - schema=new_schema + item.columns + [data], schema=item.schema.append(new_field) ) else: raise ValueError(item) +def rename_columns(item: TAnyArrowItem, new_column_names: Sequence[str]) -> TAnyArrowItem: + """Rename arrow columns on Table or RecordBatch, returns same data but with renamed schema""" + + if list(item.schema.names) == list(new_column_names): + # No need to rename + return item + + if isinstance(item, pyarrow.Table): + return item.rename_columns(new_column_names) + elif isinstance(item, pyarrow.RecordBatch): + new_fields = [ + field.with_name(new_name) for new_name, field in zip(new_column_names, item.schema) + ] + return pyarrow.RecordBatch.from_arrays(item.columns, schema=pyarrow.schema(new_fields)) + else: + raise TypeError(f"Unsupported data item type {type(item)}") + + +def normalize_py_arrow_schema( + item: TAnyArrowItem, + columns: TTableSchemaColumns, + naming: NamingConvention, + caps: DestinationCapabilitiesContext, +) -> TAnyArrowItem: + """Normalize arrow `item` schema according to the `columns`. + + 1. arrow schema field names will be normalized according to `naming` + 2. arrows columns will be reordered according to `columns` + 3. empty columns will be inserted if they are missing, types will be generated using `caps` + """ + rename_mapping = get_normalized_arrow_fields_mapping(item, naming) + rev_mapping = {v: k for k, v in rename_mapping.items()} + dlt_table_prefix = naming.normalize_table_identifier(DLT_NAME_PREFIX) + + # remove all columns that are dlt columns but are not present in arrow schema. we do not want to add such columns + # that should happen in the normalizer + columns = { + name: column + for name, column in columns.items() + if not name.startswith(dlt_table_prefix) or name in rev_mapping + } + + # check if nothing to rename + if list(rename_mapping.keys()) == list(rename_mapping.values()): + # check if nothing to reorder + if list(rename_mapping.keys())[: len(columns)] == list(columns.keys()): + return item + + schema = item.schema + new_fields = [] + new_columns = [] + + for column_name, column in columns.items(): + # get original field name + field_name = rev_mapping.pop(column_name, column_name) + if field_name in rename_mapping: + idx = schema.get_field_index(field_name) + # use renamed field + new_fields.append(schema.field(idx).with_name(column_name)) + new_columns.append(item.column(idx)) + else: + # column does not exist in pyarrow. create empty field and column + new_field = pyarrow.field( + column_name, + get_py_arrow_datatype(column, caps, "UTC"), + nullable=column.get("nullable", True), + ) + new_fields.append(new_field) + new_columns.append(pyarrow.nulls(item.num_rows, type=new_field.type)) + + # add the remaining columns + for column_name, field_name in rev_mapping.items(): + idx = schema.get_field_index(field_name) + # use renamed field + new_fields.append(schema.field(idx).with_name(column_name)) + new_columns.append(item.column(idx)) + + # create desired type + return item.__class__.from_arrays(new_columns, schema=pyarrow.schema(new_fields)) + + +def get_normalized_arrow_fields_mapping(item: TAnyArrowItem, naming: NamingConvention) -> StrStr: + """Normalizes schema field names and returns mapping from original to normalized name. Raises on name clashes""" + norm_f = naming.normalize_identifier + name_mapping = {n.name: norm_f(n.name) for n in item.schema} + # verify if names uniquely normalize + normalized_names = set(name_mapping.values()) + if len(name_mapping) != len(normalized_names): + raise NameNormalizationClash( + f"Arrow schema fields normalized from {list(name_mapping.keys())} to" + f" {list(normalized_names)}" + ) + return name_mapping + def py_arrow_to_table_schema_columns(schema: pyarrow.Schema) -> TTableSchemaColumns: """Convert a PyArrow schema to a table schema columns dict. @@ -194,7 +313,8 @@ def is_arrow_item(item: Any) -> bool: return isinstance(item, (pyarrow.Table, pyarrow.RecordBatch)) -TNewColumns = Sequence[Tuple[pyarrow.Field, Callable[[pyarrow.Table], Iterable[Any]]]] +TNewColumns = Sequence[Tuple[int, pyarrow.Field, Callable[[pyarrow.Table], Iterable[Any]]]] +"""Sequence of tuples: (field index, field, generating function)""" def pq_stream_with_new_columns( @@ -206,7 +326,7 @@ def pq_stream_with_new_columns( Args: parquet_file: path or file object to parquet file - columns: list of columns to add in the form of (`pyarrow.Field`, column_value_callback) + columns: list of columns to add in the form of (insertion index, `pyarrow.Field`, column_value_callback) The callback should accept a `pyarrow.Table` and return an array of values for the column. row_groups_per_read: number of row groups to read at a time. Defaults to 1. @@ -217,7 +337,18 @@ def pq_stream_with_new_columns( n_groups = reader.num_row_groups # Iterate through n row groups at a time for i in range(0, n_groups, row_groups_per_read): - tbl: pyarrow.Table = reader.read_row_groups(range(i, min(i + row_groups_per_read, n_groups))) - for col in columns: - tbl = tbl.append_column(col[0], col[1](tbl)) + tbl: pyarrow.Table = reader.read_row_groups( + range(i, min(i + row_groups_per_read, n_groups)) + ) + for idx, field, gen_ in columns: + if idx == -1: + tbl = tbl.append_column(field, gen_(tbl)) + else: + tbl = tbl.add_column(idx, field, gen_(tbl)) yield tbl + + +class NameNormalizationClash(ValueError): + def __init__(self, reason: str) -> None: + msg = f"Arrow column name clash after input data normalization. {reason}" + super().__init__(msg) diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py index c66d67f1f7..58829f0592 100644 --- a/dlt/common/libs/pydantic.py +++ b/dlt/common/libs/pydantic.py @@ -1,38 +1,104 @@ -from typing import Type, Union, get_type_hints, get_args, Any +from __future__ import annotations as _annotations +import inspect +from copy import copy +from typing import ( + Dict, + Generic, + Set, + TypedDict, + List, + Type, + Union, + TypeVar, + Any, +) +from typing_extensions import Annotated, get_args, get_origin from dlt.common.exceptions import MissingDependencyException -from dlt.common.schema.typing import TTableSchemaColumns -from dlt.common.data_types import py_type_to_sc_type, TDataType -from dlt.common.typing import is_optional_type, extract_inner_type, is_list_generic_type, is_dict_generic_type, is_union +from dlt.common.schema import DataValidationError +from dlt.common.schema.typing import TSchemaEvolutionMode, TTableSchemaColumns +from dlt.common.data_types import py_type_to_sc_type +from dlt.common.typing import ( + TDataItem, + TDataItems, + extract_union_types, + is_annotated, + is_optional_type, + extract_inner_type, + is_list_generic_type, + is_dict_generic_type, + is_union_type, +) try: - from pydantic import BaseModel, Field, Json + from pydantic import BaseModel, ValidationError, Json, create_model except ImportError: - raise MissingDependencyException("DLT pydantic Helpers", ["pydantic"], "DLT Helpers for for pydantic.") + raise MissingDependencyException( + "dlt Pydantic helpers", ["pydantic"], "Both Pydantic 1.x and 2.x are supported" + ) +_PYDANTIC_2 = False +try: + from pydantic import PydanticDeprecatedSince20 + + _PYDANTIC_2 = True + # hide deprecation warning + import warnings + + warnings.simplefilter("ignore", category=PydanticDeprecatedSince20) +except ImportError: + pass + +_TPydanticModel = TypeVar("_TPydanticModel", bound=BaseModel) + + +class ListModel(BaseModel, Generic[_TPydanticModel]): + items: List[_TPydanticModel] + + +class DltConfig(TypedDict, total=False): + """dlt configuration that can be attached to Pydantic model + + Example below removes `nested` field from the resulting dlt schema. + >>> class ItemModel(BaseModel): + >>> b: bool + >>> nested: Dict[str, Any] + >>> dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + """ + + skip_complex_types: bool + """If True, columns of complex types (`dict`, `list`, `BaseModel`) will be excluded from dlt schema generated from the model""" -def pydantic_to_table_schema_columns(model: Union[BaseModel, Type[BaseModel]], skip_complex_types: bool = False) -> TTableSchemaColumns: + +def pydantic_to_table_schema_columns( + model: Union[BaseModel, Type[BaseModel]] +) -> TTableSchemaColumns: """Convert a pydantic model to a table schema columns dict + See also DltConfig for more control over how the schema is created + Args: model: The pydantic model to convert. Can be a class or an instance. - skip_complex_types: If True, columns of complex types (`dict`, `list`, `BaseModel`) will be excluded from the result. + Returns: TTableSchemaColumns: table schema columns dict """ + skip_complex_types = False + if hasattr(model, "dlt_config"): + skip_complex_types = model.dlt_config.get("skip_complex_types", False) + result: TTableSchemaColumns = {} - fields = model.__fields__ - for field_name, field in fields.items(): + for field_name, field in model.__fields__.items(): # type: ignore[union-attr] annotation = field.annotation - if inner_annotation := getattr(annotation, 'inner_type', None): + if inner_annotation := getattr(annotation, "inner_type", None): # This applies to pydantic.Json fields, the inner type is the type after json parsing # (In pydantic 2 the outer annotation is the final type) annotation = inner_annotation nullable = is_optional_type(annotation) - if is_union(annotation): + if is_union_type(annotation): inner_type = get_args(annotation)[0] else: inner_type = extract_inner_type(annotation) @@ -49,8 +115,13 @@ def pydantic_to_table_schema_columns(model: Union[BaseModel, Type[BaseModel]], s inner_type = dict name = field.alias or field_name - data_type = py_type_to_sc_type(inner_type) - if data_type == 'complex' and skip_complex_types: + try: + data_type = py_type_to_sc_type(inner_type) + except TypeError: + # try to coerce unknown type to text + data_type = "text" + + if data_type == "complex" and skip_complex_types: continue result[name] = { @@ -60,3 +131,266 @@ def pydantic_to_table_schema_columns(model: Union[BaseModel, Type[BaseModel]], s } return result + + +def column_mode_to_extra(column_mode: TSchemaEvolutionMode) -> str: + extra = "forbid" + if column_mode == "evolve": + extra = "allow" + elif column_mode == "discard_value": + extra = "ignore" + return extra + + +def extra_to_column_mode(extra: str) -> TSchemaEvolutionMode: + if extra == "forbid": + return "freeze" + if extra == "allow": + return "evolve" + return "discard_value" + + +def get_extra_from_model(model: Type[BaseModel]) -> str: + default_extra = "ignore" + if _PYDANTIC_2: + default_extra = model.model_config.get("extra", default_extra) + else: + default_extra = str(model.Config.extra) or default_extra # type: ignore[attr-defined] + return default_extra + + +def apply_schema_contract_to_model( + model: Type[_TPydanticModel], + column_mode: TSchemaEvolutionMode, + data_mode: TSchemaEvolutionMode = "freeze", +) -> Type[_TPydanticModel]: + """Configures or re-creates `model` so it behaves according to `column_mode` and `data_mode` settings. + + `column_mode` sets the model behavior when unknown field is found. + `data_mode` sets model behavior when known field does not validate. currently `evolve` and `freeze` are supported here. + + `discard_row` is implemented in `validate_item`. + """ + if data_mode == "evolve": + # create a lenient model that accepts any data + model = create_model(model.__name__ + "Any", **{n: (Any, None) for n in model.__fields__}) # type: ignore[call-overload, attr-defined] + elif data_mode == "discard_value": + raise NotImplementedError( + "data_mode is discard_value. Cannot discard defined fields with validation errors using" + " Pydantic models." + ) + + extra = column_mode_to_extra(column_mode) + + if extra == get_extra_from_model(model): + # no need to change the model + return model + + if _PYDANTIC_2: + config = copy(model.model_config) + config["extra"] = extra # type: ignore[typeddict-item] + else: + from pydantic.config import prepare_config + + config = copy(model.Config) # type: ignore[attr-defined] + config.extra = extra # type: ignore[attr-defined] + prepare_config(config, model.Config.__name__) # type: ignore[attr-defined] + + _child_models: Dict[int, Type[BaseModel]] = {} + + def _process_annotation(t_: Type[Any]) -> Type[Any]: + """Recursively recreates models with applied schema contract""" + if is_annotated(t_): + a_t, *a_m = get_args(t_) + return Annotated[_process_annotation(a_t), a_m] # type: ignore + elif is_list_generic_type(t_): + l_t: Type[Any] = get_args(t_)[0] + try: + return get_origin(t_)[_process_annotation(l_t)] # type: ignore[no-any-return] + except TypeError: + # this is Python3.8 fallback. it does not support indexers on types + return List[_process_annotation(l_t)] # type: ignore + elif is_dict_generic_type(t_): + k_t: Type[Any] + v_t: Type[Any] + k_t, v_t = get_args(t_) + try: + return get_origin(t_)[k_t, _process_annotation(v_t)] # type: ignore[no-any-return] + except TypeError: + # this is Python3.8 fallback. it does not support indexers on types + return Dict[k_t, _process_annotation(v_t)] # type: ignore + elif is_union_type(t_): + u_t_s = tuple(_process_annotation(u_t) for u_t in extract_union_types(t_)) + return Union[u_t_s] # type: ignore[return-value] + elif inspect.isclass(t_) and issubclass(t_, BaseModel): + # types must be same before and after processing + if id(t_) in _child_models: + return _child_models[id(t_)] + else: + _child_models[id(t_)] = child_model = apply_schema_contract_to_model( + t_, column_mode, data_mode + ) + return child_model + return t_ + + def _rebuild_annotated(f: Any) -> Type[Any]: + if hasattr(f, "rebuild_annotation"): + return f.rebuild_annotation() # type: ignore[no-any-return] + else: + return f.annotation # type: ignore[no-any-return] + + new_model: Type[_TPydanticModel] = create_model( # type: ignore[call-overload] + model.__name__ + "Extra" + extra.title(), + __config__=config, + **{n: (_process_annotation(_rebuild_annotated(f)), f) for n, f in model.__fields__.items()}, # type: ignore[attr-defined] + ) + # pass dlt config along + dlt_config = getattr(model, "dlt_config", None) + if dlt_config: + new_model.dlt_config = dlt_config # type: ignore[attr-defined] + return new_model + + +def create_list_model( + model: Type[_TPydanticModel], data_mode: TSchemaEvolutionMode = "freeze" +) -> Type[ListModel[_TPydanticModel]]: + """Creates a model from `model` for validating list of items in batch according to `data_mode` + + Currently only freeze is supported. See comments in the code + """ + # TODO: use LenientList to create list model that automatically discards invalid items + # https://github.com/pydantic/pydantic/issues/2274 and https://gist.github.com/dmontagu/7f0cef76e5e0e04198dd608ad7219573 + return create_model( + "List" + __name__, items=(List[model], ...) # type: ignore[return-value,valid-type] + ) + + +def validate_items( + table_name: str, + list_model: Type[ListModel[_TPydanticModel]], + items: List[TDataItem], + column_mode: TSchemaEvolutionMode, + data_mode: TSchemaEvolutionMode, +) -> List[_TPydanticModel]: + """Validates list of `item` with `list_model` and returns parsed Pydantic models + + `list_model` should be created with `create_list_model` and have `items` field which this function returns. + """ + try: + return list_model(items=items).items + except ValidationError as e: + deleted: Set[int] = set() + for err in e.errors(): + # TODO: we can get rid of most of the code if we use LenientList as explained above + if len(err["loc"]) >= 2: + err_idx = int(err["loc"][1]) + if err_idx in deleted: + # already dropped + continue + err_item = items[err_idx - len(deleted)] + else: + # top level error which means misalignment of list model and items + raise DataValidationError( + None, + table_name, + str(err["loc"]), + "columns", + "freeze", + list_model, + {"columns": "freeze"}, + items, + ) from e + # raise on freeze + if err["type"] == "extra_forbidden": + if column_mode == "freeze": + raise DataValidationError( + None, + table_name, + str(err["loc"]), + "columns", + "freeze", + list_model, + {"columns": "freeze"}, + err_item, + ) from e + elif column_mode == "discard_row": + # pop at the right index + items.pop(err_idx - len(deleted)) + # store original index so we do not pop again + deleted.add(err_idx) + else: + raise NotImplementedError( + f"{column_mode} column mode not implemented for Pydantic validation" + ) + else: + if data_mode == "freeze": + raise DataValidationError( + None, + table_name, + str(err["loc"]), + "data_type", + "freeze", + list_model, + {"data_type": "freeze"}, + err_item, + ) from e + elif data_mode == "discard_row": + items.pop(err_idx - len(deleted)) + deleted.add(err_idx) + else: + raise NotImplementedError( + f"{column_mode} column mode not implemented for Pydantic validation" + ) + + # validate again with error items removed + return validate_items(table_name, list_model, items, column_mode, data_mode) + + +def validate_item( + table_name: str, + model: Type[_TPydanticModel], + item: TDataItems, + column_mode: TSchemaEvolutionMode, + data_mode: TSchemaEvolutionMode, +) -> _TPydanticModel: + """Validates `item` against model `model` and returns an instance of it""" + try: + return model.parse_obj(item) + except ValidationError as e: + for err in e.errors(): + # raise on freeze + if err["type"] == "extra_forbidden": + if column_mode == "freeze": + raise DataValidationError( + None, + table_name, + str(err["loc"]), + "columns", + "freeze", + model, + {"columns": "freeze"}, + item, + ) from e + elif column_mode == "discard_row": + return None + raise NotImplementedError( + f"{column_mode} column mode not implemented for Pydantic validation" + ) + else: + if data_mode == "freeze": + raise DataValidationError( + None, + table_name, + str(err["loc"]), + "data_type", + "freeze", + model, + {"data_type": "freeze"}, + item, + ) from e + elif data_mode == "discard_row": + return None + raise NotImplementedError( + f"{data_mode} data mode not implemented for Pydantic validation" + ) + raise AssertionError("unreachable") diff --git a/dlt/common/managed_thread_pool.py b/dlt/common/managed_thread_pool.py new file mode 100644 index 0000000000..ea2a0e6b47 --- /dev/null +++ b/dlt/common/managed_thread_pool.py @@ -0,0 +1,27 @@ +from typing import Optional + +import atexit +from concurrent.futures import ThreadPoolExecutor + + +class ManagedThreadPool: + def __init__(self, max_workers: int = 1) -> None: + self._max_workers = max_workers + self._thread_pool: Optional[ThreadPoolExecutor] = None + + def _create_thread_pool(self) -> None: + assert not self._thread_pool, "Thread pool already created" + self._thread_pool = ThreadPoolExecutor(self._max_workers) + # flush pool on exit + atexit.register(self.stop) + + @property + def thread_pool(self) -> ThreadPoolExecutor: + if not self._thread_pool: + self._create_thread_pool() + return self._thread_pool + + def stop(self, wait: bool = True) -> None: + if self._thread_pool: + self._thread_pool.shutdown(wait=wait) + self._thread_pool = None diff --git a/dlt/common/normalizers/__init__.py b/dlt/common/normalizers/__init__.py index e106419df9..2ff41d4c12 100644 --- a/dlt/common/normalizers/__init__.py +++ b/dlt/common/normalizers/__init__.py @@ -4,6 +4,8 @@ __all__ = [ "NormalizersConfiguration", - "TJSONNormalizer", "TNormalizersConfig", - "explicit_normalizers", "import_normalizers" + "TJSONNormalizer", + "TNormalizersConfig", + "explicit_normalizers", + "import_normalizers", ] diff --git a/dlt/common/normalizers/configuration.py b/dlt/common/normalizers/configuration.py index 2c13367abd..6957417f9d 100644 --- a/dlt/common/normalizers/configuration.py +++ b/dlt/common/normalizers/configuration.py @@ -24,5 +24,5 @@ def on_resolved(self) -> None: self.naming = self.destination_capabilities.naming_convention if TYPE_CHECKING: - def __init__(self, naming: str = None, json_normalizer: TJSONNormalizer = None) -> None: - ... + + def __init__(self, naming: str = None, json_normalizer: TJSONNormalizer = None) -> None: ... diff --git a/dlt/common/normalizers/exceptions.py b/dlt/common/normalizers/exceptions.py index b8ad4baed3..248aecc7fe 100644 --- a/dlt/common/normalizers/exceptions.py +++ b/dlt/common/normalizers/exceptions.py @@ -9,4 +9,7 @@ class InvalidJsonNormalizer(NormalizerException): def __init__(self, required_normalizer: str, present_normalizer: str) -> None: self.required_normalizer = required_normalizer self.present_normalizer = present_normalizer - super().__init__(f"Operation requires {required_normalizer} normalizer while {present_normalizer} normalizer is present") + super().__init__( + f"Operation requires {required_normalizer} normalizer while" + f" {present_normalizer} normalizer is present" + ) diff --git a/dlt/common/normalizers/json/__init__.py b/dlt/common/normalizers/json/__init__.py index e1c5c3b846..a13bab15f4 100644 --- a/dlt/common/normalizers/json/__init__.py +++ b/dlt/common/normalizers/json/__init__.py @@ -1,7 +1,8 @@ import abc -from typing import Any, Generic, Type, Iterator, Tuple, Protocol, TYPE_CHECKING, TypeVar +from typing import Any, Generic, Type, Generator, Tuple, Protocol, TYPE_CHECKING, TypeVar from dlt.common.typing import DictStrAny, TDataItem, StrAny + if TYPE_CHECKING: from dlt.common.schema import Schema else: @@ -10,19 +11,21 @@ # type definitions for json normalization function # iterator of form ((table_name, parent_table), dict) must be returned from normalization function -TNormalizedRowIterator = Iterator[Tuple[Tuple[str, str], StrAny]] +TNormalizedRowIterator = Generator[Tuple[Tuple[str, str], StrAny], bool, None] # type var for data item normalizer config TNormalizerConfig = TypeVar("TNormalizerConfig", bound=Any) -class DataItemNormalizer(abc.ABC, Generic[TNormalizerConfig]): +class DataItemNormalizer(abc.ABC, Generic[TNormalizerConfig]): @abc.abstractmethod def __init__(self, schema: Schema) -> None: pass @abc.abstractmethod - def normalize_data_item(self, item: TDataItem, load_id: str, table_name: str) -> TNormalizedRowIterator: + def normalize_data_item( + self, item: TDataItem, load_id: str, table_name: str + ) -> TNormalizedRowIterator: pass @abc.abstractmethod diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py index 98b34e298d..e33bf2ab35 100644 --- a/dlt/common/normalizers/json/relational.py +++ b/dlt/common/normalizers/json/relational.py @@ -9,11 +9,16 @@ from dlt.common.schema.typing import TColumnSchema, TColumnName, TSimpleRegex from dlt.common.schema.utils import column_name_validator from dlt.common.utils import digest128, update_dict_nested -from dlt.common.normalizers.json import TNormalizedRowIterator, wrap_in_dict, DataItemNormalizer as DataItemNormalizerBase +from dlt.common.normalizers.json import ( + TNormalizedRowIterator, + wrap_in_dict, + DataItemNormalizer as DataItemNormalizerBase, +) from dlt.common.validation import validate_dict EMPTY_KEY_IDENTIFIER = "_empty" # replace empty keys with this + class TDataItemRow(TypedDict, total=False): _dlt_id: str # unique id of current row @@ -48,6 +53,8 @@ class DataItemNormalizer(DataItemNormalizerBase[RelationalNormalizerConfig]): _skip_primary_key: Dict[str, bool] def __init__(self, schema: Schema) -> None: + """This item normalizer works with nested dictionaries. It flattens dictionaries and descends into lists. + It yields row dictionaries at each nesting level.""" self.schema = schema self._reset() @@ -62,7 +69,7 @@ def _reset(self) -> None: # for those paths the complex nested objects should be left in place def _is_complex_type(self, table_name: str, field_name: str, _r_lvl: int) -> bool: # turn everything at the recursion level into complex type - max_nesting = self.max_nesting + max_nesting = self.max_nesting schema = self.schema assert _r_lvl <= max_nesting @@ -81,14 +88,9 @@ def _is_complex_type(self, table_name: str, field_name: str, _r_lvl: int) -> boo data_type = column["data_type"] return data_type == "complex" - def _flatten( - self, - table: str, - dict_row: TDataItemRow, - _r_lvl: int + self, table: str, dict_row: TDataItemRow, _r_lvl: int ) -> Tuple[TDataItemRow, Dict[Tuple[str, ...], Sequence[Any]]]: - out_rec_row: DictStrAny = {} out_rec_list: Dict[Tuple[str, ...], Sequence[Any]] = {} schema_naming = self.schema.naming @@ -102,7 +104,9 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) - norm_k = EMPTY_KEY_IDENTIFIER # if norm_k != k: # print(f"{k} -> {norm_k}") - child_name = norm_k if path == () else schema_naming.shorten_fragments(*path, norm_k) + child_name = ( + norm_k if path == () else schema_naming.shorten_fragments(*path, norm_k) + ) # for lists and dicts we must check if type is possibly complex if isinstance(v, (dict, list)): if not self._is_complex_type(table, child_name, __r_lvl): @@ -129,7 +133,6 @@ def _get_child_row_hash(parent_row_id: str, child_table: str, list_idx: int) -> # and all child tables must be lists return digest128(f"{parent_row_id}_{child_table}_{list_idx}", DLT_ID_LENGTH_BYTES) - @staticmethod def _link_row(row: TDataItemRowChild, parent_row_id: str, list_idx: int) -> TDataItemRowChild: assert parent_row_id @@ -142,7 +145,9 @@ def _link_row(row: TDataItemRowChild, parent_row_id: str, list_idx: int) -> TDat def _extend_row(extend: DictStrAny, row: TDataItemRow) -> None: row.update(extend) # type: ignore - def _add_row_id(self, table: str, row: TDataItemRow, parent_row_id: str, pos: int, _r_lvl: int) -> str: + def _add_row_id( + self, table: str, row: TDataItemRow, parent_row_id: str, pos: int, _r_lvl: int + ) -> str: # row_id is always random, no matter if primary_key is present or not row_id = generate_dlt_id() if _r_lvl > 0: @@ -181,19 +186,22 @@ def _normalize_list( ident_path: Tuple[str, ...], parent_path: Tuple[str, ...], parent_row_id: Optional[str] = None, - _r_lvl: int = 0 + _r_lvl: int = 0, ) -> TNormalizedRowIterator: - v: TDataItemRowChild = None table = self.schema.naming.shorten_fragments(*parent_path, *ident_path) for idx, v in enumerate(seq): # yield child table row if isinstance(v, dict): - yield from self._normalize_row(v, extend, ident_path, parent_path, parent_row_id, idx, _r_lvl) + yield from self._normalize_row( + v, extend, ident_path, parent_path, parent_row_id, idx, _r_lvl + ) elif isinstance(v, list): # to normalize lists of lists, we must create a tracking intermediary table by creating a mock row - yield from self._normalize_row({"list": v}, extend, ident_path, parent_path, parent_row_id, idx, _r_lvl + 1) + yield from self._normalize_row( + {"list": v}, extend, ident_path, parent_path, parent_row_id, idx, _r_lvl + 1 + ) else: # list of simple types child_row_hash = DataItemNormalizer._get_child_row_hash(parent_row_id, table, idx) @@ -211,9 +219,8 @@ def _normalize_row( parent_path: Tuple[str, ...] = (), parent_row_id: Optional[str] = None, pos: Optional[int] = None, - _r_lvl: int = 0 + _r_lvl: int = 0, ) -> TNormalizedRowIterator: - schema = self.schema table = schema.naming.shorten_fragments(*parent_path, *ident_path) @@ -227,18 +234,24 @@ def _normalize_row( row_id = self._add_row_id(table, flattened_row, parent_row_id, pos, _r_lvl) # find fields to propagate to child tables in config - extend.update(self._get_propagated_values(table, flattened_row, _r_lvl )) + extend.update(self._get_propagated_values(table, flattened_row, _r_lvl)) # yield parent table first - yield (table, schema.naming.shorten_fragments(*parent_path)), flattened_row + should_descend = yield (table, schema.naming.shorten_fragments(*parent_path)), flattened_row + if should_descend is False: + return # normalize and yield lists for list_path, list_content in lists.items(): - yield from self._normalize_list(list_content, extend, list_path, parent_path + ident_path, row_id, _r_lvl + 1) + yield from self._normalize_list( + list_content, extend, list_path, parent_path + ident_path, row_id, _r_lvl + 1 + ) def extend_schema(self) -> None: # validate config - config = cast(RelationalNormalizerConfig, self.schema._normalizers_config["json"].get("config") or {}) + config = cast( + RelationalNormalizerConfig, self.schema._normalizers_config["json"].get("config") or {} + ) DataItemNormalizer._validate_normalizer_config(self.schema, config) # quick check to see if hints are applied @@ -249,12 +262,15 @@ def extend_schema(self) -> None: self.schema.merge_hints( { "not_null": [ - TSimpleRegex("_dlt_id"), TSimpleRegex("_dlt_root_id"), TSimpleRegex("_dlt_parent_id"), - TSimpleRegex("_dlt_list_idx"), TSimpleRegex("_dlt_load_id") - ], + TSimpleRegex("_dlt_id"), + TSimpleRegex("_dlt_root_id"), + TSimpleRegex("_dlt_parent_id"), + TSimpleRegex("_dlt_list_idx"), + TSimpleRegex("_dlt_load_id"), + ], "foreign_key": [TSimpleRegex("_dlt_parent_id")], "root_key": [TSimpleRegex("_dlt_root_id")], - "unique": [TSimpleRegex("_dlt_id")] + "unique": [TSimpleRegex("_dlt_id")], } ) @@ -264,15 +280,15 @@ def extend_schema(self) -> None: def extend_table(self, table_name: str) -> None: # if the table has a merge w_d, add propagation info to normalizer table = self.schema.tables.get(table_name) - if not table.get("parent") and table["write_disposition"] == "merge": - DataItemNormalizer.update_normalizer_config(self.schema, {"propagation": { - "tables": { - table_name: { - "_dlt_id": TColumnName("_dlt_root_id") - } - }}}) - - def normalize_data_item(self, item: TDataItem, load_id: str, table_name: str) -> TNormalizedRowIterator: + if not table.get("parent") and table.get("write_disposition") == "merge": + DataItemNormalizer.update_normalizer_config( + self.schema, + {"propagation": {"tables": {table_name: {"_dlt_id": TColumnName("_dlt_root_id")}}}}, + ) + + def normalize_data_item( + self, item: TDataItem, load_id: str, table_name: str + ) -> TNormalizedRowIterator: # wrap items that are not dictionaries in dictionary, otherwise they cannot be processed by the JSON normalizer if not isinstance(item, dict): item = wrap_in_dict(item) @@ -280,7 +296,11 @@ def normalize_data_item(self, item: TDataItem, load_id: str, table_name: str) -> row = cast(TDataItemRowRoot, item) # identify load id if loaded data must be processed after loading incrementally row["_dlt_load_id"] = load_id - yield from self._normalize_row(cast(TDataItemRowChild, row), {}, (self.schema.naming.normalize_table_identifier(table_name),)) + yield from self._normalize_row( + cast(TDataItemRowChild, row), + {}, + (self.schema.naming.normalize_table_identifier(table_name),), + ) @classmethod def ensure_this_normalizer(cls, norm_config: TJSONNormalizer) -> None: @@ -307,4 +327,9 @@ def get_normalizer_config(cls, schema: Schema) -> RelationalNormalizerConfig: @staticmethod def _validate_normalizer_config(schema: Schema, config: RelationalNormalizerConfig) -> None: - validate_dict(RelationalNormalizerConfig, config, "./normalizers/json/config", validator_f=column_name_validator(schema.naming)) + validate_dict( + RelationalNormalizerConfig, + config, + "./normalizers/json/config", + validator_f=column_name_validator(schema.naming), + ) diff --git a/dlt/common/normalizers/naming/__init__.py b/dlt/common/normalizers/naming/__init__.py index c8c08ddd63..967fb9643e 100644 --- a/dlt/common/normalizers/naming/__init__.py +++ b/dlt/common/normalizers/naming/__init__.py @@ -1,6 +1,3 @@ from .naming import SupportsNamingConvention, NamingConvention -__all__ = [ - 'SupportsNamingConvention', "NamingConvention" -] - +__all__ = ["SupportsNamingConvention", "NamingConvention"] diff --git a/dlt/common/normalizers/naming/direct.py b/dlt/common/normalizers/naming/direct.py index 3a973106fe..09403d9e53 100644 --- a/dlt/common/normalizers/naming/direct.py +++ b/dlt/common/normalizers/naming/direct.py @@ -17,4 +17,4 @@ def make_path(self, *identifiers: Any) -> str: return self.PATH_SEPARATOR.join(filter(lambda x: x.strip(), identifiers)) def break_path(self, path: str) -> Sequence[str]: - return [ident for ident in path.split(self.PATH_SEPARATOR) if ident.strip()] \ No newline at end of file + return [ident for ident in path.split(self.PATH_SEPARATOR) if ident.strip()] diff --git a/dlt/common/normalizers/naming/duck_case.py b/dlt/common/normalizers/naming/duck_case.py index 200c0bbdad..063482a799 100644 --- a/dlt/common/normalizers/naming/duck_case.py +++ b/dlt/common/normalizers/naming/duck_case.py @@ -5,8 +5,7 @@ class NamingConvention(SnakeCaseNamingConvention): - - _CLEANUP_TABLE = str.maketrans("\n\r\"", "___") + _CLEANUP_TABLE = str.maketrans('\n\r"', "___") _RE_LEADING_DIGITS = None # do not remove leading digits @staticmethod @@ -18,7 +17,5 @@ def _normalize_identifier(identifier: str, max_length: int) -> str: # shorten identifier return NamingConvention.shorten_identifier( - NamingConvention._RE_UNDERSCORES.sub("_", normalized_ident), - identifier, - max_length + NamingConvention._RE_UNDERSCORES.sub("_", normalized_ident), identifier, max_length ) diff --git a/dlt/common/normalizers/naming/exceptions.py b/dlt/common/normalizers/naming/exceptions.py index b76362962e..572fc7e0d0 100644 --- a/dlt/common/normalizers/naming/exceptions.py +++ b/dlt/common/normalizers/naming/exceptions.py @@ -1,4 +1,3 @@ - from dlt.common.exceptions import DltException @@ -19,5 +18,8 @@ def __init__(self, naming_module: str) -> None: class InvalidNamingModule(NormalizersException): def __init__(self, naming_module: str) -> None: self.naming_module = naming_module - msg = f"Naming module {naming_module} does not implement required SupportsNamingConvention protocol" + msg = ( + f"Naming module {naming_module} does not implement required SupportsNamingConvention" + " protocol" + ) super().__init__(msg) diff --git a/dlt/common/normalizers/naming/naming.py b/dlt/common/normalizers/naming/naming.py index 80130bace6..fccb147981 100644 --- a/dlt/common/normalizers/naming/naming.py +++ b/dlt/common/normalizers/naming/naming.py @@ -7,7 +7,6 @@ class NamingConvention(ABC): - _TR_TABLE = bytes.maketrans(b"/+", b"ab") _DEFAULT_COLLISION_PROB = 0.001 @@ -46,7 +45,9 @@ def normalize_path(self, path: str) -> str: def normalize_tables_path(self, path: str) -> str: """Breaks path of table identifiers, normalizes components, reconstitutes and shortens the path""" - normalized_idents = [self.normalize_table_identifier(ident) for ident in self.break_path(path)] + normalized_idents = [ + self.normalize_table_identifier(ident) for ident in self.break_path(path) + ] # shorten the whole path return self.shorten_identifier(self.make_path(*normalized_idents), path, self.max_length) @@ -59,7 +60,12 @@ def shorten_fragments(self, *normalized_idents: str) -> str: @staticmethod @lru_cache(maxsize=None) - def shorten_identifier(normalized_ident: str, identifier: str, max_length: int, collision_prob: float = _DEFAULT_COLLISION_PROB) -> str: + def shorten_identifier( + normalized_ident: str, + identifier: str, + max_length: int, + collision_prob: float = _DEFAULT_COLLISION_PROB, + ) -> str: """Shortens the `name` to `max_length` and adds a tag to it to make it unique. Tag may be placed in the middle or at the end""" if max_length and len(normalized_ident) > max_length: # use original identifier to compute tag @@ -72,9 +78,14 @@ def shorten_identifier(normalized_ident: str, identifier: str, max_length: int, def _compute_tag(identifier: str, collision_prob: float) -> str: # assume that shake_128 has perfect collision resistance 2^N/2 then collision prob is 1/resistance: prob = 1/2^N/2, solving for prob # take into account that we are case insensitive in base64 so we need ~1.5x more bits (2+1) - tl_bytes = int(((2+1)*math.log2(1/(collision_prob)) // 8) + 1) - tag = base64.b64encode(hashlib.shake_128(identifier.encode("utf-8")).digest(tl_bytes) - ).rstrip(b"=").translate(NamingConvention._TR_TABLE).lower().decode("ascii") + tl_bytes = int(((2 + 1) * math.log2(1 / (collision_prob)) // 8) + 1) + tag = ( + base64.b64encode(hashlib.shake_128(identifier.encode("utf-8")).digest(tl_bytes)) + .rstrip(b"=") + .translate(NamingConvention._TR_TABLE) + .lower() + .decode("ascii") + ) return tag @staticmethod @@ -82,7 +93,11 @@ def _trim_and_tag(identifier: str, tag: str, max_length: int) -> str: assert len(tag) <= max_length remaining_length = max_length - len(tag) remaining_overflow = remaining_length % 2 - identifier = identifier[:remaining_length // 2 + remaining_overflow] + tag + identifier[len(identifier) - remaining_length // 2:] + identifier = ( + identifier[: remaining_length // 2 + remaining_overflow] + + tag + + identifier[len(identifier) - remaining_length // 2 :] + ) assert len(identifier) == max_length return identifier diff --git a/dlt/common/normalizers/naming/snake_case.py b/dlt/common/normalizers/naming/snake_case.py index 12aa887d6e..b3c65e9b8d 100644 --- a/dlt/common/normalizers/naming/snake_case.py +++ b/dlt/common/normalizers/naming/snake_case.py @@ -6,7 +6,6 @@ class NamingConvention(BaseNamingConvention): - _RE_UNDERSCORES = re.compile("__+") _RE_LEADING_DIGITS = re.compile(r"^\d+") # _RE_ENDING_UNDERSCORES = re.compile(r"_+$") @@ -41,16 +40,14 @@ def _normalize_identifier(identifier: str, max_length: int) -> str: # shorten identifier return NamingConvention.shorten_identifier( - NamingConvention._to_snake_case(normalized_ident), - identifier, - max_length + NamingConvention._to_snake_case(normalized_ident), identifier, max_length ) @classmethod def _to_snake_case(cls, identifier: str) -> str: # then convert to snake case - identifier = cls._SNAKE_CASE_BREAK_1.sub(r'\1_\2', identifier) - identifier = cls._SNAKE_CASE_BREAK_2.sub(r'\1_\2', identifier).lower() + identifier = cls._SNAKE_CASE_BREAK_1.sub(r"\1_\2", identifier) + identifier = cls._SNAKE_CASE_BREAK_2.sub(r"\1_\2", identifier).lower() # leading digits will be prefixed (if regex is defined) if cls._RE_LEADING_DIGITS and cls._RE_LEADING_DIGITS.match(identifier): @@ -63,4 +60,4 @@ def _to_snake_case(cls, identifier: str) -> str: # identifier = cls._RE_ENDING_UNDERSCORES.sub("x", identifier) # replace consecutive underscores with single one to prevent name clashes with PATH_SEPARATOR - return cls._RE_UNDERSCORES.sub("_", stripped_ident) \ No newline at end of file + return cls._RE_UNDERSCORES.sub("_", stripped_ident) diff --git a/dlt/common/normalizers/typing.py b/dlt/common/normalizers/typing.py index 93920fda1b..599426259f 100644 --- a/dlt/common/normalizers/typing.py +++ b/dlt/common/normalizers/typing.py @@ -11,4 +11,4 @@ class TJSONNormalizer(TypedDict, total=False): class TNormalizersConfig(TypedDict, total=False): names: str detections: Optional[List[str]] - json: TJSONNormalizer \ No newline at end of file + json: TJSONNormalizer diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py index aabaa39017..dde78edede 100644 --- a/dlt/common/normalizers/utils.py +++ b/dlt/common/normalizers/utils.py @@ -14,10 +14,10 @@ DEFAULT_NAMING_MODULE = "dlt.common.normalizers.naming.snake_case" DLT_ID_LENGTH_BYTES = 10 + @with_config(spec=NormalizersConfiguration) def explicit_normalizers( - naming: str = dlt.config.value , - json_normalizer: TJSONNormalizer = dlt.config.value + naming: str = dlt.config.value, json_normalizer: TJSONNormalizer = dlt.config.value ) -> TNormalizersConfig: """Gets explicitly configured normalizers - via config or destination caps. May return None as naming or normalizer""" return {"names": naming, "json": json_normalizer} @@ -26,15 +26,17 @@ def explicit_normalizers( @with_config def import_normalizers( normalizers_config: TNormalizersConfig, - destination_capabilities: DestinationCapabilitiesContext = None + destination_capabilities: DestinationCapabilitiesContext = None, ) -> Tuple[TNormalizersConfig, NamingConvention, Type[DataItemNormalizer[Any]]]: """Imports the normalizers specified in `normalizers_config` or taken from defaults. Returns the updated config and imported modules. - `destination_capabilities` are used to get max length of the identifier. + `destination_capabilities` are used to get max length of the identifier. """ # add defaults to normalizer_config normalizers_config["names"] = names = normalizers_config["names"] or "snake_case" - normalizers_config["json"] = item_normalizer = normalizers_config["json"] or {"module": "dlt.common.normalizers.json.relational"} + normalizers_config["json"] = item_normalizer = normalizers_config["json"] or { + "module": "dlt.common.normalizers.json.relational" + } try: if "." in names: # TODO: bump schema engine version and migrate schema. also change the name in TNormalizersConfig from names to naming @@ -44,19 +46,28 @@ def import_normalizers( naming_module = cast(SupportsNamingConvention, import_module(names)) else: # from known location - naming_module = cast(SupportsNamingConvention, import_module(f"dlt.common.normalizers.naming.{names}")) + naming_module = cast( + SupportsNamingConvention, import_module(f"dlt.common.normalizers.naming.{names}") + ) except ImportError: raise UnknownNamingModule(names) if not hasattr(naming_module, "NamingConvention"): raise InvalidNamingModule(names) # get max identifier length if destination_capabilities: - max_length = min(destination_capabilities.max_identifier_length, destination_capabilities.max_column_identifier_length) + max_length = min( + destination_capabilities.max_identifier_length, + destination_capabilities.max_column_identifier_length, + ) else: max_length = None json_module = cast(SupportsDataItemNormalizer, import_module(item_normalizer["module"])) - return normalizers_config, naming_module.NamingConvention(max_length), json_module.DataItemNormalizer + return ( + normalizers_config, + naming_module.NamingConvention(max_length), + json_module.DataItemNormalizer, + ) def generate_dlt_ids(n_ids: int) -> List[str]: diff --git a/dlt/common/pendulum.py b/dlt/common/pendulum.py index 3d1c784488..cad5e099eb 100644 --- a/dlt/common/pendulum.py +++ b/dlt/common/pendulum.py @@ -2,7 +2,7 @@ import pendulum # noqa: I251 # force UTC as the local timezone to prevent local dates to be written to dbs -pendulum.set_local_timezone(pendulum.timezone('UTC')) +pendulum.set_local_timezone(pendulum.timezone("UTC")) def __utcnow() -> pendulum.DateTime: diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index aeb0bdc68a..6b7b308b44 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -1,11 +1,27 @@ +from abc import ABC, abstractmethod import os import datetime # noqa: 251 import humanize import contextlib -from typing import Any, Callable, ClassVar, Dict, List, NamedTuple, Optional, Protocol, Sequence, TYPE_CHECKING, Tuple, TypedDict +from typing import ( + Any, + Callable, + ClassVar, + Dict, + Generic, + List, + NamedTuple, + Optional, + Protocol, + Sequence, + TYPE_CHECKING, + Tuple, + TypeVar, + TypedDict, + Mapping, +) from typing_extensions import NotRequired -from dlt.common import pendulum, logger from dlt.common.configuration import configspec from dlt.common.configuration import known_sections from dlt.common.configuration.container import Container @@ -14,15 +30,135 @@ from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.paths import get_dlt_data_dir from dlt.common.configuration.specs import RunConfiguration -from dlt.common.destination import DestinationReference, TDestinationReferenceArg -from dlt.common.exceptions import DestinationHasFailedJobs, PipelineStateNotAvailable, ResourceNameNotAvailable, SourceSectionNotAvailable +from dlt.common.destination import TDestinationReferenceArg, TDestination +from dlt.common.exceptions import ( + DestinationHasFailedJobs, + PipelineStateNotAvailable, + SourceSectionNotAvailable, +) from dlt.common.schema import Schema -from dlt.common.schema.typing import TColumnNames, TColumnSchema, TWriteDisposition +from dlt.common.schema.typing import TColumnNames, TColumnSchema, TWriteDisposition, TSchemaContract from dlt.common.source import get_current_pipe_name from dlt.common.storages.load_storage import LoadPackageInfo -from dlt.common.typing import DictStrAny, REPattern +from dlt.common.time import ensure_pendulum_datetime, precise_time +from dlt.common.typing import DictStrAny, REPattern, StrAny, SupportsHumanize from dlt.common.jsonpath import delete_matches, TAnyJsonPath -from dlt.common.data_writers.writers import TLoaderFileFormat +from dlt.common.data_writers.writers import DataWriterMetrics, TLoaderFileFormat +from dlt.common.utils import RowCounts, merge_row_counts + + +class _StepInfo(NamedTuple): + pipeline: "SupportsPipeline" + loads_ids: List[str] + """ids of the loaded packages""" + load_packages: List[LoadPackageInfo] + """Information on loaded packages""" + first_run: bool + started_at: datetime.datetime + finished_at: datetime.datetime + + +class StepMetrics(TypedDict): + """Metrics for particular package processed in particular pipeline step""" + + started_at: datetime.datetime + """Start of package processing""" + finished_at: datetime.datetime + """End of package processing""" + + +TStepMetricsCo = TypeVar("TStepMetricsCo", bound=StepMetrics, covariant=True) + + +class StepInfo(SupportsHumanize, Generic[TStepMetricsCo]): + pipeline: "SupportsPipeline" + metrics: Dict[str, List[TStepMetricsCo]] + """Metrics per load id. If many sources with the same name were extracted, there will be more than 1 element in the list""" + loads_ids: List[str] + """ids of the loaded packages""" + load_packages: List[LoadPackageInfo] + """Information on loaded packages""" + first_run: bool + + @property + def started_at(self) -> datetime.datetime: + """Returns the earliest start date of all collected metrics""" + if not self.metrics: + return None + try: + return min(m["started_at"] for l_m in self.metrics.values() for m in l_m) + except ValueError: + return None + + @property + def finished_at(self) -> datetime.datetime: + """Returns the latest end date of all collected metrics""" + if not self.metrics: + return None + try: + return max(m["finished_at"] for l_m in self.metrics.values() for m in l_m) + except ValueError: + return None + + def asdict(self) -> DictStrAny: + # to be mixed with NamedTuple + d: DictStrAny = self._asdict() # type: ignore + d["pipeline"] = {"pipeline_name": self.pipeline.pipeline_name} + d["load_packages"] = [package.asdict() for package in self.load_packages] + if self.metrics: + d["started_at"] = self.started_at + d["finished_at"] = self.finished_at + return d + + def __str__(self) -> str: + return self.asstr(verbosity=0) + + @staticmethod + def _load_packages_asstr(load_packages: List[LoadPackageInfo], verbosity: int) -> str: + msg: str = "" + for load_package in load_packages: + cstr = ( + load_package.state.upper() + if load_package.completed_at + else f"{load_package.state.upper()} and NOT YET LOADED to the destination" + ) + # now enumerate all complete loads if we have any failed packages + # complete but failed job will not raise any exceptions + failed_jobs = load_package.jobs["failed_jobs"] + jobs_str = "no failed jobs" if not failed_jobs else f"{len(failed_jobs)} FAILED job(s)!" + msg += f"\nLoad package {load_package.load_id} is {cstr} and contains {jobs_str}" + if verbosity > 0: + for failed_job in failed_jobs: + msg += ( + f"\n\t[{failed_job.job_file_info.job_id()}]: {failed_job.failed_message}\n" + ) + if verbosity > 1: + msg += "\nPackage details:\n" + msg += load_package.asstr() + "\n" + return msg + + @staticmethod + def job_metrics_asdict( + job_metrics: Dict[str, DataWriterMetrics], key_name: str = "job_id", extend: StrAny = None + ) -> List[DictStrAny]: + jobs = [] + for job_id, metrics in job_metrics.items(): + d = metrics._asdict() + if extend: + d.update(extend) + d[key_name] = job_id + jobs.append(d) + return jobs + + def _astuple(self) -> _StepInfo: + return _StepInfo( + self.pipeline, + self.loads_ids, + self.load_packages, + self.first_run, + self.started_at, + self.finished_at, + ) class ExtractDataInfo(TypedDict): @@ -30,31 +166,140 @@ class ExtractDataInfo(TypedDict): data_type: str -class ExtractInfo(NamedTuple): - """A tuple holding information on extracted data items. Returned by pipeline `extract` method.""" +class ExtractMetrics(StepMetrics): + schema_name: str + job_metrics: Dict[str, DataWriterMetrics] + """Metrics collected per job id during writing of job file""" + table_metrics: Dict[str, DataWriterMetrics] + """Job metrics aggregated by table""" + resource_metrics: Dict[str, DataWriterMetrics] + """Job metrics aggregated by resource""" + dag: List[Tuple[str, str]] + """A resource dag where elements of the list are graph edges""" + hints: Dict[str, Dict[str, Any]] + """Hints passed to the resources""" + + +class _ExtractInfo(NamedTuple): + """NamedTuple cannot be part of the derivation chain so we must re-declare all fields to use it as mixin later""" + pipeline: "SupportsPipeline" + metrics: Dict[str, List[ExtractMetrics]] extract_data_info: List[ExtractDataInfo] + loads_ids: List[str] + """ids of the loaded packages""" + load_packages: List[LoadPackageInfo] + """Information on loaded packages""" + first_run: bool + + +class ExtractInfo(StepInfo[ExtractMetrics], _ExtractInfo): # type: ignore[misc] + """A tuple holding information on extracted data items. Returned by pipeline `extract` method.""" def asdict(self) -> DictStrAny: - return {} + """A dictionary representation of ExtractInfo that can be loaded with `dlt`""" + d = super().asdict() + d.pop("extract_data_info") + # transform metrics + d.pop("metrics") + load_metrics: Dict[str, List[Any]] = { + "job_metrics": [], + "table_metrics": [], + "resource_metrics": [], + "dag": [], + "hints": [], + } + for load_id, metrics_list in self.metrics.items(): + for idx, metrics in enumerate(metrics_list): + extend = {"load_id": load_id, "extract_idx": idx} + load_metrics["job_metrics"].extend( + self.job_metrics_asdict(metrics["job_metrics"], extend=extend) + ) + load_metrics["table_metrics"].extend( + self.job_metrics_asdict( + metrics["table_metrics"], key_name="table_name", extend=extend + ) + ) + load_metrics["resource_metrics"].extend( + self.job_metrics_asdict( + metrics["resource_metrics"], key_name="resource_name", extend=extend + ) + ) + load_metrics["dag"].extend( + [ + {**extend, "parent_name": edge[0], "resource_name": edge[1]} + for edge in metrics["dag"] + ] + ) + load_metrics["hints"].extend( + [ + {**extend, "resource_name": name, **hints} + for name, hints in metrics["hints"].items() + ] + ) + d.update(load_metrics) + return d def asstr(self, verbosity: int = 0) -> str: - return "" + return self._load_packages_asstr(self.load_packages, verbosity) - def __str__(self) -> str: - return self.asstr(verbosity=0) + +# reveal_type(ExtractInfo) + + +class NormalizeMetrics(StepMetrics): + job_metrics: Dict[str, DataWriterMetrics] + """Metrics collected per job id during writing of job file""" + table_metrics: Dict[str, DataWriterMetrics] + """Job metrics aggregated by table""" + + +class _NormalizeInfo(NamedTuple): + pipeline: "SupportsPipeline" + metrics: Dict[str, List[NormalizeMetrics]] + loads_ids: List[str] + """ids of the loaded packages""" + load_packages: List[LoadPackageInfo] + """Information on loaded packages""" + first_run: bool -class NormalizeInfo(NamedTuple): +class NormalizeInfo(StepInfo[NormalizeMetrics], _NormalizeInfo): # type: ignore[misc] """A tuple holding information on normalized data items. Returned by pipeline `normalize` method.""" - row_counts: Dict[str, int] = {} + @property + def row_counts(self) -> RowCounts: + if not self.metrics: + return {} + counts: RowCounts = {} + for metrics in self.metrics.values(): + assert len(metrics) == 1, "Cannot deal with more than 1 normalize metric per load_id" + merge_row_counts( + counts, {t: m.items_count for t, m in metrics[0]["table_metrics"].items()} + ) + return counts def asdict(self) -> DictStrAny: """A dictionary representation of NormalizeInfo that can be loaded with `dlt`""" - d = self._asdict() - # list representation creates a nice table - d["row_counts"] = [(k, v) for k, v in self.row_counts.items()] + d = super().asdict() + # transform metrics + d.pop("metrics") + load_metrics: Dict[str, List[Any]] = { + "job_metrics": [], + "table_metrics": [], + } + for load_id, metrics_list in self.metrics.items(): + for idx, metrics in enumerate(metrics_list): + extend = {"load_id": load_id, "extract_idx": idx} + load_metrics["job_metrics"].extend( + self.job_metrics_asdict(metrics["job_metrics"], extend=extend) + ) + load_metrics["table_metrics"].extend( + self.job_metrics_asdict( + metrics["table_metrics"], key_name="table_name", extend=extend + ) + ) + d.update(load_metrics) return d def asstr(self, verbosity: int = 0) -> str: @@ -64,17 +309,22 @@ def asstr(self, verbosity: int = 0) -> str: msg += f"- {key}: {value} row(s)\n" else: msg = "No data found to normalize" + msg += self._load_packages_asstr(self.load_packages, verbosity) return msg - def __str__(self) -> str: - return self.asstr(verbosity=0) +class LoadMetrics(StepMetrics): + pass -class LoadInfo(NamedTuple): - """A tuple holding the information on recently loaded packages. Returned by pipeline `run` and `load` methods""" + +class _LoadInfo(NamedTuple): pipeline: "SupportsPipeline" - destination_name: str + metrics: Dict[str, List[LoadMetrics]] + destination_type: str destination_displayable_credentials: str + destination_name: str + environment: str + staging_type: str staging_name: str staging_displayable_credentials: str destination_fingerprint: str @@ -83,43 +333,39 @@ class LoadInfo(NamedTuple): """ids of the loaded packages""" load_packages: List[LoadPackageInfo] """Information on loaded packages""" - started_at: datetime.datetime first_run: bool + +class LoadInfo(StepInfo[LoadMetrics], _LoadInfo): # type: ignore[misc] + """A tuple holding the information on recently loaded packages. Returned by pipeline `run` and `load` methods""" + def asdict(self) -> DictStrAny: """A dictionary representation of LoadInfo that can be loaded with `dlt`""" - d = self._asdict() - d["pipeline"] = { - "pipeline_name": self.pipeline.pipeline_name - } - d["load_packages"] = [package.asdict() for package in self.load_packages] - return d + return super().asdict() def asstr(self, verbosity: int = 0) -> str: - msg = f"Pipeline {self.pipeline.pipeline_name} completed in " + msg = f"Pipeline {self.pipeline.pipeline_name} load step completed in " if self.started_at: - elapsed = pendulum.now() - self.started_at + elapsed = self.finished_at - self.started_at msg += humanize.precisedelta(elapsed) else: msg += "---" - msg += f"\n{len(self.loads_ids)} load package(s) were loaded to destination {self.destination_name} and into dataset {self.dataset_name}\n" + msg += ( + f"\n{len(self.loads_ids)} load package(s) were loaded to destination" + f" {self.destination_name} and into dataset {self.dataset_name}\n" + ) if self.staging_name: - msg += f"The {self.staging_name} staging destination used {self.staging_displayable_credentials} location to stage data\n" + msg += ( + f"The {self.staging_name} staging destination used" + f" {self.staging_displayable_credentials} location to stage data\n" + ) + + msg += ( + f"The {self.destination_name} destination used" + f" {self.destination_displayable_credentials} location to store data" + ) + msg += self._load_packages_asstr(self.load_packages, verbosity) - msg += f"The {self.destination_name} destination used {self.destination_displayable_credentials} location to store data" - for load_package in self.load_packages: - cstr = load_package.state.upper() if load_package.completed_at else "NOT COMPLETED" - # now enumerate all complete loads if we have any failed packages - # complete but failed job will not raise any exceptions - failed_jobs = load_package.jobs["failed_jobs"] - jobs_str = "no failed jobs" if not failed_jobs else f"{len(failed_jobs)} FAILED job(s)!" - msg += f"\nLoad package {load_package.load_id} is {cstr} and contains {jobs_str}" - if verbosity > 0: - for failed_job in failed_jobs: - msg += f"\n\t[{failed_job.job_file_info.job_id()}]: {failed_job.failed_message}\n" - if verbosity > 1: - msg += "\nPackage details:\n" - msg += load_package.asstr() + "\n" return msg @property @@ -135,31 +381,90 @@ def raise_on_failed_jobs(self) -> None: for load_package in self.load_packages: failed_jobs = load_package.jobs["failed_jobs"] if len(failed_jobs): - raise DestinationHasFailedJobs(self.destination_name, load_package.load_id, failed_jobs) + raise DestinationHasFailedJobs( + self.destination_name, load_package.load_id, failed_jobs + ) def __str__(self) -> str: return self.asstr(verbosity=1) + +TStepMetrics = TypeVar("TStepMetrics", bound=StepMetrics, covariant=False) +TStepInfo = TypeVar("TStepInfo", bound=StepInfo[StepMetrics]) + + +class WithStepInfo(ABC, Generic[TStepMetrics, TStepInfo]): + """Implemented by classes that generate StepInfo with metrics and package infos""" + + _current_load_id: str + _load_id_metrics: Dict[str, List[TStepMetrics]] + _current_load_started: float + """Completed load ids metrics""" + + def __init__(self) -> None: + self._load_id_metrics = {} + self._current_load_id = None + self._current_load_started = None + + def _step_info_start_load_id(self, load_id: str) -> None: + self._current_load_id = load_id + self._current_load_started = precise_time() + self._load_id_metrics.setdefault(load_id, []) + + def _step_info_complete_load_id(self, load_id: str, metrics: TStepMetrics) -> None: + assert self._current_load_id == load_id, ( + f"Current load id mismatch {self._current_load_id} != {load_id} when completing step" + " info" + ) + metrics["started_at"] = ensure_pendulum_datetime(self._current_load_started) + metrics["finished_at"] = ensure_pendulum_datetime(precise_time()) + self._load_id_metrics[load_id].append(metrics) + self._current_load_id = None + self._current_load_started = None + + def _step_info_metrics(self, load_id: str) -> List[TStepMetrics]: + return self._load_id_metrics[load_id] + + @property + def current_load_id(self) -> str: + """Returns currently processing load id""" + return self._current_load_id + + @abstractmethod + def get_step_info( + self, + pipeline: "SupportsPipeline", + ) -> TStepInfo: + """Returns and instance of StepInfo with metrics and package infos""" + pass + + class TPipelineLocalState(TypedDict, total=False): first_run: bool """Indicates a first run of the pipeline, where run ends with successful loading of data""" _last_extracted_at: datetime.datetime - """Timestamp indicating when the state was synced with the destination. Lack of timestamp means not synced state.""" + """Timestamp indicating when the state was synced with the destination.""" + _last_extracted_hash: str + """Hash of state that was recently synced with destination""" class TPipelineState(TypedDict, total=False): """Schema for a pipeline state that is stored within the pipeline working directory""" + pipeline_name: str dataset_name: str default_schema_name: Optional[str] """Name of the first schema added to the pipeline to which all the resources without schemas will be added""" schema_names: Optional[List[str]] """All the schemas present within the pipeline working directory""" - destination: Optional[str] - staging: Optional[str] + destination_name: Optional[str] + destination_type: Optional[str] + staging_name: Optional[str] + staging_type: Optional[str] # properties starting with _ are not automatically applied to pipeline object when state is restored _state_version: int + _version_hash: str _state_engine_version: int _local: TPipelineLocalState """A section of state that is not synchronized with the destination and does not participate in change merging and version control""" @@ -173,11 +478,12 @@ class TSourceState(TPipelineState): class SupportsPipeline(Protocol): """A protocol with core pipeline operations that lets high level abstractions ie. sources to access pipeline methods and properties""" + pipeline_name: str """Name of the pipeline""" default_schema_name: str """Name of the default schema""" - destination: DestinationReference + destination: TDestination """The destination reference which is ModuleType. `destination.__name__` returns the name string""" dataset_name: str """Name of the dataset to which pipeline will be loaded to""" @@ -194,6 +500,10 @@ class SupportsPipeline(Protocol): def state(self) -> TPipelineState: """Returns dictionary with pipeline state""" + @property + def schemas(self) -> Mapping[str, Schema]: + """Mapping of all pipeline schemas""" + def set_local_state_val(self, key: str, value: Any) -> None: """Sets value in local state. Local state is not synchronized with destination.""" @@ -212,9 +522,9 @@ def run( columns: Sequence[TColumnSchema] = None, primary_key: TColumnNames = None, schema: Schema = None, - loader_file_format: TLoaderFileFormat = None - ) -> LoadInfo: - ... + loader_file_format: TLoaderFileFormat = None, + schema_contract: TSchemaContract = None, + ) -> LoadInfo: ... def _set_context(self, is_active: bool) -> None: """Called when pipeline context activated or deactivate""" @@ -234,9 +544,9 @@ def __call__( write_disposition: TWriteDisposition = None, columns: Sequence[TColumnSchema] = None, schema: Schema = None, - loader_file_format: TLoaderFileFormat = None - ) -> LoadInfo: - ... + loader_file_format: TLoaderFileFormat = None, + schema_contract: TSchemaContract = None, + ) -> LoadInfo: ... @configspec @@ -244,12 +554,16 @@ class PipelineContext(ContainerInjectableContext): _deferred_pipeline: Callable[[], SupportsPipeline] _pipeline: SupportsPipeline - can_create_default: ClassVar[bool] = False + can_create_default: ClassVar[bool] = True def pipeline(self) -> SupportsPipeline: """Creates or returns exiting pipeline""" if not self._pipeline: # delayed pipeline creation + assert self._deferred_pipeline is not None, ( + "Deferred pipeline creation function not provided to PipelineContext. Are you" + " calling dlt.pipeline() from another thread?" + ) self.activate(self._deferred_pipeline()) return self._pipeline @@ -269,7 +583,7 @@ def deactivate(self) -> None: self._pipeline._set_context(False) self._pipeline = None - def __init__(self, deferred_pipeline: Callable[..., SupportsPipeline]) -> None: + def __init__(self, deferred_pipeline: Callable[..., SupportsPipeline] = None) -> None: """Initialize the context with a function returning the Pipeline object to allow creation on first use""" self._deferred_pipeline = deferred_pipeline @@ -281,17 +595,19 @@ class StateInjectableContext(ContainerInjectableContext): can_create_default: ClassVar[bool] = False if TYPE_CHECKING: - def __init__(self, state: TPipelineState = None) -> None: - ... + + def __init__(self, state: TPipelineState = None) -> None: ... -def pipeline_state(container: Container, initial_default: TPipelineState = None) -> Tuple[TPipelineState, bool]: +def pipeline_state( + container: Container, initial_default: TPipelineState = None +) -> Tuple[TPipelineState, bool]: """Gets value of the state from context or active pipeline, if none found returns `initial_default` - Injected state is called "writable": it is injected by the `Pipeline` class and all the changes will be persisted. - The state coming from pipeline context or `initial_default` is called "read only" and all the changes to it will be discarded + Injected state is called "writable": it is injected by the `Pipeline` class and all the changes will be persisted. + The state coming from pipeline context or `initial_default` is called "read only" and all the changes to it will be discarded - Returns tuple (state, writable) + Returns tuple (state, writable) """ try: # get injected state if present. injected state is typically "managed" so changes will be persisted @@ -363,7 +679,9 @@ def source_state() -> DictStrAny: _last_full_state: TPipelineState = None -def _delete_source_state_keys(key: TAnyJsonPath, source_state_: Optional[DictStrAny] = None, /) -> None: +def _delete_source_state_keys( + key: TAnyJsonPath, source_state_: Optional[DictStrAny] = None, / +) -> None: """Remove one or more key from the source state. The `key` can be any number of keys and/or json paths to be removed. """ @@ -371,7 +689,9 @@ def _delete_source_state_keys(key: TAnyJsonPath, source_state_: Optional[DictStr delete_matches(key, state_) -def resource_state(resource_name: str = None, source_state_: Optional[DictStrAny] = None, /) -> DictStrAny: +def resource_state( + resource_name: str = None, source_state_: Optional[DictStrAny] = None, / +) -> DictStrAny: """Returns a dictionary with the resource-scoped state. Resource-scoped state is visible only to resource requesting the access. Dlt state is preserved across pipeline runs and may be used to implement incremental loads. Note that this function accepts the resource name as optional argument. There are rare cases when `dlt` is not able to resolve resource name due to requesting function @@ -419,9 +739,7 @@ def resource_state(resource_name: str = None, source_state_: Optional[DictStrAny # backtrace to find the shallowest resource if not resource_name: resource_name = get_current_pipe_name() - if not resource_name: - raise ResourceNameNotAvailable() - return state_.setdefault('resources', {}).setdefault(resource_name, {}) # type: ignore + return state_.setdefault("resources", {}).setdefault(resource_name, {}) # type: ignore def reset_resource_state(resource_name: str, source_state_: Optional[DictStrAny] = None, /) -> None: @@ -436,7 +754,9 @@ def reset_resource_state(resource_name: str, source_state_: Optional[DictStrAny] state_["resources"].pop(resource_name) -def _get_matching_resources(pattern: REPattern, source_state_: Optional[DictStrAny] = None, /) -> List[str]: +def _get_matching_resources( + pattern: REPattern, source_state_: Optional[DictStrAny] = None, / +) -> List[str]: """Get all resource names in state matching the regex pattern""" state_ = source_state() if source_state_ is None else source_state_ if "resources" not in state_: @@ -445,10 +765,10 @@ def _get_matching_resources(pattern: REPattern, source_state_: Optional[DictStrA def get_dlt_pipelines_dir() -> str: - """ Gets default directory where pipelines' data will be stored - 1. in user home directory ~/.dlt/pipelines/ - 2. if current user is root in /var/dlt/pipelines - 3. if current user does not have a home directory in /tmp/dlt/pipelines + """Gets default directory where pipelines' data will be stored + 1. in user home directory ~/.dlt/pipelines/ + 2. if current user is root in /var/dlt/pipelines + 3. if current user does not have a home directory in /tmp/dlt/pipelines """ return os.path.join(get_dlt_data_dir(), "pipelines") diff --git a/dlt/common/reflection/function_visitor.py b/dlt/common/reflection/function_visitor.py index 3b89403745..6cb6016a7f 100644 --- a/dlt/common/reflection/function_visitor.py +++ b/dlt/common/reflection/function_visitor.py @@ -2,6 +2,7 @@ from ast import NodeVisitor from typing import Any + class FunctionVisitor(NodeVisitor): def __init__(self, source: str): self.source = source diff --git a/dlt/common/reflection/spec.py b/dlt/common/reflection/spec.py index 58a75fb53e..0a486088c8 100644 --- a/dlt/common/reflection/spec.py +++ b/dlt/common/reflection/spec.py @@ -15,7 +15,9 @@ def _get_spec_name_from_f(f: AnyFun) -> str: - func_name = get_callable_name(f, "__qualname__").replace(".", "") # func qual name contains position in the module, separated by dots + func_name = get_callable_name(f, "__qualname__").replace( + ".", "" + ) # func qual name contains position in the module, separated by dots def _first_up(s: str) -> str: return s[0].upper() + s[1:] @@ -23,7 +25,9 @@ def _first_up(s: str) -> str: return "".join(map(_first_up, _SLEEPING_CAT_SPLIT.findall(func_name))) + "Configuration" -def spec_from_signature(f: AnyFun, sig: Signature, include_defaults: bool = True) -> Type[BaseConfiguration]: +def spec_from_signature( + f: AnyFun, sig: Signature, include_defaults: bool = True +) -> Type[BaseConfiguration]: name = _get_spec_name_from_f(f) module = inspect.getmodule(f) @@ -60,7 +64,10 @@ def dlt_config_literal_to_type(arg_name: str) -> AnyType: for p in sig.parameters.values(): # skip *args and **kwargs, skip typical method params - if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in ["self", "cls"]: + if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in [ + "self", + "cls", + ]: field_type = AnyType if p.annotation == Parameter.empty else p.annotation # only valid hints and parameters with defaults are eligible if is_valid_hint(field_type) and p.default != Parameter.empty: diff --git a/dlt/common/reflection/utils.py b/dlt/common/reflection/utils.py index c9c1ad92ed..9bd3cb6775 100644 --- a/dlt/common/reflection/utils.py +++ b/dlt/common/reflection/utils.py @@ -68,12 +68,16 @@ def creates_func_def_name_node(func_def: ast.FunctionDef, source_lines: Sequence """Recreate function name as a ast.Name with known source code location""" func_name = ast.Name(func_def.name) func_name.lineno = func_name.end_lineno = func_def.lineno - func_name.col_offset = source_lines[func_name.lineno - 1].index(func_def.name) # find where function name starts + func_name.col_offset = source_lines[func_name.lineno - 1].index( + func_def.name + ) # find where function name starts func_name.end_col_offset = func_name.col_offset + len(func_def.name) return func_name -def rewrite_python_script(source_script_lines: List[str], transformed_nodes: List[Tuple[ast.AST, ast.AST]]) -> List[str]: +def rewrite_python_script( + source_script_lines: List[str], transformed_nodes: List[Tuple[ast.AST, ast.AST]] +) -> List[str]: """Replaces all the nodes present in `transformed_nodes` in the `script_lines`. The `transformed_nodes` is a tuple where the first element is must be a node with full location information created out of `script_lines`""" script_lines: List[str] = [] @@ -87,12 +91,12 @@ def rewrite_python_script(source_script_lines: List[str], transformed_nodes: Lis if last_offset >= 0: script_lines.append(source_script_lines[last_line][last_offset:]) # add all new lines from previous line to current - script_lines.extend(source_script_lines[last_line+1:node.lineno-1]) + script_lines.extend(source_script_lines[last_line + 1 : node.lineno - 1]) # add trailing characters until node in current line starts - script_lines.append(source_script_lines[node.lineno-1][:node.col_offset]) + script_lines.append(source_script_lines[node.lineno - 1][: node.col_offset]) elif last_offset >= 0: # no line change, add the characters from the end of previous node to the current - script_lines.append(source_script_lines[last_line][last_offset:node.col_offset]) + script_lines.append(source_script_lines[last_line][last_offset : node.col_offset]) # replace node value script_lines.append(astunparse.unparse(t_value).strip()) @@ -102,7 +106,7 @@ def rewrite_python_script(source_script_lines: List[str], transformed_nodes: Lis # add all that was missing if last_offset >= 0: script_lines.append(source_script_lines[last_line][last_offset:]) - script_lines.extend(source_script_lines[last_line+1:]) + script_lines.extend(source_script_lines[last_line + 1 :]) return script_lines diff --git a/dlt/common/runners/__init__.py b/dlt/common/runners/__init__.py index 9af668ce87..2c5916eaec 100644 --- a/dlt/common/runners/__init__.py +++ b/dlt/common/runners/__init__.py @@ -5,8 +5,12 @@ __all__ = [ - "run_pool", "NullExecutor", - "Runnable", "workermethod", "TExecutor", + "run_pool", + "NullExecutor", + "Runnable", + "workermethod", + "TExecutor", "TRunMetrics", - "Venv", "VenvNotFound" + "Venv", + "VenvNotFound", ] diff --git a/dlt/common/runners/configuration.py b/dlt/common/runners/configuration.py index 3231f83807..c5de2353f4 100644 --- a/dlt/common/runners/configuration.py +++ b/dlt/common/runners/configuration.py @@ -8,14 +8,21 @@ @configspec class PoolRunnerConfiguration(BaseConfiguration): - pool_type: TPoolType = None # type of pool to run, must be set in derived configs - workers: Optional[int] = None # how many threads/processes in the pool - run_sleep: float = 0.1 # how long to sleep between runs with workload, seconds + pool_type: TPoolType = None + """type of pool to run, must be set in derived configs""" + start_method: Optional[str] = None + """start method for the pool (typically process). None is system default""" + workers: Optional[int] = None + """# how many threads/processes in the pool""" + run_sleep: float = 0.1 + """how long to sleep between runs with workload, seconds""" if TYPE_CHECKING: + def __init__( self, pool_type: TPoolType = None, - workers: int = None - ) -> None: - ... + start_method: str = None, + workers: int = None, + run_sleep: float = 0.1, + ) -> None: ... diff --git a/dlt/common/runners/pool_runner.py b/dlt/common/runners/pool_runner.py index 7b83d68e0f..491c74cd18 100644 --- a/dlt/common/runners/pool_runner.py +++ b/dlt/common/runners/pool_runner.py @@ -5,6 +5,7 @@ from typing_extensions import ParamSpec from dlt.common import logger, sleep +from dlt.common.configuration.container import Container from dlt.common.runtime import init from dlt.common.runners.runnable import Runnable, TExecutor from dlt.common.runners.configuration import PoolRunnerConfiguration @@ -38,28 +39,35 @@ def submit(self, fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> Futur def create_pool(config: PoolRunnerConfiguration) -> Executor: if config.pool_type == "process": # if not fork method, provide initializer for logs and configuration - if multiprocessing.get_start_method() != "fork" and init._INITIALIZED: + start_method = config.start_method or multiprocessing.get_start_method() + if start_method != "fork" and init._INITIALIZED: return ProcessPoolExecutor( max_workers=config.workers, initializer=init.initialize_runtime, initargs=(init._RUN_CONFIGURATION,), - mp_context=multiprocessing.get_context() - ) + mp_context=multiprocessing.get_context(method=start_method), + ) else: return ProcessPoolExecutor( - max_workers=config.workers, - mp_context=multiprocessing.get_context() + max_workers=config.workers, mp_context=multiprocessing.get_context() ) elif config.pool_type == "thread": - return ThreadPoolExecutor(max_workers=config.workers) + return ThreadPoolExecutor( + max_workers=config.workers, thread_name_prefix=Container.thread_pool_prefix() + ) # no pool - single threaded return NullExecutor() -def run_pool(config: PoolRunnerConfiguration, run_f: Union[Runnable[TExecutor], Callable[[TExecutor], TRunMetrics]]) -> int: +def run_pool( + config: PoolRunnerConfiguration, + run_f: Union[Runnable[TExecutor], Callable[[TExecutor], TRunMetrics]], +) -> int: # validate the run function if not isinstance(run_f, Runnable) and not callable(run_f): - raise ValueError(run_f, "Pool runner entry point must be a function f(pool: TPool) or Runnable") + raise ValueError( + run_f, "Pool runner entry point must be a function f(pool: TPool) or Runnable" + ) # start pool pool = create_pool(config) diff --git a/dlt/common/runners/runnable.py b/dlt/common/runners/runnable.py index c2d9989bb5..fe795fe73e 100644 --- a/dlt/common/runners/runnable.py +++ b/dlt/common/runners/runnable.py @@ -20,7 +20,9 @@ class Runnable(ABC, Generic[TExecutor]): # use weak reference container, once other references are dropped the referenced object is garbage collected RUNNING: TWeakValueDictionary = WeakValueDictionary({}) - def __new__(cls: Type["Runnable[TExecutor]"], *args: Any, **kwargs: Any) -> "Runnable[TExecutor]": + def __new__( + cls: Type["Runnable[TExecutor]"], *args: Any, **kwargs: Any + ) -> "Runnable[TExecutor]": """Registers Runnable instance as running for a time when context is active. Used with `~workermethod` decorator to pass a class instance to decorator function that must be static thus avoiding pickling such instance. @@ -50,6 +52,7 @@ def workermethod(f: TFun) -> TFun: Returns: TFun: wrapped worker function """ + @wraps(f) def _wrap(rid: Union[int, Runnable[TExecutor]], *args: Any, **kwargs: Any) -> Any: if isinstance(rid, int): @@ -95,4 +98,3 @@ def _wrap(rid: Union[int, Runnable[TExecutor]], *args: Any, **kwargs: Any) -> An # return f(config, *args, **kwargs) # return _wrap # type: ignore - diff --git a/dlt/common/runners/stdout.py b/dlt/common/runners/stdout.py index a9f4ab1438..8ddfb45ee4 100644 --- a/dlt/common/runners/stdout.py +++ b/dlt/common/runners/stdout.py @@ -26,14 +26,16 @@ def exec_to_stdout(f: AnyFun) -> Iterator[Any]: def iter_stdout(venv: Venv, command: str, *script_args: Any) -> Iterator[str]: # start a process in virtual environment, assume that text comes from stdout - with venv.start_command(command, *script_args, stdout=PIPE, stderr=PIPE, bufsize=1, text=True) as process: + with venv.start_command( + command, *script_args, stdout=PIPE, stderr=PIPE, bufsize=1, text=True + ) as process: exit_code: int = None line = "" stderr: List[str] = [] def _r_stderr() -> None: nonlocal stderr - for line in iter(process.stderr.readline, ''): + for line in iter(process.stderr.readline, ""): stderr.append(line) # read stderr with a thread, selectors do not work on windows @@ -41,7 +43,7 @@ def _r_stderr() -> None: t.start() # read stdout with - for line in iter(process.stdout.readline, ''): + for line in iter(process.stdout.readline, ""): if line.endswith("\n"): yield line[:-1] else: @@ -57,9 +59,11 @@ def _r_stderr() -> None: raise CalledProcessError(exit_code, command, output=line, stderr="".join(stderr)) -def iter_stdout_with_result(venv: Venv, command: str, *script_args: Any) -> Generator[str, None, Any]: +def iter_stdout_with_result( + venv: Venv, command: str, *script_args: Any +) -> Generator[str, None, Any]: """Yields stdout lines coming from remote process and returns the last result decoded with decode_obj. In case of exit code != 0 if exception is decoded - it will be raised, otherwise CalledProcessError is raised""" + it will be raised, otherwise CalledProcessError is raised""" last_result: Any = None try: for line in iter_stdout(venv, command, *script_args): diff --git a/dlt/common/runners/synth_pickle.py b/dlt/common/runners/synth_pickle.py index 420e89a74a..ba0c87f28d 100644 --- a/dlt/common/runners/synth_pickle.py +++ b/dlt/common/runners/synth_pickle.py @@ -15,6 +15,7 @@ def __init__(*args: Any, **kwargs: Any) -> None: class SynthesizingUnpickler(pickle.Unpickler): """Unpickler that synthesizes missing types instead of raising""" + def find_class(self, module: str, name: str) -> Any: if module not in sys.modules: module_obj = sys.modules[__name__] @@ -24,7 +25,7 @@ def find_class(self, module: str, name: str) -> Any: return getattr(module_obj, name) except Exception: # synthesize type - t = type(name, (MissingUnpickledType, ), {"__module__": module}) + t = type(name, (MissingUnpickledType,), {"__module__": module}) setattr(module_obj, name, t) return t diff --git a/dlt/common/runners/venv.py b/dlt/common/runners/venv.py index d81e7384b4..9a92b30326 100644 --- a/dlt/common/runners/venv.py +++ b/dlt/common/runners/venv.py @@ -19,7 +19,7 @@ def post_setup(self, context: types.SimpleNamespace) -> None: self.context = context -class Venv(): +class Venv: """Creates and wraps the Python Virtual Environment to allow for code execution""" def __init__(self, context: types.SimpleNamespace, current: bool = False) -> None: @@ -59,6 +59,7 @@ def restore_current(cls) -> "Venv": venv = cls.restore(os.environ["VIRTUAL_ENV"], current=True) except KeyError: import sys + # do not set bin path because it is not known context = types.SimpleNamespace(bin_path="", env_exe=sys.executable) venv = cls(context, current=True) @@ -69,7 +70,9 @@ def __enter__(self) -> "Venv": raise NotImplementedError("Context manager does not work with current venv") return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: types.TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: types.TracebackType + ) -> None: self.delete_environment() def delete_environment(self) -> None: @@ -80,7 +83,9 @@ def delete_environment(self) -> None: if self.context.env_dir and os.path.isdir(self.context.env_dir): shutil.rmtree(self.context.env_dir) - def start_command(self, entry_point: str, *script_args: Any, **popen_kwargs: Any) -> "subprocess.Popen[str]": + def start_command( + self, entry_point: str, *script_args: Any, **popen_kwargs: Any + ) -> "subprocess.Popen[str]": command = os.path.join(self.context.bin_path, entry_point) cmd = [command, *script_args] return subprocess.Popen(cmd, **popen_kwargs) @@ -120,7 +125,6 @@ def _install_deps(context: types.SimpleNamespace, dependencies: List[str]) -> No except subprocess.CalledProcessError as exc: raise CannotInstallDependencies(dependencies, context.env_exe, exc.output) - @staticmethod def is_virtual_env() -> bool: """Checks if we are running in virtual environment""" diff --git a/dlt/common/runtime/collector.py b/dlt/common/runtime/collector.py index 5e7143241e..eec379564c 100644 --- a/dlt/common/runtime/collector.py +++ b/dlt/common/runtime/collector.py @@ -4,7 +4,20 @@ import time from abc import ABC, abstractmethod from collections import defaultdict -from typing import Any, ContextManager, Dict, Type, TYPE_CHECKING, DefaultDict, NamedTuple, Optional, Union, TextIO, TypeVar +from typing import ( + Any, + ContextManager, + Dict, + Type, + TYPE_CHECKING, + DefaultDict, + NamedTuple, + Optional, + Union, + TextIO, + TypeVar, +) + if TYPE_CHECKING: from tqdm import tqdm import enlighten @@ -20,11 +33,12 @@ class Collector(ABC): - step: str @abstractmethod - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None) -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None + ) -> None: """Creates or updates a counter This function updates a counter `name` with a value `inc`. If counter does not exist, it is created with optional total value of `total`. @@ -65,7 +79,9 @@ def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb class NullCollector(Collector): """A default counter that does not count anything.""" - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None) -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None + ) -> None: pass def _start(self, step: str) -> None: @@ -81,7 +97,9 @@ class DictCollector(Collector): def __init__(self) -> None: self.counters: DefaultDict[str, int] = None - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None) -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None + ) -> None: assert not label, "labels not supported in dict collector" self.counters[name] += inc @@ -103,7 +121,13 @@ class CounterInfo(NamedTuple): start_time: float total: Optional[int] - def __init__(self, log_period: float = 1.0, logger: Union[logging.Logger, TextIO] = sys.stdout, log_level: int = logging.INFO, dump_system_stats: bool = True) -> None: + def __init__( + self, + log_period: float = 1.0, + logger: Union[logging.Logger, TextIO] = sys.stdout, + log_level: int = logging.INFO, + dump_system_stats: bool = True, + ) -> None: """ Collector writing to a `logger` every `log_period` seconds. The logger can be a Python logger instance, text stream, or None that will attach `dlt` logger @@ -123,12 +147,19 @@ def __init__(self, log_period: float = 1.0, logger: Union[logging.Logger, TextIO try: import psutil except ImportError: - self._log(logging.WARNING, "psutil dependency is not installed and mem stats will not be available. add psutil to your environment or pass dump_system_stats argument as False to disable warning.") + self._log( + logging.WARNING, + "psutil dependency is not installed and mem stats will not be available. add" + " psutil to your environment or pass dump_system_stats argument as False to" + " disable warning.", + ) dump_system_stats = False self.dump_system_stats = dump_system_stats self.last_log_time: float = None - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None) -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = None + ) -> None: counter_key = f"{name}_{label}" if label else name if counter_key not in self.counters: @@ -169,7 +200,10 @@ def dump_counters(self) -> None: items_per_second_str = f"{items_per_second:.2f}/s" message = f"[{self.messages[name]}]" if self.messages[name] is not None else "" - counter_line = f"{info.description}: {progress} {percentage} | Time: {elapsed_time_str} | Rate: {items_per_second_str} {message}" + counter_line = ( + f"{info.description}: {progress} {percentage} | Time: {elapsed_time_str} | Rate:" + f" {items_per_second_str} {message}" + ) log_lines.append(counter_line.strip()) if self.dump_system_stats: @@ -177,10 +211,13 @@ def dump_counters(self) -> None: process = psutil.Process(os.getpid()) mem_info = process.memory_info() - current_mem = mem_info.rss / (1024 ** 2) # Convert to MB + current_mem = mem_info.rss / (1024**2) # Convert to MB mem_percent = psutil.virtual_memory().percent cpu_percent = process.cpu_percent() - log_lines.append(f"Memory usage: {current_mem:.2f} MB ({mem_percent:.2f}%) | CPU usage: {cpu_percent:.2f}%") + log_lines.append( + f"Memory usage: {current_mem:.2f} MB ({mem_percent:.2f}%) | CPU usage:" + f" {cpu_percent:.2f}%" + ) log_lines.append("") log_message = "\n".join(log_lines) @@ -218,12 +255,16 @@ def __init__(self, single_bar: bool = False, **tqdm_kwargs: Any) -> None: global tqdm from tqdm import tqdm except ModuleNotFoundError: - raise MissingDependencyException("TqdmCollector", ["tqdm"], "We need tqdm to display progress bars.") + raise MissingDependencyException( + "TqdmCollector", ["tqdm"], "We need tqdm to display progress bars." + ) self.single_bar = single_bar self._bars: Dict[str, tqdm[None]] = {} self.tqdm_kwargs = tqdm_kwargs or {} - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "") -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "" + ) -> None: key = f"{name}_{label}" bar = self._bars.get(key) if bar is None: @@ -263,13 +304,19 @@ def __init__(self, single_bar: bool = True, **alive_kwargs: Any) -> None: from alive_progress import alive_bar except ModuleNotFoundError: - raise MissingDependencyException("AliveCollector", ["alive-progress"], "We need alive-progress to display progress bars.") + raise MissingDependencyException( + "AliveCollector", + ["alive-progress"], + "We need alive-progress to display progress bars.", + ) self.single_bar = single_bar self._bars: Dict[str, Any] = {} self._bars_contexts: Dict[str, ContextManager[Any]] = {} self.alive_kwargs = alive_kwargs or {} - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "") -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "" + ) -> None: key = f"{name}_{label}" bar = self._bars.get(key) if bar is None: @@ -313,13 +360,23 @@ def __init__(self, single_bar: bool = False, **enlighten_kwargs: Any) -> None: global enlighten import enlighten - from enlighten import Counter as EnlCounter, StatusBar as EnlStatusBar, Manager as EnlManager + from enlighten import ( + Counter as EnlCounter, + StatusBar as EnlStatusBar, + Manager as EnlManager, + ) except ModuleNotFoundError: - raise MissingDependencyException("EnlightenCollector", ["enlighten"], "We need enlighten to display progress bars with a space for log messages.") + raise MissingDependencyException( + "EnlightenCollector", + ["enlighten"], + "We need enlighten to display progress bars with a space for log messages.", + ) self.single_bar = single_bar self.enlighten_kwargs = enlighten_kwargs - def update(self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "") -> None: + def update( + self, name: str, inc: int = 1, total: int = None, message: str = None, label: str = "" + ) -> None: key = f"{name}_{label}" bar = self._bars.get(key) if bar is None: @@ -328,7 +385,9 @@ def update(self, name: str, inc: int = 1, total: int = None, message: str = Non if len(self._bars) > 0 and self.single_bar: # do not add any more counters return - bar = self._manager.counter(desc=name, total=total, leave=True, force=True, **self.enlighten_kwargs) + bar = self._manager.counter( + desc=name, total=total, leave=True, force=True, **self.enlighten_kwargs + ) bar.refresh() self._bars[key] = bar bar.update(inc) @@ -336,7 +395,9 @@ def update(self, name: str, inc: int = 1, total: int = None, message: str = Non def _start(self, step: str) -> None: self._bars = {} self._manager = enlighten.get_manager(enabled=True) - self._status = self._manager.status_bar(leave=True, justify=enlighten.Justify.CENTER, fill="=") + self._status = self._manager.status_bar( + leave=True, justify=enlighten.Justify.CENTER, fill="=" + ) self._status.update(step) def _stop(self) -> None: @@ -352,4 +413,4 @@ def _stop(self) -> None: self._status = None -NULL_COLLECTOR = NullCollector() +NULL_COLLECTOR = NullCollector() diff --git a/dlt/common/runtime/exec_info.py b/dlt/common/runtime/exec_info.py index ecb8376aa7..3aa19c83ab 100644 --- a/dlt/common/runtime/exec_info.py +++ b/dlt/common/runtime/exec_info.py @@ -1,13 +1,16 @@ import io import os import contextlib +import sys +import multiprocessing +import platform +from dlt.common.runtime.typing import TExecutionContext, TVersion, TExecInfoNames from dlt.common.typing import StrStr, StrAny, Literal, List from dlt.common.utils import filter_env_vars -from dlt.version import __version__ +from dlt.version import __version__, DLT_PKG_NAME -TExecInfoNames = Literal["kubernetes", "docker", "codespaces", "github_actions", "airflow", "notebook", "colab","aws_lambda","gcp_cloud_function"] # if one of these environment variables is set, we assume to be running in CI env CI_ENVIRONMENT_TELL = [ "bamboo.buildKey", @@ -100,7 +103,7 @@ def is_running_in_airflow_task() -> bool: from airflow.operators.python import get_current_context context = get_current_context() - return context is not None and 'ti' in context + return context is not None and "ti" in context except Exception: return False @@ -163,4 +166,16 @@ def is_aws_lambda() -> bool: def is_gcp_cloud_function() -> bool: "Return True if the process is running in the serverless platform GCP Cloud Functions" - return os.environ.get("FUNCTION_NAME") is not None \ No newline at end of file + return os.environ.get("FUNCTION_NAME") is not None + + +def get_execution_context() -> TExecutionContext: + "Get execution context information" + return TExecutionContext( + ci_run=in_continuous_integration(), + python=sys.version.split(" ")[0], + cpu=multiprocessing.cpu_count(), + exec_info=exec_info_names(), + os=TVersion(name=platform.system(), version=platform.release()), + library=TVersion(name=DLT_PKG_NAME, version=__version__), + ) diff --git a/dlt/common/runtime/json_logging.py b/dlt/common/runtime/json_logging.py index bfff063dab..042236a093 100644 --- a/dlt/common/runtime/json_logging.py +++ b/dlt/common/runtime/json_logging.py @@ -1,4 +1,3 @@ - import logging from datetime import datetime # noqa: I251 import traceback @@ -8,7 +7,7 @@ from dlt.common.json import json from dlt.common.typing import DictStrAny, StrAny -EMPTY_VALUE = '-' +EMPTY_VALUE = "-" JSON_SERIALIZER = lambda log: json.dumps(log) COMPONENT_ID = EMPTY_VALUE COMPONENT_NAME = EMPTY_VALUE @@ -17,15 +16,35 @@ # The list contains all the attributes listed in # http://docs.python.org/library/logging.html#logrecord-attributes RECORD_ATTR_SKIP_LIST = [ - 'asctime', 'created', 'exc_info', 'exc_text', 'filename', 'args', - 'funcName', 'id', 'levelname', 'levelno', 'lineno', 'module', 'msg', - 'msecs', 'msecs', 'message', 'name', 'pathname', 'process', - 'processName', 'relativeCreated', 'thread', 'threadName', 'extra', + "asctime", + "created", + "exc_info", + "exc_text", + "filename", + "args", + "funcName", + "id", + "levelname", + "levelno", + "lineno", + "module", + "msg", + "msecs", + "msecs", + "message", + "name", + "pathname", + "process", + "processName", + "relativeCreated", + "thread", + "threadName", + "extra", # Also exclude legacy 'props' - 'props', + "props", ] -RECORD_ATTR_SKIP_LIST.append('stack_info') +RECORD_ATTR_SKIP_LIST.append("stack_info") EASY_TYPES = (str, bool, dict, float, int, list, type(None)) _default_formatter: Type[logging.Formatter] = None @@ -34,10 +53,10 @@ def config_root_logger() -> None: """ - You must call this if you are using root logger. - Make all root logger' handlers produce JSON format - & remove duplicate handlers for request instrumentation logging. - Please made sure that you call this after you called "logging.basicConfig() or logging.getLogger() + You must call this if you are using root logger. + Make all root logger' handlers produce JSON format + & remove duplicate handlers for request instrumentation logging. + Please made sure that you call this after you called "logging.basicConfig() or logging.getLogger() """ global _default_formatter update_formatter_for_loggers([logging.root], _default_formatter) @@ -54,7 +73,9 @@ def init(custom_formatter: Type[logging.Formatter] = None) -> None: if custom_formatter: if not issubclass(custom_formatter, logging.Formatter): - raise ValueError('custom_formatter is not subclass of logging.Formatter', custom_formatter) + raise ValueError( + "custom_formatter is not subclass of logging.Formatter", custom_formatter + ) _default_formatter = custom_formatter if custom_formatter else JSONLogFormatter logging._defaultFormatter = _default_formatter() # type: ignore @@ -66,8 +87,9 @@ def init(custom_formatter: Type[logging.Formatter] = None) -> None: class BaseJSONFormatter(logging.Formatter): """ - Base class for JSON formatters + Base class for JSON formatters """ + base_object_common: DictStrAny = {} def __init__(self, *args: Any, **kw: Any) -> None: @@ -98,7 +120,7 @@ def _get_extra_fields(self, record: logging.LogRecord) -> StrAny: fields: DictStrAny = {} if record.args: - fields['msg'] = record.msg + fields["msg"] = record.msg for key, value in record.__dict__.items(): if key not in RECORD_ATTR_SKIP_LIST: @@ -108,15 +130,14 @@ def _get_extra_fields(self, record: logging.LogRecord) -> StrAny: fields[key] = repr(value) # Always add 'props' to the root of the log, assumes props is a dict - if hasattr(record, 'props') and isinstance(record.props, dict): + if hasattr(record, "props") and isinstance(record.props, dict): fields.update(record.props) return fields - def _sanitize_log_msg(record: logging.LogRecord) -> str: - return record.getMessage().replace('\n', '_').replace('\r', '_').replace('\t', '_') + return record.getMessage().replace("\n", "_").replace("\r", "_").replace("\t", "_") class JSONLogFormatter(BaseJSONFormatter): @@ -130,25 +151,27 @@ def get_exc_fields(self, record: logging.LogRecord) -> StrAny: else: exc_info = record.exc_text return { - 'exc_info': exc_info, - 'filename': record.filename, + "exc_info": exc_info, + "filename": record.filename, } @classmethod def format_exception(cls, exc_info: Any) -> str: - return ''.join(traceback.format_exception(*exc_info)) if exc_info else '' + return "".join(traceback.format_exception(*exc_info)) if exc_info else "" def _format_log_object(self, record: logging.LogRecord) -> DictStrAny: json_log_object = super(JSONLogFormatter, self)._format_log_object(record) - json_log_object.update({ - "msg": _sanitize_log_msg(record), - "type": "log", - "logger": record.name, - "thread": record.threadName, - "level": record.levelname, - "module": record.module, - "line_no": record.lineno, - }) + json_log_object.update( + { + "msg": _sanitize_log_msg(record), + "type": "log", + "logger": record.name, + "thread": record.threadName, + "level": record.levelname, + "module": record.module, + "line_no": record.lineno, + } + ) if record.exc_info or record.exc_text: json_log_object.update(self.get_exc_fields(record)) @@ -156,7 +179,9 @@ def _format_log_object(self, record: logging.LogRecord) -> DictStrAny: return json_log_object -def update_formatter_for_loggers(loggers_iter: List[Logger], formatter: Type[logging.Formatter]) -> None: +def update_formatter_for_loggers( + loggers_iter: List[Logger], formatter: Type[logging.Formatter] +) -> None: """ :param formatter: :param loggers_iter: @@ -174,6 +199,12 @@ def epoch_nano_second(datetime_: datetime) -> int: def iso_time_format(datetime_: datetime) -> str: - return '%04d-%02d-%02dT%02d:%02d:%02d.%03dZ' % ( - datetime_.year, datetime_.month, datetime_.day, datetime_.hour, datetime_.minute, datetime_.second, - int(datetime_.microsecond / 1000)) + return "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ" % ( + datetime_.year, + datetime_.month, + datetime_.day, + datetime_.hour, + datetime_.minute, + datetime_.second, + int(datetime_.microsecond / 1000), + ) diff --git a/dlt/common/runtime/logger.py b/dlt/common/runtime/logger.py index f833d36608..9dd8ce4e3a 100644 --- a/dlt/common/runtime/logger.py +++ b/dlt/common/runtime/logger.py @@ -14,12 +14,12 @@ class LogMethod(Protocol): - def __call__(self, msg: str, *args: Any, **kwds: Any) -> None: - ... + def __call__(self, msg: str, *args: Any, **kwds: Any) -> None: ... def __getattr__(name: str) -> LogMethod: """Forwards log method calls (debug, info, error etc.) to LOGGER""" + def wrapper(msg: str, *args: Any, **kwargs: Any) -> None: if LOGGER: # skip stack frames when displaying log so the original logging frame is displayed @@ -28,6 +28,7 @@ def wrapper(msg: str, *args: Any, **kwargs: Any) -> None: # exception has one more frame stacklevel = 3 getattr(LOGGER, name)(msg, *args, **kwargs, stacklevel=stacklevel) + return wrapper @@ -50,11 +51,8 @@ def init_logging(config: RunConfiguration) -> None: version = dlt_version_info(config.pipeline_name) LOGGER = _init_logging( - DLT_LOGGER_NAME, - config.log_level, - config.log_format, - config.pipeline_name, - version) + DLT_LOGGER_NAME, config.log_level, config.log_format, config.pipeline_name, version + ) def is_logging() -> bool: @@ -84,7 +82,9 @@ def format(self, record: LogRecord) -> str: # noqa: A003 return s -def _init_logging(logger_name: str, level: str, fmt: str, component: str, version: StrStr) -> Logger: +def _init_logging( + logger_name: str, level: str, fmt: str, component: str, version: StrStr +) -> Logger: if logger_name == "root": logging.basicConfig(level=level) handler = logging.getLogger().handlers[0] @@ -120,6 +120,6 @@ def _format_log_object(self, record: LogRecord) -> Any: if logger_name == "root": json_logging.config_root_logger() else: - handler.setFormatter(_MetricsFormatter(fmt=fmt, style='{')) + handler.setFormatter(_MetricsFormatter(fmt=fmt, style="{")) return logger diff --git a/dlt/common/runtime/prometheus.py b/dlt/common/runtime/prometheus.py index 0634670a5a..1b233ffa9b 100644 --- a/dlt/common/runtime/prometheus.py +++ b/dlt/common/runtime/prometheus.py @@ -23,7 +23,9 @@ def get_metrics_from_prometheus(gauges: Iterable[MetricWrapperBase]) -> StrAny: name = g._name if g._is_parent(): # for gauges containing many label values, enumerate all - metrics.update(get_metrics_from_prometheus([g.labels(*label) for label in g._metrics.keys()])) + metrics.update( + get_metrics_from_prometheus([g.labels(*label) for label in g._metrics.keys()]) + ) continue # for gauges with labels: add the label to the name and enumerate samples if g._labelvalues: diff --git a/dlt/common/runtime/segment.py b/dlt/common/runtime/segment.py index b8d533cccb..e302767fcc 100644 --- a/dlt/common/runtime/segment.py +++ b/dlt/common/runtime/segment.py @@ -2,41 +2,41 @@ # several code fragments come from https://github.com/RasaHQ/rasa/blob/main/rasa/telemetry.py import os -import sys -import multiprocessing + import atexit import base64 import requests -import platform from concurrent.futures import ThreadPoolExecutor from typing import Literal, Optional from dlt.common.configuration.paths import get_dlt_data_dir from dlt.common.runtime import logger +from dlt.common.managed_thread_pool import ManagedThreadPool from dlt.common.configuration.specs import RunConfiguration -from dlt.common.runtime.exec_info import exec_info_names, in_continuous_integration +from dlt.common.runtime.exec_info import get_execution_context, TExecutionContext from dlt.common.typing import DictStrAny, StrAny from dlt.common.utils import uniq_id -from dlt.version import __version__, DLT_PKG_NAME +from dlt.version import __version__ TEventCategory = Literal["pipeline", "command", "helper"] -_THREAD_POOL: ThreadPoolExecutor = None +_THREAD_POOL: ManagedThreadPool = ManagedThreadPool(1) _SESSION: requests.Session = None _WRITE_KEY: str = None _SEGMENT_REQUEST_TIMEOUT = (1.0, 1.0) # short connect & send timeouts _SEGMENT_ENDPOINT = "https://api.segment.io/v1/track" -_SEGMENT_CONTEXT: DictStrAny = None +_SEGMENT_CONTEXT: TExecutionContext = None def init_segment(config: RunConfiguration) -> None: - assert config.dlthub_telemetry_segment_write_key, "dlthub_telemetry_segment_write_key not present in RunConfiguration" + assert ( + config.dlthub_telemetry_segment_write_key + ), "dlthub_telemetry_segment_write_key not present in RunConfiguration" # create thread pool to send telemetry to segment - global _THREAD_POOL, _WRITE_KEY, _SESSION - if not _THREAD_POOL: - _THREAD_POOL = ThreadPoolExecutor(1) + global _WRITE_KEY, _SESSION + if not _SESSION: _SESSION = requests.Session() # flush pool on exit atexit.register(_at_exit_cleanup) @@ -51,11 +51,7 @@ def disable_segment() -> None: _at_exit_cleanup() -def track( - event_category: TEventCategory, - event_name: str, - properties: DictStrAny -) -> None: +def track(event_category: TEventCategory, event_name: str, properties: DictStrAny) -> None: """Tracks a telemetry event. The segment event name will be created as "{event_category}_{event_name} @@ -68,10 +64,7 @@ def track( if properties is None: properties = {} - properties.update({ - "event_category": event_category, - "event_name": event_name - }) + properties.update({"event_category": event_category, "event_name": event_name}) try: _send_event(f"{event_category}_{event_name}", properties, _default_context_fields()) @@ -86,10 +79,9 @@ def before_send(event: DictStrAny) -> Optional[DictStrAny]: def _at_exit_cleanup() -> None: - global _THREAD_POOL, _SESSION, _WRITE_KEY, _SEGMENT_CONTEXT - if _THREAD_POOL: - _THREAD_POOL.shutdown(wait=True) - _THREAD_POOL = None + global _SESSION, _WRITE_KEY, _SEGMENT_CONTEXT + if _SESSION: + _THREAD_POOL.stop(True) _SESSION.close() _SESSION = None _WRITE_KEY = None @@ -127,11 +119,7 @@ def get_anonymous_id() -> str: return anonymous_id -def _segment_request_payload( - event_name: str, - properties: StrAny, - context: StrAny -) -> DictStrAny: +def _segment_request_payload(event_name: str, properties: StrAny, context: StrAny) -> DictStrAny: """Compose a valid payload for the segment API. Args: @@ -150,7 +138,7 @@ def _segment_request_payload( } -def _default_context_fields() -> DictStrAny: +def _default_context_fields() -> TExecutionContext: """Return a dictionary that contains the default context values. Return: @@ -161,14 +149,7 @@ def _default_context_fields() -> DictStrAny: if not _SEGMENT_CONTEXT: # Make sure to update the example in docs/docs/telemetry/telemetry.mdx # if you change / add context - _SEGMENT_CONTEXT = { - "os": {"name": platform.system(), "version": platform.release()}, - "ci_run": in_continuous_integration(), - "python": sys.version.split(" ")[0], - "library": {"name": DLT_PKG_NAME, "version": __version__}, - "cpu": multiprocessing.cpu_count(), - "exec_info": exec_info_names() - } + _SEGMENT_CONTEXT = get_execution_context() # avoid returning the cached dict --> caller could modify the dictionary... # usually we would use `lru_cache`, but that doesn't return a dict copy and @@ -176,11 +157,7 @@ def _default_context_fields() -> DictStrAny: return _SEGMENT_CONTEXT.copy() -def _send_event( - event_name: str, - properties: StrAny, - context: StrAny -) -> None: +def _send_event(event_name: str, properties: StrAny, context: StrAny) -> None: """Report the contents segment of an event to the /track Segment endpoint. Args: @@ -205,7 +182,9 @@ def _send_event( def _future_send() -> None: # import time # start_ts = time.time() - resp = _SESSION.post(_SEGMENT_ENDPOINT, headers=headers, json=payload, timeout=_SEGMENT_REQUEST_TIMEOUT) + resp = _SESSION.post( + _SEGMENT_ENDPOINT, headers=headers, json=payload, timeout=_SEGMENT_REQUEST_TIMEOUT + ) # print(f"SENDING TO Segment done {resp.status_code} {time.time() - start_ts} {base64.b64decode(_WRITE_KEY)}") # handle different failure cases if resp.status_code != 200: @@ -216,8 +195,6 @@ def _future_send() -> None: else: data = resp.json() if not data.get("success"): - logger.debug( - f"Segment telemetry request returned a failure. Response: {data}" - ) + logger.debug(f"Segment telemetry request returned a failure. Response: {data}") - _THREAD_POOL.submit(_future_send) + _THREAD_POOL.thread_pool.submit(_future_send) diff --git a/dlt/common/runtime/sentry.py b/dlt/common/runtime/sentry.py index 8bc70e46cf..7ea45affc0 100644 --- a/dlt/common/runtime/sentry.py +++ b/dlt/common/runtime/sentry.py @@ -8,7 +8,11 @@ from sentry_sdk.transport import HttpTransport from sentry_sdk.integrations.logging import LoggingIntegration except ModuleNotFoundError: - raise MissingDependencyException("sentry telemetry", ["sentry-sdk"], "Please install sentry-sdk if you have `sentry_dsn` set in your RuntimeConfiguration") + raise MissingDependencyException( + "sentry telemetry", + ["sentry-sdk"], + "Please install sentry-sdk if you have `sentry_dsn` set in your RuntimeConfiguration", + ) from dlt.common.typing import DictStrAny, Any, StrAny from dlt.common.configuration.specs import RunConfiguration @@ -27,10 +31,10 @@ def init_sentry(config: RunConfiguration) -> None: before_send=before_send, traces_sample_rate=1.0, # disable tornado, boto3, sql alchemy etc. - auto_enabling_integrations = False, + auto_enabling_integrations=False, integrations=[_get_sentry_log_level(config)], release=release, - transport=_SentryHttpTransport + transport=_SentryHttpTransport, ) # add version tags for k, v in version.items(): @@ -58,12 +62,11 @@ def before_send(event: DictStrAny, _unused_hint: Optional[StrAny] = None) -> Opt class _SentryHttpTransport(HttpTransport): - timeout: float = 0 def _get_pool_options(self, *a: Any, **kw: Any) -> DictStrAny: rv = HttpTransport._get_pool_options(self, *a, **kw) - rv['timeout'] = self.timeout + rv["timeout"] = self.timeout return rv @@ -71,6 +74,6 @@ def _get_sentry_log_level(config: RunConfiguration) -> LoggingIntegration: log_level = logging._nameToLevel[config.log_level] event_level = logging.WARNING if log_level <= logging.WARNING else log_level return LoggingIntegration( - level=logging.INFO, # Capture info and above as breadcrumbs - event_level=event_level # Send errors as events + level=logging.INFO, # Capture info and above as breadcrumbs + event_level=event_level, # Send errors as events ) diff --git a/dlt/common/runtime/slack.py b/dlt/common/runtime/slack.py index ce5e90b300..15da89f333 100644 --- a/dlt/common/runtime/slack.py +++ b/dlt/common/runtime/slack.py @@ -4,13 +4,10 @@ def send_slack_message(incoming_hook: str, message: str, is_markdown: bool = True) -> None: """Sends a `message` to Slack `incoming_hook`, by default formatted as markdown.""" - r = requests.post(incoming_hook, - data = json.dumps({ - "text": message, - "mrkdwn": is_markdown - } - ).encode("utf-8"), - headers={'Content-Type': 'application/json;charset=utf-8'} + r = requests.post( + incoming_hook, + data=json.dumps({"text": message, "mrkdwn": is_markdown}).encode("utf-8"), + headers={"Content-Type": "application/json;charset=utf-8"}, ) if r.status_code >= 400: logger.warning(f"Could not post the notification to slack: {r.status_code}") diff --git a/dlt/common/runtime/telemetry.py b/dlt/common/runtime/telemetry.py index 86b3355985..e03bc04d79 100644 --- a/dlt/common/runtime/telemetry.py +++ b/dlt/common/runtime/telemetry.py @@ -21,6 +21,7 @@ def start_telemetry(config: RunConfiguration) -> None: if config.sentry_dsn: # may raise if sentry is not installed from dlt.common.runtime.sentry import init_sentry + init_sentry(config) if config.dlthub_telemetry: @@ -36,6 +37,7 @@ def stop_telemetry() -> None: try: from dlt.common.runtime.sentry import disable_sentry + disable_sentry() except ImportError: pass @@ -49,14 +51,18 @@ def is_telemetry_started() -> bool: return _TELEMETRY_STARTED -def with_telemetry(category: TEventCategory, command: str, track_before: bool, *args: str) -> Callable[[TFun], TFun]: +def with_telemetry( + category: TEventCategory, command: str, track_before: bool, *args: str +) -> Callable[[TFun], TFun]: """Adds telemetry to f: TFun and add optional f *args values to `properties` of telemetry event""" + def decorator(f: TFun) -> TFun: sig: inspect.Signature = inspect.signature(f) + def _wrap(*f_args: Any, **f_kwargs: Any) -> Any: # look for additional arguments bound_args = sig.bind(*f_args, **f_kwargs) - props = {p:bound_args.arguments[p] for p in args if p in bound_args.arguments} + props = {p: bound_args.arguments[p] for p in args if p in bound_args.arguments} start_ts = time.time() def _track(success: bool) -> None: @@ -88,4 +94,5 @@ def _track(success: bool) -> None: raise return _wrap # type: ignore - return decorator \ No newline at end of file + + return decorator diff --git a/dlt/common/runtime/typing.py b/dlt/common/runtime/typing.py new file mode 100644 index 0000000000..eb167e9002 --- /dev/null +++ b/dlt/common/runtime/typing.py @@ -0,0 +1,36 @@ +from typing import ( + List, + Literal, + TypedDict, +) + + +TExecInfoNames = Literal[ + "kubernetes", + "docker", + "codespaces", + "github_actions", + "airflow", + "notebook", + "colab", + "aws_lambda", + "gcp_cloud_function", +] + + +class TVersion(TypedDict): + """TypeDict representing a library version""" + + name: str + version: str + + +class TExecutionContext(TypedDict): + """TypeDict representing the runtime context info""" + + ci_run: bool + python: str + cpu: int + exec_info: List[TExecInfoNames] + library: TVersion + os: TVersion diff --git a/dlt/common/schema/__init__.py b/dlt/common/schema/__init__.py index 1a3b4db223..9cb5e2ab76 100644 --- a/dlt/common/schema/__init__.py +++ b/dlt/common/schema/__init__.py @@ -1,9 +1,32 @@ -from dlt.common.schema.typing import TSchemaUpdate, TSchemaTables, TTableSchema, TStoredSchema, TTableSchemaColumns, TColumnHint, TColumnSchema, TColumnSchemaBase +from dlt.common.schema.typing import ( + TSchemaContractDict, + TSchemaUpdate, + TSchemaTables, + TTableSchema, + TStoredSchema, + TTableSchemaColumns, + TColumnHint, + TColumnSchema, + TColumnSchemaBase, +) from dlt.common.schema.typing import COLUMN_HINTS -from dlt.common.schema.schema import Schema +from dlt.common.schema.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE +from dlt.common.schema.exceptions import DataValidationError from dlt.common.schema.utils import verify_schema_hash __all__ = [ - "TSchemaUpdate", "TSchemaTables", "TTableSchema", "TStoredSchema", "TTableSchemaColumns", "TColumnHint", - "TColumnSchema", "TColumnSchemaBase", "COLUMN_HINTS", "Schema", "verify_schema_hash" + "TSchemaUpdate", + "TSchemaTables", + "TTableSchema", + "TStoredSchema", + "TTableSchemaColumns", + "TColumnHint", + "TColumnSchema", + "TColumnSchemaBase", + "COLUMN_HINTS", + "Schema", + "verify_schema_hash", + "TSchemaContractDict", + "DEFAULT_SCHEMA_CONTRACT_MODE", + "DataValidationError", ] diff --git a/dlt/common/schema/detections.py b/dlt/common/schema/detections.py index 207c934091..30b23706af 100644 --- a/dlt/common/schema/detections.py +++ b/dlt/common/schema/detections.py @@ -43,7 +43,7 @@ def is_iso_date(t: Type[Any], v: Any) -> Optional[TDataType]: if not v: return None # don't cast iso timestamps as dates - if is_iso_timestamp(t,v): + if is_iso_timestamp(t, v): return None # strict autodetection of iso timestamps try: diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py index 5f638a111d..7f73bcbf36 100644 --- a/dlt/common/schema/exceptions.py +++ b/dlt/common/schema/exceptions.py @@ -2,6 +2,11 @@ from dlt.common.exceptions import DltException from dlt.common.data_types import TDataType +from dlt.common.schema.typing import ( + TSchemaContractDict, + TSchemaContractEntities, + TSchemaEvolutionMode, +) class SchemaException(DltException): @@ -13,28 +18,41 @@ class InvalidSchemaName(ValueError, SchemaException): def __init__(self, name: str) -> None: self.name = name - super().__init__(f"{name} is an invalid schema/source name. The source or schema name must be a valid Python identifier ie. a snake case function name and have maximum {self.MAXIMUM_SCHEMA_NAME_LENGTH} characters. Ideally should contain only small letters, numbers and underscores.") + super().__init__( + f"{name} is an invalid schema/source name. The source or schema name must be a valid" + " Python identifier ie. a snake case function name and have maximum" + f" {self.MAXIMUM_SCHEMA_NAME_LENGTH} characters. Ideally should contain only small" + " letters, numbers and underscores." + ) -# class InvalidDatasetName(ValueError, SchemaException): -# def __init__(self, name: str, normalized_name: str) -> None: -# self.name = name -# super().__init__(f"{name} is an invalid dataset name. The dataset name must conform to wide range of destinations and ideally should contain only small letters, numbers and underscores. Try {normalized_name} instead as suggested by current naming module.") - class InvalidDatasetName(ValueError, SchemaException): def __init__(self, destination_name: str) -> None: self.destination_name = destination_name - super().__init__(f"Destination {destination_name} does not accept empty datasets. Please pass the dataset name to the destination configuration ie. via dlt pipeline.") + super().__init__( + f"Destination {destination_name} does not accept empty datasets. Please pass the" + " dataset name to the destination configuration ie. via dlt pipeline." + ) class CannotCoerceColumnException(SchemaException): - def __init__(self, table_name: str, column_name: str, from_type: TDataType, to_type: TDataType, coerced_value: Any) -> None: + def __init__( + self, + table_name: str, + column_name: str, + from_type: TDataType, + to_type: TDataType, + coerced_value: Any, + ) -> None: self.table_name = table_name self.column_name = column_name self.from_type = from_type self.to_type = to_type self.coerced_value = coerced_value - super().__init__(f"Cannot coerce type in table {table_name} column {column_name} existing type {from_type} coerced type {to_type} value: {coerced_value}") + super().__init__( + f"Cannot coerce type in table {table_name} column {column_name} existing type" + f" {from_type} coerced type {to_type} value: {coerced_value}" + ) class TablePropertiesConflictException(SchemaException): @@ -43,19 +61,27 @@ def __init__(self, table_name: str, prop_name: str, val1: str, val2: str): self.prop_name = prop_name self.val1 = val1 self.val2 = val2 - super().__init__(f"Cannot merge partial tables for {table_name} due to property {prop_name}: {val1} != {val2}") + super().__init__( + f"Cannot merge partial tables for {table_name} due to property {prop_name}: {val1} !=" + f" {val2}" + ) class ParentTableNotFoundException(SchemaException): def __init__(self, table_name: str, parent_table_name: str, explanation: str = "") -> None: self.table_name = table_name self.parent_table_name = parent_table_name - super().__init__(f"Parent table {parent_table_name} for {table_name} was not found in the schema.{explanation}") + super().__init__( + f"Parent table {parent_table_name} for {table_name} was not found in the" + f" schema.{explanation}" + ) class CannotCoerceNullException(SchemaException): def __init__(self, table_name: str, column_name: str) -> None: - super().__init__(f"Cannot coerce NULL in table {table_name} column {column_name} which is not nullable") + super().__init__( + f"Cannot coerce NULL in table {table_name} column {column_name} which is not nullable" + ) class SchemaCorruptedException(SchemaException): @@ -63,14 +89,64 @@ class SchemaCorruptedException(SchemaException): class SchemaEngineNoUpgradePathException(SchemaException): - def __init__(self, schema_name: str, init_engine: int, from_engine: int, to_engine: int) -> None: + def __init__( + self, schema_name: str, init_engine: int, from_engine: int, to_engine: int + ) -> None: self.schema_name = schema_name self.init_engine = init_engine self.from_engine = from_engine self.to_engine = to_engine - super().__init__(f"No engine upgrade path in schema {schema_name} from {init_engine} to {to_engine}, stopped at {from_engine}") + super().__init__( + f"No engine upgrade path in schema {schema_name} from {init_engine} to {to_engine}," + f" stopped at {from_engine}" + ) + + +class DataValidationError(SchemaException): + def __init__( + self, + schema_name: str, + table_name: str, + column_name: str, + schema_entity: TSchemaContractEntities, + contract_mode: TSchemaEvolutionMode, + table_schema: Any, + schema_contract: TSchemaContractDict, + data_item: Any = None, + extended_info: str = None, + ) -> None: + """Raised when `data_item` violates `contract_mode` on a `schema_entity` as defined by `table_schema` + + Schema, table and column names are given as a context and full `schema_contract` and causing `data_item` as an evidence. + """ + msg = "" + if schema_name: + msg = f"Schema: {schema_name} " + msg += f"Table: {table_name} " + if column_name: + msg += f"Column: {column_name}" + msg = ( + "In " + + msg + + f" . Contract on {schema_entity} with mode {contract_mode} is violated. " + + (extended_info or "") + ) + super().__init__(msg) + self.schema_name = schema_name + self.table_name = table_name + self.column_name = column_name + + # violated contract + self.schema_entity = schema_entity + self.contract_mode = contract_mode + + # some evidence + self.table_schema = table_schema + self.schema_contract = schema_contract + self.data_item = data_item + class UnknownTableException(SchemaException): def __init__(self, table_name: str) -> None: self.table_name = table_name - super().__init__(f"Trying to access unknown table {table_name}.") \ No newline at end of file + super().__init__(f"Trying to access unknown table {table_name}.") diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index 77a5ae8e8e..e95699b91e 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -1,20 +1,61 @@ import yaml from copy import copy, deepcopy -from typing import ClassVar, Dict, List, Mapping, Optional, Sequence, Tuple, Any, cast +from typing import ClassVar, Dict, List, Mapping, Optional, Sequence, Tuple, Any, cast, Literal from dlt.common import json from dlt.common.utils import extend_list_deduplicated -from dlt.common.typing import DictStrAny, StrAny, REPattern, SupportsVariant, VARIANT_FIELD_FORMAT, TDataItem +from dlt.common.typing import ( + DictStrAny, + StrAny, + REPattern, + SupportsVariant, + VARIANT_FIELD_FORMAT, + TDataItem, +) from dlt.common.normalizers import TNormalizersConfig, explicit_normalizers, import_normalizers from dlt.common.normalizers.naming import NamingConvention from dlt.common.normalizers.json import DataItemNormalizer, TNormalizedRowIterator from dlt.common.schema import utils from dlt.common.data_types import py_type_to_sc_type, coerce_value, TDataType -from dlt.common.schema.typing import (COLUMN_HINTS, SCHEMA_ENGINE_VERSION, LOADS_TABLE_NAME, VERSION_TABLE_NAME, STATE_TABLE_NAME, TPartialTableSchema, TSchemaSettings, TSimpleRegex, TStoredSchema, - TSchemaTables, TTableSchema, TTableSchemaColumns, TColumnSchema, TColumnProp, TColumnHint, TTypeDetections) -from dlt.common.schema.exceptions import (CannotCoerceColumnException, CannotCoerceNullException, InvalidSchemaName, - ParentTableNotFoundException, SchemaCorruptedException) +from dlt.common.schema.typing import ( + COLUMN_HINTS, + DLT_NAME_PREFIX, + SCHEMA_ENGINE_VERSION, + LOADS_TABLE_NAME, + VERSION_TABLE_NAME, + STATE_TABLE_NAME, + TPartialTableSchema, + TSchemaContractEntities, + TSchemaEvolutionMode, + TSchemaSettings, + TSimpleRegex, + TStoredSchema, + TSchemaTables, + TTableSchema, + TTableSchemaColumns, + TColumnSchema, + TColumnProp, + TColumnHint, + TTypeDetections, + TSchemaContractDict, + TSchemaContract, +) +from dlt.common.schema.exceptions import ( + CannotCoerceColumnException, + CannotCoerceNullException, + InvalidSchemaName, + ParentTableNotFoundException, + SchemaCorruptedException, +) from dlt.common.validation import validate_dict +from dlt.common.schema.exceptions import DataValidationError + + +DEFAULT_SCHEMA_CONTRACT_MODE: TSchemaContractDict = { + "tables": "evolve", + "columns": "evolve", + "data_type": "evolve", +} class Schema: @@ -32,15 +73,15 @@ class Schema: state_table_name: str """Normalized name of the dlt state table""" - _schema_name: str _dlt_tables_prefix: str _stored_version: int # version at load/creation time _stored_version_hash: str # version hash at load/creation time + _stored_previous_hashes: Optional[List[str]] # list of ancestor hashes of the schema _imported_version_hash: str # version hash of recently imported schema _schema_description: str # optional schema description _schema_tables: TSchemaTables - _settings: TSchemaSettings # schema settings to hold default hints, preferred types and other settings + _settings: TSchemaSettings # schema settings to hold default hints, preferred types and other settings # list of preferred types: map regex on columns into types _compiled_preferred_types: List[Tuple[REPattern, TDataType]] @@ -60,7 +101,7 @@ def __init__(self, name: str, normalizers: TNormalizersConfig = None) -> None: self._reset_schema(name, normalizers) @classmethod - def from_dict(cls, d: DictStrAny) -> "Schema": + def from_dict(cls, d: DictStrAny, bump_version: bool = True) -> "Schema": # upgrade engine if needed stored_schema = utils.migrate_schema(d, d["engine_version"], cls.ENGINE_VERSION) # verify schema @@ -69,21 +110,25 @@ def from_dict(cls, d: DictStrAny) -> "Schema": stored_schema = utils.apply_defaults(stored_schema) # bump version if modified - utils.bump_version_if_modified(stored_schema) + if bump_version: + utils.bump_version_if_modified(stored_schema) return cls.from_stored_schema(stored_schema) @classmethod def from_stored_schema(cls, stored_schema: TStoredSchema) -> "Schema": # create new instance from dict - self: Schema = cls(stored_schema["name"], normalizers=stored_schema.get("normalizers", None)) + self: Schema = cls( + stored_schema["name"], normalizers=stored_schema.get("normalizers", None) + ) self._from_stored_schema(stored_schema) return self def replace_schema_content(self, schema: "Schema") -> None: self._reset_schema(schema.name, schema._normalizers_config) - self._from_stored_schema(schema.to_dict()) + # do not bump version so hash from `schema` is preserved + self._from_stored_schema(schema.to_dict(bump_version=False)) - def to_dict(self, remove_defaults: bool = False) -> TStoredSchema: + def to_dict(self, remove_defaults: bool = False, bump_version: bool = True) -> TStoredSchema: stored_schema: TStoredSchema = { "version": self._stored_version, "version_hash": self._stored_version_hash, @@ -91,7 +136,8 @@ def to_dict(self, remove_defaults: bool = False) -> TStoredSchema: "name": self._schema_name, "tables": self._schema_tables, "settings": self._settings, - "normalizers": self._normalizers_config + "normalizers": self._normalizers_config, + "previous_hashes": self._stored_previous_hashes, } if self._imported_version_hash and not remove_defaults: stored_schema["imported_version_hash"] = self._imported_version_hash @@ -99,13 +145,16 @@ def to_dict(self, remove_defaults: bool = False) -> TStoredSchema: stored_schema["description"] = self._schema_description # bump version if modified - utils.bump_version_if_modified(stored_schema) + if bump_version: + utils.bump_version_if_modified(stored_schema) # remove defaults after bumping version if remove_defaults: utils.remove_defaults(stored_schema) return stored_schema - def normalize_data_item(self, item: TDataItem, load_id: str, table_name: str) -> TNormalizedRowIterator: + def normalize_data_item( + self, item: TDataItem, load_id: str, table_name: str + ) -> TNormalizedRowIterator: return self.data_item_normalizer.normalize_data_item(item, load_id, table_name) def filter_row(self, table_name: str, row: StrAny) -> StrAny: @@ -122,7 +171,9 @@ def filter_row(self, table_name: str, row: StrAny) -> StrAny: # most of the schema do not use them return row - def _exclude(path: str, excludes: Sequence[REPattern], includes: Sequence[REPattern]) -> bool: + def _exclude( + path: str, excludes: Sequence[REPattern], includes: Sequence[REPattern] + ) -> bool: is_included = False is_excluded = any(exclude.search(path) for exclude in excludes) if is_excluded: @@ -151,16 +202,18 @@ def _exclude(path: str, excludes: Sequence[REPattern], includes: Sequence[REPatt break return row - def coerce_row(self, table_name: str, parent_table: str, row: StrAny) -> Tuple[DictStrAny, TPartialTableSchema]: + def coerce_row( + self, table_name: str, parent_table: str, row: StrAny + ) -> Tuple[DictStrAny, TPartialTableSchema]: """Fits values of fields present in `row` into a schema of `table_name`. Will coerce values into data types and infer new tables and column schemas. - Method expects that field names in row are already normalized. - * if table schema for `table_name` does not exist, new table is created - * if column schema for a field in `row` does not exist, it is inferred from data - * if incomplete column schema (no data type) exists, column is inferred from data and existing hints are applied - * fields with None value are removed + Method expects that field names in row are already normalized. + * if table schema for `table_name` does not exist, new table is created + * if column schema for a field in `row` does not exist, it is inferred from data + * if incomplete column schema (no data type) exists, column is inferred from data and existing hints are applied + * fields with None value are removed - Returns tuple with row with coerced values and a partial table containing just the newly added columns or None if no changes were detected + Returns tuple with row with coerced values and a partial table containing just the newly added columns or None if no changes were detected """ # get existing or create a new table updated_table_partial: TPartialTableSchema = None @@ -176,7 +229,9 @@ def coerce_row(self, table_name: str, parent_table: str, row: StrAny) -> Tuple[D # just check if column is nullable if it exists self._coerce_null_value(table_columns, table_name, col_name) else: - new_col_name, new_col_def, new_v = self._coerce_non_null_value(table_columns, table_name, col_name, v) + new_col_name, new_col_def, new_v = self._coerce_non_null_value( + table_columns, table_name, col_name, v + ) new_row[new_col_name] = new_v if new_col_def: if not updated_table_partial: @@ -187,17 +242,166 @@ def coerce_row(self, table_name: str, parent_table: str, row: StrAny) -> Tuple[D return new_row, updated_table_partial + def apply_schema_contract( + self, + schema_contract: TSchemaContractDict, + partial_table: TPartialTableSchema, + data_item: TDataItem = None, + raise_on_freeze: bool = True, + ) -> Tuple[ + TPartialTableSchema, List[Tuple[TSchemaContractEntities, str, TSchemaEvolutionMode]] + ]: + """ + Checks if `schema_contract` allows for the `partial_table` to update the schema. It applies the contract dropping + the affected columns or the whole `partial_table`. It generates and returns a set of filters that should be applied to incoming data in order to modify it + so it conforms to the contract. `data_item` is provided only as evidence in case DataValidationError is raised. + + Example `schema_contract`: + { + "tables": "freeze", + "columns": "evolve", + "data_type": "discard_row" + } + + Settings for table affects new tables, settings for column affects new columns and settings for data_type affects new variant columns. Each setting can be set to one of: + * evolve: allow all changes + * freeze: allow no change and fail the load + * discard_row: allow no schema change and filter out the row + * discard_value: allow no schema change and filter out the value but load the rest of the row + + Returns a tuple where a first element is modified partial table and the second is a list of filters. The modified partial may be None in case the + whole table is not allowed. + Each filter is a tuple of (table|columns, entity name, freeze | discard_row | discard_value). + Note: by default `freeze` immediately raises DataValidationError which is convenient in most use cases + + """ + # default settings allow all evolutions, skip all else + if schema_contract == DEFAULT_SCHEMA_CONTRACT_MODE: + return partial_table, [] + + assert partial_table + table_name = partial_table["name"] + existing_table: TTableSchema = self._schema_tables.get(table_name, None) + + # table is new when not yet exist or + is_new_table = not existing_table or self.is_new_table(table_name) + # check case where we have a new table + if is_new_table and schema_contract["tables"] != "evolve": + if raise_on_freeze and schema_contract["tables"] == "freeze": + raise DataValidationError( + self.name, + table_name, + None, + "tables", + "freeze", + None, + schema_contract, + data_item, + f"Trying to add table {table_name} but new tables are frozen.", + ) + # filter tables with name below + return None, [("tables", table_name, schema_contract["tables"])] + + column_mode, data_mode = schema_contract["columns"], schema_contract["data_type"] + # allow to add new columns when table is new or if columns are allowed to evolve once + if is_new_table or existing_table.get("x-normalizer", {}).get("evolve-columns-once", False): # type: ignore[attr-defined] + column_mode = "evolve" + + # check if we should filter any columns, partial table below contains only new columns + filters: List[Tuple[TSchemaContractEntities, str, TSchemaEvolutionMode]] = [] + for column_name, column in list(partial_table["columns"].items()): + # dlt cols may always be added + if column_name.startswith(self._dlt_tables_prefix): + continue + is_variant = column.get("variant", False) + # new column and contract prohibits that + if column_mode != "evolve" and not is_variant: + if raise_on_freeze and column_mode == "freeze": + raise DataValidationError( + self.name, + table_name, + column_name, + "columns", + "freeze", + existing_table, + schema_contract, + data_item, + f"Trying to add column {column_name} to table {table_name} but columns are" + " frozen.", + ) + # filter column with name below + filters.append(("columns", column_name, column_mode)) + # pop the column + partial_table["columns"].pop(column_name) + + # variant (data type evolution) and contract prohibits that + if data_mode != "evolve" and is_variant: + if raise_on_freeze and data_mode == "freeze": + raise DataValidationError( + self.name, + table_name, + column_name, + "data_type", + "freeze", + existing_table, + schema_contract, + data_item, + f"Trying to create new variant column {column_name} to table" + f" {table_name} but data_types are frozen.", + ) + # filter column with name below + filters.append(("columns", column_name, data_mode)) + # pop the column + partial_table["columns"].pop(column_name) + + return partial_table, filters + + @staticmethod + def expand_schema_contract_settings( + settings: TSchemaContract, default: TSchemaContractDict = None + ) -> TSchemaContractDict: + """Expand partial or shorthand settings into full settings dictionary using `default` for unset entities""" + if isinstance(settings, str): + settings = TSchemaContractDict(tables=settings, columns=settings, data_type=settings) + return cast( + TSchemaContractDict, {**(default or DEFAULT_SCHEMA_CONTRACT_MODE), **(settings or {})} + ) + + def resolve_contract_settings_for_table( + self, table_name: str, new_table_schema: TTableSchema = None + ) -> TSchemaContractDict: + """Resolve the exact applicable schema contract settings for the table `table_name`. `new_table_schema` is added to the tree during the resolution.""" + + settings: TSchemaContract = {} + if not table_name.startswith(self._dlt_tables_prefix): + if new_table_schema: + tables = copy(self._schema_tables) + tables[table_name] = new_table_schema + else: + tables = self._schema_tables + # find root table + try: + table = utils.get_top_level_table(tables, table_name) + settings = table["schema_contract"] + except KeyError: + settings = self._settings.get("schema_contract", {}) + + # expand settings, empty settings will expand into default settings + return Schema.expand_schema_contract_settings(settings) + def update_table(self, partial_table: TPartialTableSchema) -> TPartialTableSchema: - """Update table in this schema""" table_name = partial_table["name"] parent_table_name = partial_table.get("parent") # check if parent table present if parent_table_name is not None: if self._schema_tables.get(parent_table_name) is None: raise ParentTableNotFoundException( - table_name, parent_table_name, - f" This may be due to misconfigured excludes filter that fully deletes content of the {parent_table_name}. Add includes that will preserve the parent table." - ) + table_name, + parent_table_name, + " This may be due to misconfigured excludes filter that fully deletes content" + f" of the {parent_table_name}. Add includes that will preserve the parent" + " table.", + ) table = self._schema_tables.get(table_name) if table is None: # add the whole new table to SchemaTables @@ -209,20 +413,17 @@ def update_table(self, partial_table: TPartialTableSchema) -> TPartialTableSchem self.data_item_normalizer.extend_table(table_name) return partial_table - def update_schema(self, schema: "Schema") -> None: """Updates this schema from an incoming schema""" # update all tables for table in schema.tables.values(): self.update_table(table) - # update normalizer config nondestructively - self.data_item_normalizer.update_normalizer_config(self, self.data_item_normalizer.get_normalizer_config(schema)) - self.update_normalizers() + # pass normalizer config + self._configure_normalizers(schema._normalizers_config) # update and compile settings self._settings = deepcopy(schema.settings) self._compile_settings() - def bump_version(self) -> Tuple[int, str]: """Computes schema hash in order to check if schema content was modified. In such case the schema ``stored_version`` and ``stored_version_hash`` are updated. @@ -231,9 +432,10 @@ def bump_version(self) -> Tuple[int, str]: Returns: Tuple[int, str]: Current (``stored_version``, ``stored_version_hash``) tuple """ - version = utils.bump_version_if_modified(self.to_dict()) - self._stored_version, self._stored_version_hash = version - return version + self._stored_version, self._stored_version_hash, _, _ = utils.bump_version_if_modified( + self.to_dict(bump_version=False) + ) + return self._stored_version, self._stored_version_hash def filter_row_with_hint(self, table_name: str, hint_type: TColumnHint, row: StrAny) -> StrAny: rv_row: DictStrAny = {} @@ -255,7 +457,12 @@ def filter_row_with_hint(self, table_name: str, hint_type: TColumnHint, row: Str def merge_hints(self, new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]]) -> None: # validate regexes - validate_dict(TSchemaSettings, {"default_hints": new_hints}, ".", validator_f=utils.simple_regex_validator) + validate_dict( + TSchemaSettings, + {"default_hints": new_hints}, + ".", + validator_f=utils.simple_regex_validator, + ) # prepare hints to be added default_hints = self._settings.setdefault("default_hints", {}) # add `new_hints` to existing hints @@ -269,12 +476,12 @@ def merge_hints(self, new_hints: Mapping[TColumnHint, Sequence[TSimpleRegex]]) - def normalize_table_identifiers(self, table: TTableSchema) -> TTableSchema: """Normalizes all table and column names in `table` schema according to current schema naming convention and returns - new normalized TTableSchema instance. + new normalized TTableSchema instance. - Naming convention like snake_case may produce name clashes with the column names. Clashing column schemas are merged - where the column that is defined later in the dictionary overrides earlier column. + Naming convention like snake_case may produce name clashes with the column names. Clashing column schemas are merged + where the column that is defined later in the dictionary overrides earlier column. - Note that resource name is not normalized. + Note that resource name is not normalized. """ # normalize all identifiers in table according to name normalizer of the schema @@ -290,13 +497,20 @@ def normalize_table_identifiers(self, table: TTableSchema) -> TTableSchema: # re-index columns as the name changed, if name space was reduced then # some columns now clash with each other. so make sure that we merge columns that are already there if new_col_name in new_columns: - new_columns[new_col_name] = utils.merge_columns(new_columns[new_col_name], c, merge_defaults=False) + new_columns[new_col_name] = utils.merge_columns( + new_columns[new_col_name], c, merge_defaults=False + ) else: new_columns[new_col_name] = c table["columns"] = new_columns return table - def get_new_table_columns(self, table_name: str, exiting_columns: TTableSchemaColumns, include_incomplete: bool = False) -> List[TColumnSchema]: + def get_new_table_columns( + self, + table_name: str, + exiting_columns: TTableSchemaColumns, + include_incomplete: bool = False, + ) -> List[TColumnSchema]: """Gets new columns to be added to `exiting_columns` to bring them up to date with `table_name` schema. Optionally includes incomplete columns (without data type)""" diff_c: List[TColumnSchema] = [] s_t = self.get_table_columns(table_name, include_incomplete=include_incomplete) @@ -308,24 +522,49 @@ def get_new_table_columns(self, table_name: str, exiting_columns: TTableSchemaCo def get_table(self, table_name: str) -> TTableSchema: return self._schema_tables[table_name] - def get_table_columns(self, table_name: str, include_incomplete: bool = False) -> TTableSchemaColumns: - """Gets columns of `table_name`. Optionally includes incomplete columns """ + def get_table_columns( + self, table_name: str, include_incomplete: bool = False + ) -> TTableSchemaColumns: + """Gets columns of `table_name`. Optionally includes incomplete columns""" if include_incomplete: return self._schema_tables[table_name]["columns"] else: - return {k:v for k, v in self._schema_tables[table_name]["columns"].items() if utils.is_complete_column(v)} + return { + k: v + for k, v in self._schema_tables[table_name]["columns"].items() + if utils.is_complete_column(v) + } def data_tables(self, include_incomplete: bool = False) -> List[TTableSchema]: """Gets list of all tables, that hold the loaded data. Excludes dlt tables. Excludes incomplete tables (ie. without columns)""" - return [t for t in self._schema_tables.values() if not t["name"].startswith(self._dlt_tables_prefix) and (len(t["columns"]) > 0 or include_incomplete)] + return [ + t + for t in self._schema_tables.values() + if not t["name"].startswith(self._dlt_tables_prefix) + and ( + include_incomplete or len(self.get_table_columns(t["name"], include_incomplete)) > 0 + ) + ] def dlt_tables(self) -> List[TTableSchema]: """Gets dlt tables""" - return [t for t in self._schema_tables.values() if t["name"].startswith(self._dlt_tables_prefix)] + return [ + t for t in self._schema_tables.values() if t["name"].startswith(self._dlt_tables_prefix) + ] def get_preferred_type(self, col_name: str) -> Optional[TDataType]: return next((m[1] for m in self._compiled_preferred_types if m[0].search(col_name)), None) + def is_new_table(self, table_name: str) -> bool: + """Returns true if this table does not exist OR is incomplete (has only incomplete columns) and therefore new""" + return (table_name not in self.tables) or ( + not [ + c + for c in self.tables[table_name]["columns"].values() + if utils.is_complete_column(c) + ] + ) + @property def version(self) -> int: """Version of the schema content that takes into account changes from the time of schema loading/creation. @@ -350,6 +589,11 @@ def version_hash(self) -> str: """Current version hash of the schema, recomputed from the actual content""" return utils.bump_version_if_modified(self.to_dict())[1] + @property + def previous_hashes(self) -> Sequence[str]: + """Current version hash of the schema, recomputed from the actual content""" + return utils.bump_version_if_modified(self.to_dict())[3] + @property def stored_version_hash(self) -> str: """Version hash of the schema content form the time of schema loading/creation.""" @@ -376,9 +620,15 @@ def to_pretty_yaml(self, remove_defaults: bool = True) -> str: d = self.to_dict(remove_defaults=remove_defaults) return yaml.dump(d, allow_unicode=True, default_flow_style=False, sort_keys=False) - def clone(self, update_normalizers: bool = False) -> "Schema": - """Make a deep copy of the schema, possibly updating normalizers and identifiers in the schema if `update_normalizers` is True""" + def clone(self, with_name: str = None, update_normalizers: bool = False) -> "Schema": + """Make a deep copy of the schema, optionally changing the name, and updating normalizers and identifiers in the schema if `update_normalizers` is True + + Note that changing of name will break the previous version chain + """ d = deepcopy(self.to_dict()) + if with_name is not None: + d["name"] = with_name + d["previous_hashes"] = [] schema = Schema.from_dict(d) # type: ignore # update normalizers and possibly all schema identifiers if update_normalizers: @@ -393,6 +643,12 @@ def update_normalizers(self) -> None: normalizers["json"] = normalizers["json"] or self._normalizers_config["json"] self._configure_normalizers(normalizers) + def set_schema_contract(self, settings: TSchemaContract) -> None: + if not settings: + self._settings.pop("schema_contract", None) + else: + self._settings["schema_contract"] = settings + def add_type_detection(self, detection: TTypeDetections) -> None: """Add type auto detection to the schema.""" if detection not in self.settings["detections"]: @@ -405,11 +661,13 @@ def remove_type_detection(self, detection: TTypeDetections) -> None: self.settings["detections"].remove(detection) self._compile_settings() - def _infer_column(self, k: str, v: Any, data_type: TDataType = None, is_variant: bool = False) -> TColumnSchema: - column_schema = TColumnSchema( + def _infer_column( + self, k: str, v: Any, data_type: TDataType = None, is_variant: bool = False + ) -> TColumnSchema: + column_schema = TColumnSchema( name=k, data_type=data_type or self._infer_column_type(v, k), - nullable=not self._infer_hint("not_null", v, k) + nullable=not self._infer_hint("not_null", v, k), ) for hint in COLUMN_HINTS: column_prop = utils.hint_to_column_prop(hint) @@ -421,14 +679,23 @@ def _infer_column(self, k: str, v: Any, data_type: TDataType = None, is_variant: column_schema["variant"] = is_variant return column_schema - def _coerce_null_value(self, table_columns: TTableSchemaColumns, table_name: str, col_name: str) -> None: + def _coerce_null_value( + self, table_columns: TTableSchemaColumns, table_name: str, col_name: str + ) -> None: """Raises when column is explicitly not nullable""" if col_name in table_columns: existing_column = table_columns[col_name] if not existing_column.get("nullable", True): raise CannotCoerceNullException(table_name, col_name) - def _coerce_non_null_value(self, table_columns: TTableSchemaColumns, table_name: str, col_name: str, v: Any, is_variant: bool = False) -> Tuple[str, TColumnSchema, Any]: + def _coerce_non_null_value( + self, + table_columns: TTableSchemaColumns, + table_name: str, + col_name: str, + v: Any, + is_variant: bool = False, + ) -> Tuple[str, TColumnSchema, Any]: new_column: TColumnSchema = None existing_column = table_columns.get(col_name) # if column exist but is incomplete then keep it as new column @@ -437,7 +704,11 @@ def _coerce_non_null_value(self, table_columns: TTableSchemaColumns, table_name: existing_column = None # infer type or get it from existing table - col_type = existing_column["data_type"] if existing_column else self._infer_column_type(v, col_name, skip_preferred=is_variant) + col_type = ( + existing_column["data_type"] + if existing_column + else self._infer_column_type(v, col_name, skip_preferred=is_variant) + ) # get data type of value py_type = py_type_to_sc_type(type(v)) # and coerce type if inference changed the python type @@ -446,12 +717,18 @@ def _coerce_non_null_value(self, table_columns: TTableSchemaColumns, table_name: except (ValueError, SyntaxError): if is_variant: # this is final call: we cannot generate any more auto-variants - raise CannotCoerceColumnException(table_name, col_name, py_type, table_columns[col_name]["data_type"], v) + raise CannotCoerceColumnException( + table_name, col_name, py_type, table_columns[col_name]["data_type"], v + ) # otherwise we must create variant extension to the table # pass final=True so no more auto-variants can be created recursively # TODO: generate callback so dlt user can decide what to do - variant_col_name = self.naming.shorten_fragments(col_name, VARIANT_FIELD_FORMAT % py_type) - return self._coerce_non_null_value(table_columns, table_name, variant_col_name, v, is_variant=True) + variant_col_name = self.naming.shorten_fragments( + col_name, VARIANT_FIELD_FORMAT % py_type + ) + return self._coerce_non_null_value( + table_columns, table_name, variant_col_name, v, is_variant=True + ) # if coerced value is variant, then extract variant value # note: checking runtime protocols with isinstance(coerced_v, SupportsVariant): is extremely slow so we check if callable as every variant is callable @@ -459,11 +736,17 @@ def _coerce_non_null_value(self, table_columns: TTableSchemaColumns, table_name: coerced_v = coerced_v() if isinstance(coerced_v, tuple): # variant recovered so call recursively with variant column name and variant value - variant_col_name = self.naming.shorten_fragments(col_name, VARIANT_FIELD_FORMAT % coerced_v[0]) - return self._coerce_non_null_value(table_columns, table_name, variant_col_name, coerced_v[1], is_variant=True) + variant_col_name = self.naming.shorten_fragments( + col_name, VARIANT_FIELD_FORMAT % coerced_v[0] + ) + return self._coerce_non_null_value( + table_columns, table_name, variant_col_name, coerced_v[1], is_variant=True + ) if not existing_column: - inferred_column = self._infer_column(col_name, v, data_type=col_type, is_variant=is_variant) + inferred_column = self._infer_column( + col_name, v, data_type=col_type, is_variant=is_variant + ) # if there's incomplete new_column then merge it with inferred column if new_column: # use all values present in incomplete column to override inferred column - also the defaults @@ -493,8 +776,12 @@ def _infer_hint(self, hint_type: TColumnHint, _: Any, col_name: str) -> bool: return False def _add_standard_tables(self) -> None: - self._schema_tables[self.version_table_name] = self.normalize_table_identifiers(utils.version_table()) - self._schema_tables[self.loads_table_name] = self.normalize_table_identifiers(utils.load_table()) + self._schema_tables[self.version_table_name] = self.normalize_table_identifiers( + utils.version_table() + ) + self._schema_tables[self.loads_table_name] = self.normalize_table_identifiers( + utils.load_table() + ) def _add_standard_hints(self) -> None: default_hints = utils.standard_hints() @@ -506,18 +793,20 @@ def _add_standard_hints(self) -> None: def _configure_normalizers(self, normalizers: TNormalizersConfig) -> None: # import desired modules - self._normalizers_config, naming_module, item_normalizer_class = import_normalizers(normalizers) + self._normalizers_config, naming_module, item_normalizer_class = import_normalizers( + normalizers + ) # print(f"{self.name}: {type(self.naming)} {type(naming_module)}") if self.naming and type(self.naming) is not type(naming_module): self.naming = naming_module for table in self._schema_tables.values(): self.normalize_table_identifiers(table) # re-index the table names - self._schema_tables = {t["name"]:t for t in self._schema_tables.values()} + self._schema_tables = {t["name"]: t for t in self._schema_tables.values()} # name normalization functions self.naming = naming_module - self._dlt_tables_prefix = self.naming.normalize_table_identifier("_dlt") + self._dlt_tables_prefix = self.naming.normalize_table_identifier(DLT_NAME_PREFIX) self.version_table_name = self.naming.normalize_table_identifier(VERSION_TABLE_NAME) self.loads_table_name = self.naming.normalize_table_identifier(LOADS_TABLE_NAME) self.state_table_name = self.naming.normalize_table_identifier(STATE_TABLE_NAME) @@ -532,6 +821,7 @@ def _reset_schema(self, name: str, normalizers: TNormalizersConfig = None) -> No self._stored_version_hash: str = None self._imported_version_hash: str = None self._schema_description: str = None + self._stored_previous_hashes: List[str] = [] self._settings: TSchemaSettings = {} self._compiled_preferred_types: List[Tuple[REPattern, TDataType]] = [] @@ -570,6 +860,7 @@ def _from_stored_schema(self, stored_schema: TStoredSchema) -> None: self._imported_version_hash = stored_schema.get("imported_version_hash") self._schema_description = stored_schema.get("description") self._settings = stored_schema.get("settings") or {} + self._stored_previous_hashes = stored_schema.get("previous_hashes") self._compile_settings() def _set_schema_name(self, name: str) -> None: @@ -590,9 +881,13 @@ def _compile_settings(self) -> None: for table in self._schema_tables.values(): if "filters" in table: if "excludes" in table["filters"]: - self._compiled_excludes[table["name"]] = list(map(utils.compile_simple_regex, table["filters"]["excludes"])) + self._compiled_excludes[table["name"]] = list( + map(utils.compile_simple_regex, table["filters"]["excludes"]) + ) if "includes" in table["filters"]: - self._compiled_includes[table["name"]] = list(map(utils.compile_simple_regex, table["filters"]["includes"])) + self._compiled_includes[table["name"]] = list( + map(utils.compile_simple_regex, table["filters"]["includes"]) + ) # look for auto-detections in settings and then normalizer self._type_detections = self._settings.get("detections") or self._normalizers_config.get("detections") or [] # type: ignore diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py index ac17f0ae9f..9a27cbe4bb 100644 --- a/dlt/common/schema/typing.py +++ b/dlt/common/schema/typing.py @@ -1,4 +1,18 @@ -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Set, Type, TypedDict, NewType, Union, get_args +from typing import ( + Any, + Callable, + Dict, + List, + Literal, + Optional, + Sequence, + Set, + Type, + TypedDict, + NewType, + Union, + get_args, +) from typing_extensions import Never from dlt.common.data_types import TDataType @@ -11,27 +25,63 @@ # current version of schema engine -SCHEMA_ENGINE_VERSION = 6 +SCHEMA_ENGINE_VERSION = 8 # dlt tables VERSION_TABLE_NAME = "_dlt_version" LOADS_TABLE_NAME = "_dlt_loads" STATE_TABLE_NAME = "_dlt_pipeline_state" - -TColumnProp = Literal["name", "data_type", "nullable", "partition", "cluster", "primary_key", "foreign_key", "sort", "unique", "merge_key", "root_key"] +DLT_NAME_PREFIX = "_dlt" + +TColumnProp = Literal[ + "name", + "data_type", + "nullable", + "partition", + "cluster", + "primary_key", + "foreign_key", + "sort", + "unique", + "merge_key", + "root_key", +] """Known properties and hints of the column""" # TODO: merge TColumnHint with TColumnProp -TColumnHint = Literal["not_null", "partition", "cluster", "primary_key", "foreign_key", "sort", "unique", "root_key", "merge_key"] +TColumnHint = Literal[ + "not_null", + "partition", + "cluster", + "primary_key", + "foreign_key", + "sort", + "unique", + "root_key", + "merge_key", +] """Known hints of a column used to declare hint regexes.""" TWriteDisposition = Literal["skip", "append", "replace", "merge"] TTableFormat = Literal["iceberg"] -TTypeDetections = Literal["timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double"] +TTypeDetections = Literal[ + "timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double" +] TTypeDetectionFunc = Callable[[Type[Any], Any], Optional[TDataType]] TColumnNames = Union[str, Sequence[str]] """A string representing a column name or a list of""" COLUMN_PROPS: Set[TColumnProp] = set(get_args(TColumnProp)) -COLUMN_HINTS: Set[TColumnHint] = set(["partition", "cluster", "primary_key", "foreign_key", "sort", "unique", "merge_key", "root_key"]) +COLUMN_HINTS: Set[TColumnHint] = set( + [ + "partition", + "cluster", + "primary_key", + "foreign_key", + "sort", + "unique", + "merge_key", + "root_key", + ] +) WRITE_DISPOSITIONS: Set[TWriteDisposition] = set(get_args(TWriteDisposition)) @@ -43,12 +93,14 @@ class TColumnType(TypedDict, total=False): class TColumnSchemaBase(TColumnType, total=False): """TypedDict that defines basic properties of a column: name, data type and nullable""" + name: Optional[str] nullable: Optional[bool] class TColumnSchema(TColumnSchemaBase, total=False): """TypedDict that defines additional column hints""" + description: Optional[str] partition: Optional[bool] cluster: Optional[bool] @@ -65,23 +117,48 @@ class TColumnSchema(TColumnSchemaBase, total=False): """A mapping from column name to column schema, typically part of a table schema""" -TAnySchemaColumns = Union[TTableSchemaColumns, Sequence[TColumnSchema], _PydanticBaseModel, Type[_PydanticBaseModel]] +TAnySchemaColumns = Union[ + TTableSchemaColumns, Sequence[TColumnSchema], _PydanticBaseModel, Type[_PydanticBaseModel] +] TSimpleRegex = NewType("TSimpleRegex", str) TColumnName = NewType("TColumnName", str) SIMPLE_REGEX_PREFIX = "re:" +TSchemaEvolutionMode = Literal["evolve", "discard_value", "freeze", "discard_row"] +TSchemaContractEntities = Literal["tables", "columns", "data_type"] + + +class TSchemaContractDict(TypedDict, total=False): + """TypedDict defining the schema update settings""" + + tables: Optional[TSchemaEvolutionMode] + columns: Optional[TSchemaEvolutionMode] + data_type: Optional[TSchemaEvolutionMode] + + +TSchemaContract = Union[TSchemaEvolutionMode, TSchemaContractDict] + class TRowFilters(TypedDict, total=True): excludes: Optional[List[TSimpleRegex]] includes: Optional[List[TSimpleRegex]] +class NormalizerInfo(TypedDict, total=True): + new_table: bool + + +# TypedDict that defines properties of a table + + class TTableSchema(TypedDict, total=False): """TypedDict that defines properties of a table""" + name: Optional[str] description: Optional[str] write_disposition: Optional[TWriteDisposition] + schema_contract: Optional[TSchemaContract] table_sealed: Optional[bool] parent: Optional[str] filters: Optional[TRowFilters] @@ -97,8 +174,9 @@ class TPartialTableSchema(TTableSchema): TSchemaTables = Dict[str, TTableSchema] TSchemaUpdate = Dict[str, List[TPartialTableSchema]] + class TSchemaSettings(TypedDict, total=False): - schema_sealed: Optional[bool] + schema_contract: Optional[TSchemaContract] detections: Optional[List[TTypeDetections]] default_hints: Optional[Dict[TColumnHint, List[TSimpleRegex]]] preferred_types: Optional[Dict[TSimpleRegex, TDataType]] @@ -106,8 +184,10 @@ class TSchemaSettings(TypedDict, total=False): class TStoredSchema(TypedDict, total=False): """TypeDict defining the schema representation in storage""" + version: int version_hash: str + previous_hashes: List[str] imported_version_hash: Optional[str] engine_version: int name: str diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index f2075ce85d..dc243f50dd 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -7,18 +7,46 @@ from dlt.common import json from dlt.common.data_types import TDataType -from dlt.common.exceptions import DictValidationException, MissingDependencyException +from dlt.common.exceptions import DictValidationException from dlt.common.normalizers import explicit_normalizers from dlt.common.normalizers.naming import NamingConvention from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCase -from dlt.common.typing import DictStrAny, REPattern, is_dict_generic_type -from dlt.common.validation import TCustomValidator, validate_dict, validate_dict_ignoring_xkeys +from dlt.common.typing import DictStrAny, REPattern +from dlt.common.validation import TCustomValidator, validate_dict_ignoring_xkeys from dlt.common.schema import detections -from dlt.common.schema.typing import (COLUMN_HINTS, SCHEMA_ENGINE_VERSION, LOADS_TABLE_NAME, SIMPLE_REGEX_PREFIX, VERSION_TABLE_NAME, TColumnName, TPartialTableSchema, TSchemaTables, TSchemaUpdate, - TSimpleRegex, TStoredSchema, TTableSchema, TTableSchemaColumns, TColumnSchemaBase, TColumnSchema, TColumnProp, TTableFormat, - TColumnHint, TTypeDetectionFunc, TTypeDetections, TWriteDisposition) -from dlt.common.schema.exceptions import (CannotCoerceColumnException, ParentTableNotFoundException, SchemaEngineNoUpgradePathException, SchemaException, - TablePropertiesConflictException, InvalidSchemaName, UnknownTableException) +from dlt.common.schema.typing import ( + COLUMN_HINTS, + SCHEMA_ENGINE_VERSION, + LOADS_TABLE_NAME, + SIMPLE_REGEX_PREFIX, + VERSION_TABLE_NAME, + TColumnName, + TPartialTableSchema, + TSchemaTables, + TSchemaUpdate, + TSimpleRegex, + TStoredSchema, + TTableSchema, + TTableSchemaColumns, + TColumnSchemaBase, + TColumnSchema, + TColumnProp, + TTableFormat, + TColumnHint, + TTypeDetectionFunc, + TTypeDetections, + TWriteDisposition, + TSchemaContract, +) +from dlt.common.schema.exceptions import ( + CannotCoerceColumnException, + ParentTableNotFoundException, + SchemaEngineNoUpgradePathException, + SchemaException, + TablePropertiesConflictException, + InvalidSchemaName, + UnknownTableException, +) from dlt.common.normalizers.utils import import_normalizers from dlt.common.schema.typing import TAnySchemaColumns @@ -30,7 +58,11 @@ def is_valid_schema_name(name: str) -> bool: """Schema name must be a valid python identifier and have max len of 64""" - return name is not None and name.isidentifier() and len(name) <= InvalidSchemaName.MAXIMUM_SCHEMA_NAME_LENGTH + return ( + name is not None + and name.isidentifier() + and len(name) <= InvalidSchemaName.MAXIMUM_SCHEMA_NAME_LENGTH + ) def normalize_schema_name(name: str) -> str: @@ -42,7 +74,7 @@ def normalize_schema_name(name: str) -> str: def apply_defaults(stored_schema: TStoredSchema) -> TStoredSchema: """Applies default hint values to `stored_schema` in place - Updates only complete column hints, incomplete columns are preserved intact + Updates only complete column hints, incomplete columns are preserved intact """ for table_name, table in stored_schema["tables"].items(): # overwrite name @@ -51,8 +83,8 @@ def apply_defaults(stored_schema: TStoredSchema) -> TStoredSchema: if table.get("parent") is None: if table.get("write_disposition") is None: table["write_disposition"] = DEFAULT_WRITE_DISPOSITION - if table.get('resource') is None: - table['resource'] = table_name + if table.get("resource") is None: + table["resource"] = table_name for column_name in table["columns"]: # add default hints to tables column = table["columns"][column_name] @@ -66,13 +98,13 @@ def apply_defaults(stored_schema: TStoredSchema) -> TStoredSchema: def remove_defaults(stored_schema: TStoredSchema) -> TStoredSchema: """Removes default values from `stored_schema` in place, returns the input for chaining - Default values are removed from table schemas and complete column schemas. Incomplete columns are preserved intact. + Default values are removed from table schemas and complete column schemas. Incomplete columns are preserved intact. """ clean_tables = deepcopy(stored_schema["tables"]) for table_name, t in clean_tables.items(): del t["name"] - if t.get('resource') == table_name: - del t['resource'] + if t.get("resource") == table_name: + del t["resource"] for c in t["columns"].values(): # remove defaults only on complete columns # if is_complete_column(c): @@ -110,19 +142,20 @@ def remove_column_defaults(column_schema: TColumnSchema) -> TColumnSchema: def add_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: """Adds default boolean hints to column""" return { - **{ - "nullable": True, - "partition": False, - "cluster": False, - "unique": False, - "sort": False, - "primary_key": False, - "foreign_key": False, - "root_key": False, - "merge_key": False - }, - **column - } + **{ + "nullable": True, + "partition": False, + "cluster": False, + "unique": False, + "sort": False, + "primary_key": False, + "foreign_key": False, + "root_key": False, + "merge_key": False, + }, + **column, + } + # def add_complete_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: # """Adds default hints to `column` if it is completed, otherwise preserves `column` content intact @@ -134,8 +167,8 @@ def add_column_defaults(column: TColumnSchemaBase) -> TColumnSchema: # return copy(column) # type: ignore -def bump_version_if_modified(stored_schema: TStoredSchema) -> Tuple[int, str]: - # if any change to schema document is detected then bump version and write new hash +def bump_version_if_modified(stored_schema: TStoredSchema) -> Tuple[int, str, str, Sequence[str]]: + """Bumps the `stored_schema` version and version hash if content modified, returns (new version, new hash, old hash, 10 last hashes) tuple""" hash_ = generate_version_hash(stored_schema) previous_hash = stored_schema.get("version_hash") if not previous_hash: @@ -143,19 +176,25 @@ def bump_version_if_modified(stored_schema: TStoredSchema) -> Tuple[int, str]: pass elif hash_ != previous_hash: stored_schema["version"] += 1 + # unshift previous hash to previous_hashes and limit array to 10 entries + if previous_hash not in stored_schema["previous_hashes"]: + stored_schema["previous_hashes"].insert(0, previous_hash) + stored_schema["previous_hashes"] = stored_schema["previous_hashes"][:10] + stored_schema["version_hash"] = hash_ - return stored_schema["version"], hash_ + return stored_schema["version"], hash_, previous_hash, stored_schema["previous_hashes"] def generate_version_hash(stored_schema: TStoredSchema) -> str: # generates hash out of stored schema content, excluding the hash itself and version - schema_copy = deepcopy(stored_schema) + schema_copy = copy(stored_schema) schema_copy.pop("version") schema_copy.pop("version_hash", None) schema_copy.pop("imported_version_hash", None) + schema_copy.pop("previous_hashes", None) # ignore order of elements when computing the hash - content = json.dumps(schema_copy, sort_keys=True) - h = hashlib.sha3_256(content.encode("utf-8")) + content = json.dumpb(schema_copy, sort_keys=True) + h = hashlib.sha3_256(content) # additionally check column order table_names = sorted((schema_copy.get("tables") or {}).keys()) if table_names: @@ -165,10 +204,12 @@ def generate_version_hash(stored_schema: TStoredSchema) -> str: # add column names to hash in order for cn in (t.get("columns") or {}).keys(): h.update(cn.encode("utf-8")) - return base64.b64encode(h.digest()).decode('ascii') + return base64.b64encode(h.digest()).decode("ascii") -def verify_schema_hash(loaded_schema_dict: DictStrAny, verifies_if_not_migrated: bool = False) -> bool: +def verify_schema_hash( + loaded_schema_dict: DictStrAny, verifies_if_not_migrated: bool = False +) -> bool: # generates content hash and compares with existing engine_version: str = loaded_schema_dict.get("engine_version") # if upgrade is needed, the hash cannot be compared @@ -184,16 +225,32 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: # custom validator on type TSimpleRegex if t is TSimpleRegex: if not isinstance(pv, str): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while str is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while str" + " is expected", + path, + pk, + pv, + ) if pv.startswith(SIMPLE_REGEX_PREFIX): # check if regex try: re.compile(pv[3:]) except Exception as e: - raise DictValidationException(f"In {path}: field {pk} value {pv[3:]} does not compile as regex: {str(e)}", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv[3:]} does not compile as regex: {str(e)}", + path, + pk, + pv, + ) else: if RE_NON_ALPHANUMERIC_UNDERSCORE.match(pv): - raise DictValidationException(f"In {path}: field {pk} value {pv} looks like a regex, please prefix with re:", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} looks like a regex, please prefix with re:", + path, + pk, + pv, + ) # we know how to validate that type return True else: @@ -202,16 +259,25 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool: def column_name_validator(naming: NamingConvention) -> TCustomValidator: - def validator(path: str, pk: str, pv: Any, t: Any) -> bool: if t is TColumnName: if not isinstance(pv, str): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while str is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while" + " str is expected", + path, + pk, + pv, + ) try: if naming.normalize_path(pv) != pv: - raise DictValidationException(f"In {path}: field {pk}: {pv} is not a valid column name", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk}: {pv} is not a valid column name", path, pk, pv + ) except ValueError: - raise DictValidationException(f"In {path}: field {pk}: {pv} is not a valid column name", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk}: {pv} is not a valid column name", path, pk, pv + ) return True else: return False @@ -233,7 +299,7 @@ def compile_simple_regex(r: TSimpleRegex) -> REPattern: def compile_simple_regexes(r: Iterable[TSimpleRegex]) -> REPattern: """Compile multiple patterns as one""" - pattern = '|'.join(f"({_prepare_simple_regex(p)})" for p in r) + pattern = "|".join(f"({_prepare_simple_regex(p)})" for p in r) if not pattern: # Don't create an empty pattern that matches everything raise ValueError("Cannot create a regex pattern from empty sequence") return re.compile(pattern) @@ -242,10 +308,7 @@ def compile_simple_regexes(r: Iterable[TSimpleRegex]) -> REPattern: def validate_stored_schema(stored_schema: TStoredSchema) -> None: # use lambda to verify only non extra fields validate_dict_ignoring_xkeys( - spec=TStoredSchema, - doc=stored_schema, - path=".", - validator_f=simple_regex_validator + spec=TStoredSchema, doc=stored_schema, path=".", validator_f=simple_regex_validator ) # check child parent relationships for table_name, table in stored_schema["tables"].items(): @@ -269,12 +332,8 @@ def migrate_schema(schema_dict: DictStrAny, from_engine: int, to_engine: int) -> # add default normalizers and root hash propagation current["normalizers"], _, _ = import_normalizers(explicit_normalizers()) current["normalizers"]["json"]["config"] = { - "propagation": { - "root": { - "_dlt_id": "_dlt_root_id" - } - } - } + "propagation": {"root": {"_dlt_id": "_dlt_root_id"}} + } # move settings, convert strings to simple regexes d_h: Dict[TColumnHint, List[TSimpleRegex]] = schema_dict.pop("hints", {}) for h_k, h_l in d_h.items(): @@ -311,8 +370,8 @@ def migrate_filters(group: str, filters: List[str]) -> None: # existing filter were always defined at the root table. find this table and move filters for f in filters: # skip initial ^ - root = f[1:f.find("__")] - path = f[f.find("__") + 2:] + root = f[1 : f.find("__")] + path = f[f.find("__") + 2 :] t = current["tables"].get(root) if t is None: # must add new table to hold filters @@ -340,10 +399,24 @@ def migrate_filters(group: str, filters: List[str]) -> None: # replace loads table schema_dict["tables"][LOADS_TABLE_NAME] = load_table() from_engine = 6 + if from_engine == 6 and to_engine > 6: + # migrate from sealed properties to schema evolution settings + schema_dict["settings"].pop("schema_sealed", None) + schema_dict["settings"]["schema_contract"] = {} + for table in schema_dict["tables"].values(): + table.pop("table_sealed", None) + if not table.get("parent"): + table["schema_contract"] = {} + from_engine = 7 + if from_engine == 7 and to_engine > 7: + schema_dict["previous_hashes"] = [] + from_engine = 8 schema_dict["engine_version"] = from_engine if from_engine != to_engine: - raise SchemaEngineNoUpgradePathException(schema_dict["name"], schema_dict["engine_version"], from_engine, to_engine) + raise SchemaEngineNoUpgradePathException( + schema_dict["name"], schema_dict["engine_version"], from_engine, to_engine + ) return cast(TStoredSchema, schema_dict) @@ -371,10 +444,12 @@ def compare_complete_columns(a: TColumnSchema, b: TColumnSchema) -> bool: return a["data_type"] == b["data_type"] and a["name"] == b["name"] -def merge_columns(col_a: TColumnSchema, col_b: TColumnSchema, merge_defaults: bool = True) -> TColumnSchema: +def merge_columns( + col_a: TColumnSchema, col_b: TColumnSchema, merge_defaults: bool = True +) -> TColumnSchema: """Merges `col_b` into `col_a`. if `merge_defaults` is True, only hints from `col_b` that are not default in `col_a` will be set. - Modifies col_a in place and returns it + Modifies col_a in place and returns it """ col_b_clean = col_b if merge_defaults else remove_column_defaults(copy(col_b)) for n, v in col_b_clean.items(): @@ -396,7 +471,9 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl table_name = tab_a["name"] # check if table properties can be merged if tab_a.get("parent") != tab_b.get("parent"): - raise TablePropertiesConflictException(table_name, "parent", tab_a.get("parent"), tab_b.get("parent")) + raise TablePropertiesConflictException( + table_name, "parent", tab_a.get("parent"), tab_b.get("parent") + ) # get new columns, changes in the column data type or other properties are not allowed tab_a_columns = tab_a["columns"] @@ -408,7 +485,13 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl if is_complete_column(col_a) and is_complete_column(col_b): if not compare_complete_columns(tab_a_columns[col_b_name], col_b): # attempt to update to incompatible columns - raise CannotCoerceColumnException(table_name, col_b_name, col_b["data_type"], tab_a_columns[col_b_name]["data_type"], None) + raise CannotCoerceColumnException( + table_name, + col_b_name, + col_b["data_type"], + tab_a_columns[col_b_name]["data_type"], + None, + ) # all other properties can change merged_column = merge_columns(copy(col_a), col_b) if merged_column != col_a: @@ -419,24 +502,24 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl # return partial table containing only name and properties that differ (column, filters etc.) partial_table: TPartialTableSchema = { "name": table_name, - "columns": {} if new_columns is None else {c["name"]: c for c in new_columns} + "columns": {} if new_columns is None else {c["name"]: c for c in new_columns}, } for k, v in tab_b.items(): if k in ["columns", None]: continue existing_v = tab_a.get(k) if existing_v != v: - # print(f"{k} ==? {v} ==? {existing_v}") partial_table[k] = v # type: ignore # this should not really happen - if tab_a.get('parent') is not None and (resource := tab_b.get('resource')): - raise TablePropertiesConflictException(table_name, "resource", resource, tab_a.get('parent')) + if tab_a.get("parent") is not None and (resource := tab_b.get("resource")): + raise TablePropertiesConflictException( + table_name, "resource", resource, tab_a.get("parent") + ) return partial_table - # def compare_tables(tab_a: TTableSchema, tab_b: TTableSchema) -> bool: # try: # table_name = tab_a["name"] @@ -452,14 +535,16 @@ def diff_tables(tab_a: TTableSchema, tab_b: TPartialTableSchema) -> TPartialTabl def merge_tables(table: TTableSchema, partial_table: TPartialTableSchema) -> TPartialTableSchema: """Merges "partial_table" into "table". `table` is merged in place. Returns the diff partial table. - `table` and `partial_table` names must be identical. A table diff is generated and applied to `table`: - * new columns are added, updated columns are replaced from diff - * table hints are added or replaced from diff - * nothing gets deleted + `table` and `partial_table` names must be identical. A table diff is generated and applied to `table`: + * new columns are added, updated columns are replaced from diff + * table hints are added or replaced from diff + * nothing gets deleted """ if table["name"] != partial_table["name"]: - raise TablePropertiesConflictException(table["name"], "name", table["name"], partial_table["name"]) + raise TablePropertiesConflictException( + table["name"], "name", table["name"], partial_table["name"] + ) diff_table = diff_tables(table, partial_table) # add new columns when all checks passed table["columns"].update(diff_table["columns"]) @@ -476,10 +561,16 @@ def hint_to_column_prop(h: TColumnHint) -> TColumnProp: return h -def get_columns_names_with_prop(table: TTableSchema, column_prop: Union[TColumnProp, str], include_incomplete: bool = False) -> List[str]: +def get_columns_names_with_prop( + table: TTableSchema, column_prop: Union[TColumnProp, str], include_incomplete: bool = False +) -> List[str]: # column_prop: TColumnProp = hint_to_column_prop(hint_type) # default = column_prop != "nullable" # default is true, only for nullable false - return [c["name"] for c in table["columns"].values() if bool(c.get(column_prop, False)) is True and (include_incomplete or is_complete_column(c))] + return [ + c["name"] + for c in table["columns"].values() + if bool(c.get(column_prop, False)) is True and (include_incomplete or is_complete_column(c)) + ] def merge_schema_updates(schema_updates: Sequence[TSchemaUpdate]) -> TSchemaTables: @@ -493,7 +584,9 @@ def merge_schema_updates(schema_updates: Sequence[TSchemaUpdate]) -> TSchemaTabl return aggregated_update -def get_inherited_table_hint(tables: TSchemaTables, table_name: str, table_hint_name: str, allow_none: bool = False) -> Any: +def get_inherited_table_hint( + tables: TSchemaTables, table_name: str, table_hint_name: str, allow_none: bool = False +) -> Any: table = tables.get(table_name, {}) hint = table.get(table_hint_name) if hint: @@ -506,16 +599,23 @@ def get_inherited_table_hint(tables: TSchemaTables, table_name: str, table_hint_ if allow_none: return None - raise ValueError(f"No table hint '{table_hint_name} found in the chain of tables for '{table_name}'.") + raise ValueError( + f"No table hint '{table_hint_name} found in the chain of tables for '{table_name}'." + ) def get_write_disposition(tables: TSchemaTables, table_name: str) -> TWriteDisposition: """Returns table hint of a table if present. If not, looks up into parent table""" - return cast(TWriteDisposition, get_inherited_table_hint(tables, table_name, "write_disposition", allow_none=False)) + return cast( + TWriteDisposition, + get_inherited_table_hint(tables, table_name, "write_disposition", allow_none=False), + ) def get_table_format(tables: TSchemaTables, table_name: str) -> TTableFormat: - return cast(TTableFormat, get_inherited_table_hint(tables, table_name, "table_format", allow_none=True)) + return cast( + TTableFormat, get_inherited_table_hint(tables, table_name, "table_format", allow_none=True) + ) def table_schema_has_type(table: TTableSchema, _typ: TDataType) -> bool: @@ -525,7 +625,10 @@ def table_schema_has_type(table: TTableSchema, _typ: TDataType) -> bool: def table_schema_has_type_with_precision(table: TTableSchema, _typ: TDataType) -> bool: """Checks if `table` schema contains column with type _typ and precision set""" - return any(c.get("data_type") == _typ and c.get("precision") is not None for c in table["columns"].values()) + return any( + c.get("data_type") == _typ and c.get("precision") is not None + for c in table["columns"].values() + ) def get_top_level_table(tables: TSchemaTables, table_name: str) -> TTableSchema: @@ -552,54 +655,38 @@ def _child(t: TTableSchema) -> None: return chain -def group_tables_by_resource(tables: TSchemaTables, pattern: Optional[REPattern] = None) -> Dict[str, List[TTableSchema]]: +def group_tables_by_resource( + tables: TSchemaTables, pattern: Optional[REPattern] = None +) -> Dict[str, List[TTableSchema]]: """Create a dict of resources and their associated tables and descendant tables If `pattern` is supplied, the result is filtered to only resource names matching the pattern. """ result: Dict[str, List[TTableSchema]] = {} for table in tables.values(): - resource = table.get('resource') + resource = table.get("resource") if resource and (pattern is None or pattern.match(resource)): resource_tables = result.setdefault(resource, []) - resource_tables.extend(get_child_tables(tables, table['name'])) + resource_tables.extend(get_child_tables(tables, table["name"])) return result def version_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) - table = new_table(VERSION_TABLE_NAME, columns=[ + table = new_table( + VERSION_TABLE_NAME, + columns=[ { "name": "version", "data_type": "bigint", "nullable": False, }, - { - "name": "engine_version", - "data_type": "bigint", - "nullable": False - }, - { - "name": "inserted_at", - "data_type": "timestamp", - "nullable": False - }, - { - "name": "schema_name", - "data_type": "text", - "nullable": False - }, - { - "name": "version_hash", - "data_type": "text", - "nullable": False - }, - { - "name": "schema", - "data_type": "text", - "nullable": False - } - ] + {"name": "engine_version", "data_type": "bigint", "nullable": False}, + {"name": "inserted_at", "data_type": "timestamp", "nullable": False}, + {"name": "schema_name", "data_type": "text", "nullable": False}, + {"name": "version_hash", "data_type": "text", "nullable": False}, + {"name": "schema", "data_type": "text", "nullable": False}, + ], ) table["write_disposition"] = "skip" table["description"] = "Created by DLT. Tracks schema updates" @@ -609,33 +696,19 @@ def version_table() -> TTableSchema: def load_table() -> TTableSchema: # NOTE: always add new columns at the end of the table so we have identical layout # after an update of existing tables (always at the end) - table = new_table(LOADS_TABLE_NAME, columns=[ - { - "name": "load_id", - "data_type": "text", - "nullable": False - }, - { - "name": "schema_name", - "data_type": "text", - "nullable": True - }, - { - "name": "status", - "data_type": "bigint", - "nullable": False - }, - { - "name": "inserted_at", - "data_type": "timestamp", - "nullable": False - }, + table = new_table( + LOADS_TABLE_NAME, + columns=[ + {"name": "load_id", "data_type": "text", "nullable": False}, + {"name": "schema_name", "data_type": "text", "nullable": True}, + {"name": "status", "data_type": "bigint", "nullable": False}, + {"name": "inserted_at", "data_type": "timestamp", "nullable": False}, { "name": "schema_version_hash", "data_type": "text", "nullable": True, }, - ] + ], ) table["write_disposition"] = "skip" table["description"] = "Created by DLT. Tracks completed loads" @@ -649,21 +722,24 @@ def new_table( columns: Sequence[TColumnSchema] = None, validate_schema: bool = False, resource: str = None, - table_format: TTableFormat = None + schema_contract: TSchemaContract = None, + table_format: TTableFormat = None, ) -> TTableSchema: - table: TTableSchema = { "name": table_name, - "columns": {} if columns is None else {c["name"]: c for c in columns} + "columns": {} if columns is None else {c["name"]: c for c in columns}, } if parent_table_name: table["parent"] = parent_table_name assert write_disposition is None assert resource is None + assert schema_contract is None else: # set write disposition only for root tables table["write_disposition"] = write_disposition or DEFAULT_WRITE_DISPOSITION table["resource"] = resource or table_name + if schema_contract is not None: + table["schema_contract"] = schema_contract if table_format: table["table_format"] = table_format if validate_schema: @@ -676,11 +752,13 @@ def new_table( return table -def new_column(column_name: str, data_type: TDataType = None, nullable: bool = True, validate_schema: bool = False) -> TColumnSchema: - column: TColumnSchema = { - "name": column_name, - "nullable": nullable - } +def new_column( + column_name: str, + data_type: TDataType = None, + nullable: bool = True, + validate_schema: bool = False, +) -> TColumnSchema: + column: TColumnSchema = {"name": column_name, "nullable": nullable} if data_type: column["data_type"] = data_type if validate_schema: diff --git a/dlt/common/source.py b/dlt/common/source.py index a75c2dd948..249d54b4c5 100644 --- a/dlt/common/source.py +++ b/dlt/common/source.py @@ -10,6 +10,7 @@ class SourceInfo(NamedTuple): """Runtime information on the source/resource""" + SPEC: Type[BaseConfiguration] f: AnyFun module: ModuleType @@ -44,4 +45,4 @@ def _get_source_for_inner_function(f: AnyFun) -> Optional[SourceInfo]: # find source function parts = get_callable_name(f, "__qualname__").split(".") parent_fun = ".".join(parts[:-2]) - return _SOURCES.get(parent_fun) \ No newline at end of file + return _SOURCES.get(parent_fun) diff --git a/dlt/common/storages/__init__.py b/dlt/common/storages/__init__.py index c18b8ab04d..e5feeaba57 100644 --- a/dlt/common/storages/__init__.py +++ b/dlt/common/storages/__init__.py @@ -3,9 +3,22 @@ from .schema_storage import SchemaStorage from .live_schema_storage import LiveSchemaStorage from .normalize_storage import NormalizeStorage -from .load_storage import LoadStorage +from .load_package import ( + ParsedLoadJobFileName, + LoadJobInfo, + LoadPackageInfo, + PackageStorage, + TJobState, +) from .data_item_storage import DataItemStorage -from .configuration import LoadStorageConfiguration, NormalizeStorageConfiguration, SchemaStorageConfiguration, TSchemaFileFormat, FilesystemConfiguration +from .load_storage import LoadStorage +from .configuration import ( + LoadStorageConfiguration, + NormalizeStorageConfiguration, + SchemaStorageConfiguration, + TSchemaFileFormat, + FilesystemConfiguration, +) from .fsspec_filesystem import fsspec_from_config, fsspec_filesystem @@ -17,6 +30,16 @@ "NormalizeStorage", "LoadStorage", "DataItemStorage", - "LoadStorageConfiguration", "NormalizeStorageConfiguration", "SchemaStorageConfiguration", "TSchemaFileFormat", "FilesystemConfiguration", - "fsspec_from_config", "fsspec_filesystem", + "LoadStorageConfiguration", + "NormalizeStorageConfiguration", + "SchemaStorageConfiguration", + "TSchemaFileFormat", + "FilesystemConfiguration", + "ParsedLoadJobFileName", + "LoadJobInfo", + "LoadPackageInfo", + "PackageStorage", + "TJobState", + "fsspec_from_config", + "fsspec_filesystem", ] diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 699465ce4a..83e7e88189 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -4,7 +4,14 @@ from dlt.common.configuration.specs import BaseConfiguration, configspec, CredentialsConfiguration from dlt.common.configuration import configspec, resolve_type -from dlt.common.configuration.specs import GcpServiceAccountCredentials, AwsCredentials, GcpOAuthCredentials, AzureCredentials, AzureCredentialsWithoutDefaults, BaseConfiguration +from dlt.common.configuration.specs import ( + GcpServiceAccountCredentials, + AwsCredentials, + GcpOAuthCredentials, + AzureCredentials, + AzureCredentialsWithoutDefaults, + BaseConfiguration, +) from dlt.common.utils import digest128 from dlt.common.configuration.exceptions import ConfigurationValueError @@ -18,11 +25,18 @@ class SchemaStorageConfiguration(BaseConfiguration): import_schema_path: Optional[str] = None # path from which to import a schema into storage export_schema_path: Optional[str] = None # path to which export schema from storage external_schema_format: TSchemaFileFormat = "yaml" # format in which to expect external schema - external_schema_format_remove_defaults: bool = True # remove default values when exporting schema + external_schema_format_remove_defaults: bool = ( + True # remove default values when exporting schema + ) if TYPE_CHECKING: - def __init__(self, schema_volume_path: str = None, import_schema_path: str = None, export_schema_path: str = None) -> None: - ... + + def __init__( + self, + schema_volume_path: str = None, + import_schema_path: str = None, + export_schema_path: str = None, + ) -> None: ... @configspec @@ -30,33 +44,43 @@ class NormalizeStorageConfiguration(BaseConfiguration): normalize_volume_path: str = None # path to volume where normalized loader files will be stored if TYPE_CHECKING: - def __init__(self, normalize_volume_path: str = None) -> None: - ... + + def __init__(self, normalize_volume_path: str = None) -> None: ... @configspec class LoadStorageConfiguration(BaseConfiguration): - load_volume_path: str = None # path to volume where files to be loaded to analytical storage are stored - delete_completed_jobs: bool = False # if set to true the folder with completed jobs will be deleted + load_volume_path: str = ( + None # path to volume where files to be loaded to analytical storage are stored + ) + delete_completed_jobs: bool = ( + False # if set to true the folder with completed jobs will be deleted + ) if TYPE_CHECKING: - def __init__(self, load_volume_path: str = None, delete_completed_jobs: bool = None) -> None: - ... + + def __init__( + self, load_volume_path: str = None, delete_completed_jobs: bool = None + ) -> None: ... -FileSystemCredentials = Union[AwsCredentials, GcpServiceAccountCredentials, AzureCredentials, GcpOAuthCredentials] +FileSystemCredentials = Union[ + AwsCredentials, GcpServiceAccountCredentials, AzureCredentials, GcpOAuthCredentials +] + @configspec class FilesystemConfiguration(BaseConfiguration): """A configuration defining filesystem location and access credentials. - When configuration is resolved, `bucket_url` is used to extract a protocol and request corresponding credentials class. - * s3 - * gs, gcs - * az, abfs, adl - * file, memory - * gdrive + When configuration is resolved, `bucket_url` is used to extract a protocol and request corresponding credentials class. + * s3 + * gs, gcs + * az, abfs, adl + * file, memory + * gdrive """ + PROTOCOL_CREDENTIALS: ClassVar[Dict[str, Any]] = { "gs": Union[GcpServiceAccountCredentials, GcpOAuthCredentials], "gcs": Union[GcpServiceAccountCredentials, GcpOAuthCredentials], @@ -84,13 +108,16 @@ def protocol(self) -> str: def on_resolved(self) -> None: url = urlparse(self.bucket_url) if not url.path and not url.netloc: - raise ConfigurationValueError("File path or netloc missing. Field bucket_url of FilesystemClientConfiguration must contain valid url with a path or host:password component.") + raise ConfigurationValueError( + "File path or netloc missing. Field bucket_url of FilesystemClientConfiguration" + " must contain valid url with a path or host:password component." + ) # this is just a path in local file system if url.path == self.bucket_url: url = url._replace(scheme="file") self.bucket_url = url.geturl() - @resolve_type('credentials') + @resolve_type("credentials") def resolve_credentials_type(self) -> Type[CredentialsConfiguration]: # use known credentials or empty credentials for unknown protocol return self.PROTOCOL_CREDENTIALS.get(self.protocol) or Optional[CredentialsConfiguration] # type: ignore[return-value] @@ -113,9 +140,5 @@ def __str__(self) -> str: return self.bucket_url if TYPE_CHECKING: - def __init__( - self, - bucket_url: str, - credentials: FileSystemCredentials = None - ) -> None: - ... + + def __init__(self, bucket_url: str, credentials: FileSystemCredentials = None) -> None: ... diff --git a/dlt/common/storages/data_item_storage.py b/dlt/common/storages/data_item_storage.py index 8de95a6f60..816c6bc494 100644 --- a/dlt/common/storages/data_item_storage.py +++ b/dlt/common/storages/data_item_storage.py @@ -1,10 +1,12 @@ -from typing import Dict, Any, List, Generic +from pathlib import Path +from typing import Dict, Any, List, Sequence from abc import ABC, abstractmethod from dlt.common import logger +from dlt.common.destination import TLoaderFileFormat from dlt.common.schema import TTableSchemaColumns -from dlt.common.typing import TDataItems -from dlt.common.data_writers import TLoaderFileFormat, BufferedDataWriter, DataWriter +from dlt.common.typing import StrAny, TDataItems +from dlt.common.data_writers import BufferedDataWriter, DataWriter, DataWriterMetrics class DataItemStorage(ABC): @@ -13,7 +15,9 @@ def __init__(self, load_file_type: TLoaderFileFormat, *args: Any) -> None: self.buffered_writers: Dict[str, BufferedDataWriter[DataWriter]] = {} super().__init__(*args) - def get_writer(self, load_id: str, schema_name: str, table_name: str) -> BufferedDataWriter[DataWriter]: + def _get_writer( + self, load_id: str, schema_name: str, table_name: str + ) -> BufferedDataWriter[DataWriter]: # unique writer id writer_id = f"{load_id}.{schema_name}.{table_name}" writer = self.buffered_writers.get(writer_id, None) @@ -24,30 +28,90 @@ def get_writer(self, load_id: str, schema_name: str, table_name: str) -> Buffere self.buffered_writers[writer_id] = writer return writer - def write_data_item(self, load_id: str, schema_name: str, table_name: str, item: TDataItems, columns: TTableSchemaColumns) -> None: - writer = self.get_writer(load_id, schema_name, table_name) + def write_data_item( + self, + load_id: str, + schema_name: str, + table_name: str, + item: TDataItems, + columns: TTableSchemaColumns, + ) -> int: + writer = self._get_writer(load_id, schema_name, table_name) # write item(s) - writer.write_data_item(item, columns) + return writer.write_data_item(item, columns) - def write_empty_file(self, load_id: str, schema_name: str, table_name: str, columns: TTableSchemaColumns) -> None: - writer = self.get_writer(load_id, schema_name, table_name) - writer.write_empty_file(columns) + def write_empty_items_file( + self, load_id: str, schema_name: str, table_name: str, columns: TTableSchemaColumns + ) -> DataWriterMetrics: + """Writes empty file: only header and footer without actual items. Closed the + empty file and returns metrics. Mind that header and footer will be written.""" + writer = self._get_writer(load_id, schema_name, table_name) + return writer.write_empty_file(columns) - def close_writers(self, extract_id: str) -> None: + def import_items_file( + self, + load_id: str, + schema_name: str, + table_name: str, + file_path: str, + metrics: DataWriterMetrics, + ) -> DataWriterMetrics: + """Import a file from `file_path` into items storage under a new file name. Does not check + the imported file format. Uses counts from `metrics` as a base. Logically closes the imported file + + The preferred import method is a hard link to avoid copying the data. If current filesystem does not + support it, a regular copy is used. + """ + writer = self._get_writer(load_id, schema_name, table_name) + return writer.import_file(file_path, metrics) + + def close_writers(self, load_id: str) -> None: # flush and close all files for name, writer in self.buffered_writers.items(): - if name.startswith(extract_id): - logger.debug(f"Closing writer for {name} with file {writer._file} and actual name {writer._file_name}") + if name.startswith(load_id) and not writer.closed: + logger.debug( + f"Closing writer for {name} with file {writer._file} and actual name" + f" {writer._file_name}" + ) writer.close() - def closed_files(self) -> List[str]: - files: List[str] = [] - for writer in self.buffered_writers.values(): - files.extend(writer.closed_files) + def closed_files(self, load_id: str) -> List[DataWriterMetrics]: + """Return metrics for all fully processed (closed) files""" + files: List[DataWriterMetrics] = [] + for name, writer in self.buffered_writers.items(): + if name.startswith(load_id): + files.extend(writer.closed_files) return files + def remove_closed_files(self, load_id: str) -> None: + """Remove metrics for closed files in a given `load_id`""" + for name, writer in self.buffered_writers.items(): + if name.startswith(load_id): + writer.closed_files.clear() + + def _write_temp_job_file( + self, + load_id: str, + table_name: str, + table: TTableSchemaColumns, + file_id: str, + rows: Sequence[StrAny], + ) -> str: + """Writes new file into new packages "new_jobs". Intended for testing""" + file_name = ( + self._get_data_item_path_template(load_id, None, table_name) % file_id + + "." + + self.loader_file_format + ) + format_spec = DataWriter.data_format_from_file_format(self.loader_file_format) + mode = "wb" if format_spec.is_binary_format else "w" + with self.storage.open_file(file_name, mode=mode) as f: # type: ignore[attr-defined] + writer = DataWriter.from_file_format(self.loader_file_format, f) + writer.write_all(table, rows) + return Path(file_name).name + @abstractmethod def _get_data_item_path_template(self, load_id: str, schema_name: str, table_name: str) -> str: - # note: use %s for file id to create required template format + """Returns a file template for item writer. note: use %s for file id to create required template format""" pass diff --git a/dlt/common/storages/exceptions.py b/dlt/common/storages/exceptions.py index 3203191cd8..22d6dfaf79 100644 --- a/dlt/common/storages/exceptions.py +++ b/dlt/common/storages/exceptions.py @@ -1,8 +1,8 @@ import semver from typing import Iterable -from dlt.common.exceptions import DltException -from dlt.common.data_writers import TLoaderFileFormat +from dlt.common.exceptions import DltException, TerminalValueError +from dlt.common.destination import TLoaderFileFormat class StorageException(DltException): @@ -11,32 +11,69 @@ def __init__(self, msg: str) -> None: class NoMigrationPathException(StorageException): - def __init__(self, storage_path: str, initial_version: semver.VersionInfo, migrated_version: semver.VersionInfo, target_version: semver.VersionInfo) -> None: + def __init__( + self, + storage_path: str, + initial_version: semver.VersionInfo, + migrated_version: semver.VersionInfo, + target_version: semver.VersionInfo, + ) -> None: self.storage_path = storage_path self.initial_version = initial_version self.migrated_version = migrated_version self.target_version = target_version - super().__init__(f"Could not find migration path for {storage_path} from v {initial_version} to {target_version}, stopped at {migrated_version}") + super().__init__( + f"Could not find migration path for {storage_path} from v {initial_version} to" + f" {target_version}, stopped at {migrated_version}" + ) class WrongStorageVersionException(StorageException): - def __init__(self, storage_path: str, initial_version: semver.VersionInfo, target_version: semver.VersionInfo) -> None: + def __init__( + self, + storage_path: str, + initial_version: semver.VersionInfo, + target_version: semver.VersionInfo, + ) -> None: self.storage_path = storage_path self.initial_version = initial_version self.target_version = target_version - super().__init__(f"Expected storage {storage_path} with v {target_version} but found {initial_version}") + super().__init__( + f"Expected storage {storage_path} with v {target_version} but found {initial_version}" + ) + + +class StorageMigrationError(StorageException): + def __init__( + self, + storage_path: str, + from_version: semver.VersionInfo, + target_version: semver.VersionInfo, + info: str, + ) -> None: + self.storage_path = storage_path + self.from_version = from_version + self.target_version = target_version + super().__init__( + f"Storage {storage_path} with target v {target_version} at {from_version}: " + info + ) class LoadStorageException(StorageException): pass -class JobWithUnsupportedWriterException(LoadStorageException): - def __init__(self, load_id: str, expected_file_formats: Iterable[TLoaderFileFormat], wrong_job: str) -> None: +class JobWithUnsupportedWriterException(LoadStorageException, TerminalValueError): + def __init__( + self, load_id: str, expected_file_formats: Iterable[TLoaderFileFormat], wrong_job: str + ) -> None: self.load_id = load_id self.expected_file_formats = expected_file_formats self.wrong_job = wrong_job - super().__init__(f"Job {wrong_job} for load id {load_id} requires loader file format that is not one of {expected_file_formats}") + super().__init__( + f"Job {wrong_job} for load id {load_id} requires loader file format that is not one of" + f" {expected_file_formats}" + ) class LoadPackageNotFound(LoadStorageException, FileNotFoundError): @@ -51,12 +88,22 @@ class SchemaStorageException(StorageException): class InStorageSchemaModified(SchemaStorageException): def __init__(self, schema_name: str, storage_path: str) -> None: - msg = f"Schema {schema_name} in {storage_path} was externally modified. This is not allowed as that would prevent correct version tracking. Use import/export capabilities of dlt to provide external changes." + msg = ( + f"Schema {schema_name} in {storage_path} was externally modified. This is not allowed" + " as that would prevent correct version tracking. Use import/export capabilities of" + " dlt to provide external changes." + ) super().__init__(msg) class SchemaNotFoundError(SchemaStorageException, FileNotFoundError, KeyError): - def __init__(self, schema_name: str, storage_path: str, import_path: str = None, import_format: str = None) -> None: + def __init__( + self, + schema_name: str, + storage_path: str, + import_path: str = None, + import_format: str = None, + ) -> None: msg = f"Schema {schema_name} in {storage_path} could not be found." if import_path: msg += f"Import from {import_path} and format {import_format} failed." @@ -65,4 +112,7 @@ def __init__(self, schema_name: str, storage_path: str, import_path: str = None, class UnexpectedSchemaName(SchemaStorageException, ValueError): def __init__(self, schema_name: str, storage_path: str, stored_name: str) -> None: - super().__init__(f"A schema file name '{schema_name}' in {storage_path} does not correspond to the name of schema in the file {stored_name}") + super().__init__( + f"A schema file name '{schema_name}' in {storage_path} does not correspond to the name" + f" of schema in the file {stored_name}" + ) diff --git a/dlt/common/storages/file_storage.py b/dlt/common/storages/file_storage.py index 3c5a391200..7fe62a9728 100644 --- a/dlt/common/storages/file_storage.py +++ b/dlt/common/storages/file_storage.py @@ -14,11 +14,9 @@ FILE_COMPONENT_INVALID_CHARACTERS = re.compile(r"[.%{}]") + class FileStorage: - def __init__(self, - storage_path: str, - file_type: str = "t", - makedirs: bool = False) -> None: + def __init__(self, storage_path: str, file_type: str = "t", makedirs: bool = False) -> None: # make it absolute path self.storage_path = os.path.realpath(storage_path) # os.path.join(, '') self.file_type = file_type @@ -31,7 +29,9 @@ def save(self, relative_path: str, data: Any) -> str: @staticmethod def save_atomic(storage_path: str, relative_path: str, data: Any, file_type: str = "t") -> str: mode = "w" + file_type - with tempfile.NamedTemporaryFile(dir=storage_path, mode=mode, delete=False, encoding=encoding_for_mode(mode)) as f: + with tempfile.NamedTemporaryFile( + dir=storage_path, mode=mode, delete=False, encoding=encoding_for_mode(mode) + ) as f: tmp_path = f.name f.write(data) try: @@ -93,7 +93,9 @@ def delete(self, relative_path: str) -> None: else: raise FileNotFoundError(file_path) - def delete_folder(self, relative_path: str, recursively: bool = False, delete_ro: bool = False) -> None: + def delete_folder( + self, relative_path: str, recursively: bool = False, delete_ro: bool = False + ) -> None: folder_path = self.make_full_path(relative_path) if os.path.isdir(folder_path): if recursively: @@ -116,7 +118,9 @@ def open_file(self, relative_path: str, mode: str = "r") -> IO[Any]: def open_temp(self, delete: bool = False, mode: str = "w", file_type: str = None) -> IO[Any]: mode = mode + file_type or self.file_type - return tempfile.NamedTemporaryFile(dir=self.storage_path, mode=mode, delete=delete, encoding=encoding_for_mode(mode)) + return tempfile.NamedTemporaryFile( + dir=self.storage_path, mode=mode, delete=delete, encoding=encoding_for_mode(mode) + ) def has_file(self, relative_path: str) -> bool: return os.path.isfile(self.make_full_path(relative_path)) @@ -137,7 +141,9 @@ def list_folder_files(self, relative_path: str, to_root: bool = True) -> List[st scan_path = self.make_full_path(relative_path) if to_root: # list files in relative path, returning paths relative to storage root - return [os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_file()] + return [ + os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_file() + ] else: # or to the folder return [e.name for e in os.scandir(scan_path) if e.is_file()] @@ -147,7 +153,9 @@ def list_folder_dirs(self, relative_path: str, to_root: bool = True) -> List[str scan_path = self.make_full_path(relative_path) if to_root: # list folders in relative path, returning paths relative to storage root - return [os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_dir()] + return [ + os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_dir() + ] else: # or to the folder return [e.name for e in os.scandir(scan_path) if e.is_dir()] @@ -157,15 +165,11 @@ def create_folder(self, relative_path: str, exists_ok: bool = False) -> None: def link_hard(self, from_relative_path: str, to_relative_path: str) -> None: # note: some interesting stuff on links https://lightrun.com/answers/conan-io-conan-research-investigate-symlinks-and-hard-links - os.link( - self.make_full_path(from_relative_path), - self.make_full_path(to_relative_path) - ) + os.link(self.make_full_path(from_relative_path), self.make_full_path(to_relative_path)) @staticmethod def link_hard_with_fallback(external_file_path: str, to_file_path: str) -> None: - """Try to create a hardlink and fallback to copying when filesystem doesn't support links - """ + """Try to create a hardlink and fallback to copying when filesystem doesn't support links""" try: os.link(external_file_path, to_file_path) except OSError as ex: @@ -184,10 +188,7 @@ def atomic_rename(self, from_relative_path: str, to_relative_path: str) -> None: 3. All buckets mapped with FUSE are not atomic """ - os.rename( - self.make_full_path(from_relative_path), - self.make_full_path(to_relative_path) - ) + os.rename(self.make_full_path(from_relative_path), self.make_full_path(to_relative_path)) def rename_tree(self, from_relative_path: str, to_relative_path: str) -> None: """Renames a tree using os.rename if possible making it atomic @@ -226,7 +227,9 @@ def rename_tree_files(self, from_relative_path: str, to_relative_path: str) -> N if not os.listdir(root): os.rmdir(root) - def atomic_import(self, external_file_path: str, to_folder: str, new_file_name: Optional[str] = None) -> str: + def atomic_import( + self, external_file_path: str, to_folder: str, new_file_name: Optional[str] = None + ) -> str: """Moves a file at `external_file_path` into the `to_folder` effectively importing file into storage Args: @@ -239,7 +242,9 @@ def atomic_import(self, external_file_path: str, to_folder: str, new_file_name: """ new_file_name = new_file_name or os.path.basename(external_file_path) dest_file_path = os.path.join(self.make_full_path(to_folder), new_file_name) - return self.to_relative_path(FileStorage.move_atomic_to_file(external_file_path, dest_file_path)) + return self.to_relative_path( + FileStorage.move_atomic_to_file(external_file_path, dest_file_path) + ) def in_storage(self, path: str) -> bool: assert path is not None @@ -281,10 +286,16 @@ def get_file_name_from_file_path(file_path: str) -> str: @staticmethod def validate_file_name_component(name: str) -> None: # Universal platform bans several characters allowed in POSIX ie. | < \ or "COM1" :) - pathvalidate.validate_filename(name, platform="Universal") + try: + pathvalidate.validate_filename(name, platform="Universal") + except pathvalidate.error.ValidationError as val_ex: + if val_ex.reason != pathvalidate.ErrorReason.INVALID_LENGTH: + raise # component cannot contain "." if FILE_COMPONENT_INVALID_CHARACTERS.search(name): - raise pathvalidate.error.InvalidCharError(description="Component name cannot contain the following characters: . % { }") + raise pathvalidate.error.InvalidCharError( + description="Component name cannot contain the following characters: . % { }" + ) @staticmethod def rmtree_del_ro(action: AnyFun, name: str, exc: Any) -> Any: @@ -311,7 +322,6 @@ def open_zipsafe_ro(path: str, mode: str = "r", **kwargs: Any) -> IO[Any]: except (gzip.BadGzipFile, OSError): return open(path, origmode, encoding=encoding, **kwargs) - @staticmethod def is_gzipped(path: str) -> bool: """Checks if file under path is gzipped by reading a header""" diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index c084fcc12e..18c1837e00 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -13,7 +13,12 @@ from dlt.common.exceptions import MissingDependencyException from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import DictStrAny -from dlt.common.configuration.specs import CredentialsWithDefault, GcpCredentials, AwsCredentials, AzureCredentials +from dlt.common.configuration.specs import ( + CredentialsWithDefault, + GcpCredentials, + AwsCredentials, + AzureCredentials, +) from dlt.common.storages.configuration import FileSystemCredentials, FilesystemConfiguration from dlt import version @@ -21,6 +26,7 @@ class FileItem(TypedDict, total=False): """A DataItem representing a file""" + file_url: str file_name: str mime_type: str @@ -45,31 +51,32 @@ class FileItem(TypedDict, total=False): MTIME_DISPATCH["abfs"] = MTIME_DISPATCH["az"] -def fsspec_filesystem(protocol: str, credentials: FileSystemCredentials = None) -> Tuple[AbstractFileSystem, str]: +def fsspec_filesystem( + protocol: str, credentials: FileSystemCredentials = None +) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` for a given `protocol` and credentials. - Please supply credentials instance corresponding to the protocol. The `protocol` is just the code name of the filesystem ie: - * s3 - * az, abfs - * gcs, gs + Please supply credentials instance corresponding to the protocol. The `protocol` is just the code name of the filesystem ie: + * s3 + * az, abfs + * gcs, gs - also see filesystem_from_config + also see filesystem_from_config """ return fsspec_from_config(FilesystemConfiguration(protocol, credentials)) - def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` from `config` argument. - Authenticates following filesystems: - * s3 - * az, abfs - * gcs, gs + Authenticates following filesystems: + * s3 + * az, abfs + * gcs, gs - All other filesystems are not authenticated + All other filesystems are not authenticated - Returns: (fsspec filesystem, normalized url) + Returns: (fsspec filesystem, normalized url) """ proto = config.protocol @@ -78,14 +85,17 @@ def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSys fs_kwargs.update(cast(AwsCredentials, config.credentials).to_s3fs_credentials()) elif proto in ["az", "abfs", "adl", "azure"]: fs_kwargs.update(cast(AzureCredentials, config.credentials).to_adlfs_credentials()) - elif proto in ['gcs', 'gs']: + elif proto in ["gcs", "gs"]: assert isinstance(config.credentials, GcpCredentials) # Default credentials are handled by gcsfs - if isinstance(config.credentials, CredentialsWithDefault) and config.credentials.has_default_credentials(): - fs_kwargs['token'] = None + if ( + isinstance(config.credentials, CredentialsWithDefault) + and config.credentials.has_default_credentials() + ): + fs_kwargs["token"] = None else: - fs_kwargs['token'] = dict(config.credentials) - fs_kwargs['project'] = config.credentials.project_id + fs_kwargs["token"] = dict(config.credentials) + fs_kwargs["project"] = config.credentials.project_id try: return url_to_fs(config.bucket_url, use_listings_cache=False, **fs_kwargs) # type: ignore[no-any-return] except ModuleNotFoundError as e: @@ -93,11 +103,12 @@ def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSys class FileItemDict(DictStrAny): - """A FileItem dictionary with additional methods to get fsspec filesystem, open and read files. - """ + """A FileItem dictionary with additional methods to get fsspec filesystem, open and read files.""" def __init__( - self, mapping: FileItem, credentials: Optional[Union[FileSystemCredentials, AbstractFileSystem]] = None + self, + mapping: FileItem, + credentials: Optional[Union[FileSystemCredentials, AbstractFileSystem]] = None, ): """Create a dictionary with the filesystem client. @@ -141,9 +152,7 @@ def open(self, mode: str = "rb", **kwargs: Any) -> IO[Any]: # noqa: A003 if "t" in mode: text_kwargs = { - k: kwargs.pop(k) - for k in ["encoding", "errors", "newline"] - if k in kwargs + k: kwargs.pop(k) for k in ["encoding", "errors", "newline"] if k in kwargs } return io.TextIOWrapper( bytes_io, @@ -191,6 +200,7 @@ def glob_files( Iterable[FileItem]: The list of files. """ import os + bucket_url_parsed = urlparse(bucket_url) # if this is file path without scheme if not bucket_url_parsed.scheme or (os.path.isabs(bucket_url) and "\\" in bucket_url): @@ -198,13 +208,16 @@ def glob_files( bucket_url = pathlib.Path(bucket_url).absolute().as_uri() bucket_url_parsed = urlparse(bucket_url) - bucket_path = bucket_url_parsed._replace(scheme='').geturl() + bucket_path = bucket_url_parsed._replace(scheme="").geturl() bucket_path = bucket_path[2:] if bucket_path.startswith("//") else bucket_path filter_url = posixpath.join(bucket_path, file_glob) glob_result = fs_client.glob(filter_url, detail=True) if isinstance(glob_result, list): - raise NotImplementedError("Cannot request details when using fsspec.glob. For ADSL (Azure) please use version 2023.9.0 or later") + raise NotImplementedError( + "Cannot request details when using fsspec.glob. For ADSL (Azure) please use version" + " 2023.9.0 or later" + ) for file, md in glob_result.items(): if md["type"] != "file": diff --git a/dlt/common/storages/live_schema_storage.py b/dlt/common/storages/live_schema_storage.py index c482d5e7ea..e3fd07cf72 100644 --- a/dlt/common/storages/live_schema_storage.py +++ b/dlt/common/storages/live_schema_storage.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, List from dlt.common.schema.schema import Schema from dlt.common.configuration.accessors import config @@ -7,8 +7,9 @@ class LiveSchemaStorage(SchemaStorage): - - def __init__(self, config: SchemaStorageConfiguration = config.value, makedirs: bool = False) -> None: + def __init__( + self, config: SchemaStorageConfiguration = config.value, makedirs: bool = False + ) -> None: self.live_schemas: Dict[str, Schema] = {} super().__init__(config, makedirs) @@ -18,7 +19,7 @@ def __getitem__(self, name: str) -> Schema: else: # return new schema instance schema = super().load_schema(name) - self._update_live_schema(schema) + self.update_live_schema(schema) return schema @@ -30,7 +31,7 @@ def load_schema(self, name: str) -> Schema: def save_schema(self, schema: Schema) -> str: rv = super().save_schema(schema) # update the live schema with schema being saved, if no live schema exist, create one to be available for a getter - self._update_live_schema(schema) + self.update_live_schema(schema) return rv def remove_schema(self, name: str) -> None: @@ -54,12 +55,18 @@ def commit_live_schema(self, name: str) -> Schema: self._save_schema(live_schema) return live_schema - def _update_live_schema(self, schema: Schema, can_create_new: bool = True) -> None: + def update_live_schema(self, schema: Schema, can_create_new: bool = True) -> None: + """Will update live schema content without writing to storage. Optionally allows to create a new live schema""" live_schema = self.live_schemas.get(schema.name) if live_schema: - # replace content without replacing instance - # print(f"live schema {live_schema} updated in place") - live_schema.replace_schema_content(schema) + if id(live_schema) != id(schema): + # replace content without replacing instance + # print(f"live schema {live_schema} updated in place") + live_schema.replace_schema_content(schema) elif can_create_new: # print(f"live schema {schema.name} created from schema") self.live_schemas[schema.name] = schema + + def list_schemas(self) -> List[str]: + names = list(set(super().list_schemas()) | set(self.live_schemas.keys())) + return names diff --git a/dlt/common/storages/load_package.py b/dlt/common/storages/load_package.py new file mode 100644 index 0000000000..2860364cd0 --- /dev/null +++ b/dlt/common/storages/load_package.py @@ -0,0 +1,505 @@ +import contextlib +import os +from copy import deepcopy +import datetime # noqa: 251 +import humanize +from pathlib import Path +from pendulum.datetime import DateTime +from typing import ( + ClassVar, + Dict, + List, + NamedTuple, + Literal, + Optional, + Sequence, + Set, + get_args, + cast, +) + +from dlt.common import pendulum, json +from dlt.common.data_writers import DataWriter, new_file_id +from dlt.common.destination import TLoaderFileFormat +from dlt.common.exceptions import TerminalValueError +from dlt.common.schema import Schema, TSchemaTables +from dlt.common.schema.typing import TStoredSchema, TTableSchemaColumns +from dlt.common.storages import FileStorage +from dlt.common.storages.exceptions import LoadPackageNotFound +from dlt.common.typing import DictStrAny, StrAny, SupportsHumanize +from dlt.common.utils import flatten_list_or_items + +# folders to manage load jobs in a single load package +TJobState = Literal["new_jobs", "failed_jobs", "started_jobs", "completed_jobs"] +WORKING_FOLDERS: Set[TJobState] = set(get_args(TJobState)) +TLoadPackageState = Literal["new", "extracted", "normalized", "loaded", "aborted"] + + +class ParsedLoadJobFileName(NamedTuple): + """Represents a file name of a job in load package. The file name contains name of a table, number of times the job was retired, extension + and a 5 bytes random string to make job file name unique. + The job id does not contain retry count and is immutable during loading of the data + """ + + table_name: str + file_id: str + retry_count: int + file_format: TLoaderFileFormat + + def job_id(self) -> str: + """Unique identifier of the job""" + return f"{self.table_name}.{self.file_id}.{self.file_format}" + + def file_name(self) -> str: + """A name of the file with the data to be loaded""" + return f"{self.table_name}.{self.file_id}.{int(self.retry_count)}.{self.file_format}" + + def with_retry(self) -> "ParsedLoadJobFileName": + """Returns a job with increased retry count""" + return self._replace(retry_count=self.retry_count + 1) + + @staticmethod + def parse(file_name: str) -> "ParsedLoadJobFileName": + p = Path(file_name) + parts = p.name.split(".") + if len(parts) != 4: + raise TerminalValueError(parts) + + return ParsedLoadJobFileName( + parts[0], parts[1], int(parts[2]), cast(TLoaderFileFormat, parts[3]) + ) + + @staticmethod + def new_file_id() -> str: + return new_file_id() + + def __str__(self) -> str: + return self.job_id() + + +class LoadJobInfo(NamedTuple): + state: TJobState + file_path: str + file_size: int + created_at: datetime.datetime + elapsed: float + job_file_info: ParsedLoadJobFileName + failed_message: str + + def asdict(self) -> DictStrAny: + d = self._asdict() + # flatten + del d["job_file_info"] + d.update(self.job_file_info._asdict()) + return d + + def asstr(self, verbosity: int = 0) -> str: + failed_msg = ( + "The job FAILED TERMINALLY and cannot be restarted." if self.failed_message else "" + ) + elapsed_msg = ( + humanize.precisedelta(pendulum.duration(seconds=self.elapsed)) + if self.elapsed + else "---" + ) + msg = ( + f"Job: {self.job_file_info.job_id()}, table: {self.job_file_info.table_name} in" + f" {self.state}. " + ) + msg += ( + f"File type: {self.job_file_info.file_format}, size:" + f" {humanize.naturalsize(self.file_size, binary=True, gnu=True)}. " + ) + msg += f"Started on: {self.created_at} and completed in {elapsed_msg}." + if failed_msg: + msg += "\nThe job FAILED TERMINALLY and cannot be restarted." + if verbosity > 0: + msg += "\n" + self.failed_message + return msg + + def __str__(self) -> str: + return self.asstr(verbosity=0) + + +class _LoadPackageInfo(NamedTuple): + load_id: str + package_path: str + state: TLoadPackageState + schema: Schema + schema_update: TSchemaTables + completed_at: datetime.datetime + jobs: Dict[TJobState, List[LoadJobInfo]] + + +class LoadPackageInfo(SupportsHumanize, _LoadPackageInfo): + @property + def schema_name(self) -> str: + return self.schema.name + + @property + def schema_hash(self) -> str: + return self.schema.stored_version_hash + + def asdict(self) -> DictStrAny: + d = self._asdict() + # job as list + d["jobs"] = [job.asdict() for job in flatten_list_or_items(iter(self.jobs.values()))] # type: ignore + d["schema_hash"] = self.schema_hash + d["schema_name"] = self.schema_name + # flatten update into list of columns + tables: List[DictStrAny] = deepcopy(list(self.schema_update.values())) # type: ignore + for table in tables: + table.pop("filters", None) + columns: List[DictStrAny] = [] + table["schema_name"] = self.schema_name + table["load_id"] = self.load_id + for column in table["columns"].values(): + column["table_name"] = table["name"] + column["schema_name"] = self.schema_name + column["load_id"] = self.load_id + columns.append(column) + table["columns"] = columns + d.pop("schema_update") + d.pop("schema") + d["tables"] = tables + + return d + + def asstr(self, verbosity: int = 0) -> str: + completed_msg = ( + f"The package was {self.state.upper()} at {self.completed_at}" + if self.completed_at + else "The package is NOT YET LOADED to the destination" + ) + msg = ( + f"The package with load id {self.load_id} for schema {self.schema_name} is in" + f" {self.state.upper()} state. It updated schema for {len(self.schema_update)} tables." + f" {completed_msg}.\n" + ) + msg += "Jobs details:\n" + msg += "\n".join(job.asstr(verbosity) for job in flatten_list_or_items(iter(self.jobs.values()))) # type: ignore + return msg + + def __str__(self) -> str: + return self.asstr(verbosity=0) + + +class PackageStorage: + NEW_JOBS_FOLDER: ClassVar[TJobState] = "new_jobs" + FAILED_JOBS_FOLDER: ClassVar[TJobState] = "failed_jobs" + STARTED_JOBS_FOLDER: ClassVar[TJobState] = "started_jobs" + COMPLETED_JOBS_FOLDER: ClassVar[TJobState] = "completed_jobs" + + SCHEMA_FILE_NAME: ClassVar[str] = "schema.json" + SCHEMA_UPDATES_FILE_NAME = ( # updates to the tables in schema created by normalizer + "schema_updates.json" + ) + APPLIED_SCHEMA_UPDATES_FILE_NAME = ( + "applied_" + "schema_updates.json" + ) # updates applied to the destination + PACKAGE_COMPLETED_FILE_NAME = ( # completed package marker file, currently only to store data with os.stat + "package_completed.json" + ) + + def __init__(self, storage: FileStorage, initial_state: TLoadPackageState) -> None: + """Creates storage that manages load packages with root at `storage` and initial package state `initial_state`""" + self.storage = storage + self.initial_state = initial_state + + # + # List jobs + # + + def get_package_path(self, load_id: str) -> str: + return load_id + + def get_job_folder_path(self, load_id: str, folder: TJobState) -> str: + return os.path.join(self.get_package_path(load_id), folder) + + def get_job_file_path(self, load_id: str, folder: TJobState, file_name: str) -> str: + return os.path.join(self.get_job_folder_path(load_id, folder), file_name) + + def list_packages(self) -> Sequence[str]: + """Lists all load ids in storage, earliest first + + NOTE: Load ids are sorted alphabetically. This class does not store package creation time separately. + """ + loads = self.storage.list_folder_dirs(".", to_root=False) + # start from the oldest packages + return sorted(loads) + + def list_new_jobs(self, load_id: str) -> Sequence[str]: + new_jobs = self.storage.list_folder_files( + self.get_job_folder_path(load_id, PackageStorage.NEW_JOBS_FOLDER) + ) + return new_jobs + + def list_started_jobs(self, load_id: str) -> Sequence[str]: + return self.storage.list_folder_files( + self.get_job_folder_path(load_id, PackageStorage.STARTED_JOBS_FOLDER) + ) + + def list_failed_jobs(self, load_id: str) -> Sequence[str]: + return self.storage.list_folder_files( + self.get_job_folder_path(load_id, PackageStorage.FAILED_JOBS_FOLDER) + ) + + def list_jobs_for_table(self, load_id: str, table_name: str) -> Sequence[LoadJobInfo]: + return [ + job for job in self.list_all_jobs(load_id) if job.job_file_info.table_name == table_name + ] + + def list_all_jobs(self, load_id: str) -> Sequence[LoadJobInfo]: + info = self.get_load_package_info(load_id) + return [job for job in flatten_list_or_items(iter(info.jobs.values()))] # type: ignore + + def list_failed_jobs_infos(self, load_id: str) -> Sequence[LoadJobInfo]: + """List all failed jobs and associated error messages for a load package with `load_id`""" + failed_jobs: List[LoadJobInfo] = [] + package_path = self.get_package_path(load_id) + package_created_at = pendulum.from_timestamp( + os.path.getmtime( + self.storage.make_full_path( + os.path.join(package_path, PackageStorage.PACKAGE_COMPLETED_FILE_NAME) + ) + ) + ) + for file in self.list_failed_jobs(load_id): + if not file.endswith(".exception"): + failed_jobs.append( + self._read_job_file_info("failed_jobs", file, package_created_at) + ) + return failed_jobs + + # + # Move jobs + # + + def import_job( + self, load_id: str, job_file_path: str, job_state: TJobState = "new_jobs" + ) -> None: + """Adds new job by moving the `job_file_path` into `new_jobs` of package `load_id`""" + self.storage.atomic_import(job_file_path, self.get_job_folder_path(load_id, job_state)) + + def start_job(self, load_id: str, file_name: str) -> str: + return self._move_job( + load_id, PackageStorage.NEW_JOBS_FOLDER, PackageStorage.STARTED_JOBS_FOLDER, file_name + ) + + def fail_job(self, load_id: str, file_name: str, failed_message: Optional[str]) -> str: + # save the exception to failed jobs + if failed_message: + self.storage.save( + self.get_job_file_path( + load_id, PackageStorage.FAILED_JOBS_FOLDER, file_name + ".exception" + ), + failed_message, + ) + # move to failed jobs + return self._move_job( + load_id, + PackageStorage.STARTED_JOBS_FOLDER, + PackageStorage.FAILED_JOBS_FOLDER, + file_name, + ) + + def retry_job(self, load_id: str, file_name: str) -> str: + # when retrying job we must increase the retry count + source_fn = ParsedLoadJobFileName.parse(file_name) + dest_fn = source_fn.with_retry() + # move it directly to new file name + return self._move_job( + load_id, + PackageStorage.STARTED_JOBS_FOLDER, + PackageStorage.NEW_JOBS_FOLDER, + file_name, + dest_fn.file_name(), + ) + + def complete_job(self, load_id: str, file_name: str) -> str: + return self._move_job( + load_id, + PackageStorage.STARTED_JOBS_FOLDER, + PackageStorage.COMPLETED_JOBS_FOLDER, + file_name, + ) + + # + # Create and drop entities + # + + def create_package(self, load_id: str) -> None: + self.storage.create_folder(load_id) + # create processing directories + self.storage.create_folder(os.path.join(load_id, PackageStorage.NEW_JOBS_FOLDER)) + self.storage.create_folder(os.path.join(load_id, PackageStorage.COMPLETED_JOBS_FOLDER)) + self.storage.create_folder(os.path.join(load_id, PackageStorage.FAILED_JOBS_FOLDER)) + self.storage.create_folder(os.path.join(load_id, PackageStorage.STARTED_JOBS_FOLDER)) + + def complete_loading_package(self, load_id: str, load_state: TLoadPackageState) -> str: + """Completes loading the package by writing marker file with`package_state. Returns path to the completed package""" + load_path = self.get_package_path(load_id) + # save marker file + self.storage.save( + os.path.join(load_path, PackageStorage.PACKAGE_COMPLETED_FILE_NAME), load_state + ) + return load_path + + def remove_completed_jobs(self, load_id: str) -> None: + """Deletes completed jobs. If package has failed jobs, nothing gets deleted.""" + has_failed_jobs = len(self.list_failed_jobs(load_id)) > 0 + # delete completed jobs + if not has_failed_jobs: + self.storage.delete_folder( + self.get_job_folder_path(load_id, PackageStorage.COMPLETED_JOBS_FOLDER), + recursively=True, + ) + + def delete_package(self, load_id: str) -> None: + package_path = self.get_package_path(load_id) + if not self.storage.has_folder(package_path): + raise LoadPackageNotFound(load_id) + self.storage.delete_folder(package_path, recursively=True) + + def load_schema(self, load_id: str) -> Schema: + return Schema.from_dict(self._load_schema(load_id)) + + def schema_name(self, load_id: str) -> str: + """Gets schema name associated with the package""" + schema_dict: TStoredSchema = self._load_schema(load_id) # type: ignore[assignment] + return schema_dict["name"] + + def save_schema(self, load_id: str, schema: Schema) -> str: + # save a schema to a temporary load package + dump = json.dumps(schema.to_dict()) + return self.storage.save(os.path.join(load_id, PackageStorage.SCHEMA_FILE_NAME), dump) + + def save_schema_updates(self, load_id: str, schema_update: TSchemaTables) -> None: + with self.storage.open_file( + os.path.join(load_id, PackageStorage.SCHEMA_UPDATES_FILE_NAME), mode="wb" + ) as f: + json.dump(schema_update, f) + + # + # Get package info + # + + def get_load_package_info(self, load_id: str) -> LoadPackageInfo: + """Gets information on normalized/completed package with given load_id, all jobs and their statuses.""" + package_path = self.get_package_path(load_id) + if not self.storage.has_folder(package_path): + raise LoadPackageNotFound(load_id) + + package_created_at: DateTime = None + package_state = self.initial_state + applied_update: TSchemaTables = {} + + # check if package completed + completed_file_path = os.path.join(package_path, PackageStorage.PACKAGE_COMPLETED_FILE_NAME) + if self.storage.has_file(completed_file_path): + package_created_at = pendulum.from_timestamp( + os.path.getmtime(self.storage.make_full_path(completed_file_path)) + ) + package_state = self.storage.load(completed_file_path) + + # check if schema updates applied + applied_schema_update_file = os.path.join( + package_path, PackageStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME + ) + if self.storage.has_file(applied_schema_update_file): + applied_update = json.loads(self.storage.load(applied_schema_update_file)) + schema = Schema.from_dict(self._load_schema(load_id)) + + # read jobs with all statuses + all_jobs: Dict[TJobState, List[LoadJobInfo]] = {} + for state in WORKING_FOLDERS: + jobs: List[LoadJobInfo] = [] + with contextlib.suppress(FileNotFoundError): + # we ignore if load package lacks one of working folders. completed_jobs may be deleted on archiving + for file in self.storage.list_folder_files(os.path.join(package_path, state)): + if not file.endswith(".exception"): + jobs.append(self._read_job_file_info(state, file, package_created_at)) + all_jobs[state] = jobs + + return LoadPackageInfo( + load_id, + self.storage.make_full_path(package_path), + package_state, + schema, + applied_update, + package_created_at, + all_jobs, + ) + + def _read_job_file_info(self, state: TJobState, file: str, now: DateTime = None) -> LoadJobInfo: + try: + failed_message = self.storage.load(file + ".exception") + except FileNotFoundError: + failed_message = None + full_path = self.storage.make_full_path(file) + st = os.stat(full_path) + return LoadJobInfo( + state, + full_path, + st.st_size, + pendulum.from_timestamp(st.st_mtime), + PackageStorage._job_elapsed_time_seconds(full_path, now.timestamp() if now else None), + ParsedLoadJobFileName.parse(file), + failed_message, + ) + + def _move_job( + self, + load_id: str, + source_folder: TJobState, + dest_folder: TJobState, + file_name: str, + new_file_name: str = None, + ) -> str: + # ensure we move file names, not paths + assert file_name == FileStorage.get_file_name_from_file_path(file_name) + load_path = self.get_package_path(load_id) + dest_path = os.path.join(load_path, dest_folder, new_file_name or file_name) + self.storage.atomic_rename(os.path.join(load_path, source_folder, file_name), dest_path) + # print(f"{join(load_path, source_folder, file_name)} -> {dest_path}") + return self.storage.make_full_path(dest_path) + + def _load_schema(self, load_id: str) -> DictStrAny: + schema_path = os.path.join(load_id, PackageStorage.SCHEMA_FILE_NAME) + return json.loads(self.storage.load(schema_path)) # type: ignore[no-any-return] + + @staticmethod + def build_job_file_name( + table_name: str, + file_id: str, + retry_count: int = 0, + validate_components: bool = True, + loader_file_format: TLoaderFileFormat = None, + ) -> str: + if validate_components: + FileStorage.validate_file_name_component(table_name) + fn = f"{table_name}.{file_id}.{int(retry_count)}" + if loader_file_format: + format_spec = DataWriter.data_format_from_file_format(loader_file_format) + return fn + f".{format_spec.file_extension}" + return fn + + @staticmethod + def is_package_partially_loaded(package_info: LoadPackageInfo) -> bool: + """Checks if package is partially loaded - has jobs that are not new.""" + if package_info.state == "normalized": + pending_jobs: Sequence[TJobState] = ["new_jobs"] + else: + pending_jobs = ["completed_jobs", "failed_jobs"] + return ( + sum( + len(package_info.jobs[job_state]) + for job_state in WORKING_FOLDERS + if job_state not in pending_jobs + ) + > 0 + ) + + @staticmethod + def _job_elapsed_time_seconds(file_path: str, now_ts: float = None) -> float: + return (now_ts or pendulum.now().timestamp()) - os.path.getmtime(file_path) diff --git a/dlt/common/storages/load_storage.py b/dlt/common/storages/load_storage.py index d8eee9b8d6..a83502cb9b 100644 --- a/dlt/common/storages/load_storage.py +++ b/dlt/common/storages/load_storage.py @@ -1,143 +1,32 @@ -import contextlib -from copy import deepcopy -import os -import datetime # noqa: 251 -import humanize from os.path import join -from pathlib import Path -from pendulum.datetime import DateTime -from typing import Dict, Iterable, List, NamedTuple, Literal, Optional, Sequence, Set, get_args, cast +from typing import Iterable, Optional, Sequence -from dlt.common import json, pendulum +from dlt.common import json from dlt.common.configuration import known_sections from dlt.common.configuration.inject import with_config -from dlt.common.typing import DictStrAny, StrAny -from dlt.common.storages.file_storage import FileStorage -from dlt.common.data_writers import DataWriter from dlt.common.destination import ALL_SUPPORTED_FILE_FORMATS, TLoaderFileFormat from dlt.common.configuration.accessors import config from dlt.common.exceptions import TerminalValueError -from dlt.common.schema import Schema, TSchemaTables, TTableSchemaColumns +from dlt.common.schema import TSchemaTables +from dlt.common.storages.file_storage import FileStorage from dlt.common.storages.configuration import LoadStorageConfiguration from dlt.common.storages.versioned_storage import VersionedStorage from dlt.common.storages.data_item_storage import DataItemStorage +from dlt.common.storages.load_package import ( + LoadJobInfo, + LoadPackageInfo, + PackageStorage, + ParsedLoadJobFileName, + TJobState, +) from dlt.common.storages.exceptions import JobWithUnsupportedWriterException, LoadPackageNotFound -from dlt.common.utils import flatten_list_or_items - - -# folders to manage load jobs in a single load package -TJobState = Literal["new_jobs", "failed_jobs", "started_jobs", "completed_jobs"] -WORKING_FOLDERS: Set[TJobState] = set(get_args(TJobState)) -TLoadPackageState = Literal["normalized", "loaded", "aborted"] - - -class ParsedLoadJobFileName(NamedTuple): - table_name: str - file_id: str - retry_count: int - file_format: TLoaderFileFormat - - def job_id(self) -> str: - return f"{self.table_name}.{self.file_id}.{int(self.retry_count)}.{self.file_format}" - - @staticmethod - def parse(file_name: str) -> "ParsedLoadJobFileName": - p = Path(file_name) - parts = p.name.split(".") - if len(parts) != 4: - raise TerminalValueError(parts) - - return ParsedLoadJobFileName(parts[0], parts[1], int(parts[2]), cast(TLoaderFileFormat, parts[3])) - - -class LoadJobInfo(NamedTuple): - state: TJobState - file_path: str - file_size: int - created_at: datetime.datetime - elapsed: float - job_file_info: ParsedLoadJobFileName - failed_message: str - - def asdict(self) -> DictStrAny: - d = self._asdict() - # flatten - del d["job_file_info"] - d.update(self.job_file_info._asdict()) - return d - - def asstr(self, verbosity: int = 0) -> str: - failed_msg = "The job FAILED TERMINALLY and cannot be restarted." if self.failed_message else "" - elapsed_msg = humanize.precisedelta(pendulum.duration(seconds=self.elapsed)) if self.elapsed else "---" - msg = f"Job: {self.job_file_info.job_id()}, table: {self.job_file_info.table_name} in {self.state}. " - msg += f"File type: {self.job_file_info.file_format}, size: {humanize.naturalsize(self.file_size, binary=True, gnu=True)}. " - msg += f"Started on: {self.created_at} and completed in {elapsed_msg}." - if failed_msg: - msg += "\nThe job FAILED TERMINALLY and cannot be restarted." - if verbosity > 0: - msg += "\n" + self.failed_message - return msg - - def __str__(self) -> str: - return self.asstr(verbosity=0) - - -class LoadPackageInfo(NamedTuple): - load_id: str - package_path: str - state: TLoadPackageState - schema_name: str - schema_update: TSchemaTables - completed_at: datetime.datetime - jobs: Dict[TJobState, List[LoadJobInfo]] - - def asdict(self) -> DictStrAny: - d = self._asdict() - # job as list - d["jobs"] = [job.asdict() for job in flatten_list_or_items(iter(self.jobs.values()))] # type: ignore - # flatten update into list of columns - tables: List[DictStrAny] = deepcopy(list(self.schema_update.values())) # type: ignore - for table in tables: - table.pop("filters", None) - columns: List[DictStrAny] = [] - table["schema_name"] = self.schema_name - table["load_id"] = self.load_id - for column in table["columns"].values(): - column["table_name"] = table["name"] - column["schema_name"] = self.schema_name - column["load_id"] = self.load_id - columns.append(column) - table["columns"] = columns - d.pop("schema_update") - d["tables"] = tables - return d - - def asstr(self, verbosity: int = 0) -> str: - completed_msg = f"The package was {self.state.upper()} at {self.completed_at}" if self.completed_at else "The package is being PROCESSED" - msg = f"The package with load id {self.load_id} for schema {self.schema_name} is in {self.state} state. It updated schema for {len(self.schema_update)} tables. {completed_msg}.\n" - msg += "Jobs details:\n" - msg += "\n".join(job.asstr(verbosity) for job in flatten_list_or_items(iter(self.jobs.values()))) # type: ignore - return msg - - def __str__(self) -> str: - return self.asstr(verbosity=0) class LoadStorage(DataItemStorage, VersionedStorage): - STORAGE_VERSION = "1.0.0" NORMALIZED_FOLDER = "normalized" # folder within the volume where load packages are stored LOADED_FOLDER = "loaded" # folder to keep the loads that were completely processed - - NEW_JOBS_FOLDER: TJobState = "new_jobs" - FAILED_JOBS_FOLDER: TJobState = "failed_jobs" - STARTED_JOBS_FOLDER: TJobState = "started_jobs" - COMPLETED_JOBS_FOLDER: TJobState = "completed_jobs" - - SCHEMA_UPDATES_FILE_NAME = "schema_updates.json" # updates to the tables in schema created by normalizer - APPLIED_SCHEMA_UPDATES_FILE_NAME = "applied_" + "schema_updates.json" # updates applied to the destination - SCHEMA_FILE_NAME = "schema.json" # package schema - PACKAGE_COMPLETED_FILE_NAME = "package_completed.json" # completed package marker file, currently only to store data with os.stat + NEW_PACKAGES_FOLDER = "new" # folder where new packages are created ALL_SUPPORTED_FILE_FORMATS = ALL_SUPPORTED_FILE_FORMATS @@ -147,7 +36,7 @@ def __init__( is_owner: bool, preferred_file_format: TLoaderFileFormat, supported_file_formats: Iterable[TLoaderFileFormat], - config: LoadStorageConfiguration = config.value + config: LoadStorageConfiguration = config.value, ) -> None: if not LoadStorage.ALL_SUPPORTED_FILE_FORMATS.issuperset(supported_file_formats): raise TerminalValueError(supported_file_formats) @@ -158,145 +47,73 @@ def __init__( super().__init__( preferred_file_format, LoadStorage.STORAGE_VERSION, - is_owner, FileStorage(config.load_volume_path, "t", makedirs=is_owner) + is_owner, + FileStorage(config.load_volume_path, "t", makedirs=is_owner), ) if is_owner: self.initialize_storage() + # create package storages + self.new_packages = PackageStorage( + FileStorage(join(config.load_volume_path, LoadStorage.NEW_PACKAGES_FOLDER)), "new" + ) + self.normalized_packages = PackageStorage( + FileStorage(join(config.load_volume_path, LoadStorage.NORMALIZED_FOLDER)), "normalized" + ) + self.loaded_packages = PackageStorage( + FileStorage(join(config.load_volume_path, LoadStorage.LOADED_FOLDER)), "loaded" + ) def initialize_storage(self) -> None: - self.storage.create_folder(LoadStorage.LOADED_FOLDER, exists_ok=True) + self.storage.create_folder(LoadStorage.NEW_PACKAGES_FOLDER, exists_ok=True) self.storage.create_folder(LoadStorage.NORMALIZED_FOLDER, exists_ok=True) - - def create_temp_load_package(self, load_id: str) -> None: - # delete previous version - if self.storage.has_folder(load_id): - self.storage.delete_folder(load_id, recursively=True) - self.storage.create_folder(load_id) - # create processing directories - self.storage.create_folder(join(load_id, LoadStorage.NEW_JOBS_FOLDER)) - self.storage.create_folder(join(load_id, LoadStorage.COMPLETED_JOBS_FOLDER)) - self.storage.create_folder(join(load_id, LoadStorage.FAILED_JOBS_FOLDER)) - self.storage.create_folder(join(load_id, LoadStorage.STARTED_JOBS_FOLDER)) + self.storage.create_folder(LoadStorage.LOADED_FOLDER, exists_ok=True) def _get_data_item_path_template(self, load_id: str, _: str, table_name: str) -> str: - file_name = self.build_job_file_name(table_name, "%s", with_extension=False) - return self.storage.make_full_path(join(load_id, LoadStorage.NEW_JOBS_FOLDER, file_name)) - - def write_temp_job_file(self, load_id: str, table_name: str, table: TTableSchemaColumns, file_id: str, rows: Sequence[StrAny]) -> str: - file_name = self._get_data_item_path_template(load_id, None, table_name) % file_id + "." + self.loader_file_format - format_spec = DataWriter.data_format_from_file_format(self.loader_file_format) - mode = "wb" if format_spec.is_binary_format else "w" - with self.storage.open_file(file_name, mode=mode) as f: - writer = DataWriter.from_file_format(self.loader_file_format, f) - writer.write_all(table, rows) - return Path(file_name).name - - def load_package_schema(self, load_id: str) -> Schema: - # load schema from a load package to be processed - schema_path = join(self.get_normalized_package_path(load_id), LoadStorage.SCHEMA_FILE_NAME) - return self._load_schema(schema_path) - - def load_temp_schema(self, load_id: str) -> Schema: - # load schema from a temporary load package - schema_path = join(load_id, LoadStorage.SCHEMA_FILE_NAME) - return self._load_schema(schema_path) - - def save_temp_schema(self, schema: Schema, load_id: str) -> str: - # save a schema to a temporary load package - dump = json.dumps(schema.to_dict()) - return self.storage.save(join(load_id, LoadStorage.SCHEMA_FILE_NAME), dump) - - def save_temp_schema_updates(self, load_id: str, schema_update: TSchemaTables) -> None: - with self.storage.open_file(join(load_id, LoadStorage.SCHEMA_UPDATES_FILE_NAME), mode="wb") as f: - json.dump(schema_update, f) - - def commit_temp_load_package(self, load_id: str) -> None: - self.storage.rename_tree(load_id, self.get_normalized_package_path(load_id)) - - def list_normalized_packages(self) -> Sequence[str]: - """Lists all packages that are normalized and will be loaded or are currently loaded""" - loads = self.storage.list_folder_dirs(LoadStorage.NORMALIZED_FOLDER, to_root=False) - # start from the oldest packages - return sorted(loads) - - def list_completed_packages(self) -> Sequence[str]: - """List packages that are completely loaded""" - loads = self.storage.list_folder_dirs(LoadStorage.LOADED_FOLDER, to_root=False) - # start from the oldest packages - return sorted(loads) + # implements DataItemStorage._get_data_item_path_template + file_name = PackageStorage.build_job_file_name(table_name, "%s") + file_path = self.new_packages.get_job_file_path( + load_id, PackageStorage.NEW_JOBS_FOLDER, file_name + ) + return self.new_packages.storage.make_full_path(file_path) def list_new_jobs(self, load_id: str) -> Sequence[str]: - new_jobs = self.storage.list_folder_files(self._get_job_folder_path(load_id, LoadStorage.NEW_JOBS_FOLDER)) - # make sure all jobs have supported writers - wrong_job = next((j for j in new_jobs if LoadStorage.parse_job_file_name(j).file_format not in self.supported_file_formats), None) + """Lists all jobs in new jobs folder of normalized package storage and checks if file formats are supported""" + new_jobs = self.normalized_packages.list_new_jobs(load_id) + # # make sure all jobs have supported writers + wrong_job = next( + ( + j + for j in new_jobs + if ParsedLoadJobFileName.parse(j).file_format not in self.supported_file_formats + ), + None, + ) if wrong_job is not None: raise JobWithUnsupportedWriterException(load_id, self.supported_file_formats, wrong_job) return new_jobs - def list_started_jobs(self, load_id: str) -> Sequence[str]: - return self.storage.list_folder_files(self._get_job_folder_path(load_id, LoadStorage.STARTED_JOBS_FOLDER)) - - def list_failed_jobs(self, load_id: str) -> Sequence[str]: - return self.storage.list_folder_files(self._get_job_folder_path(load_id, LoadStorage.FAILED_JOBS_FOLDER)) - - def list_jobs_for_table(self, load_id: str, table_name: str) -> Sequence[LoadJobInfo]: - return [job for job in self.list_all_jobs(load_id) if job.job_file_info.table_name == table_name] + def commit_new_load_package(self, load_id: str) -> None: + self.storage.rename_tree( + self.get_new_package_path(load_id), self.get_normalized_package_path(load_id) + ) - def list_all_jobs(self, load_id: str) -> Sequence[LoadJobInfo]: - info = self.get_load_package_info(load_id) - return [job for job in flatten_list_or_items(iter(info.jobs.values()))] # type: ignore + def list_normalized_packages(self) -> Sequence[str]: + """Lists all packages that are normalized and will be loaded or are currently loaded""" + return self.normalized_packages.list_packages() - def list_completed_failed_jobs(self, load_id: str) -> Sequence[str]: - return self.storage.list_folder_files(self._get_job_folder_completed_path(load_id, LoadStorage.FAILED_JOBS_FOLDER)) + def list_loaded_packages(self) -> Sequence[str]: + """List packages that are completely loaded""" + return self.loaded_packages.list_packages() - def list_failed_jobs_in_completed_package(self, load_id: str) -> Sequence[LoadJobInfo]: + def list_failed_jobs_in_loaded_package(self, load_id: str) -> Sequence[LoadJobInfo]: """List all failed jobs and associated error messages for a completed load package with `load_id`""" - failed_jobs: List[LoadJobInfo] = [] - package_path = self.get_completed_package_path(load_id) - package_created_at = pendulum.from_timestamp( - os.path.getmtime(self.storage.make_full_path(join(package_path, LoadStorage.PACKAGE_COMPLETED_FILE_NAME))) - ) - for file in self.list_completed_failed_jobs(load_id): - if not file.endswith(".exception"): - failed_jobs.append(self._read_job_file_info("failed_jobs", file, package_created_at)) - return failed_jobs - - def get_load_package_info(self, load_id: str) -> LoadPackageInfo: - """Gets information on normalized/completed package with given load_id, all jobs and their statuses.""" - # check if package is completed or in process - package_created_at: DateTime = None - package_state: TLoadPackageState = "normalized" - package_path = self.get_normalized_package_path(load_id) - applied_update: TSchemaTables = {} - if not self.storage.has_folder(package_path): - package_path = self.get_completed_package_path(load_id) - if not self.storage.has_folder(package_path): - raise LoadPackageNotFound(load_id) - completed_file_path = self.storage.make_full_path(join(package_path, LoadStorage.PACKAGE_COMPLETED_FILE_NAME)) - package_created_at = pendulum.from_timestamp(os.path.getmtime(completed_file_path)) - package_state = self.storage.load(completed_file_path) - applied_schema_update_file = join(package_path, LoadStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME) - if self.storage.has_file(applied_schema_update_file): - applied_update = json.loads(self.storage.load(applied_schema_update_file)) - schema = self._load_schema(join(package_path, LoadStorage.SCHEMA_FILE_NAME)) - # read jobs with all statuses - all_jobs: Dict[TJobState, List[LoadJobInfo]] = {} - for state in WORKING_FOLDERS: - jobs: List[LoadJobInfo] = [] - with contextlib.suppress(FileNotFoundError): - # we ignore if load package lacks one of working folders. completed_jobs may be deleted on archiving - for file in self.storage.list_folder_files(join(package_path, state)): - if not file.endswith(".exception"): - jobs.append(self._read_job_file_info(state, file, package_created_at)) - all_jobs[state] = jobs - - return LoadPackageInfo(load_id, self.storage.make_full_path(package_path), package_state, schema.name, applied_update, package_created_at, all_jobs) + return self.loaded_packages.list_failed_jobs_infos(load_id) def begin_schema_update(self, load_id: str) -> Optional[TSchemaTables]: package_path = self.get_normalized_package_path(load_id) if not self.storage.has_folder(package_path): raise FileNotFoundError(package_path) - schema_update_file = join(package_path, LoadStorage.SCHEMA_UPDATES_FILE_NAME) + schema_update_file = join(package_path, PackageStorage.SCHEMA_UPDATES_FILE_NAME) if self.storage.has_file(schema_update_file): schema_update: TSchemaTables = json.loads(self.storage.load(schema_update_file)) return schema_update @@ -306,148 +123,64 @@ def begin_schema_update(self, load_id: str) -> Optional[TSchemaTables]: def commit_schema_update(self, load_id: str, applied_update: TSchemaTables) -> None: """Marks schema update as processed and stores the update that was applied at the destination""" load_path = self.get_normalized_package_path(load_id) - schema_update_file = join(load_path, LoadStorage.SCHEMA_UPDATES_FILE_NAME) - processed_schema_update_file = join(load_path, LoadStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME) + schema_update_file = join(load_path, PackageStorage.SCHEMA_UPDATES_FILE_NAME) + processed_schema_update_file = join( + load_path, PackageStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME + ) # delete initial schema update self.storage.delete(schema_update_file) # save applied update self.storage.save(processed_schema_update_file, json.dumps(applied_update)) - def add_new_job(self, load_id: str, job_file_path: str, job_state: TJobState = "new_jobs") -> None: + def import_new_job( + self, load_id: str, job_file_path: str, job_state: TJobState = "new_jobs" + ) -> None: """Adds new job by moving the `job_file_path` into `new_jobs` of package `load_id`""" - self.storage.atomic_import(job_file_path, self._get_job_folder_path(load_id, job_state)) - - def atomic_import(self, external_file_path: str, to_folder: str) -> str: - """Copies or links a file at `external_file_path` into the `to_folder` effectively importing file into storage""" - # LoadStorage.parse_job_file_name - return self.storage.to_relative_path(FileStorage.move_atomic_to_folder(external_file_path, self.storage.make_full_path(to_folder))) - - def start_job(self, load_id: str, file_name: str) -> str: - return self._move_job(load_id, LoadStorage.NEW_JOBS_FOLDER, LoadStorage.STARTED_JOBS_FOLDER, file_name) - - def fail_job(self, load_id: str, file_name: str, failed_message: Optional[str]) -> str: - # save the exception to failed jobs - if failed_message: - self.storage.save( - self._get_job_file_path(load_id, LoadStorage.FAILED_JOBS_FOLDER, file_name + ".exception"), - failed_message - ) - # move to failed jobs - return self._move_job(load_id, LoadStorage.STARTED_JOBS_FOLDER, LoadStorage.FAILED_JOBS_FOLDER, file_name) - - def retry_job(self, load_id: str, file_name: str) -> str: - # when retrying job we must increase the retry count - source_fn = ParsedLoadJobFileName.parse(file_name) - dest_fn = ParsedLoadJobFileName(source_fn.table_name, source_fn.file_id, source_fn.retry_count + 1, source_fn.file_format) - # move it directly to new file name - return self._move_job(load_id, LoadStorage.STARTED_JOBS_FOLDER, LoadStorage.NEW_JOBS_FOLDER, file_name, dest_fn.job_id()) - - def complete_job(self, load_id: str, file_name: str) -> str: - return self._move_job(load_id, LoadStorage.STARTED_JOBS_FOLDER, LoadStorage.COMPLETED_JOBS_FOLDER, file_name) + # TODO: use normalize storage and add file type checks + return self.normalized_packages.import_job(load_id, job_file_path, job_state) + + # def atomic_import(self, external_file_path: str, to_folder: str) -> str: + # """Copies or links a file at `external_file_path` into the `to_folder` effectively importing file into storage""" + # # LoadStorage.parse_job_file_name + # return self.storage.to_relative_path( + # FileStorage.move_atomic_to_folder( + # external_file_path, self.storage.make_full_path(to_folder) + # ) + # ) def complete_load_package(self, load_id: str, aborted: bool) -> None: - load_path = self.get_normalized_package_path(load_id) - has_failed_jobs = len(self.list_failed_jobs(load_id)) > 0 - # delete completed jobs - if self.config.delete_completed_jobs and not has_failed_jobs: - self.storage.delete_folder( - self._get_job_folder_path(load_id, LoadStorage.COMPLETED_JOBS_FOLDER), - recursively=True) - # save marker file - completed_state: TLoadPackageState = "aborted" if aborted else "loaded" - self.storage.save(join(load_path, LoadStorage.PACKAGE_COMPLETED_FILE_NAME), completed_state) + self.normalized_packages.complete_loading_package( + load_id, "aborted" if aborted else "loaded" + ) # move to completed - completed_path = self.get_completed_package_path(load_id) - self.storage.rename_tree(load_path, completed_path) + completed_path = self.get_loaded_package_path(load_id) + self.storage.rename_tree(self.get_normalized_package_path(load_id), completed_path) - def delete_completed_package(self, load_id: str) -> None: - package_path = self.get_completed_package_path(load_id) - if not self.storage.has_folder(package_path): - raise LoadPackageNotFound(load_id) - self.storage.delete_folder(package_path, recursively=True) + def maybe_remove_completed_jobs(self, load_id: str) -> None: + """Deletes completed jobs if delete_completed_jobs config flag is set. If package has failed jobs, nothing gets deleted.""" + if self.config.delete_completed_jobs: + self.loaded_packages.remove_completed_jobs(load_id) + + def delete_loaded_package(self, load_id: str) -> None: + self.loaded_packages.delete_package(load_id) def wipe_normalized_packages(self) -> None: self.storage.delete_folder(self.NORMALIZED_FOLDER, recursively=True) - def get_normalized_package_path(self, load_id: str) -> str: - return join(LoadStorage.NORMALIZED_FOLDER, load_id) - - def get_completed_package_path(self, load_id: str) -> str: - return join(LoadStorage.LOADED_FOLDER, load_id) - - def job_elapsed_time_seconds(self, file_path: str, now_ts: float = None) -> float: - return (now_ts or pendulum.now().timestamp()) - os.path.getmtime(file_path) - - def _save_schema(self, schema: Schema, load_id: str) -> str: - dump = json.dumps(schema.to_dict()) - schema_path = join(self.get_normalized_package_path(load_id), LoadStorage.SCHEMA_FILE_NAME) - return self.storage.save(schema_path, dump) - - def _load_schema(self, schema_path: str) -> Schema: - stored_schema: DictStrAny = json.loads(self.storage.load(schema_path)) - return Schema.from_dict(stored_schema) - - def _move_job(self, load_id: str, source_folder: TJobState, dest_folder: TJobState, file_name: str, new_file_name: str = None) -> str: - # ensure we move file names, not paths - assert file_name == FileStorage.get_file_name_from_file_path(file_name) - load_path = self.get_normalized_package_path(load_id) - dest_path = join(load_path, dest_folder, new_file_name or file_name) - self.storage.atomic_rename(join(load_path, source_folder, file_name), dest_path) - # print(f"{join(load_path, source_folder, file_name)} -> {dest_path}") - return self.storage.make_full_path(dest_path) - - def _get_job_folder_path(self, load_id: str, folder: TJobState) -> str: - return join(self.get_normalized_package_path(load_id), folder) - - def _get_job_file_path(self, load_id: str, folder: TJobState, file_name: str) -> str: - return join(self._get_job_folder_path(load_id, folder), file_name) - - def _get_job_folder_completed_path(self, load_id: str, folder: TJobState) -> str: - return join(self.get_completed_package_path(load_id), folder) + def get_new_package_path(self, load_id: str) -> str: + return join(LoadStorage.NEW_PACKAGES_FOLDER, self.new_packages.get_package_path(load_id)) - def _read_job_file_info(self, state: TJobState, file: str, now: DateTime = None) -> LoadJobInfo: - try: - failed_message = self.storage.load(file + ".exception") - except FileNotFoundError: - failed_message = None - full_path = self.storage.make_full_path(file) - st = os.stat(full_path) - return LoadJobInfo( - state, - full_path, - st.st_size, - pendulum.from_timestamp(st.st_mtime), - self.job_elapsed_time_seconds(full_path, now.timestamp() if now else None), - self.parse_job_file_name(file), - failed_message + def get_normalized_package_path(self, load_id: str) -> str: + return join( + LoadStorage.NORMALIZED_FOLDER, self.normalized_packages.get_package_path(load_id) ) - def build_job_file_name(self, table_name: str, file_id: str, retry_count: int = 0, validate_components: bool = True, with_extension: bool = True) -> str: - if validate_components: - FileStorage.validate_file_name_component(table_name) - # FileStorage.validate_file_name_component(file_id) - fn = f"{table_name}.{file_id}.{int(retry_count)}" - if with_extension: - format_spec = DataWriter.data_format_from_file_format(self.loader_file_format) - return fn + f".{format_spec.file_extension}" - return fn + def get_loaded_package_path(self, load_id: str) -> str: + return join(LoadStorage.LOADED_FOLDER, self.loaded_packages.get_package_path(load_id)) - @staticmethod - def is_package_partially_loaded(package_info: LoadPackageInfo) -> bool: - """Checks if package is partially loaded - has jobs that are not new.""" - if package_info.state == "normalized": - pending_jobs: Sequence[TJobState] = ["new_jobs"] - else: - pending_jobs = ["completed_jobs", "failed_jobs"] - return sum(len(package_info.jobs[job_state]) for job_state in WORKING_FOLDERS if job_state not in pending_jobs) > 0 - - @staticmethod - def parse_job_file_name(file_name: str) -> ParsedLoadJobFileName: - p = Path(file_name) - parts = p.name.split(".") - # verify we know the extension - ext: TLoaderFileFormat = parts[-1] # type: ignore - if ext not in LoadStorage.ALL_SUPPORTED_FILE_FORMATS: - raise TerminalValueError(ext) - - return ParsedLoadJobFileName.parse(file_name) + def get_load_package_info(self, load_id: str) -> LoadPackageInfo: + """Gets information on normalized OR loaded package with given load_id, all jobs and their statuses.""" + try: + return self.loaded_packages.get_load_package_info(load_id) + except LoadPackageNotFound: + return self.normalized_packages.get_load_package_info(load_id) diff --git a/dlt/common/storages/normalize_storage.py b/dlt/common/storages/normalize_storage.py index 44e6fe2f1c..8a247c2021 100644 --- a/dlt/common/storages/normalize_storage.py +++ b/dlt/common/storages/normalize_storage.py @@ -1,62 +1,71 @@ -from typing import ClassVar, Sequence, NamedTuple, Union -from itertools import groupby -from pathlib import Path +import os +import glob +import semver +from typing import ClassVar, Sequence + +from semver import VersionInfo from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config +from dlt.common.storages.exceptions import StorageMigrationError +from dlt.common.storages.versioned_storage import VersionedStorage from dlt.common.storages.file_storage import FileStorage +from dlt.common.storages.load_package import PackageStorage from dlt.common.storages.configuration import NormalizeStorageConfiguration -from dlt.common.storages.versioned_storage import VersionedStorage -from dlt.common.destination import TLoaderFileFormat, ALL_SUPPORTED_FILE_FORMATS -from dlt.common.exceptions import TerminalValueError - -class TParsedNormalizeFileName(NamedTuple): - schema_name: str - table_name: str - file_id: str - file_format: TLoaderFileFormat +from dlt.common.utils import set_working_dir class NormalizeStorage(VersionedStorage): - - STORAGE_VERSION: ClassVar[str] = "1.0.0" - EXTRACTED_FOLDER: ClassVar[str] = "extracted" # folder within the volume where extracted files to be normalized are stored + STORAGE_VERSION: ClassVar[str] = "1.0.1" + EXTRACTED_FOLDER: ClassVar[str] = ( + "extracted" # folder within the volume where extracted files to be normalized are stored + ) @with_config(spec=NormalizeStorageConfiguration, sections=(known_sections.NORMALIZE,)) - def __init__(self, is_owner: bool, config: NormalizeStorageConfiguration = config.value) -> None: - super().__init__(NormalizeStorage.STORAGE_VERSION, is_owner, FileStorage(config.normalize_volume_path, "t", makedirs=is_owner)) + def __init__( + self, is_owner: bool, config: NormalizeStorageConfiguration = config.value + ) -> None: + super().__init__( + NormalizeStorage.STORAGE_VERSION, + is_owner, + FileStorage(config.normalize_volume_path, "t", makedirs=is_owner), + ) self.config = config if is_owner: self.initialize_storage() + self.extracted_packages = PackageStorage( + FileStorage(os.path.join(self.storage.storage_path, NormalizeStorage.EXTRACTED_FOLDER)), + "extracted", + ) def initialize_storage(self) -> None: self.storage.create_folder(NormalizeStorage.EXTRACTED_FOLDER, exists_ok=True) def list_files_to_normalize_sorted(self) -> Sequence[str]: - return sorted(self.storage.list_folder_files(NormalizeStorage.EXTRACTED_FOLDER)) - - def group_by_schema(self, files: Sequence[str]) -> "groupby[str, str]": - return groupby(files, NormalizeStorage.get_schema_name) - - @staticmethod - def get_schema_name(file_name: str) -> str: - return NormalizeStorage.parse_normalize_file_name(file_name).schema_name - - @staticmethod - def build_extracted_file_stem(schema_name: str, table_name: str, file_id: str) -> str: - # builds file name with the extracted data to be passed to normalize - return f"{schema_name}.{table_name}.{file_id}" - - @staticmethod - def parse_normalize_file_name(file_name: str) -> TParsedNormalizeFileName: - # parse extracted file name and returns (events found, load id, schema_name) - file_name_p: Path = Path(file_name) - parts = file_name_p.name.split(".") - ext = parts[-1] - if ext not in ALL_SUPPORTED_FILE_FORMATS: - raise TerminalValueError(f"File format {ext} not supported. Filename: {file_name}") - return TParsedNormalizeFileName(*parts) # type: ignore[arg-type] - - def delete_extracted_files(self, files: Sequence[str]) -> None: - for file_name in files: - self.storage.delete(file_name) + """Gets all data files in extracted packages storage. This method is compatible with current and all past storages""" + root_dir = os.path.join(self.storage.storage_path, NormalizeStorage.EXTRACTED_FOLDER) + with set_working_dir(root_dir): + files = glob.glob("**/*", recursive=True) + # return all files that are not schema files + return sorted( + [ + file + for file in files + if not file.endswith(PackageStorage.SCHEMA_FILE_NAME) and os.path.isfile(file) + ] + ) + + def migrate_storage(self, from_version: VersionInfo, to_version: VersionInfo) -> None: + if from_version == "1.0.0" and from_version < to_version: + # get files in storage + if len(self.list_files_to_normalize_sorted()) > 0: + raise StorageMigrationError( + self.storage.storage_path, + from_version, + to_version, + f"There are extract files in {NormalizeStorage.EXTRACTED_FOLDER} folder." + " Storage will not migrate automatically duo to possible data loss. Delete the" + " files or normalize it with dlt 0.3.x", + ) + from_version = semver.VersionInfo.parse("1.0.1") + self._save_version(from_version) diff --git a/dlt/common/storages/schema_storage.py b/dlt/common/storages/schema_storage.py index a9fee71531..a43b8a1f9b 100644 --- a/dlt/common/storages/schema_storage.py +++ b/dlt/common/storages/schema_storage.py @@ -4,21 +4,30 @@ from dlt.common import json, logger from dlt.common.configuration import with_config from dlt.common.configuration.accessors import config -from dlt.common.storages.configuration import SchemaStorageConfiguration, TSchemaFileFormat, SchemaFileExtensions +from dlt.common.storages.configuration import ( + SchemaStorageConfiguration, + TSchemaFileFormat, + SchemaFileExtensions, +) from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import Schema, verify_schema_hash from dlt.common.typing import DictStrAny -from dlt.common.storages.exceptions import InStorageSchemaModified, SchemaNotFoundError, UnexpectedSchemaName +from dlt.common.storages.exceptions import ( + InStorageSchemaModified, + SchemaNotFoundError, + UnexpectedSchemaName, +) class SchemaStorage(Mapping[str, Schema]): - SCHEMA_FILE_NAME = "schema.%s" NAMED_SCHEMA_FILE_PATTERN = f"%s.{SCHEMA_FILE_NAME}" @with_config(spec=SchemaStorageConfiguration, sections=("schema",)) - def __init__(self, config: SchemaStorageConfiguration = config.value, makedirs: bool = False) -> None: + def __init__( + self, config: SchemaStorageConfiguration = config.value, makedirs: bool = False + ) -> None: self.config = config self.storage = FileStorage(config.schema_volume_path, makedirs=makedirs) @@ -97,7 +106,11 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> # if schema was imported, overwrite storage schema rv_schema._imported_version_hash = rv_schema.version_hash self._save_schema(rv_schema) - logger.info(f"Schema {name} not present in {self.storage.storage_path} and got imported with version {rv_schema.stored_version} and imported hash {rv_schema._imported_version_hash}") + logger.info( + f"Schema {name} not present in {self.storage.storage_path} and got imported" + f" with version {rv_schema.stored_version} and imported hash" + f" {rv_schema._imported_version_hash}" + ) else: # import schema when imported schema was modified from the last import sc = Schema.from_dict(storage_schema) @@ -108,14 +121,23 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> rv_schema._imported_version_hash = rv_schema.version_hash # if schema was imported, overwrite storage schema self._save_schema(rv_schema) - logger.info(f"Schema {name} was present in {self.storage.storage_path} but is overwritten with imported schema version {rv_schema.stored_version} and imported hash {rv_schema._imported_version_hash}") + logger.info( + f"Schema {name} was present in {self.storage.storage_path} but is" + f" overwritten with imported schema version {rv_schema.stored_version} and" + f" imported hash {rv_schema._imported_version_hash}" + ) else: # use storage schema as nothing changed rv_schema = sc except FileNotFoundError: # no schema to import -> skip silently and return the original if storage_schema is None: - raise SchemaNotFoundError(name, self.config.schema_volume_path, self.config.import_schema_path, self.config.external_schema_format) + raise SchemaNotFoundError( + name, + self.config.schema_volume_path, + self.config.import_schema_path, + self.config.external_schema_format, + ) rv_schema = Schema.from_dict(storage_schema) assert rv_schema is not None @@ -124,20 +146,29 @@ def _maybe_import_schema(self, name: str, storage_schema: DictStrAny = None) -> def _load_import_schema(self, name: str) -> DictStrAny: import_storage = FileStorage(self.config.import_schema_path, makedirs=False) schema_file = self._file_name_in_store(name, self.config.external_schema_format) - return self._parse_schema_str(import_storage.load(schema_file), self.config.external_schema_format) + return self._parse_schema_str( + import_storage.load(schema_file), self.config.external_schema_format + ) def _export_schema(self, schema: Schema, export_path: str) -> None: if self.config.external_schema_format == "json": - exported_schema_s = schema.to_pretty_json(remove_defaults=self.config.external_schema_format_remove_defaults) + exported_schema_s = schema.to_pretty_json( + remove_defaults=self.config.external_schema_format_remove_defaults + ) elif self.config.external_schema_format == "yaml": - exported_schema_s = schema.to_pretty_yaml(remove_defaults=self.config.external_schema_format_remove_defaults) + exported_schema_s = schema.to_pretty_yaml( + remove_defaults=self.config.external_schema_format_remove_defaults + ) else: raise ValueError(self.config.external_schema_format) export_storage = FileStorage(export_path, makedirs=True) schema_file = self._file_name_in_store(schema.name, self.config.external_schema_format) export_storage.save(schema_file, exported_schema_s) - logger.info(f"Schema {schema.name} exported to {export_path} with version {schema.stored_version} as {self.config.external_schema_format}") + logger.info( + f"Schema {schema.name} exported to {export_path} with version" + f" {schema.stored_version} as {self.config.external_schema_format}" + ) def _save_schema(self, schema: Schema) -> str: # save a schema to schema store @@ -145,7 +176,9 @@ def _save_schema(self, schema: Schema) -> str: return self.storage.save(schema_file, schema.to_pretty_json(remove_defaults=False)) @staticmethod - def load_schema_file(path: str, name: str, extensions: Tuple[TSchemaFileFormat, ...]=SchemaFileExtensions) -> Schema: + def load_schema_file( + path: str, name: str, extensions: Tuple[TSchemaFileFormat, ...] = SchemaFileExtensions + ) -> Schema: storage = FileStorage(path) for extension in extensions: file = SchemaStorage._file_name_in_store(name, extension) diff --git a/dlt/common/storages/transactional_file.py b/dlt/common/storages/transactional_file.py index 9a10c812e2..e5ee220904 100644 --- a/dlt/common/storages/transactional_file.py +++ b/dlt/common/storages/transactional_file.py @@ -34,6 +34,7 @@ def lock_id(k: int = 4) -> str: class Heartbeat(Timer): """A thread designed to periodically execute a fn.""" + daemon = True def run(self) -> None: @@ -60,7 +61,9 @@ def __init__(self, path: str, fs: fsspec.AbstractFileSystem) -> None: parsed_path = Path(path) if not parsed_path.is_absolute(): - raise ValueError(f"{path} is not absolute. Please pass only absolute paths to TransactionalFile") + raise ValueError( + f"{path} is not absolute. Please pass only absolute paths to TransactionalFile" + ) self.path = path if proto == "file": # standardize path separator to POSIX. fsspec always uses POSIX. Windows may use either. @@ -102,7 +105,7 @@ def _sync_locks(self) -> t.List[str]: # Purge stale locks mtime = self.extract_mtime(lock) if now - mtime > timedelta(seconds=TransactionalFile.LOCK_TTL_SECONDS): - try: # Janitors can race, so we ignore errors + try: # Janitors can race, so we ignore errors self._fs.rm(name) except OSError: pass @@ -110,7 +113,10 @@ def _sync_locks(self) -> t.List[str]: # The name is timestamp + random suffix and is time sortable output.append(name) if not output: - raise RuntimeError(f"When syncing locks for path {self.path} and lock {self.lock_path} no lock file was found") + raise RuntimeError( + f"When syncing locks for path {self.path} and lock {self.lock_path} no lock file" + " was found" + ) return output def read(self) -> t.Optional[bytes]: @@ -136,7 +142,9 @@ def rollback(self) -> None: elif self._fs.isfile(self.path): self._fs.rm(self.path) - def acquire_lock(self, blocking: bool = True, timeout: float = -1, jitter_mean: float = 0) -> bool: + def acquire_lock( + self, blocking: bool = True, timeout: float = -1, jitter_mean: float = 0 + ) -> bool: """Acquires a lock on a path. Mimics the stdlib's `threading.Lock` interface. Acquire a lock, blocking or non-blocking. diff --git a/dlt/common/storages/versioned_storage.py b/dlt/common/storages/versioned_storage.py index c87f2a52b9..8e9a3eb88d 100644 --- a/dlt/common/storages/versioned_storage.py +++ b/dlt/common/storages/versioned_storage.py @@ -7,10 +7,11 @@ class VersionedStorage: - VERSION_FILE = ".version" - def __init__(self, version: Union[semver.VersionInfo, str], is_owner: bool, storage: FileStorage) -> None: + def __init__( + self, version: Union[semver.VersionInfo, str], is_owner: bool, storage: FileStorage + ) -> None: if isinstance(version, str): version = semver.VersionInfo.parse(version) self.storage = storage @@ -20,24 +21,34 @@ def __init__(self, version: Union[semver.VersionInfo, str], is_owner: bool, stor if existing_version != version: if existing_version > version: # version cannot be downgraded - raise NoMigrationPathException(storage.storage_path, existing_version, existing_version, version) + raise NoMigrationPathException( + storage.storage_path, existing_version, existing_version, version + ) if is_owner: # only owner can migrate storage self.migrate_storage(existing_version, version) # storage should be migrated to desired version migrated_version = self._load_version() if version != migrated_version: - raise NoMigrationPathException(storage.storage_path, existing_version, migrated_version, version) + raise NoMigrationPathException( + storage.storage_path, existing_version, migrated_version, version + ) else: # we cannot use storage and we must wait for owner to upgrade it - raise WrongStorageVersionException(storage.storage_path, existing_version, version) + raise WrongStorageVersionException( + storage.storage_path, existing_version, version + ) else: if is_owner: self._save_version(version) else: - raise WrongStorageVersionException(storage.storage_path, semver.VersionInfo.parse("0.0.0"), version) + raise WrongStorageVersionException( + storage.storage_path, semver.VersionInfo.parse("0.0.0"), version + ) - def migrate_storage(self, from_version: semver.VersionInfo, to_version: semver.VersionInfo) -> None: + def migrate_storage( + self, from_version: semver.VersionInfo, to_version: semver.VersionInfo + ) -> None: # migration example: # # semver lib supports comparing both to string and other semvers # if from_version == "1.0.0" and from_version < to_version: diff --git a/dlt/common/time.py b/dlt/common/time.py index f57ccce71d..ed390c28bf 100644 --- a/dlt/common/time.py +++ b/dlt/common/time.py @@ -1,5 +1,5 @@ import contextlib -from typing import Any, Optional, Union, overload, TypeVar # noqa +from typing import Any, Optional, Union, overload, TypeVar, Callable # noqa import datetime # noqa: I251 from dlt.common.pendulum import pendulum, timedelta @@ -12,12 +12,28 @@ FUTURE_TIMESTAMP: float = 9999999999.0 DAY_DURATION_SEC: float = 24 * 60 * 60.0 +precise_time: Callable[[], float] = None +"""A precise timer using win_precise_time library on windows and time.time on other systems""" -def timestamp_within(timestamp: float, min_exclusive: Optional[float], max_inclusive: Optional[float]) -> bool: +try: + import win_precise_time as wpt + + precise_time = wpt.time +except ImportError: + from time import time as _built_in_time + + precise_time = _built_in_time + + +def timestamp_within( + timestamp: float, min_exclusive: Optional[float], max_inclusive: Optional[float] +) -> bool: """ check if timestamp within range uniformly treating none and range inclusiveness """ - return timestamp > (min_exclusive or PAST_TIMESTAMP) and timestamp <= (max_inclusive or FUTURE_TIMESTAMP) + return timestamp > (min_exclusive or PAST_TIMESTAMP) and timestamp <= ( + max_inclusive or FUTURE_TIMESTAMP + ) def timestamp_before(timestamp: float, max_inclusive: Optional[float]) -> bool: @@ -122,7 +138,9 @@ def ensure_pendulum_time(value: Union[str, datetime.time]) -> pendulum.Time: raise TypeError(f"Cannot coerce {value} to a pendulum.Time object.") -def _datetime_from_ts_or_iso(value: Union[int, float, str]) -> Union[pendulum.DateTime, pendulum.Date, pendulum.Time]: +def _datetime_from_ts_or_iso( + value: Union[int, float, str] +) -> Union[pendulum.DateTime, pendulum.Date, pendulum.Time]: if isinstance(value, (int, float)): return pendulum.from_timestamp(value) try: @@ -150,7 +168,8 @@ def to_seconds(td: Optional[TimedeltaSeconds]) -> Optional[float]: T = TypeVar("T", bound=Union[pendulum.DateTime, pendulum.Time]) + def reduce_pendulum_datetime_precision(value: T, microsecond_precision: int) -> T: if microsecond_precision >= 6: return value - return value.replace(microsecond=value.microsecond // 10**(6 - microsecond_precision) * 10**(6 - microsecond_precision)) # type: ignore + return value.replace(microsecond=value.microsecond // 10 ** (6 - microsecond_precision) * 10 ** (6 - microsecond_precision)) # type: ignore diff --git a/dlt/common/typing.py b/dlt/common/typing.py index b2bd03f7e6..b6a27a98a7 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -3,18 +3,41 @@ import inspect import os from re import Pattern as _REPattern -from typing import Callable, Dict, Any, Final, Literal, List, Mapping, NewType, Optional, Tuple, Type, TypeVar, Generic, Protocol, TYPE_CHECKING, Union, runtime_checkable, get_args, get_origin, IO -from typing_extensions import TypeAlias, ParamSpec, Concatenate +from typing import ( + ForwardRef, + Callable, + ClassVar, + Dict, + Any, + Final, + Literal, + List, + Mapping, + NewType, + Optional, + Tuple, + Type, + TypeVar, + Generic, + Protocol, + TYPE_CHECKING, + Union, + runtime_checkable, + IO, +) +from typing_extensions import TypeAlias, ParamSpec, Concatenate, Annotated, get_args, get_origin from dlt.common.pendulum import timedelta, pendulum if TYPE_CHECKING: from _typeshed import StrOrBytesPath from typing import _TypedDict + REPattern = _REPattern[str] else: StrOrBytesPath = Any from typing import _TypedDictMeta as _TypedDict + REPattern = _REPattern AnyType: TypeAlias = Any @@ -47,15 +70,16 @@ VARIANT_FIELD_FORMAT = "v_%s" TFileOrPath = Union[str, os.PathLike, IO[Any]] + @runtime_checkable class SupportsVariant(Protocol, Generic[TVariantBase]): """Defines variant type protocol that should be recognized by normalizers - Variant types behave like TVariantBase type (ie. Decimal) but also implement the protocol below that is used to extract the variant value from it. - See `Wei` type declaration which returns Decimal or str for values greater than supported by destination warehouse. + Variant types behave like TVariantBase type (ie. Decimal) but also implement the protocol below that is used to extract the variant value from it. + See `Wei` type declaration which returns Decimal or str for values greater than supported by destination warehouse. """ - def __call__(self) -> Union[TVariantBase, TVariantRV]: - ... + + def __call__(self) -> Union[TVariantBase, TVariantRV]: ... class SupportsHumanize(Protocol): @@ -68,32 +92,71 @@ def asstr(self, verbosity: int = 0) -> str: ... +def extract_type_if_modifier(t: Type[Any]) -> Type[Any]: + if get_origin(t) in (Final, ClassVar, Annotated): + t = get_args(t)[0] + if m_t := extract_type_if_modifier(t): + return m_t + else: + return t + return None + + +def is_union_type(hint: Type[Any]) -> bool: + if get_origin(hint) is Union: + return True + if hint := extract_type_if_modifier(hint): + return is_union_type(hint) + return False + + def is_optional_type(t: Type[Any]) -> bool: - return get_origin(t) is Union and type(None) in get_args(t) + if get_origin(t) is Union: + return type(None) in get_args(t) + if t := extract_type_if_modifier(t): + return is_optional_type(t) + return False def is_final_type(t: Type[Any]) -> bool: return get_origin(t) is Final -def extract_optional_type(t: Type[Any]) -> Any: - return get_args(t)[0] +def extract_union_types(t: Type[Any], no_none: bool = False) -> List[Any]: + if no_none: + return [arg for arg in get_args(t) if arg is not type(None)] # noqa: E721 + return list(get_args(t)) def is_literal_type(hint: Type[Any]) -> bool: - return get_origin(hint) is Literal - - -def is_union(hint: Type[Any]) -> bool: - return get_origin(hint) is Union + if get_origin(hint) is Literal: + return True + if hint := extract_type_if_modifier(hint): + return is_literal_type(hint) + return False def is_newtype_type(t: Type[Any]) -> bool: - return hasattr(t, "__supertype__") + if hasattr(t, "__supertype__"): + return True + if t := extract_type_if_modifier(t): + return is_newtype_type(t) + return False def is_typeddict(t: Type[Any]) -> bool: - return isinstance(t, _TypedDict) + if isinstance(t, _TypedDict): + return True + if t := extract_type_if_modifier(t): + return is_typeddict(t) + return False + + +def is_annotated(ann_type: Any) -> bool: + try: + return issubclass(get_origin(ann_type), Annotated) # type: ignore[arg-type] + except TypeError: + return False def is_list_generic_type(t: Type[Any]) -> bool: @@ -120,12 +183,13 @@ def extract_inner_type(hint: Type[Any], preserve_new_types: bool = False) -> Typ Returns: Type[Any]: Inner type if hint was Literal, Optional or NewType, otherwise hint """ + if maybe_modified := extract_type_if_modifier(hint): + return extract_inner_type(maybe_modified, preserve_new_types) + if is_optional_type(hint): + return extract_inner_type(get_args(hint)[0], preserve_new_types) if is_literal_type(hint): # assume that all literals are of the same type - return extract_inner_type(type(get_args(hint)[0]), preserve_new_types) - if is_optional_type(hint) or is_final_type(hint): - # extract specialization type and call recursively - return extract_inner_type(get_args(hint)[0], preserve_new_types) + return type(get_args(hint)[0]) if is_newtype_type(hint) and not preserve_new_types: # descend into supertypes of NewType return extract_inner_type(hint.__supertype__, preserve_new_types) @@ -134,10 +198,16 @@ def extract_inner_type(hint: Type[Any], preserve_new_types: bool = False) -> Typ def get_all_types_of_class_in_union(hint: Type[Any], cls: Type[TAny]) -> List[Type[TAny]]: # hint is an Union that contains classes, return all classes that are a subclass or superclass of cls - return [t for t in get_args(hint) if inspect.isclass(t) and (issubclass(t, cls) or issubclass(cls, t))] + return [ + t + for t in get_args(hint) + if inspect.isclass(t) and (issubclass(t, cls) or issubclass(cls, t)) + ] -def get_generic_type_argument_from_instance(instance: Any, sample_value: Optional[Any]) -> Type[Any]: +def get_generic_type_argument_from_instance( + instance: Any, sample_value: Optional[Any] +) -> Type[Any]: """Infers type argument of a Generic class from an `instance` of that class using optional `sample_value` of the argument type Inference depends on the presence of __orig_class__ attribute in instance, if not present - sample_Value will be used @@ -160,7 +230,10 @@ def get_generic_type_argument_from_instance(instance: Any, sample_value: Optiona TInputArgs = ParamSpec("TInputArgs") TReturnVal = TypeVar("TReturnVal") -def copy_sig(wrapper: Callable[TInputArgs, Any]) -> Callable[[Callable[..., TReturnVal]], Callable[TInputArgs, TReturnVal]]: + +def copy_sig( + wrapper: Callable[TInputArgs, Any] +) -> Callable[[Callable[..., TReturnVal]], Callable[TInputArgs, TReturnVal]]: """Copies docstring and signature from wrapper to func but keeps the func return value type""" def decorator(func: Callable[..., TReturnVal]) -> Callable[TInputArgs, TReturnVal]: diff --git a/dlt/common/utils.py b/dlt/common/utils.py index 0214bc037a..72fee608a8 100644 --- a/dlt/common/utils.py +++ b/dlt/common/utils.py @@ -8,11 +8,28 @@ from functools import wraps from os import environ from types import ModuleType +import traceback import zlib -from typing import Any, ContextManager, Dict, Iterator, Optional, Sequence, Set, Tuple, TypeVar, Mapping, List, Union, Counter, Iterable +from typing import ( + Any, + ContextManager, + Dict, + Iterator, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Mapping, + List, + Union, + Counter, + Iterable, +) from collections.abc import Mapping as C_Mapping +from dlt.common.exceptions import DltException, ExceptionTrace, TerminalException from dlt.common.typing import AnyFun, StrAny, DictStrAny, StrStr, TAny, TFun @@ -23,11 +40,12 @@ TValue = TypeVar("TValue") # row counts -TRowCount = Dict[str, int] +RowCounts = Dict[str, int] + def chunks(seq: Sequence[T], n: int) -> Iterator[Sequence[T]]: for i in range(0, len(seq), n): - yield seq[i:i + n] + yield seq[i : i + n] def uniq_id(len_: int = 16) -> str: @@ -37,7 +55,7 @@ def uniq_id(len_: int = 16) -> str: def uniq_id_base64(len_: int = 16) -> str: """Returns a base64 encoded crypto-grade string of random bytes with desired len_""" - return base64.b64encode(secrets.token_bytes(len_)).decode('ascii').rstrip("=") + return base64.b64encode(secrets.token_bytes(len_)).decode("ascii").rstrip("=") def many_uniq_ids_base64(n_ids: int, len_: int = 16) -> List[str]: @@ -46,34 +64,41 @@ def many_uniq_ids_base64(n_ids: int, len_: int = 16) -> List[str]: """ random_bytes = secrets.token_bytes(n_ids * len_) encode = base64.b64encode - return [encode(random_bytes[i:i+len_]).decode('ascii').rstrip("=") for i in range(0, n_ids * len_, len_)] + return [ + encode(random_bytes[i : i + len_]).decode("ascii").rstrip("=") + for i in range(0, n_ids * len_, len_) + ] def digest128(v: str, len_: int = 15) -> str: """Returns a base64 encoded shake128 hash of str `v` with digest of length `len_` (default: 15 bytes = 20 characters length)""" - return base64.b64encode(hashlib.shake_128(v.encode("utf-8")).digest(len_)).decode('ascii').rstrip("=") + return ( + base64.b64encode(hashlib.shake_128(v.encode("utf-8")).digest(len_)) + .decode("ascii") + .rstrip("=") + ) def digest128b(v: bytes, len_: int = 15) -> str: """Returns a base64 encoded shake128 hash of bytes `v` with digest of length `len_` (default: 15 bytes = 20 characters length)""" - enc_v = base64.b64encode(hashlib.shake_128(v).digest(len_)).decode('ascii') + enc_v = base64.b64encode(hashlib.shake_128(v).digest(len_)).decode("ascii") return enc_v.rstrip("=") def digest256(v: str) -> str: digest = hashlib.sha3_256(v.encode("utf-8")).digest() - return base64.b64encode(digest).decode('ascii') + return base64.b64encode(digest).decode("ascii") def str2bool(v: str) -> bool: if isinstance(v, bool): return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): + if v.lower() in ("yes", "true", "t", "y", "1"): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + elif v.lower() in ("no", "false", "f", "n", "0"): return False else: - raise ValueError('Boolean value expected.') + raise ValueError("Boolean value expected.") # def flatten_list_of_dicts(dicts: Sequence[StrAny]) -> StrAny: @@ -96,7 +121,7 @@ def flatten_list_of_str_or_dicts(seq: Sequence[Union[StrAny, str]]) -> DictStrAn o: DictStrAny = {} for e in seq: if isinstance(e, dict): - for k,v in e.items(): + for k, v in e.items(): if k in o: raise KeyError(f"Cannot flatten with duplicate key {k}") o[k] = v @@ -177,7 +202,9 @@ def concat_strings_with_limit(strings: List[str], separator: str, limit: int) -> sep_len = len(separator) for i in range(1, len(strings)): - if current_length + len(strings[i]) + sep_len > limit: # accounts for the length of separator + if ( + current_length + len(strings[i]) + sep_len > limit + ): # accounts for the length of separator yield separator.join(strings[start:i]) start = i current_length = len(strings[i]) @@ -187,7 +214,9 @@ def concat_strings_with_limit(strings: List[str], separator: str, limit: int) -> yield separator.join(strings[start:]) -def graph_edges_to_nodes(edges: Sequence[Tuple[TAny, TAny]], directed: bool = True) -> Dict[TAny, Set[TAny]]: +def graph_edges_to_nodes( + edges: Sequence[Tuple[TAny, TAny]], directed: bool = True +) -> Dict[TAny, Set[TAny]]: """Converts a directed graph represented as a sequence of edges to a graph represented as a mapping from nodes a set of connected nodes. Isolated nodes are represented as edges to itself. If `directed` is `False`, each edge is duplicated but going in opposite direction. @@ -221,7 +250,6 @@ def dfs(node: TAny, current_component: Set[TAny]) -> None: for neighbor in undag[node]: dfs(neighbor, current_component) - for node in undag: if node not in visited: component: Set[TAny] = set() @@ -245,6 +273,7 @@ def update_dict_with_prune(dest: DictStrAny, update: StrAny) -> None: def update_dict_nested(dst: TDict, src: StrAny) -> TDict: + """Merges `src` into `dst` key wise. Does not recur into lists. Values in `src` overwrite `dst` if both keys exit.""" # based on https://github.com/clarketm/mergedeep/blob/master/mergedeep/mergedeep.py def _is_recursive_merge(a: StrAny, b: StrAny) -> bool: @@ -301,9 +330,10 @@ def is_interactive() -> bool: bool: True if interactive (e.g., REPL, IPython, Jupyter Notebook), False if running as a script. """ import __main__ as main + # When running as a script, the __main__ module has a __file__ attribute. # In an interactive environment, the __file__ attribute is absent. - return not hasattr(main, '__file__') + return not hasattr(main, "__file__") def dict_remove_nones_in_place(d: Dict[Any, Any]) -> Dict[Any, Any]: @@ -331,7 +361,6 @@ def custom_environ(env: StrStr) -> Iterator[None]: def with_custom_environ(f: TFun) -> TFun: - @wraps(f) def _wrap(*args: Any, **kwargs: Any) -> Any: saved_environ = os.environ.copy() @@ -404,11 +433,20 @@ def is_inner_callable(f: AnyFun) -> bool: def obfuscate_pseudo_secret(pseudo_secret: str, pseudo_key: bytes) -> str: - return base64.b64encode(bytes([_a ^ _b for _a, _b in zip(pseudo_secret.encode("utf-8"), pseudo_key*250)])).decode() + return base64.b64encode( + bytes([_a ^ _b for _a, _b in zip(pseudo_secret.encode("utf-8"), pseudo_key * 250)]) + ).decode() def reveal_pseudo_secret(obfuscated_secret: str, pseudo_key: bytes) -> str: - return bytes([_a ^ _b for _a, _b in zip(base64.b64decode(obfuscated_secret.encode("ascii"), validate=True), pseudo_key*250)]).decode("utf-8") + return bytes( + [ + _a ^ _b + for _a, _b in zip( + base64.b64decode(obfuscated_secret.encode("ascii"), validate=True), pseudo_key * 250 + ) + ] + ).decode("utf-8") def get_module_name(m: ModuleType) -> str: @@ -428,7 +466,7 @@ def derives_from_class_of_name(o: object, name: str) -> bool: def compressed_b64encode(value: bytes) -> str: """Compress and b64 encode the given bytestring""" - return base64.b64encode(zlib.compress(value, level=9)).decode('ascii') + return base64.b64encode(zlib.compress(value, level=9)).decode("ascii") def compressed_b64decode(value: str) -> bytes: @@ -441,15 +479,15 @@ def identity(x: TAny) -> TAny: return x -def increase_row_count(row_counts: TRowCount, table_name: str, count: int) -> None: - row_counts[table_name] = row_counts.get(table_name, 0) + count +def increase_row_count(row_counts: RowCounts, counter_name: str, count: int) -> None: + row_counts[counter_name] = row_counts.get(counter_name, 0) + count -def merge_row_count(row_counts_1: TRowCount, row_counts_2: TRowCount) -> None: +def merge_row_counts(row_counts_1: RowCounts, row_counts_2: RowCounts) -> None: """merges row counts_2 into row_counts_1""" - keys = set(row_counts_1.keys()) | set(row_counts_2.keys()) - for key in keys: - row_counts_1[key] = row_counts_1.get(key, 0) + row_counts_2.get(key, 0) + # only keys present in row_counts_2 are modifed + for counter_name in row_counts_2.keys(): + row_counts_1[counter_name] = row_counts_1.get(counter_name, 0) + row_counts_2[counter_name] def extend_list_deduplicated(original_list: List[Any], extending_list: Iterable[Any]) -> List[Any]: @@ -474,3 +512,66 @@ def maybe_context(manager: ContextManager[TAny]) -> Iterator[TAny]: def without_none(d: Mapping[TKey, Optional[TValue]]) -> Mapping[TKey, TValue]: """Return a new dict with all `None` values removed""" return {k: v for k, v in d.items() if v is not None} + + +def get_full_class_name(obj: Any) -> str: + cls = obj.__class__ + module = cls.__module__ + # exclude 'builtins' for built-in types. + if module is None or module == "builtins": + return cls.__name__ # type: ignore[no-any-return] + return module + "." + cls.__name__ # type: ignore[no-any-return] + + +def get_exception_trace(exc: BaseException) -> ExceptionTrace: + """Get exception trace and additional information for DltException(s)""" + trace: ExceptionTrace = {"message": str(exc), "exception_type": get_full_class_name(exc)} + if exc.__traceback__: + tb_extract = traceback.extract_tb(exc.__traceback__) + trace["stack_trace"] = traceback.format_list(tb_extract) + trace["is_terminal"] = isinstance(exc, TerminalException) + + # get attrs and other props + if isinstance(exc, DltException): + if exc.__doc__: + trace["docstring"] = exc.__doc__ + attrs = exc.attrs() + str_attrs = {} + for k, v in attrs.items(): + if v is None: + continue + try: + from dlt.common.json import json + + # must be json serializable, other attrs are skipped + if not isinstance(v, str): + json.dumps(v) + str_attrs[k] = v + except Exception: + continue + # extract special attrs + if k in ["load_id", "pipeline_name", "source_name", "resource_name", "job_id"]: + trace[k] = v # type: ignore[literal-required] + + trace["exception_attrs"] = str_attrs + return trace + + +def get_exception_trace_chain( + exc: BaseException, traces: List[ExceptionTrace] = None, seen: Set[int] = None +) -> List[ExceptionTrace]: + """Get traces for exception chain. The function will recursively visit all __cause__ and __context__ exceptions. The top level + exception trace is first on the list + """ + traces = traces or [] + seen = seen or set() + # prevent cycles + if id(exc) in seen: + return traces + seen.add(id(exc)) + traces.append(get_exception_trace(exc)) + if exc.__cause__: + return get_exception_trace_chain(exc.__cause__, traces, seen) + elif exc.__context__: + return get_exception_trace_chain(exc.__context__, traces, seen) + return traces diff --git a/dlt/common/validation.py b/dlt/common/validation.py index f1900c1b0e..c43f8df420 100644 --- a/dlt/common/validation.py +++ b/dlt/common/validation.py @@ -1,15 +1,32 @@ import functools -from typing import Callable, Any, Type, get_type_hints, get_args +from typing import Callable, Any, Type +from typing_extensions import get_type_hints, get_args from dlt.common.exceptions import DictValidationException -from dlt.common.typing import StrAny, extract_optional_type, is_literal_type, is_optional_type, is_typeddict, is_list_generic_type, is_dict_generic_type, _TypedDict +from dlt.common.typing import ( + StrAny, + is_literal_type, + is_optional_type, + extract_union_types, + is_union_type, + is_typeddict, + is_list_generic_type, + is_dict_generic_type, + _TypedDict, +) TFilterFunc = Callable[[str], bool] TCustomValidator = Callable[[str, str, Any, Any], bool] -def validate_dict(spec: Type[_TypedDict], doc: StrAny, path: str, filter_f: TFilterFunc = None, validator_f: TCustomValidator = None) -> None: +def validate_dict( + spec: Type[_TypedDict], + doc: StrAny, + path: str, + filter_f: TFilterFunc = None, + validator_f: TCustomValidator = None, +) -> None: """Validate the `doc` dictionary based on the given typed dictionary specification `spec`. Args: @@ -22,6 +39,8 @@ def validate_dict(spec: Type[_TypedDict], doc: StrAny, path: str, filter_f: TFil validator_f (TCustomValidator, optional): A function to perform additional validation for types not covered by this function. It should return `True` if the validation passes. Defaults to a function that rejects all such types. + filter_required (TFilterFunc, optional): A function to filter out required fields, useful + for testing historic versions of dict that might now have certain fields yet. Raises: DictValidationException: If there are missing required fields, unexpected fields, @@ -42,49 +61,111 @@ def validate_dict(spec: Type[_TypedDict], doc: StrAny, path: str, filter_f: TFil # check missing props missing = set(required_props.keys()).difference(props.keys()) if len(missing): - raise DictValidationException(f"In {path}: following required fields are missing {missing}", path) + raise DictValidationException( + f"In {path}: following required fields are missing {missing}", path + ) # check unknown props unexpected = set(props.keys()).difference(allowed_props.keys()) if len(unexpected): - raise DictValidationException(f"In {path}: following fields are unexpected {unexpected}", path) + raise DictValidationException( + f"In {path}: following fields are unexpected {unexpected}", path + ) def verify_prop(pk: str, pv: Any, t: Any) -> None: - if is_optional_type(t): - t = extract_optional_type(t) - - if is_literal_type(t): + # covers none in optional and union types + if is_optional_type(t) and pv is None: + pass + elif is_union_type(t): + # pass if value actually is none + union_types = extract_union_types(t, no_none=True) + # this is the case for optional fields + if len(union_types) == 1: + verify_prop(pk, pv, union_types[0]) + else: + has_passed = False + for ut in union_types: + try: + verify_prop(pk, pv, ut) + has_passed = True + except DictValidationException: + pass + if not has_passed: + type_names = [ + str(get_args(ut)) if is_literal_type(ut) else ut.__name__ + for ut in union_types + ] + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__}." + f" One of these types expected: {', '.join(type_names)}.", + path, + pk, + pv, + ) + elif is_literal_type(t): a_l = get_args(t) if pv not in a_l: - raise DictValidationException(f"In {path}: field {pk} value {pv} not in allowed {a_l}", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} not in allowed {a_l}", path, pk, pv + ) elif t in [int, bool, str, float]: if not isinstance(pv, t): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while {t.__name__} is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while" + f" {t.__name__} is expected", + path, + pk, + pv, + ) elif is_typeddict(t): if not isinstance(pv, dict): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while dict is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while" + " dict is expected", + path, + pk, + pv, + ) validate_dict(t, pv, path + "/" + pk, filter_f, validator_f) elif is_list_generic_type(t): if not isinstance(pv, list): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while list is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while" + " list is expected", + path, + pk, + pv, + ) # get list element type from generic and process each list element l_t = get_args(t)[0] for i, l_v in enumerate(pv): verify_prop(pk + f"[{i}]", l_v, l_t) elif is_dict_generic_type(t): if not isinstance(pv, dict): - raise DictValidationException(f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while dict is expected", path, pk, pv) + raise DictValidationException( + f"In {path}: field {pk} value {pv} has invalid type {type(pv).__name__} while" + " dict is expected", + path, + pk, + pv, + ) # get dict key and value type from generic and process each k: v of the dict _, d_v_t = get_args(t) for d_k, d_v in pv.items(): if not isinstance(d_k, str): - raise DictValidationException(f"In {path}: field {pk} key {d_k} must be a string", path, pk, d_k) + raise DictValidationException( + f"In {path}: field {pk} key {d_k} must be a string", path, pk, d_k + ) verify_prop(pk + f"[{d_k}]", d_v, d_v_t) elif t is Any: # pass everything with any type pass else: if not validator_f(path, pk, pv, t): - raise DictValidationException(f"In {path}: field {pk} has expected type {t.__name__} which lacks validator", path, pk) + raise DictValidationException( + f"In {path}: field {pk} has expected type {t.__name__} which lacks validator", + path, + pk, + ) # check allowed props for pk, pv in props.items(): @@ -92,6 +173,5 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None: validate_dict_ignoring_xkeys = functools.partial( - validate_dict, - filter_f=lambda k: not k.startswith("x-") -) \ No newline at end of file + validate_dict, filter_f=lambda k: not k.startswith("x-") +) diff --git a/dlt/common/warnings.py b/dlt/common/warnings.py new file mode 100644 index 0000000000..9c62c69bf8 --- /dev/null +++ b/dlt/common/warnings.py @@ -0,0 +1,151 @@ +import functools +import warnings +import semver +import typing +import typing_extensions + +from dlt.version import __version__ + +VersionString = typing.Union[str, semver.VersionInfo] + + +class DltDeprecationWarning(DeprecationWarning): + """A dlt specific deprecation warning. + + This warning is raised when using deprecated functionality in dlt. It provides information on when the + deprecation was introduced and the expected version in which the corresponding functionality will be removed. + + Attributes: + message: Description of the warning. + since: Version in which the deprecation was introduced. + expected_due: Version in which the corresponding functionality is expected to be removed. + """ + + def __init__( + self, + message: str, + *args: typing.Any, + since: VersionString, + expected_due: VersionString = None, + ) -> None: + super().__init__(message, *args) + self.message = message.rstrip(".") + self.since = ( + since if isinstance(since, semver.VersionInfo) else semver.parse_version_info(since) + ) + if expected_due: + expected_due = ( + expected_due + if isinstance(expected_due, semver.VersionInfo) + else semver.parse_version_info(expected_due) + ) + self.expected_due = expected_due if expected_due is not None else self.since.bump_minor() + + def __str__(self) -> str: + message = ( + f"{self.message}. Deprecated in dlt {self.since} to be removed in {self.expected_due}." + ) + return message + + +class Dlt04DeprecationWarning(DltDeprecationWarning): + V04 = semver.parse_version_info("0.4.0") + + def __init__(self, message: str, *args: typing.Any, expected_due: VersionString = None) -> None: + super().__init__( + message, *args, since=Dlt04DeprecationWarning.V04, expected_due=expected_due + ) + + +# show dlt deprecations once +warnings.simplefilter("once", DltDeprecationWarning) + +if typing.TYPE_CHECKING or hasattr(typing_extensions, "deprecated"): + deprecated = typing_extensions.deprecated +else: + # ported from typing_extensions so versions older than 4.5.x may still be used + _T = typing.TypeVar("_T") + + def deprecated( + __msg: str, + *, + category: typing.Optional[typing.Type[Warning]] = DeprecationWarning, + stacklevel: int = 1, + ) -> typing.Callable[[_T], _T]: + """Indicate that a class, function or overload is deprecated. + + Usage: + + @deprecated("Use B instead") + class A: + pass + + @deprecated("Use g instead") + def f(): + pass + + @overload + @deprecated("int support is deprecated") + def g(x: int) -> int: ... + @overload + def g(x: str) -> int: ... + + When this decorator is applied to an object, the type checker + will generate a diagnostic on usage of the deprecated object. + + The warning specified by ``category`` will be emitted on use + of deprecated objects. For functions, that happens on calls; + for classes, on instantiation. If the ``category`` is ``None``, + no warning is emitted. The ``stacklevel`` determines where the + warning is emitted. If it is ``1`` (the default), the warning + is emitted at the direct caller of the deprecated object; if it + is higher, it is emitted further up the stack. + + The decorator sets the ``__deprecated__`` + attribute on the decorated object to the deprecation message + passed to the decorator. If applied to an overload, the decorator + must be after the ``@overload`` decorator for the attribute to + exist on the overload as returned by ``get_overloads()``. + + See PEP 702 for details. + + """ + + def decorator(__arg: _T) -> _T: + if category is None: + __arg.__deprecated__ = __msg + return __arg + elif isinstance(__arg, type): + original_new = __arg.__new__ + has_init = __arg.__init__ is not object.__init__ + + @functools.wraps(original_new) + def __new__(cls, *args, **kwargs): + warnings.warn(__msg, category=category, stacklevel=stacklevel + 1) + if original_new is not object.__new__: + return original_new(cls, *args, **kwargs) + # Mirrors a similar check in object.__new__. + elif not has_init and (args or kwargs): + raise TypeError(f"{cls.__name__}() takes no arguments") + else: + return original_new(cls) + + __arg.__new__ = staticmethod(__new__) + __arg.__deprecated__ = __new__.__deprecated__ = __msg + return __arg + elif callable(__arg): + + @functools.wraps(__arg) + def wrapper(*args, **kwargs): + warnings.warn(__msg, category=category, stacklevel=stacklevel + 1) + return __arg(*args, **kwargs) + + __arg.__deprecated__ = wrapper.__deprecated__ = __msg + return wrapper + else: + raise TypeError( + "@deprecated decorator with non-None category must be applied to " + f"a class or callable, not {__arg!r}" + ) + + return decorator diff --git a/dlt/common/wei.py b/dlt/common/wei.py index 218e5eee3a..b6816bc6f3 100644 --- a/dlt/common/wei.py +++ b/dlt/common/wei.py @@ -11,8 +11,7 @@ WEI_SCALE_POW = 10**18 -class Wei(Decimal,SupportsVariant[Decimal]): - +class Wei(Decimal, SupportsVariant[Decimal]): ctx = default_context(decimal.getcontext().copy(), EVM_DECIMAL_PRECISION) @classmethod @@ -29,11 +28,13 @@ def from_int256(cls, value: int, decimals: int = 0) -> "Wei": def __call__(self) -> Union["Wei", TVariantRV]: # TODO: this should look into DestinationCapabilitiesContext to get maximum Decimal value. # this is BigQuery BIGDECIMAL max - if self > 578960446186580977117854925043439539266 or self < -578960446186580977117854925043439539267: - return ("str", str(self)) + if ( + self > 578960446186580977117854925043439539266 + or self < -578960446186580977117854925043439539267 + ): + return ("str", str(self)) else: return self - def __repr__(self) -> str: return f"Wei('{str(self)}')" diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index e69de29bb2..980c4ce7f2 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -0,0 +1,28 @@ +from dlt.destinations.impl.postgres.factory import postgres +from dlt.destinations.impl.snowflake.factory import snowflake +from dlt.destinations.impl.filesystem.factory import filesystem +from dlt.destinations.impl.duckdb.factory import duckdb +from dlt.destinations.impl.dummy.factory import dummy +from dlt.destinations.impl.mssql.factory import mssql +from dlt.destinations.impl.bigquery.factory import bigquery +from dlt.destinations.impl.athena.factory import athena +from dlt.destinations.impl.redshift.factory import redshift +from dlt.destinations.impl.qdrant.factory import qdrant +from dlt.destinations.impl.motherduck.factory import motherduck +from dlt.destinations.impl.weaviate.factory import weaviate + + +__all__ = [ + "postgres", + "snowflake", + "filesystem", + "duckdb", + "dummy", + "mssql", + "bigquery", + "athena", + "redshift", + "qdrant", + "motherduck", + "weaviate", +] diff --git a/dlt/destinations/exceptions.py b/dlt/destinations/exceptions.py index 5c20f081f1..cc4d4fd836 100644 --- a/dlt/destinations/exceptions.py +++ b/dlt/destinations/exceptions.py @@ -1,5 +1,10 @@ from typing import Sequence -from dlt.common.exceptions import DestinationTerminalException, DestinationTransientException, DestinationUndefinedEntity, DestinationException +from dlt.common.exceptions import ( + DestinationTerminalException, + DestinationTransientException, + DestinationUndefinedEntity, + DestinationException, +) from dlt.common.destination.reference import TLoadJobState @@ -25,32 +30,49 @@ def __init__(self, dbapi_exception: Exception) -> None: class DestinationConnectionError(DestinationTransientException): - def __init__(self, client_type: str, dataset_name: str, reason: str, inner_exc: Exception) -> None: + def __init__( + self, client_type: str, dataset_name: str, reason: str, inner_exc: Exception + ) -> None: self.client_type = client_type self.dataset_name = dataset_name self.inner_exc = inner_exc - super().__init__(f"Connection with {client_type} to dataset name {dataset_name} failed. Please check if you configured the credentials at all and provided the right credentials values. You can be also denied access or your internet connection may be down. The actual reason given is: {reason}") + super().__init__( + f"Connection with {client_type} to dataset name {dataset_name} failed. Please check if" + " you configured the credentials at all and provided the right credentials values. You" + " can be also denied access or your internet connection may be down. The actual reason" + f" given is: {reason}" + ) + class LoadClientNotConnected(DestinationTransientException): def __init__(self, client_type: str, dataset_name: str) -> None: self.client_type = client_type self.dataset_name = dataset_name - super().__init__(f"Connection with {client_type} to dataset {dataset_name} is closed. Open the connection with 'client.open_connection' or with the 'with client:' statement") + super().__init__( + f"Connection with {client_type} to dataset {dataset_name} is closed. Open the" + " connection with 'client.open_connection' or with the 'with client:' statement" + ) class DestinationSchemaWillNotUpdate(DestinationTerminalException): def __init__(self, table_name: str, columns: Sequence[str], msg: str) -> None: self.table_name = table_name self.columns = columns - super().__init__(f"Schema for table {table_name} column(s) {columns} will not update: {msg}") + super().__init__( + f"Schema for table {table_name} column(s) {columns} will not update: {msg}" + ) class DestinationSchemaTampered(DestinationTerminalException): def __init__(self, schema_name: str, version_hash: str, stored_version_hash: str) -> None: self.version_hash = version_hash self.stored_version_hash = stored_version_hash - super().__init__(f"Schema {schema_name} content was changed - by a loader or by destination code - from the moment it was retrieved by load package. " - f"Such schema cannot reliably be updated or saved. Current version hash: {version_hash} != stored version hash {stored_version_hash}") + super().__init__( + f"Schema {schema_name} content was changed - by a loader or by destination code - from" + " the moment it was retrieved by load package. Such schema cannot reliably be updated" + f" or saved. Current version hash: {version_hash} != stored version hash" + f" {stored_version_hash}" + ) class LoadJobNotExistsException(DestinationTerminalException): @@ -60,7 +82,9 @@ def __init__(self, job_id: str) -> None: class LoadJobTerminalException(DestinationTerminalException): def __init__(self, file_path: str, message: str) -> None: - super().__init__(f"Job with id/file name {file_path} encountered unrecoverable problem: {message}") + super().__init__( + f"Job with id/file name {file_path} encountered unrecoverable problem: {message}" + ) class LoadJobInvalidStateTransitionException(DestinationTerminalException): @@ -72,17 +96,28 @@ def __init__(self, from_state: TLoadJobState, to_state: TLoadJobState) -> None: class LoadJobFileTooBig(DestinationTerminalException): def __init__(self, file_name: str, max_size: int) -> None: - super().__init__(f"File {file_name} exceeds {max_size} and cannot be loaded. Split the file and try again.") + super().__init__( + f"File {file_name} exceeds {max_size} and cannot be loaded. Split the file and try" + " again." + ) class MergeDispositionException(DestinationTerminalException): - def __init__(self, dataset_name: str, staging_dataset_name: str, tables: Sequence[str], reason: str) -> None: + def __init__( + self, dataset_name: str, staging_dataset_name: str, tables: Sequence[str], reason: str + ) -> None: self.dataset_name = dataset_name self.staging_dataset_name = staging_dataset_name self.tables = tables self.reason = reason - msg = f"Merge sql job for dataset name {dataset_name}, staging dataset name {staging_dataset_name} COULD NOT BE GENERATED. Merge will not be performed. " - msg += f"Data for the following tables ({tables}) is loaded to staging dataset. You may need to write your own materialization. The reason is:\n" + msg = ( + f"Merge sql job for dataset name {dataset_name}, staging dataset name" + f" {staging_dataset_name} COULD NOT BE GENERATED. Merge will not be performed. " + ) + msg += ( + f"Data for the following tables ({tables}) is loaded to staging dataset. You may need" + " to write your own materialization. The reason is:\n" + ) msg += reason super().__init__(msg) diff --git a/dlt/destinations/filesystem/__init__.py b/dlt/destinations/filesystem/__init__.py deleted file mode 100644 index 3dc6c62480..0000000000 --- a/dlt/destinations/filesystem/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientDwhWithStagingConfiguration - -from dlt.destinations.filesystem.configuration import FilesystemDestinationClientConfiguration - - -@with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, "filesystem",)) -def _configure(config: FilesystemDestinationClientConfiguration = config.value) -> FilesystemDestinationClientConfiguration: - return config - - -def capabilities() -> DestinationCapabilitiesContext: - return DestinationCapabilitiesContext.generic_capabilities("jsonl") - - -def client(schema: Schema, initial_config: DestinationClientDwhWithStagingConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.filesystem.filesystem import FilesystemClient - - return FilesystemClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[FilesystemDestinationClientConfiguration]: - return FilesystemDestinationClientConfiguration diff --git a/dlt/destinations/impl/__init__.py b/dlt/destinations/impl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dlt/destinations/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py similarity index 55% rename from dlt/destinations/athena/__init__.py rename to dlt/destinations/impl/athena/__init__.py index 1fd7f14d57..9f0b829819 100644 --- a/dlt/destinations/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -1,18 +1,7 @@ -from typing import Type - from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.schema.schema import Schema from dlt.common.data_writers.escape import escape_athena_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.athena.configuration import AthenaClientConfiguration -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration - -@with_config(spec=AthenaClientConfiguration, sections=(known_sections.DESTINATION, "athena",)) -def _configure(config: AthenaClientConfiguration = config.value) -> AthenaClientConfiguration: - return config def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -37,15 +26,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.timestamp_precision = 3 caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.athena.athena import AthenaClient - return AthenaClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return AthenaClientConfiguration - - diff --git a/dlt/destinations/athena/athena.py b/dlt/destinations/impl/athena/athena.py similarity index 80% rename from dlt/destinations/athena/athena.py rename to dlt/destinations/impl/athena/athena.py index 44d020c127..4837f0dbdf 100644 --- a/dlt/destinations/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -1,4 +1,17 @@ -from typing import Optional, ClassVar, Iterator, Any, AnyStr, Sequence, Tuple, List, Dict, Callable, Iterable, Type +from typing import ( + Optional, + ClassVar, + Iterator, + Any, + AnyStr, + Sequence, + Tuple, + List, + Dict, + Callable, + Iterable, + Type, +) from copy import deepcopy import re @@ -10,7 +23,12 @@ from pyathena import connect from pyathena.connection import Connection from pyathena.error import OperationalError, DatabaseError, ProgrammingError, IntegrityError, Error -from pyathena.formatter import DefaultParameterFormatter, _DEFAULT_FORMATTERS, Formatter, _format_date +from pyathena.formatter import ( + DefaultParameterFormatter, + _DEFAULT_FORMATTERS, + Formatter, + _format_date, +) from dlt.common import logger from dlt.common.utils import without_none @@ -26,12 +44,22 @@ from dlt.destinations.sql_jobs import SqlStagingCopyJob from dlt.destinations.typing import DBApi, DBTransaction -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation, LoadJobTerminalException -from dlt.destinations.athena import capabilities -from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, + LoadJobTerminalException, +) +from dlt.destinations.impl.athena import capabilities +from dlt.destinations.sql_client import ( + SqlClientBase, + DBApiCursorImpl, + raise_database_error, + raise_open_connection_error, +) from dlt.destinations.typing import DBApiCursor from dlt.destinations.job_client_impl import SqlJobClientWithStaging -from dlt.destinations.athena.configuration import AthenaClientConfiguration +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration from dlt.destinations.type_mapping import TypeMapper from dlt.destinations import path_utils @@ -46,13 +74,10 @@ class AthenaTypeMapper(TypeMapper): "timestamp": "timestamp", "bigint": "bigint", "binary": "binary", - "time": "string" + "time": "string", } - sct_to_dbt = { - "decimal": "decimal(%i,%i)", - "wei": "decimal(%i,%i)" - } + sct_to_dbt = {"decimal": "decimal(%i,%i)", "wei": "decimal(%i,%i)"} dbt_to_sct = { "varchar": "text", @@ -72,7 +97,9 @@ class AthenaTypeMapper(TypeMapper): def __init__(self, capabilities: DestinationCapabilitiesContext): super().__init__(capabilities) - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None: return "bigint" if precision <= 8: @@ -83,7 +110,9 @@ def to_db_integer_type(self, precision: Optional[int], table_format: TTableForma return "int" return "bigint" - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: for key, val in self.dbt_to_sct.items(): if db_type.startswith(key): return without_none(dict(data_type=val, precision=precision, scale=scale)) # type: ignore[return-value] @@ -101,7 +130,6 @@ def _format_pendulum_datetime(formatter: Formatter, escaper: Callable[[str], str class DLTAthenaFormatter(DefaultParameterFormatter): - _INSTANCE: ClassVar["DLTAthenaFormatter"] = None def __new__(cls: Type["DLTAthenaFormatter"]) -> "DLTAthenaFormatter": @@ -109,7 +137,6 @@ def __new__(cls: Type["DLTAthenaFormatter"]) -> "DLTAthenaFormatter": return cls._INSTANCE return super().__new__(cls) - def __init__(self) -> None: if DLTAthenaFormatter._INSTANCE: return @@ -118,9 +145,7 @@ def __init__(self) -> None: formatters[datetime] = _format_pendulum_datetime formatters[Date] = _format_date - super(DefaultParameterFormatter, self).__init__( - mappings=formatters, default=None - ) + super(DefaultParameterFormatter, self).__init__(mappings=formatters, default=None) DLTAthenaFormatter._INSTANCE = self @@ -138,13 +163,14 @@ def exception(self) -> str: # this part of code should be never reached raise NotImplementedError() + class DoNothingFollowupJob(DoNothingJob, FollowupJob): """The second most lazy class of dlt""" + pass class AthenaSQLClient(SqlClientBase[Connection]): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() dbapi: ClassVar[DBApi] = pyathena @@ -161,7 +187,8 @@ def open_connection(self) -> Connection: schema_name=self.dataset_name, s3_staging_dir=self.config.query_result_bucket, work_group=self.config.athena_work_group, - **native_credentials) + **native_credentials, + ) return self._conn def close_connection(self) -> None: @@ -195,18 +222,24 @@ def drop_dataset(self) -> None: self.execute_sql(f"DROP DATABASE {self.fully_qualified_ddl_dataset_name()} CASCADE;") def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + return ( + self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + ) def drop_tables(self, *tables: str) -> None: if not tables: return - statements = [f"DROP TABLE IF EXISTS {self.make_qualified_ddl_table_name(table)};" for table in tables] + statements = [ + f"DROP TABLE IF EXISTS {self.make_qualified_ddl_table_name(table)};" for table in tables + ] self.execute_fragments(statements) @contextmanager @raise_database_error def begin_transaction(self) -> Iterator[DBTransaction]: - logger.warning("Athena does not support transactions! Each SQL statement is auto-committed separately.") + logger.warning( + "Athena does not support transactions! Each SQL statement is auto-committed separately." + ) yield self @raise_database_error @@ -235,7 +268,9 @@ def _make_database_exception(ex: Exception) -> Exception: return DatabaseTransientException(ex) return ex - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if curr.description is None: return None @@ -244,13 +279,17 @@ def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequen return f @staticmethod - def _convert_to_old_pyformat(new_style_string: str, args: Tuple[Any, ...]) -> Tuple[str, Dict[str, Any]]: + def _convert_to_old_pyformat( + new_style_string: str, args: Tuple[Any, ...] + ) -> Tuple[str, Dict[str, Any]]: # create a list of keys - keys = ["arg"+str(i) for i, _ in enumerate(args)] + keys = ["arg" + str(i) for i, _ in enumerate(args)] # create an old style string and replace placeholders - old_style_string, count = re.subn(r"%s", lambda _: "%(" + keys.pop(0) + ")s", new_style_string) + old_style_string, count = re.subn( + r"%s", lambda _: "%(" + keys.pop(0) + ")s", new_style_string + ) # create a dictionary mapping keys to args - mapping = dict(zip(["arg"+str(i) for i, _ in enumerate(args)], args)) + mapping = dict(zip(["arg" + str(i) for i, _ in enumerate(args)], args)) # raise if there is a mismatch between args and string if count != len(args): raise DatabaseTransientException(OperationalError()) @@ -285,19 +324,17 @@ def has_dataset(self) -> bool: class AthenaClient(SqlJobClientWithStaging, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: AthenaClientConfiguration) -> None: # verify if staging layout is valid for Athena # this will raise if the table prefix is not properly defined # we actually that {table_name} is first, no {schema_name} is allowed - self.table_prefix_layout = path_utils.get_table_prefix_layout(config.staging_config.layout, []) - - sql_client = AthenaSQLClient( - config.normalize_dataset_name(schema), - config + self.table_prefix_layout = path_utils.get_table_prefix_layout( + config.staging_config.layout, [] ) + + sql_client = AthenaSQLClient(config.normalize_dataset_name(schema), config) super().__init__(schema, config, sql_client) self.sql_client: AthenaSQLClient = sql_client # type: ignore self.config: AthenaClientConfiguration = config @@ -308,14 +345,19 @@ def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: truncate_tables = [] super().initialize_storage(truncate_tables) - def _from_db_type(self, hive_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, hive_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(hive_t, precision, scale) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - return f"{self.sql_client.escape_ddl_identifier(c['name'])} {self.type_mapper.to_db_type(c, table_format)}" - - def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool) -> List[str]: + return ( + f"{self.sql_client.escape_ddl_identifier(c['name'])} {self.type_mapper.to_db_type(c, table_format)}" + ) + def _get_table_update_sql( + self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool + ) -> List[str]: bucket = self.config.staging_config.bucket_url dataset = self.sql_client.dataset_name @@ -325,7 +367,9 @@ def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSc # or if we are in iceberg mode, we create iceberg tables for all tables table = self.get_load_table(table_name, self.in_staging_mode) is_iceberg = self._is_iceberg_table(table) or table.get("write_disposition", None) == "skip" - columns = ", ".join([self._get_column_def_sql(c, table.get("table_format")) for c in new_columns]) + columns = ", ".join( + [self._get_column_def_sql(c, table.get("table_format")) for c in new_columns] + ) # this will fail if the table prefix is not properly defined table_prefix = self.table_prefix_layout.format(table_name=table_name) @@ -354,21 +398,32 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> if table_schema_has_type(table, "time"): raise LoadJobTerminalException( file_path, - "Athena cannot load TIME columns from parquet tables. Please convert `datetime.time` objects in your data to `str` or `datetime.datetime`." + "Athena cannot load TIME columns from parquet tables. Please convert" + " `datetime.time` objects in your data to `str` or `datetime.datetime`.", ) job = super().start_file_load(table, file_path, load_id) if not job: - job = DoNothingFollowupJob(file_path) if self._is_iceberg_table(self.get_load_table(table["name"])) else DoNothingJob(file_path) + job = ( + DoNothingFollowupJob(file_path) + if self._is_iceberg_table(self.get_load_table(table["name"])) + else DoNothingJob(file_path) + ) return job def _create_append_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: if self._is_iceberg_table(self.get_load_table(table_chain[0]["name"])): - return [SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": False})] + return [ + SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": False}) + ] return super()._create_append_followup_jobs(table_chain) - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: if self._is_iceberg_table(self.get_load_table(table_chain[0]["name"])): - return [SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": True})] + return [ + SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": True}) + ] return super()._create_replace_followup_jobs(table_chain) def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: @@ -388,18 +443,22 @@ def should_load_data_to_staging_dataset(self, table: TTableSchema) -> bool: def should_truncate_table_before_load_on_staging_destination(self, table: TTableSchema) -> bool: # on athena we only truncate replace tables that are not iceberg table = self.get_load_table(table["name"]) - if table["write_disposition"] == "replace" and not self._is_iceberg_table(self.get_load_table(table["name"])): + if table["write_disposition"] == "replace" and not self._is_iceberg_table( + self.get_load_table(table["name"]) + ): return True return False - def should_load_data_to_staging_dataset_on_staging_destination(self, table: TTableSchema) -> bool: + def should_load_data_to_staging_dataset_on_staging_destination( + self, table: TTableSchema + ) -> bool: """iceberg table data goes into staging on staging destination""" return self._is_iceberg_table(self.get_load_table(table["name"])) def get_load_table(self, table_name: str, staging: bool = False) -> TTableSchema: table = super().get_load_table(table_name, staging) if self.config.force_iceberg: - table["table_format"] ="iceberg" + table["table_format"] = "iceberg" if staging and table.get("table_format", None) == "iceberg": table.pop("table_format") return table diff --git a/dlt/destinations/athena/configuration.py b/dlt/destinations/impl/athena/configuration.py similarity index 52% rename from dlt/destinations/athena/configuration.py rename to dlt/destinations/impl/athena/configuration.py index 5dd1341c34..6b985f284a 100644 --- a/dlt/destinations/athena/configuration.py +++ b/dlt/destinations/impl/athena/configuration.py @@ -1,13 +1,13 @@ -from typing import ClassVar, Final, List, Optional +from typing import ClassVar, Final, List, Optional, TYPE_CHECKING from dlt.common.configuration import configspec from dlt.common.destination.reference import DestinationClientDwhWithStagingConfiguration -from dlt.common.configuration.specs import AwsCredentials +from dlt.common.configuration.specs import AwsCredentials @configspec class AthenaClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "athena" # type: ignore[misc] + destination_type: Final[str] = "athena" # type: ignore[misc] query_result_bucket: str = None credentials: AwsCredentials = None athena_work_group: Optional[str] = None @@ -23,3 +23,19 @@ def __str__(self) -> str: return str(self.staging_config.credentials) else: return "[no staging set]" + + if TYPE_CHECKING: + + def __init__( + self, + *, + credentials: Optional[AwsCredentials] = None, + dataset_name: str = None, + default_schema_name: Optional[str] = None, + athena_work_group: Optional[str] = None, + aws_data_catalog: Optional[str] = None, + supports_truncate_command: bool = False, + force_iceberg: Optional[bool] = False, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py new file mode 100644 index 0000000000..5b37607cca --- /dev/null +++ b/dlt/destinations/impl/athena/factory.py @@ -0,0 +1,56 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration +from dlt.common.configuration.specs import AwsCredentials +from dlt.destinations.impl.athena import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.athena.athena import AthenaClient + + +class athena(Destination[AthenaClientConfiguration, "AthenaClient"]): + spec = AthenaClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["AthenaClient"]: + from dlt.destinations.impl.athena.athena import AthenaClient + + return AthenaClient + + def __init__( + self, + query_result_bucket: t.Optional[str] = None, + credentials: t.Union[AwsCredentials, t.Dict[str, t.Any], t.Any] = None, + athena_work_group: t.Optional[str] = None, + aws_data_catalog: t.Optional[str] = "awsdatacatalog", + force_iceberg: bool = False, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the Athena destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + query_result_bucket: S3 bucket to store query results in + credentials: AWS credentials to connect to the Athena database. + athena_work_group: Athena work group to use + aws_data_catalog: Athena data catalog to use + force_iceberg: Force iceberg tables + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + query_result_bucket=query_result_bucket, + credentials=credentials, + athena_work_group=athena_work_group, + aws_data_catalog=aws_data_catalog, + force_iceberg=force_iceberg, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/bigquery/README.md b/dlt/destinations/impl/bigquery/README.md similarity index 100% rename from dlt/destinations/bigquery/README.md rename to dlt/destinations/impl/bigquery/README.md diff --git a/dlt/destinations/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py similarity index 50% rename from dlt/destinations/bigquery/__init__.py rename to dlt/destinations/impl/bigquery/__init__.py index 3d97e9a929..1304bd72bb 100644 --- a/dlt/destinations/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -1,20 +1,7 @@ -from typing import Type from dlt.common.data_writers.escape import escape_bigquery_identifier - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration - - -@with_config(spec=BigQueryClientConfiguration, sections=(known_sections.DESTINATION, "bigquery",)) -def _configure(config: BigQueryClientConfiguration = config.value) -> BigQueryClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -35,14 +22,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.bigquery.bigquery import BigQueryClient - - return BigQueryClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return BigQueryClientConfiguration \ No newline at end of file diff --git a/dlt/destinations/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py similarity index 75% rename from dlt/destinations/bigquery/bigquery.py rename to dlt/destinations/impl/bigquery/bigquery.py index 9cc7591f57..fa4f5f0419 100644 --- a/dlt/destinations/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -7,7 +7,13 @@ from dlt.common import json, logger from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import FollowupJob, NewLoadJob, TLoadJobState, LoadJob, SupportsStagingDestination +from dlt.common.destination.reference import ( + FollowupJob, + NewLoadJob, + TLoadJobState, + LoadJob, + SupportsStagingDestination, +) from dlt.common.data_types import TDataType from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns @@ -15,11 +21,16 @@ from dlt.common.schema.exceptions import UnknownTableException from dlt.destinations.job_client_impl import SqlJobClientWithStaging -from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate, DestinationTransientException, LoadJobNotExistsException, LoadJobTerminalException - -from dlt.destinations.bigquery import capabilities -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration -from dlt.destinations.bigquery.sql_client import BigQuerySqlClient, BQ_TERMINAL_REASONS +from dlt.destinations.exceptions import ( + DestinationSchemaWillNotUpdate, + DestinationTransientException, + LoadJobNotExistsException, + LoadJobTerminalException, +) + +from dlt.destinations.impl.bigquery import capabilities +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.sql_client import BigQuerySqlClient, BQ_TERMINAL_REASONS from dlt.destinations.sql_jobs import SqlMergeJob, SqlStagingCopyJob, SqlJobParams from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase @@ -62,7 +73,9 @@ class BigQueryTypeMapper(TypeMapper): "TIME": "time", } - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: if db_type == "BIGNUMERIC": if precision is None: # biggest numeric possible return dict(data_type="wei") @@ -75,7 +88,7 @@ def __init__( file_name: str, bq_load_job: bigquery.LoadJob, http_timeout: float, - retry_deadline: float + retry_deadline: float, ) -> None: self.bq_load_job = bq_load_job self.default_retry = bigquery.DEFAULT_RETRY.with_deadline(retry_deadline) @@ -95,7 +108,10 @@ def state(self) -> TLoadJobState: # the job permanently failed for the reason above return "failed" elif reason in ["internalError"]: - logger.warning(f"Got reason {reason} for job {self.file_name}, job considered still running. ({self.bq_load_job.error_result})") + logger.warning( + f"Got reason {reason} for job {self.file_name}, job considered still" + f" running. ({self.bq_load_job.error_result})" + ) # status of the job could not be obtained, job still running return "running" else: @@ -104,17 +120,19 @@ def state(self) -> TLoadJobState: else: return "running" - def job_id(self) -> str: - return BigQueryLoadJob.get_job_id_from_file_path(super().job_id()) + def bigquery_job_id(self) -> str: + return BigQueryLoadJob.get_job_id_from_file_path(super().file_name()) def exception(self) -> str: - exception: str = json.dumps({ - "error_result": self.bq_load_job.error_result, - "errors": self.bq_load_job.errors, - "job_start": self.bq_load_job.started, - "job_end": self.bq_load_job.ended, - "job_id": self.bq_load_job.job_id - }) + exception: str = json.dumps( + { + "error_result": self.bq_load_job.error_result, + "errors": self.bq_load_job.errors, + "job_start": self.bq_load_job.started, + "job_end": self.bq_load_job.ended, + "job_id": self.bq_load_job.job_id, + } + ) return exception @staticmethod @@ -123,19 +141,32 @@ def get_job_id_from_file_path(file_path: str) -> str: class BigQueryMergeJob(SqlMergeJob): - @classmethod - def gen_key_table_clauses(cls, root_table_name: str, staging_root_table_name: str, key_clauses: Sequence[str], for_delete: bool) -> List[str]: + def gen_key_table_clauses( + cls, + root_table_name: str, + staging_root_table_name: str, + key_clauses: Sequence[str], + for_delete: bool, + ) -> List[str]: # generate several clauses: BigQuery does not support OR nor unions sql: List[str] = [] for clause in key_clauses: - sql.append(f"FROM {root_table_name} AS d WHERE EXISTS (SELECT 1 FROM {staging_root_table_name} AS s WHERE {clause.format(d='d', s='s')})") + sql.append( + f"FROM {root_table_name} AS d WHERE EXISTS (SELECT 1 FROM" + f" {staging_root_table_name} AS s WHERE {clause.format(d='d', s='s')})" + ) return sql -class BigqueryStagingCopyJob(SqlStagingCopyJob): +class BigqueryStagingCopyJob(SqlStagingCopyJob): @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: sql: List[str] = [] for table in table_chain: with sql_client.with_staging_dataset(staging=True): @@ -147,8 +178,8 @@ def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClient sql.append(f"CREATE TABLE {table_name} CLONE {staging_table_name};") return sql -class BigQueryClient(SqlJobClientWithStaging, SupportsStagingDestination): +class BigQueryClient(SqlJobClientWithStaging, SupportsStagingDestination): capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: BigQueryClientConfiguration) -> None: @@ -157,7 +188,7 @@ def __init__(self, schema: Schema, config: BigQueryClientConfiguration) -> None: config.credentials, config.get_location(), config.http_timeout, - config.retry_deadline + config.retry_deadline, ) super().__init__(schema, config, sql_client) self.config: BigQueryClientConfiguration = config @@ -167,7 +198,9 @@ def __init__(self, schema: Schema, config: BigQueryClientConfiguration) -> None: def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: return [BigQueryMergeJob.from_table_chain(table_chain, self.sql_client)] - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: if self.config.replace_strategy == "staging-optimized": return [BigqueryStagingCopyJob.from_table_chain(table_chain, self.sql_client)] return super()._create_replace_followup_jobs(table_chain) @@ -190,7 +223,7 @@ def restore_file_load(self, file_path: str) -> LoadJob: FileStorage.get_file_name_from_file_path(file_path), self._retrieve_load_job(file_path), self.config.http_timeout, - self.config.retry_deadline + self.config.retry_deadline, ) except api_core_exceptions.GoogleAPICallError as gace: reason = BigQuerySqlClient._get_reason_from_errors(gace) @@ -211,7 +244,7 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> FileStorage.get_file_name_from_file_path(file_path), self._create_load_job(table, file_path), self.config.http_timeout, - self.config.retry_deadline + self.config.retry_deadline, ) except api_core_exceptions.GoogleAPICallError as gace: reason = BigQuerySqlClient._get_reason_from_errors(gace) @@ -228,17 +261,31 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> raise DestinationTransientException(gace) return job - def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool, separate_alters: bool = False) -> List[str]: + def _get_table_update_sql( + self, + table_name: str, + new_columns: Sequence[TColumnSchema], + generate_alter: bool, + separate_alters: bool = False, + ) -> List[str]: sql = super()._get_table_update_sql(table_name, new_columns, generate_alter) canonical_name = self.sql_client.make_qualified_table_name(table_name) - cluster_list = [self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("cluster")] - partition_list = [self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("partition")] + cluster_list = [ + self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("cluster") + ] + partition_list = [ + self.capabilities.escape_identifier(c["name"]) + for c in new_columns + if c.get("partition") + ] # partition by must be added first if len(partition_list) > 0: if len(partition_list) > 1: - raise DestinationSchemaWillNotUpdate(canonical_name, partition_list, "Partition requested for more than one column") + raise DestinationSchemaWillNotUpdate( + canonical_name, partition_list, "Partition requested for more than one column" + ) else: sql[0] = sql[0] + f"\nPARTITION BY DATE({partition_list[0]})" if len(cluster_list) > 0: @@ -248,7 +295,9 @@ def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSc def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: name = self.capabilities.escape_identifier(c["name"]) - return f"{name} {self.type_mapper.to_db_type(c, table_format)} {self._gen_not_null(c.get('nullable', True))}" + return ( + f"{name} {self.type_mapper.to_db_type(c, table_format)} {self._gen_not_null(c.get('nullable', True))}" + ) def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: schema_table: TTableSchemaColumns = {} @@ -256,7 +305,7 @@ def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns] table = self.sql_client.native_connection.get_table( self.sql_client.make_qualified_table_name(table_name, escape=False), retry=self.sql_client._default_retry, - timeout=self.config.http_timeout + timeout=self.config.http_timeout, ) partition_field = table.time_partitioning.field if table.time_partitioning else None for c in table.schema: @@ -269,7 +318,7 @@ def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns] "foreign_key": False, "cluster": c.name in (table.clustering_fields or []), "partition": c.name == partition_field, - **self._from_db_type(c.field_type, c.precision, c.scale) + **self._from_db_type(c.field_type, c.precision, c.scale), } schema_table[c.name] = schema_c return True, schema_table @@ -293,7 +342,10 @@ def _create_load_job(self, table: TTableSchema, file_path: str) -> bigquery.Load if ext == "parquet": # if table contains complex types, we cannot load with parquet if table_schema_has_type(table, "complex"): - raise LoadJobTerminalException(file_path, "Bigquery cannot load into JSON data type from parquet. Use jsonl instead.") + raise LoadJobTerminalException( + file_path, + "Bigquery cannot load into JSON data type from parquet. Use jsonl instead.", + ) source_format = bigquery.SourceFormat.PARQUET # parquet needs NUMERIC type autodetection decimal_target_types = ["NUMERIC", "BIGNUMERIC"] @@ -306,29 +358,32 @@ def _create_load_job(self, table: TTableSchema, file_path: str) -> bigquery.Load source_format=source_format, decimal_target_types=decimal_target_types, ignore_unknown_values=False, - max_bad_records=0) + max_bad_records=0, + ) if bucket_path: return self.sql_client.native_connection.load_table_from_uri( - bucket_path, - self.sql_client.make_qualified_table_name(table_name, escape=False), - job_id=job_id, - job_config=job_config, - timeout=self.config.file_upload_timeout - ) + bucket_path, + self.sql_client.make_qualified_table_name(table_name, escape=False), + job_id=job_id, + job_config=job_config, + timeout=self.config.file_upload_timeout, + ) with open(file_path, "rb") as f: return self.sql_client.native_connection.load_table_from_file( - f, - self.sql_client.make_qualified_table_name(table_name, escape=False), - job_id=job_id, - job_config=job_config, - timeout=self.config.file_upload_timeout - ) + f, + self.sql_client.make_qualified_table_name(table_name, escape=False), + job_id=job_id, + job_config=job_config, + timeout=self.config.file_upload_timeout, + ) def _retrieve_load_job(self, file_path: str) -> bigquery.LoadJob: job_id = BigQueryLoadJob.get_job_id_from_file_path(file_path) return cast(bigquery.LoadJob, self.sql_client.native_connection.get_job(job_id)) - def _from_db_type(self, bq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, bq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(bq_t, precision, scale) diff --git a/dlt/destinations/bigquery/configuration.py b/dlt/destinations/impl/bigquery/configuration.py similarity index 72% rename from dlt/destinations/bigquery/configuration.py rename to dlt/destinations/impl/bigquery/configuration.py index 146e137475..bf41d38aff 100644 --- a/dlt/destinations/bigquery/configuration.py +++ b/dlt/destinations/impl/bigquery/configuration.py @@ -1,5 +1,5 @@ import warnings -from typing import TYPE_CHECKING, ClassVar, List, Optional +from typing import TYPE_CHECKING, ClassVar, List, Optional, Final from dlt.common.configuration import configspec from dlt.common.configuration.specs import GcpServiceAccountCredentials @@ -10,13 +10,15 @@ @configspec class BigQueryClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: str = "bigquery" + destination_type: Final[str] = "bigquery" # type: ignore credentials: GcpServiceAccountCredentials = None location: str = "US" http_timeout: float = 15.0 # connection timeout for http request to BigQuery api file_upload_timeout: float = 30 * 60.0 # a timeout for file upload when loading local files - retry_deadline: float = 60.0 # how long to retry the operation in case of error, the backoff 60s + retry_deadline: float = ( + 60.0 # how long to retry the operation in case of error, the backoff 60s + ) __config_gen_annotations__: ClassVar[List[str]] = ["location"] @@ -25,7 +27,10 @@ def get_location(self) -> str: return self.location # default was changed in credentials, emit deprecation message if self.credentials.location != "US": - warnings.warn("Setting BigQuery location in the credentials is deprecated. Please set the location directly in bigquery section ie. destinations.bigquery.location='EU'") + warnings.warn( + "Setting BigQuery location in the credentials is deprecated. Please set the" + " location directly in bigquery section ie. destinations.bigquery.location='EU'" + ) return self.credentials.location def fingerprint(self) -> str: @@ -35,16 +40,17 @@ def fingerprint(self) -> str: return "" if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Optional[GcpServiceAccountCredentials] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, location: str = "US", http_timeout: float = 15.0, file_upload_timeout: float = 30 * 60.0, - retry_deadline: float = 60.0 - ) -> None: - ... - + retry_deadline: float = 60.0, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py new file mode 100644 index 0000000000..fc92c3c087 --- /dev/null +++ b/dlt/destinations/impl/bigquery/factory.py @@ -0,0 +1,38 @@ +import typing as t + +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration +from dlt.common.configuration.specs import GcpServiceAccountCredentials +from dlt.destinations.impl.bigquery import capabilities +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +if t.TYPE_CHECKING: + from dlt.destinations.impl.bigquery.bigquery import BigQueryClient + + +class bigquery(Destination[BigQueryClientConfiguration, "BigQueryClient"]): + spec = BigQueryClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["BigQueryClient"]: + from dlt.destinations.impl.bigquery.bigquery import BigQueryClient + + return BigQueryClient + + def __init__( + self, + credentials: t.Optional[GcpServiceAccountCredentials] = None, + location: t.Optional[str] = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + super().__init__( + credentials=credentials, + location=location, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py similarity index 82% rename from dlt/destinations/bigquery/sql_client.py rename to dlt/destinations/impl/bigquery/sql_client.py index 3d6eb19833..cf5d2ecbd4 100644 --- a/dlt/destinations/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -1,4 +1,3 @@ - from contextlib import contextmanager from typing import Any, AnyStr, ClassVar, Iterator, List, Optional, Sequence, Type @@ -14,19 +13,37 @@ from dlt.common.typing import StrAny from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation -from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error - -from dlt.destinations.bigquery import capabilities +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) +from dlt.destinations.sql_client import ( + DBApiCursorImpl, + SqlClientBase, + raise_database_error, + raise_open_connection_error, +) + +from dlt.destinations.impl.bigquery import capabilities # terminal reasons as returned in BQ gRPC error response # https://cloud.google.com/bigquery/docs/error-messages -BQ_TERMINAL_REASONS = ["billingTierLimitExceeded", "duplicate", "invalid", "notFound", "notImplemented", "stopped", "tableUnavailable"] +BQ_TERMINAL_REASONS = [ + "billingTierLimitExceeded", + "duplicate", + "invalid", + "notFound", + "notImplemented", + "stopped", + "tableUnavailable", +] # invalidQuery is an transient error -> must be fixed by programmer class BigQueryDBApiCursorImpl(DBApiCursorImpl): """Use native BigQuery data frame support if available""" + native_cursor: BQDbApiCursor # type: ignore def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: @@ -43,7 +60,6 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: class BigQuerySqlClient(SqlClientBase[bigquery.Client], DBTransaction): - dbapi: ClassVar[DBApi] = bq_dbapi capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -53,7 +69,7 @@ def __init__( credentials: GcpServiceAccountCredentialsWithoutDefaults, location: str = "US", http_timeout: float = 15.0, - retry_deadline: float = 60.0 + retry_deadline: float = 60.0, ) -> None: self._client: bigquery.Client = None self.credentials: GcpServiceAccountCredentialsWithoutDefaults = credentials @@ -62,16 +78,17 @@ def __init__( super().__init__(credentials.project_id, dataset_name) self._default_retry = bigquery.DEFAULT_RETRY.with_deadline(retry_deadline) - self._default_query = bigquery.QueryJobConfig(default_dataset=self.fully_qualified_dataset_name(escape=False)) + self._default_query = bigquery.QueryJobConfig( + default_dataset=self.fully_qualified_dataset_name(escape=False) + ) self._session_query: bigquery.QueryJobConfig = None - @raise_open_connection_error def open_connection(self) -> bigquery.Client: self._client = bigquery.Client( self.credentials.project_id, credentials=self.credentials.to_native_credentials(), - location=self.location + location=self.location, ) # patch the client query so our defaults are used @@ -81,7 +98,7 @@ def query_patch( query: str, retry: Any = self._default_retry, timeout: Any = self.http_timeout, - **kwargs: Any + **kwargs: Any, ) -> Any: return query_orig(query, retry=retry, timeout=timeout, **kwargs) @@ -105,8 +122,8 @@ def begin_transaction(self) -> Iterator[DBTransaction]: "BEGIN TRANSACTION;", job_config=bigquery.QueryJobConfig( create_session=True, - default_dataset=self.fully_qualified_dataset_name(escape=False) - ) + default_dataset=self.fully_qualified_dataset_name(escape=False), + ), ) self._session_query = bigquery.QueryJobConfig( create_session=False, @@ -115,7 +132,7 @@ def begin_transaction(self) -> Iterator[DBTransaction]: bigquery.query.ConnectionProperty( key="session_id", value=job.session_info.session_id ) - ] + ], ) try: job.result() @@ -124,7 +141,9 @@ def begin_transaction(self) -> Iterator[DBTransaction]: self._session_query = None raise else: - raise dbapi_exceptions.ProgrammingError("Nested transactions not supported on BigQuery") + raise dbapi_exceptions.ProgrammingError( + "Nested transactions not supported on BigQuery" + ) yield self self.commit_transaction() except Exception: @@ -150,7 +169,11 @@ def native_connection(self) -> bigquery.Client: def has_dataset(self) -> bool: try: - self._client.get_dataset(self.fully_qualified_dataset_name(escape=False), retry=self._default_retry, timeout=self.http_timeout) + self._client.get_dataset( + self.fully_qualified_dataset_name(escape=False), + retry=self._default_retry, + timeout=self.http_timeout, + ) return True except gcp_exceptions.NotFound: return False @@ -160,7 +183,7 @@ def create_dataset(self) -> None: self.fully_qualified_dataset_name(escape=False), exists_ok=False, retry=self._default_retry, - timeout=self.http_timeout + timeout=self.http_timeout, ) def drop_dataset(self) -> None: @@ -169,10 +192,12 @@ def drop_dataset(self) -> None: not_found_ok=True, delete_contents=True, retry=self._default_retry, - timeout=self.http_timeout + timeout=self.http_timeout, ) - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if not curr.description: return None @@ -187,7 +212,7 @@ def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequen @contextmanager @raise_database_error - def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DBApiCursor]: + def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DBApiCursor]: conn: DbApiConnection = None curr: DBApiCursor = None db_args = args if args else kwargs if kwargs else None @@ -226,11 +251,17 @@ def _make_database_exception(cls, ex: Exception) -> Exception: return DatabaseUndefinedRelation(ex) if reason == "invalidQuery" and "was not found" in str(ex) and "Dataset" in str(ex): return DatabaseUndefinedRelation(ex) - if reason == "invalidQuery" and "Not found" in str(ex) and ("Dataset" in str(ex) or "Table" in str(ex)): + if ( + reason == "invalidQuery" + and "Not found" in str(ex) + and ("Dataset" in str(ex) or "Table" in str(ex)) + ): return DatabaseUndefinedRelation(ex) if reason == "accessDenied" and "Dataset" in str(ex) and "not exist" in str(ex): return DatabaseUndefinedRelation(ex) - if reason == "invalidQuery" and ("Unrecognized name" in str(ex) or "cannot be null" in str(ex)): + if reason == "invalidQuery" and ( + "Unrecognized name" in str(ex) or "cannot be null" in str(ex) + ): # unknown column, inserting NULL into required field return DatabaseTerminalException(ex) if reason in BQ_TERMINAL_REASONS: @@ -253,4 +284,7 @@ def is_dbapi_exception(ex: Exception) -> bool: class TransactionsNotImplementedError(NotImplementedError): def __init__(self) -> None: - super().__init__("BigQuery does not support transaction management. Instead you may wrap your SQL script in BEGIN TRANSACTION; ... COMMIT TRANSACTION;") + super().__init__( + "BigQuery does not support transaction management. Instead you may wrap your SQL script" + " in BEGIN TRANSACTION; ... COMMIT TRANSACTION;" + ) diff --git a/dlt/destinations/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py similarity index 54% rename from dlt/destinations/duckdb/__init__.py rename to dlt/destinations/impl/duckdb/__init__.py index d9882cc0eb..5cbc8dea53 100644 --- a/dlt/destinations/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration - - -@with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, "duckdb",)) -def _configure(config: DuckDbClientConfiguration = config.value) -> DuckDbClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -37,14 +24,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.duckdb.duck import DuckDbClient - - return DuckDbClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return DuckDbClientConfiguration diff --git a/dlt/destinations/duckdb/configuration.py b/dlt/destinations/impl/duckdb/configuration.py similarity index 86% rename from dlt/destinations/duckdb/configuration.py rename to dlt/destinations/impl/duckdb/configuration.py index 82ee325ed3..8cb88c43b5 100644 --- a/dlt/destinations/duckdb/configuration.py +++ b/dlt/destinations/impl/duckdb/configuration.py @@ -7,7 +7,10 @@ from dlt.common.configuration import configspec from dlt.common.configuration.specs import ConnectionStringCredentials from dlt.common.configuration.specs.exceptions import InvalidConnectionString -from dlt.common.destination.reference import DestinationClientDwhWithStagingConfiguration, DestinationClientStagingConfiguration +from dlt.common.destination.reference import ( + DestinationClientDwhWithStagingConfiguration, + DestinationClientStagingConfiguration, +) from dlt.common.typing import TSecretValue DUCK_DB_NAME = "%s.duckdb" @@ -25,6 +28,7 @@ class DuckDbBaseCredentials(ConnectionStringCredentials): read_only: bool = False # open database read/write def borrow_conn(self, read_only: bool) -> Any: + # TODO: Can this be done in sql client instead? import duckdb if not hasattr(self, "_conn_lock"): @@ -58,6 +62,7 @@ def parse_native_representation(self, native_value: Any) -> None: try: # check if database was passed as explicit connection import duckdb + if isinstance(native_value, duckdb.DuckDBPyConnection): self._conn = native_value self._conn_owner = False @@ -95,6 +100,13 @@ class DuckDbCredentials(DuckDbBaseCredentials): __config_gen_annotations__: ClassVar[List[str]] = [] + def is_partial(self) -> bool: + partial = super().is_partial() + if partial: + return True + # Wait until pipeline context is set up before resolving + return self.database == ":pipeline:" + def on_resolved(self) -> None: # do not set any paths for external database if self.database == ":external:": @@ -126,8 +138,9 @@ def _path_in_pipeline(self, rel_path: str) -> str: if context.is_active(): # pipeline is active, get the working directory return os.path.join(context.pipeline().working_dir, rel_path) - return None - + raise RuntimeError( + "Attempting to use special duckdb database :pipeline: outside of pipeline context." + ) def _path_to_pipeline(self, abspath: str) -> None: from dlt.common.configuration.container import Container @@ -164,7 +177,11 @@ def _path_from_pipeline(self, default_path: str) -> Tuple[str, bool]: pipeline_path = pipeline.get_local_state_val(LOCAL_STATE_KEY) # make sure that path exists if not os.path.exists(pipeline_path): - logger.warning(f"Duckdb attached to pipeline {pipeline.pipeline_name} in path {os.path.relpath(pipeline_path)} was deleted. Attaching to duckdb database '{default_path}' in current folder.") + logger.warning( + f"Duckdb attached to pipeline {pipeline.pipeline_name} in path" + f" {os.path.relpath(pipeline_path)} was deleted. Attaching to duckdb" + f" database '{default_path}' in current folder." + ) else: return pipeline_path, False except KeyError: @@ -173,13 +190,18 @@ def _path_from_pipeline(self, default_path: str) -> Tuple[str, bool]: return default_path, True + def _conn_str(self) -> str: + return self.database + @configspec class DuckDbClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "duckdb" # type: ignore + destination_type: Final[str] = "duckdb" # type: ignore credentials: DuckDbCredentials - create_indexes: bool = False # should unique indexes be created, this slows loading down massively + create_indexes: bool = ( + False # should unique indexes be created, this slows loading down massively + ) if TYPE_CHECKING: try: @@ -189,11 +211,12 @@ class DuckDbClientConfiguration(DestinationClientDwhWithStagingConfiguration): def __init__( self, - destination_name: str = None, + *, credentials: Union[DuckDbCredentials, str, DuckDBPyConnection] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, create_indexes: bool = False, - staging_config: Optional[DestinationClientStagingConfiguration] = None - ) -> None: - ... + staging_config: Optional[DestinationClientStagingConfiguration] = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/duckdb/duck.py b/dlt/destinations/impl/duckdb/duck.py similarity index 76% rename from dlt/destinations/duckdb/duck.py rename to dlt/destinations/impl/duckdb/duck.py index 4a2e54f2b6..735a4ce7e3 100644 --- a/dlt/destinations/duckdb/duck.py +++ b/dlt/destinations/impl/duckdb/duck.py @@ -12,15 +12,13 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.duckdb import capabilities -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.type_mapping import TypeMapper -HINT_TO_POSTGRES_ATTR: Dict[TColumnHint, str] = { - "unique": "UNIQUE" -} +HINT_TO_POSTGRES_ATTR: Dict[TColumnHint, str] = {"unique": "UNIQUE"} # duckdb cannot load PARQUET to the same table in parallel. so serialize it per table PARQUET_TABLE_LOCK = threading.Lock() @@ -38,7 +36,7 @@ class DuckDbTypeMapper(TypeMapper): "timestamp": "TIMESTAMP WITH TIME ZONE", "bigint": "BIGINT", "binary": "BLOB", - "time": "TIME" + "time": "TIME", } sct_to_dbt = { @@ -69,7 +67,9 @@ class DuckDbTypeMapper(TypeMapper): "TIMESTAMP_NS": "timestamp", } - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None: return "BIGINT" # Precision is number of bits @@ -83,7 +83,9 @@ def to_db_integer_type(self, precision: Optional[int], table_format: TTableForma return "BIGINT" return "HUGEINT" - def to_db_datetime_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_datetime_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None or precision == 6: return super().to_db_datetime_type(precision, table_format) if precision == 0: @@ -92,9 +94,13 @@ def to_db_datetime_type(self, precision: Optional[int], table_format: TTableForm return "TIMESTAMP_MS" if precision == 9: return "TIMESTAMP_NS" - raise TerminalValueError(f"timestamp {precision} cannot be mapped into duckdb TIMESTAMP typ") + raise TerminalValueError( + f"timestamp {precision} cannot be mapped into duckdb TIMESTAMP typ" + ) - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: # duckdb provides the types with scale and precision db_type = db_type.split("(")[0].upper() if db_type == "DECIMAL": @@ -114,7 +120,9 @@ def __init__(self, table_name: str, file_path: str, sql_client: DuckDbSqlClient) # lock when creating a new lock with PARQUET_TABLE_LOCK: # create or get lock per table name - lock: threading.Lock = TABLES_LOCKS.setdefault(qualified_table_name, threading.Lock()) + lock: threading.Lock = TABLES_LOCKS.setdefault( + qualified_table_name, threading.Lock() + ) elif file_path.endswith("jsonl"): # NOTE: loading JSON does not work in practice on duckdb: the missing keys fail the load instead of being interpreted as NULL source_format = "JSON" # newline delimited, compression auto @@ -125,8 +133,10 @@ def __init__(self, table_name: str, file_path: str, sql_client: DuckDbSqlClient) with maybe_context(lock): with sql_client.begin_transaction(): - sql_client.execute_sql(f"COPY {qualified_table_name} FROM '{file_path}' ( FORMAT {source_format} {options});") - + sql_client.execute_sql( + f"COPY {qualified_table_name} FROM '{file_path}' ( FORMAT" + f" {source_format} {options});" + ) def state(self) -> TLoadJobState: return "completed" @@ -134,15 +144,12 @@ def state(self) -> TLoadJobState: def exception(self) -> str: raise NotImplementedError() -class DuckDbClient(InsertValuesJobClient): +class DuckDbClient(InsertValuesJobClient): capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: DuckDbClientConfiguration) -> None: - sql_client = DuckDbSqlClient( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = DuckDbSqlClient(config.normalize_dataset_name(schema), config.credentials) super().__init__(schema, config, sql_client) self.config: DuckDbClientConfiguration = config self.sql_client: DuckDbSqlClient = sql_client # type: ignore @@ -156,9 +163,17 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> return job def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - hints_str = " ".join(self.active_hints.get(h, "") for h in self.active_hints.keys() if c.get(h, False) is True) + hints_str = " ".join( + self.active_hints.get(h, "") + for h in self.active_hints.keys() + if c.get(h, False) is True + ) column_name = self.capabilities.escape_identifier(c["name"]) - return f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + return ( + f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + ) - def _from_db_type(self, pq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, pq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(pq_t, precision, scale) diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py new file mode 100644 index 0000000000..6a0152df26 --- /dev/null +++ b/dlt/destinations/impl/duckdb/factory.py @@ -0,0 +1,51 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.duckdb.configuration import DuckDbCredentials, DuckDbClientConfiguration +from dlt.destinations.impl.duckdb import capabilities + +if t.TYPE_CHECKING: + from duckdb import DuckDBPyConnection + from dlt.destinations.impl.duckdb.duck import DuckDbClient + + +class duckdb(Destination[DuckDbClientConfiguration, "DuckDbClient"]): + spec = DuckDbClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["DuckDbClient"]: + from dlt.destinations.impl.duckdb.duck import DuckDbClient + + return DuckDbClient + + def __init__( + self, + credentials: t.Union[ + DuckDbCredentials, t.Dict[str, t.Any], str, "DuckDBPyConnection" + ] = None, + create_indexes: bool = False, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the DuckDB destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the duckdb database. Can be an instance of `DuckDbCredentials` or + a path to a database file. Use `:memory:` to create an in-memory database or :pipeline: to create a duckdb + in the working folder of the pipeline + create_indexes: Should unique indexes be created, defaults to False + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + credentials=credentials, + create_indexes=create_indexes, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py similarity index 83% rename from dlt/destinations/duckdb/sql_client.py rename to dlt/destinations/impl/duckdb/sql_client.py index cd2160f676..2863d4943e 100644 --- a/dlt/destinations/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -4,16 +4,26 @@ from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error +from dlt.destinations.sql_client import ( + SqlClientBase, + DBApiCursorImpl, + raise_database_error, + raise_open_connection_error, +) -from dlt.destinations.duckdb import capabilities -from dlt.destinations.duckdb.configuration import DuckDbBaseCredentials +from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations.impl.duckdb.configuration import DuckDbBaseCredentials class DuckDBDBApiCursorImpl(DBApiCursorImpl): """Use native BigQuery data frame support if available""" + native_cursor: duckdb.DuckDBPyConnection # type: ignore vector_size: ClassVar[int] = 2048 @@ -21,7 +31,9 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: if chunk_size is None: return self.native_cursor.df(**kwargs) else: - multiple = chunk_size // self.vector_size + (0 if self.vector_size % chunk_size == 0 else 1) + multiple = chunk_size // self.vector_size + ( + 0 if self.vector_size % chunk_size == 0 else 1 + ) df = self.native_cursor.fetch_df_chunk(multiple, **kwargs) if df.shape[0] == 0: return None @@ -30,7 +42,6 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: class DuckDbSqlClient(SqlClientBase[duckdb.DuckDBPyConnection], DBTransaction): - dbapi: ClassVar[DBApi] = duckdb capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -44,11 +55,11 @@ def open_connection(self) -> duckdb.DuckDBPyConnection: self._conn = self.credentials.borrow_conn(read_only=self.credentials.read_only) # TODO: apply config settings from credentials self._conn.execute("PRAGMA enable_checkpoint_on_shutdown;") - config={ + config = { "search_path": self.fully_qualified_dataset_name(), "TimeZone": "UTC", - "checkpoint_threshold": "1gb" - } + "checkpoint_threshold": "1gb", + } if config: for k, v in config.items(): try: @@ -91,7 +102,9 @@ def rollback_transaction(self) -> None: def native_connection(self) -> duckdb.DuckDBPyConnection: return self._conn - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if curr.description is None: return None @@ -130,7 +143,9 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB # return None def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + return ( + self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + ) @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: @@ -144,7 +159,15 @@ def _make_database_exception(cls, ex: Exception) -> Exception: raise DatabaseUndefinedRelation(ex) # duckdb raises TypeError on malformed query parameters return DatabaseTransientException(duckdb.ProgrammingError(ex)) - elif isinstance(ex, (duckdb.OperationalError, duckdb.InternalError, duckdb.SyntaxException, duckdb.ParserException)): + elif isinstance( + ex, + ( + duckdb.OperationalError, + duckdb.InternalError, + duckdb.SyntaxException, + duckdb.ParserException, + ), + ): term = cls._maybe_make_terminal_exception_from_data_error(ex) if term: return term diff --git a/dlt/destinations/dummy/__init__.py b/dlt/destinations/impl/dummy/__init__.py similarity index 52% rename from dlt/destinations/dummy/__init__.py rename to dlt/destinations/impl/dummy/__init__.py index 7131f0109a..a3152b8d77 100644 --- a/dlt/destinations/dummy/__init__.py +++ b/dlt/destinations/impl/dummy/__init__.py @@ -1,15 +1,17 @@ -from typing import Type - -from dlt.common.schema.schema import Schema from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration -@with_config(spec=DummyClientConfiguration, sections=(known_sections.DESTINATION, "dummy",)) +@with_config( + spec=DummyClientConfiguration, + sections=( + known_sections.DESTINATION, + "dummy", + ), +) def _configure(config: DummyClientConfiguration = config.value) -> DummyClientConfiguration: return config @@ -20,7 +22,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.preferred_loader_file_format = config.loader_file_format caps.supported_loader_file_formats = [config.loader_file_format] caps.preferred_staging_file_format = None - caps.supported_staging_file_formats = [] + caps.supported_staging_file_formats = [config.loader_file_format] caps.max_identifier_length = 127 caps.max_column_identifier_length = 127 caps.max_query_length = 8 * 1024 * 1024 @@ -30,14 +32,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.dummy.dummy import DummyClient - - return DummyClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return DummyClientConfiguration diff --git a/dlt/destinations/dummy/configuration.py b/dlt/destinations/impl/dummy/configuration.py similarity index 79% rename from dlt/destinations/dummy/configuration.py rename to dlt/destinations/impl/dummy/configuration.py index 1a8072300c..82dc797126 100644 --- a/dlt/destinations/dummy/configuration.py +++ b/dlt/destinations/impl/dummy/configuration.py @@ -1,20 +1,22 @@ -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Optional, Final from dlt.common.configuration import configspec from dlt.common.destination import TLoaderFileFormat -from dlt.common.destination.reference import DestinationClientConfiguration, CredentialsConfiguration +from dlt.common.destination.reference import ( + DestinationClientConfiguration, + CredentialsConfiguration, +) @configspec class DummyClientCredentials(CredentialsConfiguration): - def __str__(self) -> str: return "/dev/null" @configspec class DummyClientConfiguration(DestinationClientConfiguration): - destination_name: str = "dummy" + destination_type: Final[str] = "dummy" # type: ignore loader_file_format: TLoaderFileFormat = "jsonl" fail_schema_update: bool = False fail_prob: float = 0.0 @@ -28,9 +30,10 @@ class DummyClientConfiguration(DestinationClientConfiguration): credentials: DummyClientCredentials = None if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Optional[CredentialsConfiguration] = None, loader_file_format: TLoaderFileFormat = None, fail_schema_update: bool = None, @@ -40,5 +43,6 @@ def __init__( exception_prob: float = None, timeout: float = None, fail_in_init: bool = None, - ) -> None: - ... + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py similarity index 79% rename from dlt/destinations/dummy/dummy.py rename to dlt/destinations/impl/dummy/dummy.py index 92827405ca..367db11e82 100644 --- a/dlt/destinations/dummy/dummy.py +++ b/dlt/destinations/impl/dummy/dummy.py @@ -8,13 +8,24 @@ from dlt.common.schema.typing import TWriteDisposition from dlt.common.storages import FileStorage from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import FollowupJob, NewLoadJob, TLoadJobState, LoadJob, JobClientBase - -from dlt.destinations.exceptions import (LoadJobNotExistsException, LoadJobInvalidStateTransitionException, - DestinationTerminalException, DestinationTransientException) - -from dlt.destinations.dummy import capabilities -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.common.destination.reference import ( + FollowupJob, + NewLoadJob, + SupportsStagingDestination, + TLoadJobState, + LoadJob, + JobClientBase, +) + +from dlt.destinations.exceptions import ( + LoadJobNotExistsException, + LoadJobInvalidStateTransitionException, + DestinationTerminalException, + DestinationTransientException, +) + +from dlt.destinations.impl.dummy import capabilities +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration class LoadDummyJob(LoadJob, FollowupJob): @@ -31,7 +42,6 @@ def __init__(self, file_name: str, config: DummyClientConfiguration) -> None: if s == "retry": raise DestinationTransientException(self._exception) - def state(self) -> TLoadJobState: # this should poll the server for a job status, here we simulate various outcomes if self._status == "running": @@ -72,7 +82,7 @@ def retry(self) -> None: JOBS: Dict[str, LoadDummyJob] = {} -class DummyClient(JobClientBase): +class DummyClient(JobClientBase, SupportsStagingDestination): """dummy client storing jobs in memory""" capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -90,10 +100,14 @@ def is_storage_initialized(self) -> bool: def drop_storage(self) -> None: pass - def update_stored_schema(self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None) -> Optional[TSchemaTables]: + def update_stored_schema( + self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None + ) -> Optional[TSchemaTables]: applied_update = super().update_stored_schema(only_tables, expected_update) if self.config.fail_schema_update: - raise DestinationTransientException("Raise on schema update due to fail_schema_update config flag") + raise DestinationTransientException( + "Raise on schema update due to fail_schema_update config flag" + ) return applied_update def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob: @@ -115,7 +129,9 @@ def restore_file_load(self, file_path: str) -> LoadJob: raise LoadJobNotExistsException(job_id) return JOBS[job_id] - def create_table_chain_completed_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def create_table_chain_completed_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: """Creates a list of followup jobs that should be executed after a table chain is completed""" return [] @@ -125,11 +141,10 @@ def complete_load(self, load_id: str) -> None: def __enter__(self) -> "DummyClient": return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType + ) -> None: pass def _create_job(self, job_id: str) -> LoadDummyJob: - return LoadDummyJob( - job_id, - config=self.config - ) + return LoadDummyJob(job_id, config=self.config) diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py new file mode 100644 index 0000000000..1c848cf22d --- /dev/null +++ b/dlt/destinations/impl/dummy/factory.py @@ -0,0 +1,39 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.dummy.configuration import ( + DummyClientConfiguration, + DummyClientCredentials, +) +from dlt.destinations.impl.dummy import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.dummy.dummy import DummyClient + + +class dummy(Destination[DummyClientConfiguration, "DummyClient"]): + spec = DummyClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["DummyClient"]: + from dlt.destinations.impl.dummy.dummy import DummyClient + + return DummyClient + + def __init__( + self, + credentials: DummyClientCredentials = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + super().__init__( + credentials=credentials, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/impl/filesystem/__init__.py b/dlt/destinations/impl/filesystem/__init__.py new file mode 100644 index 0000000000..12e83216cf --- /dev/null +++ b/dlt/destinations/impl/filesystem/__init__.py @@ -0,0 +1,5 @@ +from dlt.common.destination import DestinationCapabilitiesContext + + +def capabilities() -> DestinationCapabilitiesContext: + return DestinationCapabilitiesContext.generic_capabilities("jsonl") diff --git a/dlt/destinations/filesystem/configuration.py b/dlt/destinations/impl/filesystem/configuration.py similarity index 69% rename from dlt/destinations/filesystem/configuration.py rename to dlt/destinations/impl/filesystem/configuration.py index 174dfafb1a..93e5537aab 100644 --- a/dlt/destinations/filesystem/configuration.py +++ b/dlt/destinations/impl/filesystem/configuration.py @@ -3,26 +3,31 @@ from typing import Final, Type, Optional, Any, TYPE_CHECKING from dlt.common.configuration import configspec, resolve_type -from dlt.common.destination.reference import CredentialsConfiguration, DestinationClientStagingConfiguration +from dlt.common.destination.reference import ( + CredentialsConfiguration, + DestinationClientStagingConfiguration, +) from dlt.common.storages import FilesystemConfiguration @configspec -class FilesystemDestinationClientConfiguration(FilesystemConfiguration, DestinationClientStagingConfiguration): # type: ignore[misc] - destination_name: Final[str] = "filesystem" # type: ignore +class FilesystemDestinationClientConfiguration(FilesystemConfiguration, DestinationClientStagingConfiguration): # type: ignore[misc] + destination_type: Final[str] = "filesystem" # type: ignore - @resolve_type('credentials') + @resolve_type("credentials") def resolve_credentials_type(self) -> Type[CredentialsConfiguration]: # use known credentials or empty credentials for unknown protocol return self.PROTOCOL_CREDENTIALS.get(self.protocol) or Optional[CredentialsConfiguration] # type: ignore[return-value] if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: Optional[Any] = None, dataset_name: str = None, default_schema_name: Optional[str] = None, bucket_url: str = None, - ) -> None: - ... + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py new file mode 100644 index 0000000000..029a5bdda5 --- /dev/null +++ b/dlt/destinations/impl/filesystem/factory.py @@ -0,0 +1,57 @@ +import typing as t + +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem import capabilities +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.common.storages.configuration import FileSystemCredentials + +if t.TYPE_CHECKING: + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient + + +class filesystem(Destination[FilesystemDestinationClientConfiguration, "FilesystemClient"]): + spec = FilesystemDestinationClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["FilesystemClient"]: + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient + + return FilesystemClient + + def __init__( + self, + bucket_url: str = None, + credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any], t.Any] = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the filesystem destination to use in a pipeline and load data to local or remote filesystem. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + The `bucket_url` determines the protocol to be used: + + - Local folder: `file:///path/to/directory` + - AWS S3 (and S3 compatible storages): `s3://bucket-name + - Azure Blob Storage: `az://container-name + - Google Cloud Storage: `gs://bucket-name + - Memory fs: `memory://m` + + Args: + bucket_url: The fsspec compatible bucket url to use for the destination. + credentials: Credentials to connect to the filesystem. The type of credentials should correspond to + the bucket protocol. For example, for AWS S3, the credentials should be an instance of `AwsCredentials`. + A dictionary with the credentials parameters can also be provided. + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + bucket_url=bucket_url, + credentials=credentials, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py similarity index 68% rename from dlt/destinations/filesystem/filesystem.py rename to dlt/destinations/impl/filesystem/filesystem.py index 766f384024..5885f8a1ec 100644 --- a/dlt/destinations/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -7,51 +7,67 @@ from dlt.common import logger from dlt.common.schema import Schema, TSchemaTables, TTableSchema -from dlt.common.storages import FileStorage, LoadStorage, fsspec_from_config +from dlt.common.storages import FileStorage, ParsedLoadJobFileName, fsspec_from_config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import NewLoadJob, TLoadJobState, LoadJob, JobClientBase, FollowupJob, WithStagingDataset +from dlt.common.destination.reference import ( + NewLoadJob, + TLoadJobState, + LoadJob, + JobClientBase, + FollowupJob, + WithStagingDataset, +) from dlt.destinations.job_impl import EmptyLoadJob -from dlt.destinations.filesystem import capabilities -from dlt.destinations.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem import capabilities +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations import path_utils class LoadFilesystemJob(LoadJob): def __init__( - self, - local_path: str, - dataset_path: str, - *, - config: FilesystemDestinationClientConfiguration, - schema_name: str, - load_id: str + self, + local_path: str, + dataset_path: str, + *, + config: FilesystemDestinationClientConfiguration, + schema_name: str, + load_id: str, ) -> None: file_name = FileStorage.get_file_name_from_file_path(local_path) self.config = config self.dataset_path = dataset_path - self.destination_file_name = LoadFilesystemJob.make_destination_filename(config.layout, file_name, schema_name, load_id) + self.destination_file_name = LoadFilesystemJob.make_destination_filename( + config.layout, file_name, schema_name, load_id + ) super().__init__(file_name) fs_client, _ = fsspec_from_config(config) - self.destination_file_name = LoadFilesystemJob.make_destination_filename(config.layout, file_name, schema_name, load_id) + self.destination_file_name = LoadFilesystemJob.make_destination_filename( + config.layout, file_name, schema_name, load_id + ) item = self.make_remote_path() - logger.info("PUT file {item}") fs_client.put_file(local_path, item) @staticmethod - def make_destination_filename(layout: str, file_name: str, schema_name: str, load_id: str) -> str: - job_info = LoadStorage.parse_job_file_name(file_name) - return path_utils.create_path(layout, - schema_name=schema_name, - table_name=job_info.table_name, - load_id=load_id, - file_id=job_info.file_id, - ext=job_info.file_format) + def make_destination_filename( + layout: str, file_name: str, schema_name: str, load_id: str + ) -> str: + job_info = ParsedLoadJobFileName.parse(file_name) + return path_utils.create_path( + layout, + schema_name=schema_name, + table_name=job_info.table_name, + load_id=load_id, + file_id=job_info.file_id, + ext=job_info.file_format, + ) def make_remote_path(self) -> str: - return f"{self.config.protocol}://{posixpath.join(self.dataset_path, self.destination_file_name)}" + return ( + f"{self.config.protocol}://{posixpath.join(self.dataset_path, self.destination_file_name)}" + ) def state(self) -> TLoadJobState: return "completed" @@ -64,7 +80,9 @@ class FollowupFilesystemJob(FollowupJob, LoadFilesystemJob): def create_followup_jobs(self, next_state: str) -> List[NewLoadJob]: jobs = super().create_followup_jobs(next_state) if next_state == "completed": - ref_job = NewReferenceJob(file_name=self.file_name(), status="running", remote_path=self.make_remote_path()) + ref_job = NewReferenceJob( + file_name=self.file_name(), status="running", remote_path=self.make_remote_path() + ) jobs.append(ref_job) return jobs @@ -93,12 +111,13 @@ def drop_storage(self) -> None: def dataset_path(self) -> str: return posixpath.join(self.fs_path, self._dataset_path) - @contextmanager def with_staging_dataset(self) -> Iterator["FilesystemClient"]: current_dataset_path = self._dataset_path try: - self._dataset_path = self.schema.naming.normalize_table_identifier(current_dataset_path + "_staging") + self._dataset_path = self.schema.naming.normalize_table_identifier( + current_dataset_path + "_staging" + ) yield self finally: # restore previous dataset name @@ -113,9 +132,11 @@ def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: # print(f"TRUNCATE {truncated_dirs}") truncate_prefixes: Set[str] = set() for table in truncate_tables: - table_prefix = self.table_prefix_layout.format(schema_name=self.schema.name, table_name=table) + table_prefix = self.table_prefix_layout.format( + schema_name=self.schema.name, table_name=table + ) truncate_prefixes.add(posixpath.join(self.dataset_path, table_prefix)) - # print(f"TRUNCATE PREFIXES {truncate_prefixes}") + # print(f"TRUNCATE PREFIXES {truncate_prefixes} on {truncate_tables}") for truncate_dir in truncated_dirs: # get files in truncate dirs @@ -124,20 +145,31 @@ def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: logger.info(f"Will truncate tables in {truncate_dir}") try: all_files = self.fs_client.ls(truncate_dir, detail=False, refresh=True) - logger.info(f"Found {len(all_files)} CANDIDATE files in {truncate_dir}") + # logger.debug(f"Found {len(all_files)} CANDIDATE files in {truncate_dir}") # print(f"in truncate dir {truncate_dir}: {all_files}") for item in all_files: # check every file against all the prefixes for search_prefix in truncate_prefixes: if item.startswith(search_prefix): # NOTE: deleting in chunks on s3 does not raise on access denied, file non existing and probably other errors - # logger.info(f"DEL {item}") # print(f"DEL {item}") - self.fs_client.rm(item) + try: + # NOTE: must use rm_file to get errors on delete + self.fs_client.rm_file(item) + except NotImplementedError: + # not all filesystem implement the above + self.fs_client.rm(item) + if self.fs_client.exists(item): + raise FileExistsError(item) except FileNotFoundError: - logger.info(f"Directory or path to truncate tables {truncate_dir} does not exist but it should be created previously!") - - def update_stored_schema(self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None) -> TSchemaTables: + logger.info( + f"Directory or path to truncate tables {truncate_dir} does not exist but it" + " should be created previously!" + ) + + def update_stored_schema( + self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None + ) -> TSchemaTables: # create destination dirs for all tables dirs_to_create = self._get_table_dirs(only_tables or self.schema.tables.keys()) for directory in dirs_to_create: @@ -148,7 +180,9 @@ def _get_table_dirs(self, table_names: Iterable[str]) -> Set[str]: """Gets unique directories where table data is stored.""" table_dirs: Set[str] = set() for table_name in table_names: - table_prefix = self.table_prefix_layout.format(schema_name=self.schema.name, table_name=table_name) + table_prefix = self.table_prefix_layout.format( + schema_name=self.schema.name, table_name=table_name + ) destination_dir = posixpath.join(self.dataset_path, table_prefix) # extract the path component table_dirs.add(os.path.dirname(destination_dir)) @@ -164,7 +198,7 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> self.dataset_path, config=self.config, schema_name=self.schema.name, - load_id=load_id + load_id=load_id, ) def restore_file_load(self, file_path: str) -> LoadJob: @@ -179,7 +213,9 @@ def complete_load(self, load_id: str) -> None: def __enter__(self) -> "FilesystemClient": return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType + ) -> None: pass def should_load_data_to_staging_dataset(self, table: TTableSchema) -> bool: diff --git a/dlt/destinations/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py similarity index 51% rename from dlt/destinations/motherduck/__init__.py rename to dlt/destinations/impl/motherduck/__init__.py index eae67eaa74..74c0e36ef3 100644 --- a/dlt/destinations/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.motherduck.configuration import MotherDuckClientConfiguration - - -@with_config(spec=MotherDuckClientConfiguration, sections=(known_sections.DESTINATION, "motherduck",)) -def _configure(config: MotherDuckClientConfiguration = config.value) -> MotherDuckClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -35,14 +22,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.motherduck.motherduck import MotherDuckClient - - return MotherDuckClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return MotherDuckClientConfiguration diff --git a/dlt/destinations/motherduck/configuration.py b/dlt/destinations/impl/motherduck/configuration.py similarity index 65% rename from dlt/destinations/motherduck/configuration.py rename to dlt/destinations/impl/motherduck/configuration.py index 18d480c945..f4ab571e5c 100644 --- a/dlt/destinations/motherduck/configuration.py +++ b/dlt/destinations/impl/motherduck/configuration.py @@ -1,4 +1,4 @@ -from typing import Any, ClassVar, Final, List +from typing import Any, ClassVar, Final, List, TYPE_CHECKING, Optional from dlt.common.configuration import configspec from dlt.common.destination.reference import DestinationClientDwhWithStagingConfiguration @@ -7,7 +7,7 @@ from dlt.common.utils import digest128 from dlt.common.configuration.exceptions import ConfigurationValueError -from dlt.destinations.duckdb.configuration import DuckDbBaseCredentials +from dlt.destinations.impl.duckdb.configuration import DuckDbBaseCredentials MOTHERDUCK_DRIVERNAME = "md" @@ -31,11 +31,13 @@ def _token_to_password(self) -> None: def borrow_conn(self, read_only: bool) -> Any: from duckdb import HTTPException, InvalidInputException + try: return super().borrow_conn(read_only) except (InvalidInputException, HTTPException) as ext_ex: - if 'Failed to download extension' in str(ext_ex) and "motherduck" in str(ext_ex): + if "Failed to download extension" in str(ext_ex) and "motherduck" in str(ext_ex): from importlib.metadata import version as pkg_version + raise MotherduckLocalVersionNotSupported(pkg_version("duckdb")) from ext_ex raise @@ -47,15 +49,20 @@ def parse_native_representation(self, native_value: Any) -> None: def on_resolved(self) -> None: self._token_to_password() if self.drivername == MOTHERDUCK_DRIVERNAME and not self.password: - raise ConfigurationValueError("Motherduck schema 'md' was specified without corresponding token or password. The required format of connection string is: md:///?token=") + raise ConfigurationValueError( + "Motherduck schema 'md' was specified without corresponding token or password. The" + " required format of connection string is: md:///?token=" + ) @configspec class MotherDuckClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "motherduck" # type: ignore + destination_type: Final[str] = "motherduck" # type: ignore credentials: MotherDuckCredentials - create_indexes: bool = False # should unique indexes be created, this slows loading down massively + create_indexes: bool = ( + False # should unique indexes be created, this slows loading down massively + ) def fingerprint(self) -> str: """Returns a fingerprint of user access token""" @@ -63,8 +70,24 @@ def fingerprint(self) -> str: return digest128(self.credentials.password) return "" + if TYPE_CHECKING: + + def __init__( + self, + *, + credentials: Optional[MotherDuckCredentials] = None, + dataset_name: str = None, + default_schema_name: Optional[str] = None, + create_indexes: Optional[bool] = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... + class MotherduckLocalVersionNotSupported(DestinationTerminalException): def __init__(self, duckdb_version: str) -> None: self.duckdb_version = duckdb_version - super().__init__(f"Looks like your local duckdb version ({duckdb_version}) is not supported by Motherduck") + super().__init__( + f"Looks like your local duckdb version ({duckdb_version}) is not supported by" + " Motherduck" + ) diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py new file mode 100644 index 0000000000..5e35f69d75 --- /dev/null +++ b/dlt/destinations/impl/motherduck/factory.py @@ -0,0 +1,53 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.motherduck.configuration import ( + MotherDuckCredentials, + MotherDuckClientConfiguration, +) +from dlt.destinations.impl.motherduck import capabilities + +if t.TYPE_CHECKING: + from duckdb import DuckDBPyConnection + from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient + + +class motherduck(Destination[MotherDuckClientConfiguration, "MotherDuckClient"]): + spec = MotherDuckClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["MotherDuckClient"]: + from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient + + return MotherDuckClient + + def __init__( + self, + credentials: t.Union[ + MotherDuckCredentials, str, t.Dict[str, t.Any], "DuckDBPyConnection" + ] = None, + create_indexes: bool = False, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the MotherDuck destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the MotherDuck database. Can be an instance of `MotherDuckCredentials` or + a connection string in the format `md:///?token=` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + credentials=credentials, + create_indexes=create_indexes, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/motherduck/motherduck.py b/dlt/destinations/impl/motherduck/motherduck.py similarity index 57% rename from dlt/destinations/motherduck/motherduck.py rename to dlt/destinations/impl/motherduck/motherduck.py index 93c0ed163b..c695d9715e 100644 --- a/dlt/destinations/motherduck/motherduck.py +++ b/dlt/destinations/impl/motherduck/motherduck.py @@ -4,21 +4,17 @@ from dlt.common.schema import Schema -from dlt.destinations.duckdb.duck import DuckDbClient -from dlt.destinations.motherduck import capabilities -from dlt.destinations.motherduck.sql_client import MotherDuckSqlClient -from dlt.destinations.motherduck.configuration import MotherDuckClientConfiguration +from dlt.destinations.impl.duckdb.duck import DuckDbClient +from dlt.destinations.impl.motherduck import capabilities +from dlt.destinations.impl.motherduck.sql_client import MotherDuckSqlClient +from dlt.destinations.impl.motherduck.configuration import MotherDuckClientConfiguration class MotherDuckClient(DuckDbClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: MotherDuckClientConfiguration) -> None: super().__init__(schema, config) # type: ignore - sql_client = MotherDuckSqlClient( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = MotherDuckSqlClient(config.normalize_dataset_name(schema), config.credentials) self.config: MotherDuckClientConfiguration = config # type: ignore self.sql_client: MotherDuckSqlClient = sql_client diff --git a/dlt/destinations/impl/motherduck/sql_client.py b/dlt/destinations/impl/motherduck/sql_client.py new file mode 100644 index 0000000000..7990f90947 --- /dev/null +++ b/dlt/destinations/impl/motherduck/sql_client.py @@ -0,0 +1,41 @@ +import duckdb + +from contextlib import contextmanager +from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence +from dlt.common.destination import DestinationCapabilitiesContext + +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) +from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame +from dlt.destinations.sql_client import ( + SqlClientBase, + DBApiCursorImpl, + raise_database_error, + raise_open_connection_error, +) + +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient, DuckDBDBApiCursorImpl +from dlt.destinations.impl.motherduck import capabilities +from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials + + +class MotherDuckSqlClient(DuckDbSqlClient): + capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() + + def __init__(self, dataset_name: str, credentials: MotherDuckCredentials) -> None: + super().__init__(dataset_name, credentials) + self.database_name = credentials.database + + def fully_qualified_dataset_name(self, escape: bool = True) -> str: + database_name = ( + self.capabilities.escape_identifier(self.database_name) + if escape + else self.database_name + ) + dataset_name = ( + self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + ) + return f"{database_name}.{dataset_name}" diff --git a/dlt/destinations/postgres/README.md b/dlt/destinations/impl/mssql/README.md similarity index 93% rename from dlt/destinations/postgres/README.md rename to dlt/destinations/impl/mssql/README.md index 4348265088..2d2393eea7 100644 --- a/dlt/destinations/postgres/README.md +++ b/dlt/destinations/impl/mssql/README.md @@ -1,5 +1,5 @@ # loader account setup 1. Create new database `CREATE DATABASE dlt_data` -2. Create new user, set password `CREATE USER loader WITH PASSWORD 'loader';` +2. Create new user, set password `CREATE USER loader WITH PASSWORD = 'loader';` 3. Set as database owner (we could set lower permission) `ALTER DATABASE dlt_data OWNER TO loader` diff --git a/dlt/destinations/mssql/__init__.py b/dlt/destinations/impl/mssql/__init__.py similarity index 55% rename from dlt/destinations/mssql/__init__.py rename to dlt/destinations/impl/mssql/__init__.py index 56051a324e..e9d9fe24fd 100644 --- a/dlt/destinations/mssql/__init__.py +++ b/dlt/destinations/impl/mssql/__init__.py @@ -1,21 +1,8 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration - - -@with_config(spec=MsSqlClientConfiguration, sections=(known_sections.DESTINATION, "mssql",)) -def _configure(config: MsSqlClientConfiguration = config.value) -> MsSqlClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -32,21 +19,10 @@ def capabilities() -> DestinationCapabilitiesContext: caps.max_column_identifier_length = 128 caps.max_query_length = 4 * 1024 * 64 * 1024 caps.is_max_query_length_in_bytes = True - caps.max_text_data_type_length = 2 ** 30 - 1 + caps.max_text_data_type_length = 2**30 - 1 caps.is_max_text_data_type_length_in_bytes = False caps.supports_ddl_transactions = True caps.max_rows_per_insert = 1000 caps.timestamp_precision = 7 return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.mssql.mssql import MsSqlClient - - return MsSqlClient(schema, _configure(initial_config)) # type: ignore[arg-type] - - -def spec() -> Type[DestinationClientConfiguration]: - return MsSqlClientConfiguration diff --git a/dlt/destinations/mssql/configuration.py b/dlt/destinations/impl/mssql/configuration.py similarity index 59% rename from dlt/destinations/mssql/configuration.py rename to dlt/destinations/impl/mssql/configuration.py index 17f10ddff0..f33aca4b82 100644 --- a/dlt/destinations/mssql/configuration.py +++ b/dlt/destinations/impl/mssql/configuration.py @@ -1,4 +1,4 @@ -from typing import Final, ClassVar, Any, List, Optional +from typing import Final, ClassVar, Any, List, Optional, TYPE_CHECKING from sqlalchemy.engine import URL from dlt.common.configuration import configspec @@ -10,6 +10,9 @@ from dlt.common.destination.reference import DestinationClientDwhWithStagingConfiguration +SUPPORTED_DRIVERS = ["ODBC Driver 18 for SQL Server", "ODBC Driver 17 for SQL Server"] + + @configspec class MsSqlCredentials(ConnectionStringCredentials): drivername: Final[str] = "mssql" # type: ignore @@ -17,13 +20,21 @@ class MsSqlCredentials(ConnectionStringCredentials): host: str port: int = 1433 connect_timeout: int = 15 - odbc_driver: str = None + driver: str = None __config_gen_annotations__: ClassVar[List[str]] = ["port", "connect_timeout"] def parse_native_representation(self, native_value: Any) -> None: # TODO: Support ODBC connection string or sqlalchemy URL super().parse_native_representation(native_value) + if self.query is not None: + self.query = {k.lower(): v for k, v in self.query.items()} # Make case-insensitive. + if "driver" in self.query and self.query.get("driver") not in SUPPORTED_DRIVERS: + raise SystemConfigurationException( + f"""The specified driver "{self.query.get('driver')}" is not supported.""" + f" Choose one of the supported drivers: {', '.join(SUPPORTED_DRIVERS)}." + ) + self.driver = self.query.get("driver", self.driver) self.connect_timeout = int(self.query.get("connect_timeout", self.connect_timeout)) if not self.is_partial(): self.resolve() @@ -37,43 +48,42 @@ def to_url(self) -> URL: return url def on_partial(self) -> None: - self.odbc_driver = self._get_odbc_driver() + self.driver = self._get_driver() if not self.is_partial(): self.resolve() - def _get_odbc_driver(self) -> str: - if self.odbc_driver: - return self.odbc_driver + def _get_driver(self) -> str: + if self.driver: + return self.driver # Pick a default driver if available - supported_drivers = ['ODBC Driver 18 for SQL Server', 'ODBC Driver 17 for SQL Server'] import pyodbc + available_drivers = pyodbc.drivers() - for driver in supported_drivers: - if driver in available_drivers: - return driver + for d in SUPPORTED_DRIVERS: + if d in available_drivers: + return d docs_url = "https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16" raise SystemConfigurationException( - f"No supported ODBC driver found for MS SQL Server. " - f"See {docs_url} for information on how to install the '{supported_drivers[0]}' on your platform." + f"No supported ODBC driver found for MS SQL Server. See {docs_url} for information on" + f" how to install the '{SUPPORTED_DRIVERS[0]}' on your platform." ) def to_odbc_dsn(self) -> str: params = { - "DRIVER": self.odbc_driver, + "DRIVER": self.driver, "SERVER": f"{self.host},{self.port}", "DATABASE": self.database, "UID": self.username, "PWD": self.password, } - if self.query: - params.update(self.query) + if self.query is not None: + params.update({k.upper(): v for k, v in self.query.items()}) return ";".join([f"{k}={v}" for k, v in params.items()]) - @configspec class MsSqlClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "mssql" # type: ignore + destination_type: Final[str] = "mssql" # type: ignore credentials: MsSqlCredentials create_indexes: bool = False @@ -83,3 +93,16 @@ def fingerprint(self) -> str: if self.credentials and self.credentials.host: return digest128(self.credentials.host) return "" + + if TYPE_CHECKING: + + def __init__( + self, + *, + credentials: Optional[MsSqlCredentials] = None, + dataset_name: str = None, + default_schema_name: Optional[str] = None, + create_indexes: Optional[bool] = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py new file mode 100644 index 0000000000..2e19d7c2a8 --- /dev/null +++ b/dlt/destinations/impl/mssql/factory.py @@ -0,0 +1,48 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration +from dlt.destinations.impl.mssql import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.mssql.mssql import MsSqlClient + + +class mssql(Destination[MsSqlClientConfiguration, "MsSqlClient"]): + spec = MsSqlClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["MsSqlClient"]: + from dlt.destinations.impl.mssql.mssql import MsSqlClient + + return MsSqlClient + + def __init__( + self, + credentials: t.Union[MsSqlCredentials, t.Dict[str, t.Any], str] = None, + create_indexes: bool = True, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the MsSql destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the mssql database. Can be an instance of `MsSqlCredentials` or + a connection string in the format `mssql://user:password@host:port/database` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + credentials=credentials, + create_indexes=create_indexes, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py similarity index 67% rename from dlt/destinations/mssql/mssql.py rename to dlt/destinations/impl/mssql/mssql.py index cd999441ff..7561003fb4 100644 --- a/dlt/destinations/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -12,16 +12,14 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.mssql import capabilities -from dlt.destinations.mssql.sql_client import PyOdbcMsSqlClient -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration +from dlt.destinations.impl.mssql import capabilities +from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient +from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper -HINT_TO_MSSQL_ATTR: Dict[TColumnHint, str] = { - "unique": "UNIQUE" -} +HINT_TO_MSSQL_ATTR: Dict[TColumnHint, str] = {"unique": "UNIQUE"} class MsSqlTypeMapper(TypeMapper): @@ -44,7 +42,7 @@ class MsSqlTypeMapper(TypeMapper): "binary": "varbinary(%i)", "decimal": "decimal(%i,%i)", "time": "time(%i)", - "wei": "decimal(%i,%i)" + "wei": "decimal(%i,%i)", } dbt_to_sct = { @@ -62,7 +60,9 @@ class MsSqlTypeMapper(TypeMapper): "int": "bigint", } - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None: return "bigint" if precision <= 8: @@ -73,7 +73,9 @@ def to_db_integer_type(self, precision: Optional[int], table_format: TTableForma return "int" return "bigint" - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: if db_type == "numeric": if (precision, scale) == self.capabilities.wei_precision: return dict(data_type="wei") @@ -81,9 +83,13 @@ def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[i class MsSqlStagingCopyJob(SqlStagingCopyJob): - @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: sql: List[str] = [] for table in table_chain: with sql_client.with_staging_dataset(staging=True): @@ -92,7 +98,10 @@ def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClient # drop destination table sql.append(f"DROP TABLE IF EXISTS {table_name};") # moving staging table to destination schema - sql.append(f"ALTER SCHEMA {sql_client.fully_qualified_dataset_name()} TRANSFER {staging_table_name};") + sql.append( + f"ALTER SCHEMA {sql_client.fully_qualified_dataset_name()} TRANSFER" + f" {staging_table_name};" + ) # recreate staging table sql.append(f"SELECT * INTO {staging_table_name} FROM {table_name} WHERE 1 = 0;") return sql @@ -100,13 +109,24 @@ def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClient class MsSqlMergeJob(SqlMergeJob): @classmethod - def gen_key_table_clauses(cls, root_table_name: str, staging_root_table_name: str, key_clauses: Sequence[str], for_delete: bool) -> List[str]: - """Generate sql clauses that may be used to select or delete rows in root table of destination dataset - """ + def gen_key_table_clauses( + cls, + root_table_name: str, + staging_root_table_name: str, + key_clauses: Sequence[str], + for_delete: bool, + ) -> List[str]: + """Generate sql clauses that may be used to select or delete rows in root table of destination dataset""" if for_delete: # MS SQL doesn't support alias in DELETE FROM - return [f"FROM {root_table_name} WHERE EXISTS (SELECT 1 FROM {staging_root_table_name} WHERE {' OR '.join([c.format(d=root_table_name,s=staging_root_table_name) for c in key_clauses])})"] - return SqlMergeJob.gen_key_table_clauses(root_table_name, staging_root_table_name, key_clauses, for_delete) + return [ + f"FROM {root_table_name} WHERE EXISTS (SELECT 1 FROM" + f" {staging_root_table_name} WHERE" + f" {' OR '.join([c.format(d=root_table_name,s=staging_root_table_name) for c in key_clauses])})" + ] + return SqlMergeJob.gen_key_table_clauses( + root_table_name, staging_root_table_name, key_clauses, for_delete + ) @classmethod def _to_temp_table(cls, select_sql: str, temp_table_name: str) -> str: @@ -115,18 +135,14 @@ def _to_temp_table(cls, select_sql: str, temp_table_name: str) -> str: @classmethod def _new_temp_table_name(cls, name_prefix: str) -> str: name = SqlMergeJob._new_temp_table_name(name_prefix) - return '#' + name + return "#" + name class MsSqlClient(InsertValuesJobClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: MsSqlClientConfiguration) -> None: - sql_client = PyOdbcMsSqlClient( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = PyOdbcMsSqlClient(config.normalize_dataset_name(schema), config.credentials) super().__init__(schema, config, sql_client) self.config: MsSqlClientConfiguration = config self.sql_client = sql_client @@ -136,9 +152,13 @@ def __init__(self, schema: Schema, config: MsSqlClientConfiguration) -> None: def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: return [MsSqlMergeJob.from_table_chain(table_chain, self.sql_client)] - def _make_add_column_sql(self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None) -> List[str]: + def _make_add_column_sql( + self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None + ) -> List[str]: # Override because mssql requires multiple columns in a single ADD COLUMN clause - return ["ADD \n" + ",\n".join(self._get_column_def_sql(c, table_format) for c in new_columns)] + return [ + "ADD \n" + ",\n".join(self._get_column_def_sql(c, table_format) for c in new_columns) + ] def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: sc_type = c["data_type"] @@ -148,14 +168,22 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non else: db_type = self.type_mapper.to_db_type(c) - hints_str = " ".join(self.active_hints.get(h, "") for h in self.active_hints.keys() if c.get(h, False) is True) + hints_str = " ".join( + self.active_hints.get(h, "") + for h in self.active_hints.keys() + if c.get(h, False) is True + ) column_name = self.capabilities.escape_identifier(c["name"]) return f"{column_name} {db_type} {hints_str} {self._gen_not_null(c['nullable'])}" - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: if self.config.replace_strategy == "staging-optimized": return [MsSqlStagingCopyJob.from_table_chain(table_chain, self.sql_client)] return super()._create_replace_followup_jobs(table_chain) - def _from_db_type(self, pq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, pq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(pq_t, precision, scale) diff --git a/dlt/destinations/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py similarity index 82% rename from dlt/destinations/mssql/sql_client.py rename to dlt/destinations/impl/mssql/sql_client.py index 4dd983a334..427518feeb 100644 --- a/dlt/destinations/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -9,24 +9,39 @@ from contextlib import contextmanager from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction -from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error +from dlt.destinations.sql_client import ( + DBApiCursorImpl, + SqlClientBase, + raise_database_error, + raise_open_connection_error, +) -from dlt.destinations.mssql.configuration import MsSqlCredentials -from dlt.destinations.mssql import capabilities +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials +from dlt.destinations.impl.mssql import capabilities def handle_datetimeoffset(dto_value: bytes) -> datetime: # ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794 tup = struct.unpack("<6hI2h", dto_value) # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0) return datetime( - tup[0], tup[1], tup[2], tup[3], tup[4], tup[5], tup[6] // 1000, timezone(timedelta(hours=tup[7], minutes=tup[8])) + tup[0], + tup[1], + tup[2], + tup[3], + tup[4], + tup[5], + tup[6] // 1000, + timezone(timedelta(hours=tup[7], minutes=tup[8])), ) class PyOdbcMsSqlClient(SqlClientBase[pyodbc.Connection], DBTransaction): - dbapi: ClassVar[DBApi] = pyodbc capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -79,13 +94,15 @@ def drop_dataset(self) -> None: # MS Sql doesn't support DROP ... CASCADE, drop tables in the schema first # Drop all views rows = self.execute_sql( - "SELECT table_name FROM information_schema.views WHERE table_schema = %s;", self.dataset_name + "SELECT table_name FROM information_schema.views WHERE table_schema = %s;", + self.dataset_name, ) view_names = [row[0] for row in rows] self._drop_views(*view_names) # Drop all tables rows = self.execute_sql( - "SELECT table_name FROM information_schema.tables WHERE table_schema = %s;", self.dataset_name + "SELECT table_name FROM information_schema.tables WHERE table_schema = %s;", + self.dataset_name, ) table_names = [row[0] for row in rows] self.drop_tables(*table_names) @@ -95,10 +112,14 @@ def drop_dataset(self) -> None: def _drop_views(self, *tables: str) -> None: if not tables: return - statements = [f"DROP VIEW IF EXISTS {self.make_qualified_table_name(table)};" for table in tables] + statements = [ + f"DROP VIEW IF EXISTS {self.make_qualified_table_name(table)};" for table in tables + ] self.execute_fragments(statements) - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if curr.description is None: return None @@ -126,7 +147,9 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB raise outer def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + return ( + self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + ) @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: diff --git a/dlt/destinations/mssql/README.md b/dlt/destinations/impl/postgres/README.md similarity index 100% rename from dlt/destinations/mssql/README.md rename to dlt/destinations/impl/postgres/README.md diff --git a/dlt/destinations/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py similarity index 55% rename from dlt/destinations/postgres/__init__.py rename to dlt/destinations/impl/postgres/__init__.py index e8904c075f..43e6af1996 100644 --- a/dlt/destinations/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -1,21 +1,9 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.postgres.configuration import PostgresClientConfiguration - - -@with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, "postgres",)) -def _configure(config: PostgresClientConfiguration = config.value) -> PostgresClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: # https://www.postgresql.org/docs/current/limits.html @@ -27,7 +15,7 @@ def capabilities() -> DestinationCapabilitiesContext: caps.escape_identifier = escape_postgres_identifier caps.escape_literal = escape_postgres_literal caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE) - caps.wei_precision = (2*EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) + caps.wei_precision = (2 * EVM_DECIMAL_PRECISION, EVM_DECIMAL_PRECISION) caps.max_identifier_length = 63 caps.max_column_identifier_length = 63 caps.max_query_length = 32 * 1024 * 1024 @@ -37,14 +25,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = True return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.postgres.postgres import PostgresClient - - return PostgresClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return PostgresClientConfiguration diff --git a/dlt/destinations/postgres/configuration.py b/dlt/destinations/impl/postgres/configuration.py similarity index 91% rename from dlt/destinations/postgres/configuration.py rename to dlt/destinations/impl/postgres/configuration.py index 4204ce1c38..1433d9f641 100644 --- a/dlt/destinations/postgres/configuration.py +++ b/dlt/destinations/impl/postgres/configuration.py @@ -36,7 +36,7 @@ def to_url(self) -> URL: @configspec class PostgresClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "postgres" # type: ignore + destination_type: Final[str] = "postgres" # type: ignore credentials: PostgresCredentials create_indexes: bool = True @@ -48,12 +48,14 @@ def fingerprint(self) -> str: return "" if TYPE_CHECKING: + def __init__( self, - destination_name: str = None, + *, credentials: PostgresCredentials = None, dataset_name: str = None, default_schema_name: str = None, - create_indexes: bool = True - ) -> None: - ... + create_indexes: bool = True, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py new file mode 100644 index 0000000000..68d72f890a --- /dev/null +++ b/dlt/destinations/impl/postgres/factory.py @@ -0,0 +1,51 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.postgres.configuration import ( + PostgresCredentials, + PostgresClientConfiguration, +) +from dlt.destinations.impl.postgres import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.postgres.postgres import PostgresClient + + +class postgres(Destination[PostgresClientConfiguration, "PostgresClient"]): + spec = PostgresClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["PostgresClient"]: + from dlt.destinations.impl.postgres.postgres import PostgresClient + + return PostgresClient + + def __init__( + self, + credentials: t.Union[PostgresCredentials, t.Dict[str, t.Any], str] = None, + create_indexes: bool = True, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the Postgres destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the postgres database. Can be an instance of `PostgresCredentials` or + a connection string in the format `postgres://user:password@host:port/database` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + credentials=credentials, + create_indexes=create_indexes, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py similarity index 70% rename from dlt/destinations/postgres/postgres.py rename to dlt/destinations/impl/postgres/postgres.py index 2812d1d4c4..f8fa3e341a 100644 --- a/dlt/destinations/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -11,16 +11,15 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.postgres import capabilities -from dlt.destinations.postgres.sql_client import Psycopg2SqlClient -from dlt.destinations.postgres.configuration import PostgresClientConfiguration +from dlt.destinations.impl.postgres import capabilities +from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient +from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper -HINT_TO_POSTGRES_ATTR: Dict[TColumnHint, str] = { - "unique": "UNIQUE" -} +HINT_TO_POSTGRES_ATTR: Dict[TColumnHint, str] = {"unique": "UNIQUE"} + class PostgresTypeMapper(TypeMapper): sct_to_unbound_dbt = { @@ -40,7 +39,7 @@ class PostgresTypeMapper(TypeMapper): "timestamp": "timestamp (%i) with time zone", "decimal": "numeric(%i,%i)", "time": "time (%i) without time zone", - "wei": "numeric(%i,%i)" + "wei": "numeric(%i,%i)", } dbt_to_sct = { @@ -59,7 +58,9 @@ class PostgresTypeMapper(TypeMapper): "integer": "bigint", } - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None: return "bigint" # Precision is number of bits @@ -69,7 +70,9 @@ def to_db_integer_type(self, precision: Optional[int], table_format: TTableForma return "integer" return "bigint" - def from_db_type(self, db_type: str, precision: Optional[int] = None, scale: Optional[int] = None) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int] = None, scale: Optional[int] = None + ) -> TColumnType: if db_type == "numeric": if (precision, scale) == self.capabilities.wei_precision: return dict(data_type="wei") @@ -77,9 +80,13 @@ def from_db_type(self, db_type: str, precision: Optional[int] = None, scale: Opt class PostgresStagingCopyJob(SqlStagingCopyJob): - @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: sql: List[str] = [] for table in table_chain: with sql_client.with_staging_dataset(staging=True): @@ -88,21 +95,20 @@ def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClient # drop destination table sql.append(f"DROP TABLE IF EXISTS {table_name};") # moving staging table to destination schema - sql.append(f"ALTER TABLE {staging_table_name} SET SCHEMA {sql_client.fully_qualified_dataset_name()};") + sql.append( + f"ALTER TABLE {staging_table_name} SET SCHEMA" + f" {sql_client.fully_qualified_dataset_name()};" + ) # recreate staging table sql.append(f"CREATE TABLE {staging_table_name} (like {table_name} including all);") return sql class PostgresClient(InsertValuesJobClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: PostgresClientConfiguration) -> None: - sql_client = Psycopg2SqlClient( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = Psycopg2SqlClient(config.normalize_dataset_name(schema), config.credentials) super().__init__(schema, config, sql_client) self.config: PostgresClientConfiguration = config self.sql_client = sql_client @@ -110,14 +116,24 @@ def __init__(self, schema: Schema, config: PostgresClientConfiguration) -> None: self.type_mapper = PostgresTypeMapper(self.capabilities) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - hints_str = " ".join(self.active_hints.get(h, "") for h in self.active_hints.keys() if c.get(h, False) is True) + hints_str = " ".join( + self.active_hints.get(h, "") + for h in self.active_hints.keys() + if c.get(h, False) is True + ) column_name = self.capabilities.escape_identifier(c["name"]) - return f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + return ( + f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + ) - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: if self.config.replace_strategy == "staging-optimized": return [PostgresStagingCopyJob.from_table_chain(table_chain, self.sql_client)] return super()._create_replace_followup_jobs(table_chain) - def _from_db_type(self, pq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, pq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(pq_t, precision, scale) diff --git a/dlt/destinations/postgres/sql_client.py b/dlt/destinations/impl/postgres/sql_client.py similarity index 72% rename from dlt/destinations/postgres/sql_client.py rename to dlt/destinations/impl/postgres/sql_client.py index 079a0ae477..366ed243ef 100644 --- a/dlt/destinations/postgres/sql_client.py +++ b/dlt/destinations/impl/postgres/sql_client.py @@ -12,16 +12,24 @@ from contextlib import contextmanager from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction -from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error +from dlt.destinations.sql_client import ( + DBApiCursorImpl, + SqlClientBase, + raise_database_error, + raise_open_connection_error, +) -from dlt.destinations.postgres.configuration import PostgresCredentials -from dlt.destinations.postgres import capabilities +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres import capabilities class Psycopg2SqlClient(SqlClientBase["psycopg2.connection"], DBTransaction): - dbapi: ClassVar[DBApi] = psycopg2 capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -32,9 +40,9 @@ def __init__(self, dataset_name: str, credentials: PostgresCredentials) -> None: def open_connection(self) -> "psycopg2.connection": self._conn = psycopg2.connect( - dsn=self.credentials.to_native_representation(), - options=f"-c search_path={self.fully_qualified_dataset_name()},public" - ) + dsn=self.credentials.to_native_representation(), + options=f"-c search_path={self.fully_qualified_dataset_name()},public", + ) # we'll provide explicit transactions see _reset self._reset_connection() return self._conn @@ -70,7 +78,9 @@ def native_connection(self) -> "psycopg2.connection": return self._conn # @raise_database_error - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if curr.description is None: return None @@ -95,13 +105,17 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB self.open_connection() raise outer - def execute_fragments(self, fragments: Sequence[AnyStr], *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_fragments( + self, fragments: Sequence[AnyStr], *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: # compose the statements using psycopg2 library - composed = Composed(sql if isinstance(sql, Composable) else SQL(sql) for sql in fragments) + composed = Composed(sql if isinstance(sql, Composable) else SQL(sql) for sql in fragments) return self.execute_sql(composed, *args, **kwargs) def fully_qualified_dataset_name(self, escape: bool = True) -> str: - return self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + return ( + self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name + ) def _reset_connection(self) -> None: # self._conn.autocommit = True @@ -112,13 +126,23 @@ def _reset_connection(self) -> None: def _make_database_exception(cls, ex: Exception) -> Exception: if isinstance(ex, (psycopg2.errors.UndefinedTable, psycopg2.errors.InvalidSchemaName)): raise DatabaseUndefinedRelation(ex) - if isinstance(ex, (psycopg2.OperationalError, psycopg2.InternalError, psycopg2.errors.SyntaxError, psycopg2.errors.UndefinedFunction)): + if isinstance( + ex, + ( + psycopg2.OperationalError, + psycopg2.InternalError, + psycopg2.errors.SyntaxError, + psycopg2.errors.UndefinedFunction, + ), + ): term = cls._maybe_make_terminal_exception_from_data_error(ex) if term: return term else: return DatabaseTransientException(ex) - elif isinstance(ex, (psycopg2.DataError, psycopg2.ProgrammingError, psycopg2.IntegrityError)): + elif isinstance( + ex, (psycopg2.DataError, psycopg2.ProgrammingError, psycopg2.IntegrityError) + ): return DatabaseTerminalException(ex) elif isinstance(ex, TypeError): # psycopg2 raises TypeError on malformed query parameters @@ -129,7 +153,9 @@ def _make_database_exception(cls, ex: Exception) -> Exception: return ex @staticmethod - def _maybe_make_terminal_exception_from_data_error(pg_ex: psycopg2.DataError) -> Optional[Exception]: + def _maybe_make_terminal_exception_from_data_error( + pg_ex: psycopg2.DataError, + ) -> Optional[Exception]: return None @staticmethod diff --git a/dlt/destinations/impl/qdrant/__init__.py b/dlt/destinations/impl/qdrant/__init__.py new file mode 100644 index 0000000000..1a2c466b14 --- /dev/null +++ b/dlt/destinations/impl/qdrant/__init__.py @@ -0,0 +1,18 @@ +from dlt.common.destination import DestinationCapabilitiesContext +from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter + + +def capabilities() -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl"] + + caps.max_identifier_length = 200 + caps.max_column_identifier_length = 1024 + caps.max_query_length = 8 * 1024 * 1024 + caps.is_max_query_length_in_bytes = False + caps.max_text_data_type_length = 8 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = False + caps.supports_ddl_transactions = False + + return caps diff --git a/dlt/destinations/qdrant/configuration.py b/dlt/destinations/impl/qdrant/configuration.py similarity index 89% rename from dlt/destinations/qdrant/configuration.py rename to dlt/destinations/impl/qdrant/configuration.py index dc252e3b31..23637dee33 100644 --- a/dlt/destinations/qdrant/configuration.py +++ b/dlt/destinations/impl/qdrant/configuration.py @@ -1,7 +1,10 @@ from typing import Optional, Final from dlt.common.configuration import configspec -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, CredentialsConfiguration +from dlt.common.configuration.specs.base_configuration import ( + BaseConfiguration, + CredentialsConfiguration, +) from dlt.common.destination.reference import DestinationClientDwhConfiguration @@ -44,12 +47,12 @@ class QdrantClientOptions(BaseConfiguration): @configspec class QdrantClientConfiguration(DestinationClientDwhConfiguration): - destination_name: Final[str] = "qdrant" # type: ignore + destination_type: Final[str] = "qdrant" # type: ignore # character for the dataset separator dataset_separator: str = "_" - # make it optional do empty dataset is allowed - dataset_name: Optional[str] = None # type: ignore + # make it optional so empty dataset is allowed + dataset_name: Final[Optional[str]] = None # type: ignore[misc] # Batch size for generating embeddings embedding_batch_size: int = 32 diff --git a/dlt/destinations/impl/qdrant/factory.py b/dlt/destinations/impl/qdrant/factory.py new file mode 100644 index 0000000000..df9cd64871 --- /dev/null +++ b/dlt/destinations/impl/qdrant/factory.py @@ -0,0 +1,36 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.qdrant.configuration import QdrantCredentials, QdrantClientConfiguration +from dlt.destinations.impl.qdrant import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient + + +class qdrant(Destination[QdrantClientConfiguration, "QdrantClient"]): + spec = QdrantClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["QdrantClient"]: + from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient + + return QdrantClient + + def __init__( + self, + credentials: t.Union[QdrantCredentials, t.Dict[str, t.Any]] = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + super().__init__( + credentials=credentials, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/qdrant/qdrant_adapter.py b/dlt/destinations/impl/qdrant/qdrant_adapter.py similarity index 89% rename from dlt/destinations/qdrant/qdrant_adapter.py rename to dlt/destinations/impl/qdrant/qdrant_adapter.py index ac51bd5f42..243cbd6c5b 100644 --- a/dlt/destinations/qdrant/qdrant_adapter.py +++ b/dlt/destinations/impl/qdrant/qdrant_adapter.py @@ -1,11 +1,11 @@ from typing import Any from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns -from dlt.extract.decorators import resource as make_resource -from dlt.extract.source import DltResource +from dlt.extract import DltResource, resource as make_resource VECTORIZE_HINT = "x-qdrant-embed" + def qdrant_adapter( data: Any, embed: TColumnNames = None, @@ -48,8 +48,7 @@ def qdrant_adapter( embed = [embed] if not isinstance(embed, list): raise ValueError( - "embed must be a list of column names or a single " - "column name as a string" + "embed must be a list of column names or a single column name as a string" ) for column_name in embed: @@ -59,8 +58,7 @@ def qdrant_adapter( } if not column_hints: - raise ValueError( - "A value for 'embed' must be specified.") + raise ValueError("A value for 'embed' must be specified.") else: resource.apply_hints(columns=column_hints) diff --git a/dlt/destinations/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py similarity index 72% rename from dlt/destinations/qdrant/qdrant_client.py rename to dlt/destinations/impl/qdrant/qdrant_client.py index cba87e9528..2df3023d86 100644 --- a/dlt/destinations/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -11,14 +11,15 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.qdrant import capabilities -from dlt.destinations.qdrant.configuration import QdrantClientConfiguration -from dlt.destinations.qdrant.qdrant_adapter import VECTORIZE_HINT +from dlt.destinations.impl.qdrant import capabilities +from dlt.destinations.impl.qdrant.configuration import QdrantClientConfiguration +from dlt.destinations.impl.qdrant.qdrant_adapter import VECTORIZE_HINT from qdrant_client import QdrantClient as QC, models from qdrant_client.qdrant_fastembed import uuid from qdrant_client.http.exceptions import UnexpectedResponse + class LoadQdrantJob(LoadJob): def __init__( self, @@ -32,8 +33,7 @@ def __init__( super().__init__(file_name) self.db_client = db_client self.collection_name = collection_name - self.embedding_fields = get_columns_names_with_prop( - table_schema, VECTORIZE_HINT) + self.embedding_fields = get_columns_names_with_prop(table_schema, VECTORIZE_HINT) self.unique_identifiers = self._list_unique_identifiers(table_schema) self.config = client_config @@ -42,17 +42,24 @@ def __init__( for line in f: data = json.loads(line) - point_id = self._generate_uuid( - data, self.unique_identifiers, self.collection_name) if self.unique_identifiers else uuid.uuid4() + point_id = ( + self._generate_uuid(data, self.unique_identifiers, self.collection_name) + if self.unique_identifiers + else uuid.uuid4() + ) embedding_doc = self._get_embedding_doc(data) payloads.append(data) ids.append(point_id) docs.append(embedding_doc) - embedding_model = db_client._get_or_init_model( - db_client.embedding_model_name) - embeddings = list(embedding_model.embed( - docs, batch_size=self.config.embedding_batch_size, parallel=self.config.embedding_parallelism)) + embedding_model = db_client._get_or_init_model(db_client.embedding_model_name) + embeddings = list( + embedding_model.embed( + docs, + batch_size=self.config.embedding_batch_size, + parallel=self.config.embedding_parallelism, + ) + ) vector_name = db_client.get_vector_field_name() embeddings = [{vector_name: embedding.tolist()} for embedding in embeddings] assert len(embeddings) == len(payloads) == len(ids) @@ -81,13 +88,14 @@ def _list_unique_identifiers(self, table_schema: TTableSchema) -> Sequence[str]: Sequence[str]: A list of unique column identifiers. """ if table_schema.get("write_disposition") == "merge": - primary_keys = get_columns_names_with_prop( - table_schema, "primary_key") + primary_keys = get_columns_names_with_prop(table_schema, "primary_key") if primary_keys: return primary_keys return get_columns_names_with_prop(table_schema, "unique") - def _upload_data(self, ids: Iterable[Any], vectors: Iterable[Any], payloads: Iterable[Any]) -> None: + def _upload_data( + self, ids: Iterable[Any], vectors: Iterable[Any], payloads: Iterable[Any] + ) -> None: """Uploads data to a Qdrant instance in a batch. Supports retries and parallelism. Args: @@ -96,7 +104,14 @@ def _upload_data(self, ids: Iterable[Any], vectors: Iterable[Any], payloads: Ite payloads (Iterable[Any]): Payloads to be uploaded to the collection """ self.db_client.upload_collection( - self.collection_name, ids=ids, payload=payloads, vectors=vectors, parallel=self.config.upload_parallelism, batch_size=self.config.upload_batch_size, max_retries=self.config.upload_max_retries) + self.collection_name, + ids=ids, + payload=payloads, + vectors=vectors, + parallel=self.config.upload_parallelism, + batch_size=self.config.upload_batch_size, + max_retries=self.config.upload_max_retries, + ) def _generate_uuid( self, data: Dict[str, Any], unique_identifiers: Sequence[str], collection_name: str @@ -120,12 +135,19 @@ def state(self) -> TLoadJobState: def exception(self) -> str: raise NotImplementedError() + class QdrantClient(JobClientBase, WithStateSync): - """Qdrant Destination Handler - """ + """Qdrant Destination Handler""" + capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() state_properties: ClassVar[List[str]] = [ - "version", "engine_version", "pipeline_name", "state", "created_at", "_dlt_load_id"] + "version", + "engine_version", + "pipeline_name", + "state", + "created_at", + "_dlt_load_id", + ] def __init__(self, schema: Schema, config: QdrantClientConfiguration) -> None: super().__init__(schema, config) @@ -167,11 +189,13 @@ def _make_qualified_collection_name(self, table_name: str) -> str: str: The dataset name and table name concatenated with a separator if dataset name is present. """ dataset_separator = self.config.dataset_separator - return f"{self.dataset_name}{dataset_separator}{table_name}" if self.dataset_name else table_name + return ( + f"{self.dataset_name}{dataset_separator}{table_name}" + if self.dataset_name + else table_name + ) - def _create_collection( - self, full_collection_name: str - ) -> None: + def _create_collection(self, full_collection_name: str) -> None: """Creates a collection in Qdrant. Args: @@ -188,7 +212,8 @@ def _create_collection( vectors_config = self.db_client.get_fastembed_vector_params() self.db_client.create_collection( - collection_name=full_collection_name, vectors_config=vectors_config) + collection_name=full_collection_name, vectors_config=vectors_config + ) def _create_point(self, obj: Dict[str, Any], collection_name: str) -> None: """Inserts a point into a Qdrant collection without a vector. @@ -197,12 +222,16 @@ def _create_point(self, obj: Dict[str, Any], collection_name: str) -> None: obj (Dict[str, Any]): The arbitrary data to be inserted as payload. collection_name (str): The name of the collection to insert the point into. """ - self.db_client.upsert(collection_name, points=[ - models.PointStruct( - id=str(uuid.uuid4()), - payload=obj, - vector={}, - )]) + self.db_client.upsert( + collection_name, + points=[ + models.PointStruct( + id=str(uuid.uuid4()), + payload=obj, + vector={}, + ) + ], + ) def drop_storage(self) -> None: """Drop the dataset from the Qdrant instance. @@ -213,8 +242,7 @@ def drop_storage(self) -> None: If dataset name was not provided, it deletes all the tables in the current schema """ collections = self.db_client.get_collections().collections - collection_name_list = [collection.name - for collection in collections] + collection_name_list = [collection.name for collection in collections] if self.dataset_name: prefix = f"{self.dataset_name}{self.config.dataset_separator}" @@ -234,8 +262,7 @@ def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: self._create_sentinel_collection() elif truncate_tables: for table_name in truncate_tables: - qualified_table_name = self._make_qualified_collection_name( - table_name=table_name) + qualified_table_name = self._make_qualified_collection_name(table_name=table_name) if self._collection_exists(qualified_table_name): continue @@ -257,48 +284,64 @@ def update_stored_schema( self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None ) -> Optional[TSchemaTables]: applied_update: TSchemaTables = {} - schema_info = self.get_stored_schema_by_hash( - self.schema.stored_version_hash) + schema_info = self.get_stored_schema_by_hash(self.schema.stored_version_hash) if schema_info is None: logger.info( f"Schema with hash {self.schema.stored_version_hash} " - f"not found in the storage. upgrading" + "not found in the storage. upgrading" ) self._execute_schema_update(only_tables) else: logger.info( f"Schema with hash {self.schema.stored_version_hash} " f"inserted at {schema_info.inserted_at} found " - f"in storage, no upgrade required" + "in storage, no upgrade required" ) return applied_update def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: """Loads compressed state from destination storage - By finding a load id that was completed + By finding a load id that was completed """ limit = 10 offset = None while True: try: scroll_table_name = self._make_qualified_collection_name( - self.schema.state_table_name) - state_records, offset = self.db_client.scroll(scroll_table_name, with_payload=self.state_properties, scroll_filter=models.Filter(must=[ - models.FieldCondition( - key="pipeline_name", match=models.MatchValue(value=pipeline_name)) - ]), limit=limit, offset=offset) + self.schema.state_table_name + ) + state_records, offset = self.db_client.scroll( + scroll_table_name, + with_payload=self.state_properties, + scroll_filter=models.Filter( + must=[ + models.FieldCondition( + key="pipeline_name", match=models.MatchValue(value=pipeline_name) + ) + ] + ), + limit=limit, + offset=offset, + ) if len(state_records) == 0: return None for state_record in state_records: state = state_record.payload load_id = state["_dlt_load_id"] scroll_table_name = self._make_qualified_collection_name( - self.schema.loads_table_name) - load_records = self.db_client.count(scroll_table_name, exact=True, count_filter=models.Filter( - must=[models.FieldCondition( - key="load_id", match=models.MatchValue(value=load_id) - )] - )) + self.schema.loads_table_name + ) + load_records = self.db_client.count( + scroll_table_name, + exact=True, + count_filter=models.Filter( + must=[ + models.FieldCondition( + key="load_id", match=models.MatchValue(value=load_id) + ) + ] + ), + ) if load_records.count > 0: state["dlt_load_id"] = state.pop("_dlt_load_id") return StateInfo(**state) @@ -308,14 +351,20 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: def get_stored_schema(self) -> Optional[StorageSchemaInfo]: """Retrieves newest schema from destination storage""" try: - scroll_table_name = self._make_qualified_collection_name( - self.schema.version_table_name) - response = self.db_client.scroll(scroll_table_name, with_payload=True, scroll_filter=models.Filter( - must=[models.FieldCondition( - key="schema_name", - match=models.MatchValue(value=self.schema.name), - )] - ), limit=1) + scroll_table_name = self._make_qualified_collection_name(self.schema.version_table_name) + response = self.db_client.scroll( + scroll_table_name, + with_payload=True, + scroll_filter=models.Filter( + must=[ + models.FieldCondition( + key="schema_name", + match=models.MatchValue(value=self.schema.name), + ) + ] + ), + limit=1, + ) record = response[0][0].payload return StorageSchemaInfo(**record) except Exception: @@ -323,23 +372,25 @@ def get_stored_schema(self) -> Optional[StorageSchemaInfo]: def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaInfo]: try: - scroll_table_name = self._make_qualified_collection_name( - self.schema.version_table_name) - response = self.db_client.scroll(scroll_table_name, with_payload=True, scroll_filter=models.Filter( - must=[ - models.FieldCondition( - key="version_hash", match=models.MatchValue(value=schema_hash)) - ] - - ), limit=1) + scroll_table_name = self._make_qualified_collection_name(self.schema.version_table_name) + response = self.db_client.scroll( + scroll_table_name, + with_payload=True, + scroll_filter=models.Filter( + must=[ + models.FieldCondition( + key="version_hash", match=models.MatchValue(value=schema_hash) + ) + ] + ), + limit=1, + ) record = response[0][0].payload return StorageSchemaInfo(**record) except Exception: return None - def start_file_load( - self, table: TTableSchema, file_path: str, load_id: str - ) -> LoadJob: + def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob: return LoadQdrantJob( table, file_path, @@ -358,8 +409,7 @@ def complete_load(self, load_id: str) -> None: "status": 0, "inserted_at": str(pendulum.now()), } - loads_table_name = self._make_qualified_collection_name( - self.schema.loads_table_name) + loads_table_name = self._make_qualified_collection_name(self.schema.loads_table_name) self._create_point(properties, loads_table_name) def __enter__(self) -> "QdrantClient": @@ -383,8 +433,7 @@ def _update_schema_in_storage(self, schema: Schema) -> None: "inserted_at": str(pendulum.now()), "schema": schema_str, } - version_table_name = self._make_qualified_collection_name( - self.schema.version_table_name) + version_table_name = self._make_qualified_collection_name(self.schema.version_table_name) self._create_point(properties, version_table_name) def _execute_schema_update(self, only_tables: Iterable[str]) -> None: @@ -399,11 +448,14 @@ def _execute_schema_update(self, only_tables: Iterable[str]) -> None: def _collection_exists(self, table_name: str, qualify_table_name: bool = True) -> bool: try: - table_name = self._make_qualified_collection_name( - table_name) if qualify_table_name else table_name + table_name = ( + self._make_qualified_collection_name(table_name) + if qualify_table_name + else table_name + ) self.db_client.get_collection(table_name) return True except UnexpectedResponse as e: if e.status_code == 404: return False - raise e \ No newline at end of file + raise e diff --git a/dlt/destinations/redshift/README.md b/dlt/destinations/impl/redshift/README.md similarity index 100% rename from dlt/destinations/redshift/README.md rename to dlt/destinations/impl/redshift/README.md diff --git a/dlt/destinations/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py similarity index 52% rename from dlt/destinations/redshift/__init__.py rename to dlt/destinations/impl/redshift/__init__.py index 96741e86cd..8a8cae84b4 100644 --- a/dlt/destinations/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_redshift_identifier, escape_redshift_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration - - -@with_config(spec=RedshiftClientConfiguration, sections=(known_sections.DESTINATION, "redshift",)) -def _configure(config: RedshiftClientConfiguration = config.value) -> RedshiftClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -36,14 +23,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.alter_add_multi_column = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.redshift.redshift import RedshiftClient - - return RedshiftClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return RedshiftClientConfiguration diff --git a/dlt/destinations/redshift/configuration.py b/dlt/destinations/impl/redshift/configuration.py similarity index 52% rename from dlt/destinations/redshift/configuration.py rename to dlt/destinations/impl/redshift/configuration.py index 7cb13b996f..2a6ade4a4f 100644 --- a/dlt/destinations/redshift/configuration.py +++ b/dlt/destinations/impl/redshift/configuration.py @@ -1,10 +1,13 @@ -from typing import Final, Optional +from typing import Final, Optional, TYPE_CHECKING from dlt.common.typing import TSecretValue from dlt.common.configuration import configspec from dlt.common.utils import digest128 -from dlt.destinations.postgres.configuration import PostgresCredentials, PostgresClientConfiguration +from dlt.destinations.impl.postgres.configuration import ( + PostgresCredentials, + PostgresClientConfiguration, +) @configspec @@ -17,7 +20,7 @@ class RedshiftCredentials(PostgresCredentials): @configspec class RedshiftClientConfiguration(PostgresClientConfiguration): - destination_name: Final[str] = "redshift" # type: ignore + destination_type: Final[str] = "redshift" # type: ignore credentials: RedshiftCredentials staging_iam_role: Optional[str] = None @@ -26,3 +29,17 @@ def fingerprint(self) -> str: if self.credentials and self.credentials.host: return digest128(self.credentials.host) return "" + + if TYPE_CHECKING: + + def __init__( + self, + *, + destination_type: str = None, + credentials: PostgresCredentials = None, + dataset_name: str = None, + default_schema_name: str = None, + staging_iam_role: str = None, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py new file mode 100644 index 0000000000..d80ef9dcad --- /dev/null +++ b/dlt/destinations/impl/redshift/factory.py @@ -0,0 +1,54 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.redshift.configuration import ( + RedshiftCredentials, + RedshiftClientConfiguration, +) +from dlt.destinations.impl.redshift import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.redshift.redshift import RedshiftClient + + +class redshift(Destination[RedshiftClientConfiguration, "RedshiftClient"]): + spec = RedshiftClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["RedshiftClient"]: + from dlt.destinations.impl.redshift.redshift import RedshiftClient + + return RedshiftClient + + def __init__( + self, + credentials: t.Union[RedshiftCredentials, t.Dict[str, t.Any], str] = None, + create_indexes: bool = True, + staging_iam_role: t.Optional[str] = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the Redshift destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the redshift database. Can be an instance of `RedshiftCredentials` or + a connection string in the format `redshift://user:password@host:port/database` + create_indexes: Should unique indexes be created + staging_iam_role: IAM role to use for staging data in S3 + **kwargs: Additional arguments passed to the destination config + """ + super().__init__( + credentials=credentials, + create_indexes=create_indexes, + staging_iam_role=staging_iam_role, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py similarity index 67% rename from dlt/destinations/redshift/redshift.py rename to dlt/destinations/impl/redshift/redshift.py index 888f27ae7c..eaa1968133 100644 --- a/dlt/destinations/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -1,20 +1,27 @@ import platform import os -from dlt.destinations.postgres.sql_client import Psycopg2SqlClient +from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient from dlt.common.schema.utils import table_schema_has_type, table_schema_has_type_with_precision + if platform.python_implementation() == "PyPy": import psycopg2cffi as psycopg2 + # from psycopg2cffi.sql import SQL, Composed else: import psycopg2 + # from psycopg2.sql import SQL, Composed from typing import ClassVar, Dict, List, Optional, Sequence, Any from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import NewLoadJob, CredentialsConfiguration, SupportsStagingDestination +from dlt.common.destination.reference import ( + NewLoadJob, + CredentialsConfiguration, + SupportsStagingDestination, +) from dlt.common.data_types import TDataType from dlt.common.schema import TColumnSchema, TColumnHint, Schema from dlt.common.schema.typing import TTableSchema, TColumnType, TTableFormat @@ -25,8 +32,8 @@ from dlt.destinations.exceptions import DatabaseTerminalException, LoadJobTerminalException from dlt.destinations.job_client_impl import CopyRemoteFileLoadJob, LoadJob -from dlt.destinations.redshift import capabilities -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration +from dlt.destinations.impl.redshift import capabilities +from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper @@ -36,7 +43,7 @@ "cluster": "DISTKEY", # it is better to not enforce constraints in redshift # "primary_key": "PRIMARY KEY", - "sort": "SORTKEY" + "sort": "SORTKEY", } @@ -50,7 +57,7 @@ class RedshiftTypeMapper(TypeMapper): "timestamp": "timestamp with time zone", "bigint": "bigint", "binary": "varbinary", - "time": "time without time zone" + "time": "time without time zone", } sct_to_dbt = { @@ -76,7 +83,9 @@ class RedshiftTypeMapper(TypeMapper): "integer": "bigint", } - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: if precision is None: return "bigint" if precision <= 16: @@ -85,7 +94,9 @@ def to_db_integer_type(self, precision: Optional[int], table_format: TTableForma return "integer" return "bigint" - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: if db_type == "numeric": if (precision, scale) == self.capabilities.wei_precision: return dict(data_type="wei") @@ -93,11 +104,12 @@ def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[i class RedshiftSqlClient(Psycopg2SqlClient): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @staticmethod - def _maybe_make_terminal_exception_from_data_error(pg_ex: psycopg2.DataError) -> Optional[Exception]: + def _maybe_make_terminal_exception_from_data_error( + pg_ex: psycopg2.DataError, + ) -> Optional[Exception]: if "Cannot insert a NULL value into column" in pg_ex.pgerror: # NULL violations is internal error, probably a redshift thing return DatabaseTerminalException(pg_ex) @@ -107,26 +119,33 @@ def _maybe_make_terminal_exception_from_data_error(pg_ex: psycopg2.DataError) -> return DatabaseTerminalException(pg_ex) return None -class RedshiftCopyFileLoadJob(CopyRemoteFileLoadJob): - def __init__(self, table: TTableSchema, - file_path: str, - sql_client: SqlClientBase[Any], - staging_credentials: Optional[CredentialsConfiguration] = None, - staging_iam_role: str = None) -> None: +class RedshiftCopyFileLoadJob(CopyRemoteFileLoadJob): + def __init__( + self, + table: TTableSchema, + file_path: str, + sql_client: SqlClientBase[Any], + staging_credentials: Optional[CredentialsConfiguration] = None, + staging_iam_role: str = None, + ) -> None: self._staging_iam_role = staging_iam_role super().__init__(table, file_path, sql_client, staging_credentials) def execute(self, table: TTableSchema, bucket_path: str) -> None: - # we assume s3 credentials where provided for the staging credentials = "" if self._staging_iam_role: credentials = f"IAM_ROLE '{self._staging_iam_role}'" - elif self._staging_credentials and isinstance(self._staging_credentials, AwsCredentialsWithoutDefaults): + elif self._staging_credentials and isinstance( + self._staging_credentials, AwsCredentialsWithoutDefaults + ): aws_access_key = self._staging_credentials.aws_access_key_id aws_secret_key = self._staging_credentials.aws_secret_access_key - credentials = f"CREDENTIALS 'aws_access_key_id={aws_access_key};aws_secret_access_key={aws_secret_key}'" + credentials = ( + "CREDENTIALS" + f" 'aws_access_key_id={aws_access_key};aws_secret_access_key={aws_secret_key}'" + ) table_name = table["name"] # get format @@ -137,11 +156,17 @@ def execute(self, table: TTableSchema, bucket_path: str) -> None: if table_schema_has_type(table, "time"): raise LoadJobTerminalException( self.file_name(), - f"Redshift cannot load TIME columns from {ext} files. Switch to direct INSERT file format or convert `datetime.time` objects in your data to `str` or `datetime.datetime`" + f"Redshift cannot load TIME columns from {ext} files. Switch to direct INSERT file" + " format or convert `datetime.time` objects in your data to `str` or" + " `datetime.datetime`", ) if ext == "jsonl": if table_schema_has_type(table, "binary"): - raise LoadJobTerminalException(self.file_name(), "Redshift cannot load VARBYTE columns from json files. Switch to parquet to load binaries.") + raise LoadJobTerminalException( + self.file_name(), + "Redshift cannot load VARBYTE columns from json files. Switch to parquet to" + " load binaries.", + ) file_type = "FORMAT AS JSON 'auto'" dateformat = "dateformat 'auto' timeformat 'auto'" compression = "GZIP" @@ -149,7 +174,8 @@ def execute(self, table: TTableSchema, bucket_path: str) -> None: if table_schema_has_type_with_precision(table, "binary"): raise LoadJobTerminalException( self.file_name(), - f"Redshift cannot load fixed width VARBYTE columns from {ext} files. Switch to direct INSERT file format or use binary columns without precision." + f"Redshift cannot load fixed width VARBYTE columns from {ext} files. Switch to" + " direct INSERT file format or use binary columns without precision.", ) file_type = "PARQUET" # if table contains complex types then SUPER field will be used. @@ -174,28 +200,36 @@ def exception(self) -> str: # this part of code should be never reached raise NotImplementedError() -class RedshiftMergeJob(SqlMergeJob): +class RedshiftMergeJob(SqlMergeJob): @classmethod - def gen_key_table_clauses(cls, root_table_name: str, staging_root_table_name: str, key_clauses: Sequence[str], for_delete: bool) -> List[str]: + def gen_key_table_clauses( + cls, + root_table_name: str, + staging_root_table_name: str, + key_clauses: Sequence[str], + for_delete: bool, + ) -> List[str]: """Generate sql clauses that may be used to select or delete rows in root table of destination dataset - A list of clauses may be returned for engines that do not support OR in subqueries. Like BigQuery + A list of clauses may be returned for engines that do not support OR in subqueries. Like BigQuery """ if for_delete: - return [f"FROM {root_table_name} WHERE EXISTS (SELECT 1 FROM {staging_root_table_name} WHERE {' OR '.join([c.format(d=root_table_name,s=staging_root_table_name) for c in key_clauses])})"] - return SqlMergeJob.gen_key_table_clauses(root_table_name, staging_root_table_name, key_clauses, for_delete) + return [ + f"FROM {root_table_name} WHERE EXISTS (SELECT 1 FROM" + f" {staging_root_table_name} WHERE" + f" {' OR '.join([c.format(d=root_table_name,s=staging_root_table_name) for c in key_clauses])})" + ] + return SqlMergeJob.gen_key_table_clauses( + root_table_name, staging_root_table_name, key_clauses, for_delete + ) class RedshiftClient(InsertValuesJobClient, SupportsStagingDestination): - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: RedshiftClientConfiguration) -> None: - sql_client = RedshiftSqlClient ( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = RedshiftSqlClient(config.normalize_dataset_name(schema), config.credentials) super().__init__(schema, config, sql_client) self.sql_client = sql_client self.config: RedshiftClientConfiguration = config @@ -205,17 +239,33 @@ def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> Li return [RedshiftMergeJob.from_table_chain(table_chain, self.sql_client)] def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: - hints_str = " ".join(HINT_TO_REDSHIFT_ATTR.get(h, "") for h in HINT_TO_REDSHIFT_ATTR.keys() if c.get(h, False) is True) + hints_str = " ".join( + HINT_TO_REDSHIFT_ATTR.get(h, "") + for h in HINT_TO_REDSHIFT_ATTR.keys() + if c.get(h, False) is True + ) column_name = self.capabilities.escape_identifier(c["name"]) - return f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + return ( + f"{column_name} {self.type_mapper.to_db_type(c)} {hints_str} {self._gen_not_null(c.get('nullable', True))}" + ) def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob: """Starts SqlLoadJob for files ending with .sql or returns None to let derived classes to handle their specific jobs""" job = super().start_file_load(table, file_path, load_id) if not job: - assert NewReferenceJob.is_reference_job(file_path), "Redshift must use staging to load files" - job = RedshiftCopyFileLoadJob(table, file_path, self.sql_client, staging_credentials=self.config.staging_config.credentials, staging_iam_role=self.config.staging_iam_role) + assert NewReferenceJob.is_reference_job( + file_path + ), "Redshift must use staging to load files" + job = RedshiftCopyFileLoadJob( + table, + file_path, + self.sql_client, + staging_credentials=self.config.staging_config.credentials, + staging_iam_role=self.config.staging_iam_role, + ) return job - def _from_db_type(self, pq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, pq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(pq_t, precision, scale) diff --git a/dlt/destinations/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py similarity index 52% rename from dlt/destinations/snowflake/__init__.py rename to dlt/destinations/impl/snowflake/__init__.py index 5d32bc41fd..d6bebd3fdd 100644 --- a/dlt/destinations/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -1,21 +1,8 @@ -from typing import Type from dlt.common.data_writers.escape import escape_bigquery_identifier - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.data_writers.escape import escape_snowflake_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration - - -@with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, "snowflake",)) -def _configure(config: SnowflakeClientConfiguration = config.value) -> SnowflakeClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -35,14 +22,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = True caps.alter_add_multi_column = True return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.snowflake.snowflake import SnowflakeClient - - return SnowflakeClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return SnowflakeClientConfiguration diff --git a/dlt/destinations/snowflake/configuration.py b/dlt/destinations/impl/snowflake/configuration.py similarity index 70% rename from dlt/destinations/snowflake/configuration.py rename to dlt/destinations/impl/snowflake/configuration.py index 4d9aaa7b54..01f5ca6e03 100644 --- a/dlt/destinations/snowflake/configuration.py +++ b/dlt/destinations/impl/snowflake/configuration.py @@ -1,7 +1,7 @@ import base64 import binascii -from typing import Final, Optional, Any, Dict, ClassVar, List +from typing import Final, Optional, Any, Dict, ClassVar, List, TYPE_CHECKING from sqlalchemy.engine import URL @@ -16,8 +16,7 @@ def _read_private_key(private_key: str, password: Optional[str] = None) -> bytes: - """Load an encrypted or unencrypted private key from string. - """ + """Load an encrypted or unencrypted private key from string.""" try: from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.asymmetric import rsa @@ -25,7 +24,10 @@ def _read_private_key(private_key: str, password: Optional[str] = None) -> bytes from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes except ModuleNotFoundError as e: - raise MissingDependencyException("SnowflakeCredentials with private key", dependencies=[f"{version.DLT_PKG_NAME}[snowflake]"]) from e + raise MissingDependencyException( + "SnowflakeCredentials with private key", + dependencies=[f"{version.DLT_PKG_NAME}[snowflake]"], + ) from e try: # load key from base64-encoded DER key @@ -45,7 +47,7 @@ def _read_private_key(private_key: str, password: Optional[str] = None) -> bytes return pkey.private_bytes( encoding=serialization.Encoding.DER, format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption() + encryption_algorithm=serialization.NoEncryption(), ) @@ -65,24 +67,35 @@ class SnowflakeCredentials(ConnectionStringCredentials): def parse_native_representation(self, native_value: Any) -> None: super().parse_native_representation(native_value) - self.warehouse = self.query.get('warehouse') - self.role = self.query.get('role') - self.private_key = self.query.get('private_key') # type: ignore - self.private_key_passphrase = self.query.get('private_key_passphrase') # type: ignore + self.warehouse = self.query.get("warehouse") + self.role = self.query.get("role") + self.private_key = self.query.get("private_key") # type: ignore + self.private_key_passphrase = self.query.get("private_key_passphrase") # type: ignore if not self.is_partial() and (self.password or self.private_key): self.resolve() def on_resolved(self) -> None: if not self.password and not self.private_key: - raise ConfigurationValueError("Please specify password or private_key. SnowflakeCredentials supports password and private key authentication and one of those must be specified.") + raise ConfigurationValueError( + "Please specify password or private_key. SnowflakeCredentials supports password and" + " private key authentication and one of those must be specified." + ) def to_url(self) -> URL: query = dict(self.query or {}) - if self.warehouse and 'warehouse' not in query: - query['warehouse'] = self.warehouse - if self.role and 'role' not in query: - query['role'] = self.role - return URL.create(self.drivername, self.username, self.password, self.host, self.port, self.database, query) + if self.warehouse and "warehouse" not in query: + query["warehouse"] = self.warehouse + if self.role and "role" not in query: + query["role"] = self.role + return URL.create( + self.drivername, + self.username, + self.password, + self.host, + self.port, + self.database, + query, + ) def to_connector_params(self) -> Dict[str, Any]: private_key: Optional[bytes] = None @@ -105,7 +118,7 @@ def to_connector_params(self) -> Dict[str, Any]: @configspec class SnowflakeClientConfiguration(DestinationClientDwhWithStagingConfiguration): - destination_name: Final[str] = "snowflake" # type: ignore[misc] + destination_type: Final[str] = "snowflake" # type: ignore[misc] credentials: SnowflakeCredentials stage_name: Optional[str] = None @@ -118,3 +131,18 @@ def fingerprint(self) -> str: if self.credentials and self.credentials.host: return digest128(self.credentials.host) return "" + + if TYPE_CHECKING: + + def __init__( + self, + *, + destination_type: str = None, + credentials: SnowflakeCredentials = None, + dataset_name: str = None, + default_schema_name: str = None, + stage_name: str = None, + keep_staged_files: bool = True, + destination_name: str = None, + environment: str = None, + ) -> None: ... diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py new file mode 100644 index 0000000000..c4459232b7 --- /dev/null +++ b/dlt/destinations/impl/snowflake/factory.py @@ -0,0 +1,52 @@ +import typing as t + +from dlt.destinations.impl.snowflake.configuration import ( + SnowflakeCredentials, + SnowflakeClientConfiguration, +) +from dlt.destinations.impl.snowflake import capabilities +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +if t.TYPE_CHECKING: + from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient + + +class snowflake(Destination[SnowflakeClientConfiguration, "SnowflakeClient"]): + spec = SnowflakeClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["SnowflakeClient"]: + from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient + + return SnowflakeClient + + def __init__( + self, + credentials: t.Union[SnowflakeCredentials, t.Dict[str, t.Any], str] = None, + stage_name: t.Optional[str] = None, + keep_staged_files: bool = True, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the Snowflake destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the snowflake database. Can be an instance of `SnowflakeCredentials` or + a connection string in the format `snowflake://user:password@host:port/database` + stage_name: Name of an existing stage to use for loading data. Default uses implicit stage per table + keep_staged_files: Whether to delete or keep staged files after loading + """ + super().__init__( + credentials=credentials, + stage_name=stage_name, + keep_staged_files=keep_staged_files, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py similarity index 67% rename from dlt/destinations/snowflake/snowflake.py rename to dlt/destinations/impl/snowflake/snowflake.py index f433ec7e7d..67df78c138 100644 --- a/dlt/destinations/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -2,8 +2,18 @@ from urllib.parse import urlparse, urlunparse from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import FollowupJob, NewLoadJob, TLoadJobState, LoadJob, CredentialsConfiguration, SupportsStagingDestination -from dlt.common.configuration.specs import AwsCredentialsWithoutDefaults, AzureCredentialsWithoutDefaults +from dlt.common.destination.reference import ( + FollowupJob, + NewLoadJob, + TLoadJobState, + LoadJob, + CredentialsConfiguration, + SupportsStagingDestination, +) +from dlt.common.configuration.specs import ( + AwsCredentialsWithoutDefaults, + AzureCredentialsWithoutDefaults, +) from dlt.common.data_types import TDataType from dlt.common.storages.file_storage import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns @@ -14,11 +24,11 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.snowflake import capabilities -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration -from dlt.destinations.snowflake.sql_client import SnowflakeSqlClient +from dlt.destinations.impl.snowflake import capabilities +from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration +from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams -from dlt.destinations.snowflake.sql_client import SnowflakeSqlClient +from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper @@ -54,23 +64,31 @@ class SnowflakeTypeMapper(TypeMapper): "TIMESTAMP_TZ": "timestamp", "BINARY": "binary", "VARIANT": "complex", - "TIME": "time" + "TIME": "time", } - def from_db_type(self, db_type: str, precision: Optional[int] = None, scale: Optional[int] = None) -> TColumnType: + def from_db_type( + self, db_type: str, precision: Optional[int] = None, scale: Optional[int] = None + ) -> TColumnType: if db_type == "NUMBER": if precision == self.BIGINT_PRECISION and scale == 0: - return dict(data_type='bigint') + return dict(data_type="bigint") elif (precision, scale) == self.capabilities.wei_precision: - return dict(data_type='wei') - return dict(data_type='decimal', precision=precision, scale=scale) + return dict(data_type="wei") + return dict(data_type="decimal", precision=precision, scale=scale) return super().from_db_type(db_type, precision, scale) class SnowflakeLoadJob(LoadJob, FollowupJob): def __init__( - self, file_path: str, table_name: str, load_id: str, client: SnowflakeSqlClient, - stage_name: Optional[str] = None, keep_staged_files: bool = True, staging_credentials: Optional[CredentialsConfiguration] = None + self, + file_path: str, + table_name: str, + load_id: str, + client: SnowflakeSqlClient, + stage_name: Optional[str] = None, + keep_staged_files: bool = True, + staging_credentials: Optional[CredentialsConfiguration] = None, ) -> None: file_name = FileStorage.get_file_name_from_file_path(file_path) super().__init__(file_name) @@ -78,8 +96,14 @@ def __init__( qualified_table_name = client.make_qualified_table_name(table_name) # extract and prepare some vars - bucket_path = NewReferenceJob.resolve_reference(file_path) if NewReferenceJob.is_reference_job(file_path) else "" - file_name = FileStorage.get_file_name_from_file_path(bucket_path) if bucket_path else file_name + bucket_path = ( + NewReferenceJob.resolve_reference(file_path) + if NewReferenceJob.is_reference_job(file_path) + else "" + ) + file_name = ( + FileStorage.get_file_name_from_file_path(bucket_path) if bucket_path else file_name + ) from_clause = "" credentials_clause = "" files_clause = "" @@ -93,10 +117,18 @@ def __init__( from_clause = f"FROM '@{stage_name}'" files_clause = f"FILES = ('{bucket_url.path.lstrip('/')}')" # referencing an staged files via a bucket URL requires explicit AWS credentials - elif bucket_scheme == "s3" and staging_credentials and isinstance(staging_credentials, AwsCredentialsWithoutDefaults): + elif ( + bucket_scheme == "s3" + and staging_credentials + and isinstance(staging_credentials, AwsCredentialsWithoutDefaults) + ): credentials_clause = f"""CREDENTIALS=(AWS_KEY_ID='{staging_credentials.aws_access_key_id}' AWS_SECRET_KEY='{staging_credentials.aws_secret_access_key}')""" from_clause = f"FROM '{bucket_path}'" - elif bucket_scheme in ["az", "abfs"] and staging_credentials and isinstance(staging_credentials, AzureCredentialsWithoutDefaults): + elif ( + bucket_scheme in ["az", "abfs"] + and staging_credentials + and isinstance(staging_credentials, AzureCredentialsWithoutDefaults) + ): # Explicit azure credentials are needed to load from bucket without a named stage credentials_clause = f"CREDENTIALS=(AZURE_SAS_TOKEN='?{staging_credentials.azure_storage_sas_token}')" # Converts an az:/// to azure://.blob.core.windows.net// @@ -106,7 +138,7 @@ def __init__( bucket_url._replace( scheme="azure", netloc=f"{staging_credentials.azure_storage_account_name}.blob.core.windows.net", - path=_path + path=_path, ) ) from_clause = f"FROM '{bucket_path}'" @@ -115,14 +147,19 @@ def __init__( bucket_path = bucket_path.replace("gs://", "gcs://") if not stage_name: # when loading from bucket stage must be given - raise LoadJobTerminalException(file_path, f"Cannot load from bucket path {bucket_path} without a stage name. See https://dlthub.com/docs/dlt-ecosystem/destinations/snowflake for instructions on setting up the `stage_name`") + raise LoadJobTerminalException( + file_path, + f"Cannot load from bucket path {bucket_path} without a stage name. See" + " https://dlthub.com/docs/dlt-ecosystem/destinations/snowflake for" + " instructions on setting up the `stage_name`", + ) from_clause = f"FROM @{stage_name}/" files_clause = f"FILES = ('{urlparse(bucket_path).path.lstrip('/')}')" else: # this means we have a local file if not stage_name: # Use implicit table stage by default: "SCHEMA_NAME"."%TABLE_NAME" - stage_name = client.make_qualified_table_name('%'+table_name) + stage_name = client.make_qualified_table_name("%" + table_name) stage_file_path = f'@{stage_name}/"{load_id}"/{file_name}' from_clause = f"FROM {stage_file_path}" @@ -134,19 +171,19 @@ def __init__( with client.begin_transaction(): # PUT and COPY in one tx if local file, otherwise only copy if not bucket_path: - client.execute_sql(f'PUT file://{file_path} @{stage_name}/"{load_id}" OVERWRITE = TRUE, AUTO_COMPRESS = FALSE') - client.execute_sql( - f"""COPY INTO {qualified_table_name} + client.execute_sql( + f'PUT file://{file_path} @{stage_name}/"{load_id}" OVERWRITE = TRUE,' + " AUTO_COMPRESS = FALSE" + ) + client.execute_sql(f"""COPY INTO {qualified_table_name} {from_clause} {files_clause} {credentials_clause} FILE_FORMAT = {source_format} MATCH_BY_COLUMN_NAME='CASE_INSENSITIVE' - """ - ) + """) if stage_file_path and not keep_staged_files: - client.execute_sql(f'REMOVE {stage_file_path}') - + client.execute_sql(f"REMOVE {stage_file_path}") def state(self) -> TLoadJobState: return "completed" @@ -154,10 +191,15 @@ def state(self) -> TLoadJobState: def exception(self) -> str: raise NotImplementedError() -class SnowflakeStagingCopyJob(SqlStagingCopyJob): +class SnowflakeStagingCopyJob(SqlStagingCopyJob): @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: sql: List[str] = [] for table in table_chain: with sql_client.with_staging_dataset(staging=True): @@ -173,10 +215,7 @@ class SnowflakeClient(SqlJobClientWithStaging, SupportsStagingDestination): capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() def __init__(self, schema: Schema, config: SnowflakeClientConfiguration) -> None: - sql_client = SnowflakeSqlClient( - config.normalize_dataset_name(schema), - config.credentials - ) + sql_client = SnowflakeSqlClient(config.normalize_dataset_name(schema), config.credentials) super().__init__(schema, config, sql_client) self.config: SnowflakeClientConfiguration = config self.sql_client: SnowflakeSqlClient = sql_client # type: ignore @@ -188,43 +227,64 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> if not job: job = SnowflakeLoadJob( file_path, - table['name'], + table["name"], load_id, self.sql_client, stage_name=self.config.stage_name, keep_staged_files=self.config.keep_staged_files, - staging_credentials=self.config.staging_config.credentials if self.config.staging_config else None + staging_credentials=( + self.config.staging_config.credentials if self.config.staging_config else None + ), ) return job def restore_file_load(self, file_path: str) -> LoadJob: return EmptyLoadJob.from_file_path(file_path, "completed") - def _make_add_column_sql(self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None) -> List[str]: + def _make_add_column_sql( + self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None + ) -> List[str]: # Override because snowflake requires multiple columns in a single ADD COLUMN clause - return ["ADD COLUMN\n" + ",\n".join(self._get_column_def_sql(c, table_format) for c in new_columns)] - - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + return [ + "ADD COLUMN\n" + + ",\n".join(self._get_column_def_sql(c, table_format) for c in new_columns) + ] + + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: if self.config.replace_strategy == "staging-optimized": return [SnowflakeStagingCopyJob.from_table_chain(table_chain, self.sql_client)] return super()._create_replace_followup_jobs(table_chain) - def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool, separate_alters: bool = False) -> List[str]: + def _get_table_update_sql( + self, + table_name: str, + new_columns: Sequence[TColumnSchema], + generate_alter: bool, + separate_alters: bool = False, + ) -> List[str]: sql = super()._get_table_update_sql(table_name, new_columns, generate_alter) - cluster_list = [self.capabilities.escape_identifier(c['name']) for c in new_columns if c.get('cluster')] + cluster_list = [ + self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get("cluster") + ] if cluster_list: sql[0] = sql[0] + "\nCLUSTER BY (" + ",".join(cluster_list) + ")" return sql - def _from_db_type(self, bq_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, bq_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(bq_t, precision, scale) def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = None) -> str: name = self.capabilities.escape_identifier(c["name"]) - return f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" + return ( + f"{name} {self.type_mapper.to_db_type(c)} {self._gen_not_null(c.get('nullable', True))}" + ) def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: table_name = table_name.upper() # All snowflake tables are uppercased in information schema diff --git a/dlt/destinations/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py similarity index 85% rename from dlt/destinations/snowflake/sql_client.py rename to dlt/destinations/impl/snowflake/sql_client.py index 40cdc990a0..ba932277df 100644 --- a/dlt/destinations/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -4,11 +4,21 @@ import snowflake.connector as snowflake_lib from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation -from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) +from dlt.destinations.sql_client import ( + DBApiCursorImpl, + SqlClientBase, + raise_database_error, + raise_open_connection_error, +) from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.snowflake.configuration import SnowflakeCredentials -from dlt.destinations.snowflake import capabilities +from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials +from dlt.destinations.impl.snowflake import capabilities + class SnowflakeCursorImpl(DBApiCursorImpl): native_cursor: snowflake_lib.cursor.SnowflakeCursor # type: ignore[assignment] @@ -20,7 +30,6 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: class SnowflakeSqlClient(SqlClientBase[snowflake_lib.SnowflakeConnection], DBTransaction): - dbapi: ClassVar[DBApi] = snowflake_lib capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() @@ -36,8 +45,7 @@ def open_connection(self) -> snowflake_lib.SnowflakeConnection: if "timezone" not in conn_params: conn_params["timezone"] = "UTC" self._conn = snowflake_lib.connect( - schema=self.fully_qualified_dataset_name(), - **conn_params + schema=self.fully_qualified_dataset_name(), **conn_params ) return self._conn @@ -77,7 +85,9 @@ def drop_tables(self, *tables: str) -> None: with suppress(DatabaseUndefinedRelation): super().drop_tables(*tables) - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: with self.execute_query(sql, *args, **kwargs) as curr: if curr.description is None: return None @@ -115,7 +125,7 @@ def _reset_connection(self) -> None: @classmethod def _make_database_exception(cls, ex: Exception) -> Exception: if isinstance(ex, snowflake_lib.errors.ProgrammingError): - if ex.sqlstate == 'P0000' and ex.errno == 100132: + if ex.sqlstate == "P0000" and ex.errno == 100132: # Error in a multi statement execution. These don't show the original error codes msg = str(ex) if "NULL result in a non-nullable column" in msg: @@ -124,11 +134,11 @@ def _make_database_exception(cls, ex: Exception) -> Exception: return DatabaseUndefinedRelation(ex) else: return DatabaseTransientException(ex) - if ex.sqlstate in {'42S02', '02000'}: + if ex.sqlstate in {"42S02", "02000"}: return DatabaseUndefinedRelation(ex) - elif ex.sqlstate == '22023': # Adding non-nullable no-default column + elif ex.sqlstate == "22023": # Adding non-nullable no-default column return DatabaseTerminalException(ex) - elif ex.sqlstate == '42000' and ex.errno == 904: # Invalid identifier + elif ex.sqlstate == "42000" and ex.errno == 904: # Invalid identifier return DatabaseTerminalException(ex) elif ex.sqlstate == "22000": return DatabaseTerminalException(ex) @@ -152,7 +162,9 @@ def _make_database_exception(cls, ex: Exception) -> Exception: return ex @staticmethod - def _maybe_make_terminal_exception_from_data_error(snowflake_ex: snowflake_lib.DatabaseError) -> Optional[Exception]: + def _maybe_make_terminal_exception_from_data_error( + snowflake_ex: snowflake_lib.DatabaseError, + ) -> Optional[Exception]: return None @staticmethod diff --git a/dlt/destinations/weaviate/README.md b/dlt/destinations/impl/weaviate/README.md similarity index 100% rename from dlt/destinations/weaviate/README.md rename to dlt/destinations/impl/weaviate/README.md diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py new file mode 100644 index 0000000000..143e0260d2 --- /dev/null +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -0,0 +1,19 @@ +from dlt.common.destination import DestinationCapabilitiesContext +from dlt.destinations.impl.weaviate.weaviate_adapter import weaviate_adapter + + +def capabilities() -> DestinationCapabilitiesContext: + caps = DestinationCapabilitiesContext() + caps.preferred_loader_file_format = "jsonl" + caps.supported_loader_file_formats = ["jsonl"] + + caps.max_identifier_length = 200 + caps.max_column_identifier_length = 1024 + caps.max_query_length = 8 * 1024 * 1024 + caps.is_max_query_length_in_bytes = False + caps.max_text_data_type_length = 8 * 1024 * 1024 + caps.is_max_text_data_type_length_in_bytes = False + caps.supports_ddl_transactions = False + caps.naming_convention = "dlt.destinations.impl.weaviate.naming" + + return caps diff --git a/dlt/destinations/weaviate/ci_naming.py b/dlt/destinations/impl/weaviate/ci_naming.py similarity index 99% rename from dlt/destinations/weaviate/ci_naming.py rename to dlt/destinations/impl/weaviate/ci_naming.py index 3b1c068133..cc8936f42d 100644 --- a/dlt/destinations/weaviate/ci_naming.py +++ b/dlt/destinations/impl/weaviate/ci_naming.py @@ -1,5 +1,6 @@ from .naming import NamingConvention as WeaviateNamingConvention + class NamingConvention(WeaviateNamingConvention): def _lowercase_property(self, identifier: str) -> str: """Lowercase the whole property to become case insensitive""" diff --git a/dlt/destinations/weaviate/configuration.py b/dlt/destinations/impl/weaviate/configuration.py similarity index 52% rename from dlt/destinations/weaviate/configuration.py rename to dlt/destinations/impl/weaviate/configuration.py index 054e8bef25..5014e69163 100644 --- a/dlt/destinations/weaviate/configuration.py +++ b/dlt/destinations/impl/weaviate/configuration.py @@ -1,4 +1,4 @@ -from typing import Dict, Literal, Optional, Final +from typing import Dict, Literal, Optional, Final, TYPE_CHECKING from dataclasses import field from urllib.parse import urlparse @@ -24,9 +24,9 @@ def __str__(self) -> str: @configspec class WeaviateClientConfiguration(DestinationClientDwhConfiguration): - destination_name: Final[str] = "weaviate" # type: ignore - # make it optional do empty dataset is allowed - dataset_name: Optional[str] = None # type: ignore + destination_type: Final[str] = "weaviate" # type: ignore + # make it optional so empty dataset is allowed + dataset_name: Optional[str] = None # type: ignore[misc] batch_size: int = 100 batch_workers: int = 1 @@ -34,20 +34,22 @@ class WeaviateClientConfiguration(DestinationClientDwhConfiguration): batch_retries: int = 5 conn_timeout: float = 10.0 - read_timeout: float = 3*60.0 + read_timeout: float = 3 * 60.0 startup_period: int = 5 dataset_separator: str = "_" credentials: WeaviateCredentials vectorizer: str = "text2vec-openai" - module_config: Dict[str, Dict[str, str]] = field(default_factory=lambda: { - "text2vec-openai": { - "model": "ada", - "modelVersion": "002", - "type": "text", + module_config: Dict[str, Dict[str, str]] = field( + default_factory=lambda: { + "text2vec-openai": { + "model": "ada", + "modelVersion": "002", + "type": "text", + } } - }) + ) def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" @@ -56,3 +58,26 @@ def fingerprint(self) -> str: hostname = urlparse(self.credentials.url).hostname return digest128(hostname) return "" + + if TYPE_CHECKING: + + def __init__( + self, + *, + destination_type: str = None, + credentials: WeaviateCredentials = None, + name: str = None, + environment: str = None, + dataset_name: str = None, + default_schema_name: str = None, + batch_size: int = None, + batch_workers: int = None, + batch_consistency: TWeaviateBatchConsistency = None, + batch_retries: int = None, + conn_timeout: float = None, + read_timeout: float = None, + startup_period: int = None, + dataset_separator: str = None, + vectorizer: str = None, + module_config: Dict[str, Dict[str, str]] = None, + ) -> None: ... diff --git a/dlt/destinations/impl/weaviate/exceptions.py b/dlt/destinations/impl/weaviate/exceptions.py new file mode 100644 index 0000000000..bff1b4cacc --- /dev/null +++ b/dlt/destinations/impl/weaviate/exceptions.py @@ -0,0 +1,16 @@ +from dlt.common.exceptions import DestinationException, DestinationTerminalException + + +class WeaviateBatchError(DestinationException): + pass + + +class PropertyNameConflict(DestinationTerminalException): + def __init__(self) -> None: + super().__init__( + "Your data contains items with identical property names when compared case insensitive." + " Weaviate cannot handle such data. Please clean up your data before loading or change" + " to case insensitive naming convention. See" + " https://dlthub.com/docs/dlt-ecosystem/destinations/weaviate#names-normalization for" + " details." + ) diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py new file mode 100644 index 0000000000..0449e6cdd5 --- /dev/null +++ b/dlt/destinations/impl/weaviate/factory.py @@ -0,0 +1,53 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.weaviate.configuration import ( + WeaviateCredentials, + WeaviateClientConfiguration, +) +from dlt.destinations.impl.weaviate import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient + + +class weaviate(Destination[WeaviateClientConfiguration, "WeaviateClient"]): + spec = WeaviateClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["WeaviateClient"]: + from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient + + return WeaviateClient + + def __init__( + self, + credentials: t.Union[WeaviateCredentials, t.Dict[str, t.Any]] = None, + vectorizer: str = None, + module_config: t.Dict[str, t.Dict[str, str]] = None, + destination_name: t.Optional[str] = None, + environment: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + """Configure the Weaviate destination to use in a pipeline. + + All destination config parameters can be provided as arguments here and will supersede other config sources (such as dlt config files and environment variables). + + Args: + credentials: Weaviate credentials containing URL, API key and optional headers + vectorizer: The name of the Weaviate vectorizer to use + module_config: The configuration for the Weaviate modules + **kwargs: Additional arguments forwarded to the destination config + """ + super().__init__( + credentials=credentials, + vectorizer=vectorizer, + module_config=module_config, + destination_name=destination_name, + environment=environment, + **kwargs, + ) diff --git a/dlt/destinations/weaviate/naming.py b/dlt/destinations/impl/weaviate/naming.py similarity index 87% rename from dlt/destinations/weaviate/naming.py rename to dlt/destinations/impl/weaviate/naming.py index cf01983b90..f5c94c872f 100644 --- a/dlt/destinations/weaviate/naming.py +++ b/dlt/destinations/impl/weaviate/naming.py @@ -7,11 +7,7 @@ class NamingConvention(SnakeCaseNamingConvention): """Normalizes identifiers according to Weaviate documentation: https://weaviate.io/developers/weaviate/config-refs/schema#class""" - RESERVED_PROPERTIES = { - "id": "__id", - "_id": "___id", - "_additional": "__additional" - } + RESERVED_PROPERTIES = {"id": "__id", "_id": "___id", "_additional": "__additional"} _RE_UNDERSCORES = re.compile("([^_])__+") _STARTS_DIGIT = re.compile("^[0-9]") _STARTS_NON_LETTER = re.compile("^[0-9_]") @@ -19,7 +15,7 @@ class NamingConvention(SnakeCaseNamingConvention): def normalize_identifier(self, identifier: str) -> str: """Normalizes Weaviate property name by removing not allowed characters, replacing them by _ and contracting multiple _ into single one - and lowercasing the first character. + and lowercasing the first character. """ identifier = BaseNamingConvention.normalize_identifier(self, identifier) @@ -34,12 +30,15 @@ def normalize_identifier(self, identifier: str) -> str: def normalize_table_identifier(self, identifier: str) -> str: """Creates Weaviate class name. Runs property normalization and then creates capitalized case name by splitting on _ - https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class + https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class """ identifier = BaseNamingConvention.normalize_identifier(self, identifier) norm_identifier = self._base_normalize(identifier) # norm_identifier = norm_identifier.strip("_") - norm_identifier = "".join(s[1:2].upper() + s[2:] if s and s[0] == "_" else s for s in self._SPLIT_UNDERSCORE_NON_CAP.split(norm_identifier)) + norm_identifier = "".join( + s[1:2].upper() + s[2:] if s and s[0] == "_" else s + for s in self._SPLIT_UNDERSCORE_NON_CAP.split(norm_identifier) + ) norm_identifier = norm_identifier[0].upper() + norm_identifier[1:] if self._STARTS_NON_LETTER.match(norm_identifier): norm_identifier = "C" + norm_identifier diff --git a/dlt/destinations/weaviate/weaviate_adapter.py b/dlt/destinations/impl/weaviate/weaviate_adapter.py similarity index 92% rename from dlt/destinations/weaviate/weaviate_adapter.py rename to dlt/destinations/impl/weaviate/weaviate_adapter.py index 6829197273..2d5161d9e9 100644 --- a/dlt/destinations/weaviate/weaviate_adapter.py +++ b/dlt/destinations/impl/weaviate/weaviate_adapter.py @@ -1,8 +1,7 @@ from typing import Dict, Any, Literal, Set, get_args from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns -from dlt.extract.decorators import resource as make_resource -from dlt.extract.source import DltResource +from dlt.extract import DltResource, resource as make_resource TTokenizationTMethod = Literal["word", "lowercase", "whitespace", "field"] TOKENIZATION_METHODS: Set[TTokenizationTMethod] = set(get_args(TTokenizationTMethod)) @@ -70,8 +69,7 @@ def weaviate_adapter( vectorize = [vectorize] if not isinstance(vectorize, list): raise ValueError( - "vectorize must be a list of column names or a single " - "column name as a string" + "vectorize must be a list of column names or a single column name as a string" ) # create weaviate-specific vectorize hints for column_name in vectorize: @@ -84,7 +82,10 @@ def weaviate_adapter( for column_name, method in tokenization.items(): if method not in TOKENIZATION_METHODS: allowed_methods = ", ".join(TOKENIZATION_METHODS) - raise ValueError(f"Tokenization type {method} for column {column_name} is invalid. Allowed methods are: {allowed_methods}") + raise ValueError( + f"Tokenization type {method} for column {column_name} is invalid. Allowed" + f" methods are: {allowed_methods}" + ) if column_name in column_hints: column_hints[column_name][TOKENIZATION_HINT] = method # type: ignore else: diff --git a/dlt/destinations/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py similarity index 88% rename from dlt/destinations/weaviate/weaviate_client.py rename to dlt/destinations/impl/weaviate/weaviate_client.py index d47f08ab59..eb096d0a26 100644 --- a/dlt/destinations/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -31,23 +31,17 @@ from dlt.common.schema.typing import TColumnSchema, TColumnType from dlt.common.schema.utils import get_columns_names_with_prop from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import ( - TLoadJobState, - LoadJob, - JobClientBase, - WithStateSync - -) +from dlt.common.destination.reference import TLoadJobState, LoadJob, JobClientBase, WithStateSync from dlt.common.data_types import TDataType from dlt.common.storages import FileStorage -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.weaviate import capabilities -from dlt.destinations.weaviate.configuration import WeaviateClientConfiguration -from dlt.destinations.weaviate.exceptions import PropertyNameConflict, WeaviateBatchError +from dlt.destinations.impl.weaviate import capabilities +from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict, WeaviateBatchError from dlt.destinations.type_mapping import TypeMapper @@ -55,7 +49,7 @@ "vectorizer": "none", "vectorIndexConfig": { "skip": True, - } + }, } @@ -105,7 +99,9 @@ def _wrap(self: JobClientBase, *args: Any, **kwargs: Any) -> Any: if status_ex.status_code == 403: raise DestinationTerminalException(status_ex) if status_ex.status_code == 422: - if "conflict for property" in str(status_ex) or "none vectorizer module" in str(status_ex): + if "conflict for property" in str(status_ex) or "none vectorizer module" in str( + status_ex + ): raise PropertyNameConflict() raise DestinationTerminalException(status_ex) # looks like there are no more terminal exception @@ -133,9 +129,7 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: ) if "conflict for property" in message: raise PropertyNameConflict() - raise DestinationTransientException( - f"Batch failed {errors} AND WILL BE RETRIED" - ) + raise DestinationTransientException(f"Batch failed {errors} AND WILL BE RETRIED") except Exception: raise DestinationTransientException("Batch failed AND WILL BE RETRIED") @@ -194,9 +188,7 @@ def check_batch_result(results: List[StrAny]) -> None: weaviate_error_retries=weaviate.WeaviateErrorRetryConf( self.client_config.batch_retries ), - consistency_level=weaviate.ConsistencyLevel[ - self.client_config.batch_consistency - ], + consistency_level=weaviate.ConsistencyLevel[self.client_config.batch_consistency], num_workers=self.client_config.batch_workers, callback=check_batch_result, ) as batch: @@ -210,9 +202,7 @@ def check_batch_result(results: List[StrAny]) -> None: if key in data: data[key] = str(ensure_pendulum_datetime(data[key])) if self.unique_identifiers: - uuid = self.generate_uuid( - data, self.unique_identifiers, self.class_name - ) + uuid = self.generate_uuid(data, self.unique_identifiers, self.class_name) else: uuid = None @@ -242,7 +232,14 @@ class WeaviateClient(JobClientBase, WithStateSync): """Weaviate client implementation.""" capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - state_properties: ClassVar[List[str]] = ["version", "engine_version", "pipeline_name", "state", "created_at", "_dlt_load_id"] + state_properties: ClassVar[List[str]] = [ + "version", + "engine_version", + "pipeline_name", + "state", + "created_at", + "_dlt_load_id", + ] def __init__(self, schema: Schema, config: WeaviateClientConfiguration) -> None: super().__init__(schema, config) @@ -266,7 +263,11 @@ def sentinel_class(self) -> str: @staticmethod def create_db_client(config: WeaviateClientConfiguration) -> weaviate.Client: - auth_client_secret: weaviate.AuthApiKey = weaviate.AuthApiKey(api_key=config.credentials.api_key) if config.credentials.api_key else None + auth_client_secret: weaviate.AuthApiKey = ( + weaviate.AuthApiKey(api_key=config.credentials.api_key) + if config.credentials.api_key + else None + ) return weaviate.Client( url=config.credentials.url, timeout_config=(config.conn_timeout, config.read_timeout), @@ -314,9 +315,7 @@ def create_class( self.db_client.schema.create_class(updated_schema) - def create_class_property( - self, class_name: str, prop_schema: Dict[str, Any] - ) -> None: + def create_class_property(self, class_name: str, prop_schema: Dict[str, Any]) -> None: """Create a Weaviate class property. Args: @@ -434,14 +433,14 @@ def update_stored_schema( if schema_info is None: logger.info( f"Schema with hash {self.schema.stored_version_hash} " - f"not found in the storage. upgrading" + "not found in the storage. upgrading" ) self._execute_schema_update(only_tables) else: logger.info( f"Schema with hash {self.schema.stored_version_hash} " f"inserted at {schema_info.inserted_at} found " - f"in storage, no upgrade required" + "in storage, no upgrade required" ) return applied_update @@ -450,12 +449,8 @@ def _execute_schema_update(self, only_tables: Iterable[str]) -> None: for table_name in only_tables or self.schema.tables: exists, existing_columns = self.get_storage_table(table_name) # TODO: detect columns where vectorization was added or removed and modify it. currently we ignore change of hints - new_columns = self.schema.get_new_table_columns( - table_name, existing_columns - ) - logger.info( - f"Found {len(new_columns)} updates for {table_name} in {self.schema.name}" - ) + new_columns = self.schema.get_new_table_columns(table_name, existing_columns) + logger.info(f"Found {len(new_columns)} updates for {table_name} in {self.schema.name}") if len(new_columns) > 0: if exists: for column in new_columns: @@ -493,26 +488,33 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: stepsize = 10 offset = 0 while True: - state_records = self.get_records(self.schema.state_table_name, - sort={ - "path": ["created_at"], - "order": "desc" - }, where={ + state_records = self.get_records( + self.schema.state_table_name, + sort={"path": ["created_at"], "order": "desc"}, + where={ "path": ["pipeline_name"], "operator": "Equal", "valueString": pipeline_name, - }, limit=stepsize, offset=offset, properties=self.state_properties) + }, + limit=stepsize, + offset=offset, + properties=self.state_properties, + ) offset += stepsize if len(state_records) == 0: return None for state in state_records: load_id = state["_dlt_load_id"] - load_records = self.get_records(self.schema.loads_table_name, - where={ + load_records = self.get_records( + self.schema.loads_table_name, + where={ "path": ["load_id"], "operator": "Equal", "valueString": load_id, - }, limit=1, properties=["load_id", "status"]) + }, + limit=1, + properties=["load_id", "status"], + ) # if there is a load for this state which was successful, return the state if len(load_records): state["dlt_load_id"] = state.pop("_dlt_load_id") @@ -532,33 +534,45 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]: def get_stored_schema(self) -> Optional[StorageSchemaInfo]: """Retrieves newest schema from destination storage""" try: - record = self.get_records(self.schema.version_table_name, sort={ - "path": ["inserted_at"], - "order": "desc" - }, where={ + record = self.get_records( + self.schema.version_table_name, + sort={"path": ["inserted_at"], "order": "desc"}, + where={ "path": ["schema_name"], "operator": "Equal", "valueString": self.schema.name, }, - limit=1)[0] + limit=1, + )[0] return StorageSchemaInfo(**record) except IndexError: return None def get_stored_schema_by_hash(self, schema_hash: str) -> Optional[StorageSchemaInfo]: try: - record = self.get_records(self.schema.version_table_name, where={ + record = self.get_records( + self.schema.version_table_name, + where={ "path": ["version_hash"], "operator": "Equal", "valueString": schema_hash, - }, limit=1)[0] + }, + limit=1, + )[0] return StorageSchemaInfo(**record) except IndexError: return None @wrap_weaviate_error - def get_records(self, table_name: str, where: Dict[str, Any] = None, sort: Dict[str, Any] = None, limit: int = 0, offset: int = 0, properties: List[str] = None) -> List[Dict[str, Any]]: - + def get_records( + self, + table_name: str, + where: Dict[str, Any] = None, + sort: Dict[str, Any] = None, + limit: int = 0, + offset: int = 0, + properties: List[str] = None, + ) -> List[Dict[str, Any]]: # fail if schema does not exist? self.get_class_schema(table_name) @@ -578,7 +592,7 @@ def get_records(self, table_name: str, where: Dict[str, Any] = None, sort: Dict[ response = query.do() full_class_name = self.make_qualified_class_name(table_name) records = response["data"]["Get"][full_class_name] - return cast(List[Dict[str, Any]],records) + return cast(List[Dict[str, Any]], records) def make_weaviate_class_schema(self, table_name: str) -> Dict[str, Any]: """Creates a Weaviate class schema from a table schema.""" @@ -631,9 +645,7 @@ def _make_property_schema(self, column_name: str, column: TColumnSchema) -> Dict **extra_kv, } - def start_file_load( - self, table: TTableSchema, file_path: str, load_id: str - ) -> LoadJob: + def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob: return LoadWeaviateJob( self.schema, table, @@ -656,7 +668,6 @@ def complete_load(self, load_id: str) -> None: } self.create_object(properties, self.schema.loads_table_name) - def __enter__(self) -> "WeaviateClient": return self @@ -680,5 +691,7 @@ def _update_schema_in_storage(self, schema: Schema) -> None: } self.create_object(properties, self.schema.version_table_name) - def _from_db_type(self, wt_t: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, wt_t: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: return self.type_mapper.from_db_type(wt_t, precision, scale) diff --git a/dlt/destinations/insert_job_client.py b/dlt/destinations/insert_job_client.py index d5759db6c2..678ba43bcc 100644 --- a/dlt/destinations/insert_job_client.py +++ b/dlt/destinations/insert_job_client.py @@ -18,7 +18,9 @@ def __init__(self, table_name: str, file_path: str, sql_client: SqlClientBase[An self._sql_client = sql_client # insert file content immediately with self._sql_client.begin_transaction(): - for fragments in self._insert(sql_client.make_qualified_table_name(table_name), file_path): + for fragments in self._insert( + sql_client.make_qualified_table_name(table_name), file_path + ): self._sql_client.execute_fragments(fragments) def state(self) -> TLoadJobState: @@ -90,7 +92,6 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st class InsertValuesJobClient(SqlJobClientWithStaging): - def restore_file_load(self, file_path: str) -> LoadJob: """Returns a completed SqlLoadJob or InsertValuesJob diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py index 7dabf278c2..ac68cfea8a 100644 --- a/dlt/destinations/job_client_impl.py +++ b/dlt/destinations/job_client_impl.py @@ -6,18 +6,56 @@ from copy import copy import datetime # noqa: 251 from types import TracebackType -from typing import Any, ClassVar, List, NamedTuple, Optional, Sequence, Tuple, Type, Iterable, Iterator, ContextManager, cast +from typing import ( + Any, + ClassVar, + List, + NamedTuple, + Optional, + Sequence, + Tuple, + Type, + Iterable, + Iterator, + ContextManager, + cast, +) import zlib import re from dlt.common import json, pendulum, logger from dlt.common.data_types import TDataType -from dlt.common.schema.typing import COLUMN_HINTS, TColumnType, TColumnSchemaBase, TTableSchema, TWriteDisposition, TTableFormat +from dlt.common.schema.typing import ( + COLUMN_HINTS, + TColumnType, + TColumnSchemaBase, + TTableSchema, + TWriteDisposition, + TTableFormat, +) from dlt.common.storages import FileStorage from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns, TSchemaTables -from dlt.common.destination.reference import StateInfo, StorageSchemaInfo,WithStateSync, DestinationClientConfiguration, DestinationClientDwhConfiguration, DestinationClientDwhWithStagingConfiguration, NewLoadJob, WithStagingDataset, TLoadJobState, LoadJob, JobClientBase, FollowupJob, CredentialsConfiguration +from dlt.common.destination.reference import ( + StateInfo, + StorageSchemaInfo, + WithStateSync, + DestinationClientConfiguration, + DestinationClientDwhConfiguration, + DestinationClientDwhWithStagingConfiguration, + NewLoadJob, + WithStagingDataset, + TLoadJobState, + LoadJob, + JobClientBase, + FollowupJob, + CredentialsConfiguration, +) from dlt.common.utils import concat_strings_with_limit -from dlt.destinations.exceptions import DatabaseUndefinedRelation, DestinationSchemaTampered, DestinationSchemaWillNotUpdate +from dlt.destinations.exceptions import ( + DatabaseUndefinedRelation, + DestinationSchemaTampered, + DestinationSchemaWillNotUpdate, +) from dlt.destinations.job_impl import EmptyLoadJobWithoutFollowup, NewReferenceJob from dlt.destinations.sql_jobs import SqlMergeJob, SqlStagingCopyJob from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME @@ -26,11 +64,8 @@ from dlt.destinations.sql_client import SqlClientBase # this should suffice for now -DDL_COMMANDS = [ - "ALTER", - "CREATE", - "DROP" -] +DDL_COMMANDS = ["ALTER", "CREATE", "DROP"] + class SqlLoadJob(LoadJob): """A job executing sql statement, without followup trait""" @@ -42,7 +77,10 @@ def __init__(self, file_path: str, sql_client: SqlClientBase[Any]) -> None: sql = f.read() # if we detect ddl transactions, only execute transaction if supported by client - if not self._string_containts_ddl_queries(sql) or sql_client.capabilities.supports_ddl_transactions: + if ( + not self._string_containts_ddl_queries(sql) + or sql_client.capabilities.supports_ddl_transactions + ): # with sql_client.begin_transaction(): sql_client.execute_sql(sql) else: @@ -68,7 +106,13 @@ def is_sql_job(file_path: str) -> bool: class CopyRemoteFileLoadJob(LoadJob, FollowupJob): - def __init__(self, table: TTableSchema, file_path: str, sql_client: SqlClientBase[Any], staging_credentials: Optional[CredentialsConfiguration] = None) -> None: + def __init__( + self, + table: TTableSchema, + file_path: str, + sql_client: SqlClientBase[Any], + staging_credentials: Optional[CredentialsConfiguration] = None, + ) -> None: super().__init__(FileStorage.get_file_name_from_file_path(file_path)) self._sql_client = sql_client self._staging_credentials = staging_credentials @@ -85,13 +129,35 @@ def state(self) -> TLoadJobState: class SqlJobClientBase(JobClientBase, WithStateSync): - - _VERSION_TABLE_SCHEMA_COLUMNS: ClassVar[Tuple[str, ...]] = ('version_hash', 'schema_name', 'version', 'engine_version', 'inserted_at', 'schema') - _STATE_TABLE_COLUMNS: ClassVar[Tuple[str, ...]] = ('version', 'engine_version', 'pipeline_name', 'state', 'created_at', '_dlt_load_id') - - def __init__(self, schema: Schema, config: DestinationClientConfiguration, sql_client: SqlClientBase[TNativeConn]) -> None: - self.version_table_schema_columns = ", ".join(sql_client.escape_column_name(col) for col in self._VERSION_TABLE_SCHEMA_COLUMNS) - self.state_table_columns = ", ".join(sql_client.escape_column_name(col) for col in self._STATE_TABLE_COLUMNS) + _VERSION_TABLE_SCHEMA_COLUMNS: ClassVar[Tuple[str, ...]] = ( + "version_hash", + "schema_name", + "version", + "engine_version", + "inserted_at", + "schema", + ) + _STATE_TABLE_COLUMNS: ClassVar[Tuple[str, ...]] = ( + "version", + "engine_version", + "pipeline_name", + "state", + "created_at", + "_dlt_load_id", + ) + + def __init__( + self, + schema: Schema, + config: DestinationClientConfiguration, + sql_client: SqlClientBase[TNativeConn], + ) -> None: + self.version_table_schema_columns = ", ".join( + sql_client.escape_column_name(col) for col in self._VERSION_TABLE_SCHEMA_COLUMNS + ) + self.state_table_columns = ", ".join( + sql_client.escape_column_name(col) for col in self._STATE_TABLE_COLUMNS + ) super().__init__(schema, config) self.sql_client = sql_client @@ -112,17 +178,25 @@ def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None: def is_storage_initialized(self) -> bool: return self.sql_client.has_dataset() - def update_stored_schema(self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None) -> Optional[TSchemaTables]: + def update_stored_schema( + self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None + ) -> Optional[TSchemaTables]: super().update_stored_schema(only_tables, expected_update) applied_update: TSchemaTables = {} schema_info = self.get_stored_schema_by_hash(self.schema.stored_version_hash) if schema_info is None: - logger.info(f"Schema with hash {self.schema.stored_version_hash} not found in the storage. upgrading") + logger.info( + f"Schema with hash {self.schema.stored_version_hash} not found in the storage." + " upgrading" + ) with self.maybe_ddl_transaction(): applied_update = self._execute_schema_update_sql(only_tables) else: - logger.info(f"Schema with hash {self.schema.stored_version_hash} inserted at {schema_info.inserted_at} found in storage, no upgrade required") + logger.info( + f"Schema with hash {self.schema.stored_version_hash} inserted at" + f" {schema_info.inserted_at} found in storage, no upgrade required" + ) return applied_update def drop_tables(self, *tables: str, replace_schema: bool = True) -> None: @@ -141,7 +215,10 @@ def maybe_ddl_transaction(self) -> Iterator[None]: yield def should_truncate_table_before_load(self, table: TTableSchema) -> bool: - return table["write_disposition"] == "replace" and self.config.replace_strategy == "truncate-and-insert" + return ( + table["write_disposition"] == "replace" + and self.config.replace_strategy == "truncate-and-insert" + ) def _create_append_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: return [] @@ -149,13 +226,19 @@ def _create_append_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> L def _create_merge_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: return [SqlMergeJob.from_table_chain(table_chain, self.sql_client)] - def _create_replace_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def _create_replace_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: jobs: List[NewLoadJob] = [] if self.config.replace_strategy in ["insert-from-staging", "staging-optimized"]: - jobs.append(SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": True})) + jobs.append( + SqlStagingCopyJob.from_table_chain(table_chain, self.sql_client, {"replace": True}) + ) return jobs - def create_table_chain_completed_followup_jobs(self, table_chain: Sequence[TTableSchema]) -> List[NewLoadJob]: + def create_table_chain_completed_followup_jobs( + self, table_chain: Sequence[TTableSchema] + ) -> List[NewLoadJob]: """Creates a list of followup jobs for merge write disposition and staging replace strategies""" jobs = super().create_table_chain_completed_followup_jobs(table_chain) write_disposition = table_chain[0]["write_disposition"] @@ -194,19 +277,25 @@ def complete_load(self, load_id: str) -> None: name = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) now_ts = pendulum.now() self.sql_client.execute_sql( - f"INSERT INTO {name}(load_id, schema_name, status, inserted_at, schema_version_hash) VALUES(%s, %s, %s, %s, %s);", - load_id, self.schema.name, 0, now_ts, self.schema.version_hash + f"INSERT INTO {name}(load_id, schema_name, status, inserted_at, schema_version_hash)" + " VALUES(%s, %s, %s, %s, %s);", + load_id, + self.schema.name, + 0, + now_ts, + self.schema.version_hash, ) def __enter__(self) -> "SqlJobClientBase": self.sql_client.open_connection() return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType + ) -> None: self.sql_client.close_connection() def get_storage_table(self, table_name: str) -> Tuple[bool, TTableSchemaColumns]: - def _null_to_bool(v: str) -> bool: if v == "NO": return False @@ -217,7 +306,9 @@ def _null_to_bool(v: str) -> bool: fields = ["column_name", "data_type", "is_nullable"] if self.capabilities.schema_supports_numeric_precision: fields += ["numeric_precision", "numeric_scale"] - db_params = self.sql_client.make_qualified_table_name(table_name, escape=False).split(".", 3) + db_params = self.sql_client.make_qualified_table_name(table_name, escape=False).split( + ".", 3 + ) query = f""" SELECT {",".join(fields)} FROM INFORMATION_SCHEMA.COLUMNS @@ -234,29 +325,40 @@ def _null_to_bool(v: str) -> bool: return False, schema_table # TODO: pull more data to infer indexes, PK and uniques attributes/constraints for c in rows: - numeric_precision = c[3] if self.capabilities.schema_supports_numeric_precision else None + numeric_precision = ( + c[3] if self.capabilities.schema_supports_numeric_precision else None + ) numeric_scale = c[4] if self.capabilities.schema_supports_numeric_precision else None schema_c: TColumnSchemaBase = { "name": c[0], "nullable": _null_to_bool(c[2]), - **self._from_db_type(c[1], numeric_precision, numeric_scale) + **self._from_db_type(c[1], numeric_precision, numeric_scale), } schema_table[c[0]] = schema_c # type: ignore return True, schema_table @abstractmethod - def _from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: + def _from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: pass def get_stored_schema(self) -> StorageSchemaInfo: name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - query = f"SELECT {self.version_table_schema_columns} FROM {name} WHERE schema_name = %s ORDER BY inserted_at DESC;" + query = ( + f"SELECT {self.version_table_schema_columns} FROM {name} WHERE schema_name = %s ORDER" + " BY inserted_at DESC;" + ) return self._row_to_schema_info(query, self.schema.name) def get_stored_state(self, pipeline_name: str) -> StateInfo: state_table = self.sql_client.make_qualified_table_name(self.schema.state_table_name) loads_table = self.sql_client.make_qualified_table_name(self.schema.loads_table_name) - query = f"SELECT {self.state_table_columns} FROM {state_table} AS s JOIN {loads_table} AS l ON l.load_id = s._dlt_load_id WHERE pipeline_name = %s AND l.status = 0 ORDER BY created_at DESC" + query = ( + f"SELECT {self.state_table_columns} FROM {state_table} AS s JOIN {loads_table} AS l ON" + " l.load_id = s._dlt_load_id WHERE pipeline_name = %s AND l.status = 0 ORDER BY" + " created_at DESC" + ) with self.sql_client.execute_query(query, pipeline_name) as cur: row = cur.fetchone() if not row: @@ -281,12 +383,16 @@ def _execute_schema_update_sql(self, only_tables: Iterable[str]) -> TSchemaTable sql_scripts, schema_update = self._build_schema_update_sql(only_tables) # stay within max query size when doing DDL. some db backends use bytes not characters so decrease limit by half # assuming that most of the characters in DDL encode into single bytes - for sql_fragment in concat_strings_with_limit(sql_scripts, "\n", self.capabilities.max_query_length // 2): + for sql_fragment in concat_strings_with_limit( + sql_scripts, "\n", self.capabilities.max_query_length // 2 + ): self.sql_client.execute_sql(sql_fragment) self._update_schema_in_storage(self.schema) return schema_update - def _build_schema_update_sql(self, only_tables: Iterable[str]) -> Tuple[List[str], TSchemaTables]: + def _build_schema_update_sql( + self, only_tables: Iterable[str] + ) -> Tuple[List[str], TSchemaTables]: """Generates CREATE/ALTER sql for tables that differ between the destination and in client's Schema. This method compares all or `only_tables` defined in self.schema to the respective tables in the destination. It detects only new tables and new columns. @@ -318,11 +424,15 @@ def _build_schema_update_sql(self, only_tables: Iterable[str]) -> Tuple[List[str return sql_updates, schema_update - def _make_add_column_sql(self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None) -> List[str]: + def _make_add_column_sql( + self, new_columns: Sequence[TColumnSchema], table_format: TTableFormat = None + ) -> List[str]: """Make one or more ADD COLUMN sql clauses to be joined in ALTER TABLE statement(s)""" return [f"ADD COLUMN {self._get_column_def_sql(c, table_format)}" for c in new_columns] - def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool) -> List[str]: + def _get_table_update_sql( + self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool + ) -> List[str]: # build sql canonical_name = self.sql_client.make_qualified_table_name(table_name) table = self.get_load_table(table_name) @@ -342,20 +452,32 @@ def _get_table_update_sql(self, table_name: str, new_columns: Sequence[TColumnSc sql_result.append(sql_base + column_sql.join(add_column_statements)) else: # build ALTER as separate statement for each column (redshift limitation) - sql_result.extend([sql_base + col_statement for col_statement in add_column_statements]) + sql_result.extend( + [sql_base + col_statement for col_statement in add_column_statements] + ) # scan columns to get hints if generate_alter: # no hints may be specified on added columns for hint in COLUMN_HINTS: if any(c.get(hint, False) is True for c in new_columns): - hint_columns = [self.capabilities.escape_identifier(c["name"]) for c in new_columns if c.get(hint, False)] + hint_columns = [ + self.capabilities.escape_identifier(c["name"]) + for c in new_columns + if c.get(hint, False) + ] if hint == "not_null": - logger.warning(f"Column(s) {hint_columns} with NOT NULL are being added to existing table {canonical_name}." - " If there's data in the table the operation will fail.") + logger.warning( + f"Column(s) {hint_columns} with NOT NULL are being added to existing" + f" table {canonical_name}. If there's data in the table the operation" + " will fail." + ) else: - logger.warning(f"Column(s) {hint_columns} with hint {hint} are being added to existing table {canonical_name}." - " Several hint types may not be added to existing tables.") + logger.warning( + f"Column(s) {hint_columns} with hint {hint} are being added to existing" + f" table {canonical_name}. Several hint types may not be added to" + " existing tables." + ) return sql_result @abstractmethod @@ -366,14 +488,16 @@ def _get_column_def_sql(self, c: TColumnSchema, table_format: TTableFormat = Non def _gen_not_null(v: bool) -> str: return "NOT NULL" if not v else "" - def _create_table_update(self, table_name: str, storage_columns: TTableSchemaColumns) -> Sequence[TColumnSchema]: + def _create_table_update( + self, table_name: str, storage_columns: TTableSchemaColumns + ) -> Sequence[TColumnSchema]: # compare table with stored schema and produce delta updates = self.schema.get_new_table_columns(table_name, storage_columns) logger.info(f"Found {len(updates)} updates for {table_name} in {self.schema.name}") return updates def _row_to_schema_info(self, query: str, *args: Any) -> StorageSchemaInfo: - row: Tuple[Any,...] = None + row: Tuple[Any, ...] = None # if there's no dataset/schema return none info with contextlib.suppress(DatabaseUndefinedRelation): with self.sql_client.execute_query(query, *args) as cur: @@ -401,9 +525,7 @@ def _replace_schema_in_storage(self, schema: Schema) -> None: Save the given schema in storage and remove all previous versions with the same name """ name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) - self.sql_client.execute_sql( - f"DELETE FROM {name} WHERE schema_name = %s;", schema.name - ) + self.sql_client.execute_sql(f"DELETE FROM {name} WHERE schema_name = %s;", schema.name) self._update_schema_in_storage(schema) def _update_schema_in_storage(self, schema: Schema) -> None: @@ -425,16 +547,22 @@ def _commit_schema_update(self, schema: Schema, schema_str: str) -> None: name = self.sql_client.make_qualified_table_name(self.schema.version_table_name) # values = schema.version_hash, schema.name, schema.version, schema.ENGINE_VERSION, str(now_ts), schema_str self.sql_client.execute_sql( - f"INSERT INTO {name}({self.version_table_schema_columns}) VALUES (%s, %s, %s, %s, %s, %s);", schema.stored_version_hash, schema.name, schema.version, schema.ENGINE_VERSION, now_ts, schema_str + f"INSERT INTO {name}({self.version_table_schema_columns}) VALUES (%s, %s, %s, %s, %s," + " %s);", + schema.stored_version_hash, + schema.name, + schema.version, + schema.ENGINE_VERSION, + now_ts, + schema_str, ) class SqlJobClientWithStaging(SqlJobClientBase, WithStagingDataset): - in_staging_mode: bool = False @contextlib.contextmanager - def with_staging_dataset(self)-> Iterator["SqlJobClientBase"]: + def with_staging_dataset(self) -> Iterator["SqlJobClientBase"]: try: with self.sql_client.with_staging_dataset(True): self.in_staging_mode = True @@ -445,7 +573,8 @@ def with_staging_dataset(self)-> Iterator["SqlJobClientBase"]: def should_load_data_to_staging_dataset(self, table: TTableSchema) -> bool: if table["write_disposition"] == "merge": return True - elif table["write_disposition"] == "replace" and (self.config.replace_strategy in ["insert-from-staging", "staging-optimized"]): + elif table["write_disposition"] == "replace" and ( + self.config.replace_strategy in ["insert-from-staging", "staging-optimized"] + ): return True return False - diff --git a/dlt/destinations/job_impl.py b/dlt/destinations/job_impl.py index fb3ba48b6d..7a6b98544c 100644 --- a/dlt/destinations/job_impl.py +++ b/dlt/destinations/job_impl.py @@ -6,6 +6,7 @@ from dlt.common.destination.reference import NewLoadJob, FollowupJob, TLoadJobState, LoadJob from dlt.common.storages.load_storage import ParsedLoadJobFileName + class EmptyLoadJobWithoutFollowup(LoadJob): def __init__(self, file_name: str, status: TLoadJobState, exception: str = None) -> None: self._status = status @@ -13,7 +14,9 @@ def __init__(self, file_name: str, status: TLoadJobState, exception: str = None) super().__init__(file_name) @classmethod - def from_file_path(cls, file_path: str, status: TLoadJobState, message: str = None) -> "EmptyLoadJobWithoutFollowup": + def from_file_path( + cls, file_path: str, status: TLoadJobState, message: str = None + ) -> "EmptyLoadJobWithoutFollowup": return cls(FileStorage.get_file_name_from_file_path(file_path), status, exception=message) def state(self) -> TLoadJobState: @@ -38,9 +41,11 @@ def new_file_path(self) -> str: """Path to a newly created temporary job file""" return self._new_file_path -class NewReferenceJob(NewLoadJobImpl): - def __init__(self, file_name: str, status: TLoadJobState, exception: str = None, remote_path: str = None) -> None: +class NewReferenceJob(NewLoadJobImpl): + def __init__( + self, file_name: str, status: TLoadJobState, exception: str = None, remote_path: str = None + ) -> None: file_name = os.path.splitext(file_name)[0] + ".reference" super().__init__(file_name, status, exception) self._remote_path = remote_path diff --git a/dlt/destinations/motherduck/sql_client.py b/dlt/destinations/motherduck/sql_client.py deleted file mode 100644 index 2fc664a2e8..0000000000 --- a/dlt/destinations/motherduck/sql_client.py +++ /dev/null @@ -1,27 +0,0 @@ -import duckdb - -from contextlib import contextmanager -from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence -from dlt.common.destination import DestinationCapabilitiesContext - -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation -from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error - -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient, DuckDBDBApiCursorImpl -from dlt.destinations.motherduck import capabilities -from dlt.destinations.motherduck.configuration import MotherDuckCredentials - - -class MotherDuckSqlClient(DuckDbSqlClient): - - capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities() - - def __init__(self, dataset_name: str, credentials: MotherDuckCredentials) -> None: - super().__init__(dataset_name, credentials) - self.database_name = credentials.database - - def fully_qualified_dataset_name(self, escape: bool = True) -> str: - database_name = self.capabilities.escape_identifier(self.database_name) if escape else self.database_name - dataset_name = self.capabilities.escape_identifier(self.dataset_name) if escape else self.dataset_name - return f"{database_name}.{dataset_name}" diff --git a/dlt/destinations/path_utils.py b/dlt/destinations/path_utils.py index a6cf634452..047cb274e0 100644 --- a/dlt/destinations/path_utils.py +++ b/dlt/destinations/path_utils.py @@ -7,18 +7,9 @@ from dlt.destinations.exceptions import InvalidFilesystemLayout, CantExtractTablePrefix # TODO: ensure layout only has supported placeholders -SUPPORTED_PLACEHOLDERS = { - "schema_name", - "table_name", - "load_id", - "file_id", - "ext", - "curr_date" -} +SUPPORTED_PLACEHOLDERS = {"schema_name", "table_name", "load_id", "file_id", "ext", "curr_date"} -SUPPORTED_TABLE_NAME_PREFIX_PLACEHOLDERS = ( - "schema_name", -) +SUPPORTED_TABLE_NAME_PREFIX_PLACEHOLDERS = ("schema_name",) def check_layout(layout: str) -> List[str]: @@ -28,11 +19,14 @@ def check_layout(layout: str) -> List[str]: raise InvalidFilesystemLayout(invalid_placeholders) return placeholders + def get_placeholders(layout: str) -> List[str]: - return re.findall(r'\{(.*?)\}', layout) + return re.findall(r"\{(.*?)\}", layout) -def create_path(layout: str, schema_name: str, table_name: str, load_id: str, file_id: str, ext: str) -> str: +def create_path( + layout: str, schema_name: str, table_name: str, load_id: str, file_id: str, ext: str +) -> str: """create a filepath from the layout and our default params""" placeholders = check_layout(layout) path = layout.format( @@ -41,7 +35,7 @@ def create_path(layout: str, schema_name: str, table_name: str, load_id: str, fi load_id=load_id, file_id=file_id, ext=ext, - curr_date=str(pendulum.today()) + curr_date=str(pendulum.today()), ) # if extension is not defined, we append it at the end if "ext" not in placeholders: @@ -51,11 +45,11 @@ def create_path(layout: str, schema_name: str, table_name: str, load_id: str, fi def get_table_prefix_layout( layout: str, - supported_prefix_placeholders: Sequence[str] = SUPPORTED_TABLE_NAME_PREFIX_PLACEHOLDERS + supported_prefix_placeholders: Sequence[str] = SUPPORTED_TABLE_NAME_PREFIX_PLACEHOLDERS, ) -> str: """get layout fragment that defines positions of the table, cutting other placeholders - allowed `supported_prefix_placeholders` that may appear before table. + allowed `supported_prefix_placeholders` that may appear before table. """ placeholders = get_placeholders(layout) @@ -67,14 +61,20 @@ def get_table_prefix_layout( # fail if any other prefix is defined before table_name if [p for p in placeholders[:table_name_index] if p not in supported_prefix_placeholders]: if len(supported_prefix_placeholders) == 0: - details = "No other placeholders are allowed before {table_name} but you have %s present. " % placeholders[:table_name_index] + details = ( + "No other placeholders are allowed before {table_name} but you have %s present. " + % placeholders[:table_name_index] + ) else: - details = "Only %s are allowed before {table_name} but you have %s present. " % (supported_prefix_placeholders, placeholders[:table_name_index]) + details = "Only %s are allowed before {table_name} but you have %s present. " % ( + supported_prefix_placeholders, + placeholders[:table_name_index], + ) raise CantExtractTablePrefix(layout, details) # we include the char after the table_name here, this should be a separator not a new placeholder # this is to prevent selecting tables that have the same starting name - prefix = layout[:layout.index("{table_name}") + 13] + prefix = layout[: layout.index("{table_name}") + 13] if prefix[-1] == "{": raise CantExtractTablePrefix(layout, "A separator is required after a {table_name}. ") diff --git a/dlt/destinations/qdrant/__init__.py b/dlt/destinations/qdrant/__init__.py deleted file mode 100644 index 7a8619ffcd..0000000000 --- a/dlt/destinations/qdrant/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination.reference import ( - JobClientBase, - DestinationClientConfiguration, -) -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.qdrant.qdrant_adapter import qdrant_adapter - -from dlt.destinations.qdrant.configuration import QdrantClientConfiguration - - -@with_config( - spec=QdrantClientConfiguration, - sections=( - known_sections.DESTINATION, - "qdrant", - ), -) -def _configure( - config: QdrantClientConfiguration = config.value, -) -> QdrantClientConfiguration: - return config - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl"] - - caps.max_identifier_length = 200 - caps.max_column_identifier_length = 1024 - caps.max_query_length = 8 * 1024 * 1024 - caps.is_max_query_length_in_bytes = False - caps.max_text_data_type_length = 8 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = False - caps.supports_ddl_transactions = False - - return caps - - -def client( - schema: Schema, initial_config: DestinationClientConfiguration = config.value -) -> JobClientBase: - from dlt.destinations.qdrant.qdrant_client import QdrantClient - return QdrantClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[QdrantClientConfiguration]: - return QdrantClientConfiguration diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index 68af420085..1e5f7031a5 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -3,7 +3,19 @@ from functools import wraps import inspect from types import TracebackType -from typing import Any, ClassVar, ContextManager, Generic, Iterator, Optional, Sequence, Tuple, Type, AnyStr, List +from typing import ( + Any, + ClassVar, + ContextManager, + Generic, + Iterator, + Optional, + Sequence, + Tuple, + Type, + AnyStr, + List, +) from dlt.common.typing import TFun from dlt.common.destination import DestinationCapabilitiesContext @@ -13,7 +25,6 @@ class SqlClientBase(ABC, Generic[TNativeConn]): - dbapi: ClassVar[DBApi] = None capabilities: ClassVar[DestinationCapabilitiesContext] = None @@ -45,7 +56,9 @@ def __enter__(self) -> "SqlClientBase[TNativeConn]": self.open_connection() return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType + ) -> None: self.close_connection() @property @@ -78,20 +91,27 @@ def truncate_tables(self, *tables: str) -> None: def drop_tables(self, *tables: str) -> None: if not tables: return - statements = [f"DROP TABLE IF EXISTS {self.make_qualified_table_name(table)};" for table in tables] + statements = [ + f"DROP TABLE IF EXISTS {self.make_qualified_table_name(table)};" for table in tables + ] self.execute_fragments(statements) @abstractmethod - def execute_sql(self, sql: AnyStr, *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: + def execute_sql( + self, sql: AnyStr, *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: pass @abstractmethod - def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> ContextManager[DBApiCursor]: + def execute_query( + self, query: AnyStr, *args: Any, **kwargs: Any + ) -> ContextManager[DBApiCursor]: pass - def execute_fragments(self, fragments: Sequence[AnyStr], *args: Any, **kwargs: Any) -> Optional[Sequence[Sequence[Any]]]: - """Executes several SQL fragments as efficiently as possible to prevent data copying. Default implementation just joins the strings and executes them together. - """ + def execute_fragments( + self, fragments: Sequence[AnyStr], *args: Any, **kwargs: Any + ) -> Optional[Sequence[Sequence[Any]]]: + """Executes several SQL fragments as efficiently as possible to prevent data copying. Default implementation just joins the strings and executes them together.""" return self.execute_sql("".join(fragments), *args, **kwargs) # type: ignore @abstractmethod @@ -109,7 +129,9 @@ def escape_column_name(self, column_name: str, escape: bool = True) -> str: return column_name @contextmanager - def with_alternative_dataset_name(self, dataset_name: str) -> Iterator["SqlClientBase[TNativeConn]"]: + def with_alternative_dataset_name( + self, dataset_name: str + ) -> Iterator["SqlClientBase[TNativeConn]"]: """Sets the `dataset_name` as the default dataset during the lifetime of the context. Does not modify any search paths in the existing connection.""" current_dataset_name = self.dataset_name try: @@ -119,7 +141,9 @@ def with_alternative_dataset_name(self, dataset_name: str) -> Iterator["SqlClien # restore previous dataset name self.dataset_name = current_dataset_name - def with_staging_dataset(self, staging: bool = False)-> ContextManager["SqlClientBase[TNativeConn]"]: + def with_staging_dataset( + self, staging: bool = False + ) -> ContextManager["SqlClientBase[TNativeConn]"]: dataset_name = self.dataset_name if staging: dataset_name = SqlClientBase.make_staging_dataset_name(dataset_name) @@ -127,7 +151,7 @@ def with_staging_dataset(self, staging: bool = False)-> ContextManager["SqlClien def _ensure_native_conn(self) -> None: if not self.native_connection: - raise LoadClientNotConnected(type(self).__name__ , self.dataset_name) + raise LoadClientNotConnected(type(self).__name__, self.dataset_name) @staticmethod @abstractmethod @@ -156,6 +180,7 @@ def _truncate_table_sql(self, qualified_table_name: str) -> str: class DBApiCursorImpl(DBApiCursor): """A DBApi Cursor wrapper with dataframes reading functionality""" + def __init__(self, curr: DBApiCursor) -> None: self.native_cursor = curr @@ -187,7 +212,6 @@ def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: def raise_database_error(f: TFun) -> TFun: - @wraps(f) def _wrap_gen(self: SqlClientBase[Any], *args: Any, **kwargs: Any) -> Any: try: @@ -211,7 +235,6 @@ def _wrap(self: SqlClientBase[Any], *args: Any, **kwargs: Any) -> Any: def raise_open_connection_error(f: TFun) -> TFun: - @wraps(f) def _wrap(self: SqlClientBase[Any], *args: Any, **kwargs: Any) -> Any: try: diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index 4e8393ed74..d97a098669 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -11,67 +11,106 @@ from dlt.destinations.job_impl import NewLoadJobImpl from dlt.destinations.sql_client import SqlClientBase + class SqlJobParams(TypedDict): replace: Optional[bool] -DEFAULTS: SqlJobParams = { - "replace": False -} + +DEFAULTS: SqlJobParams = {"replace": False} + class SqlBaseJob(NewLoadJobImpl): """Sql base job for jobs that rely on the whole tablechain""" + failed_text: str = "" @classmethod - def from_table_chain(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> NewLoadJobImpl: + def from_table_chain( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> NewLoadJobImpl: """Generates a list of sql statements, that will be executed by the sql client when the job is executed in the loader. The `table_chain` contains a list schemas of a tables with parent-child relationship, ordered by the ancestry (the root of the tree is first on the list). """ params = cast(SqlJobParams, {**DEFAULTS, **(params or {})}) # type: ignore top_table = table_chain[0] - file_info = ParsedLoadJobFileName(top_table["name"], uniq_id()[:10], 0, "sql") + file_info = ParsedLoadJobFileName( + top_table["name"], ParsedLoadJobFileName.new_file_id(), 0, "sql" + ) try: # Remove line breaks from multiline statements and write one SQL statement per line in output file # to support clients that need to execute one statement at a time (i.e. snowflake) - sql = [' '.join(stmt.splitlines()) for stmt in cls.generate_sql(table_chain, sql_client, params)] - job = cls(file_info.job_id(), "running") + sql = [ + " ".join(stmt.splitlines()) + for stmt in cls.generate_sql(table_chain, sql_client, params) + ] + job = cls(file_info.file_name(), "running") job._save_text_file("\n".join(sql)) except Exception: # return failed job - tables_str = yaml.dump(table_chain, allow_unicode=True, default_flow_style=False, sort_keys=False) - job = cls(file_info.job_id(), "failed", pretty_format_exception()) + tables_str = yaml.dump( + table_chain, allow_unicode=True, default_flow_style=False, sort_keys=False + ) + job = cls(file_info.file_name(), "failed", pretty_format_exception()) job._save_text_file("\n".join([cls.failed_text, tables_str])) return job @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: pass class SqlStagingCopyJob(SqlBaseJob): """Generates a list of sql statements that copy the data from staging dataset into destination dataset.""" + failed_text: str = "Tried to generate a staging copy sql job for the following tables:" @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: sql: List[str] = [] for table in table_chain: with sql_client.with_staging_dataset(staging=True): staging_table_name = sql_client.make_qualified_table_name(table["name"]) table_name = sql_client.make_qualified_table_name(table["name"]) - columns = ", ".join(map(sql_client.capabilities.escape_identifier, get_columns_names_with_prop(table, "name"))) + columns = ", ".join( + map( + sql_client.capabilities.escape_identifier, + get_columns_names_with_prop(table, "name"), + ) + ) if params["replace"]: sql.append(sql_client._truncate_table_sql(table_name)) - sql.append(f"INSERT INTO {table_name}({columns}) SELECT {columns} FROM {staging_table_name};") + sql.append( + f"INSERT INTO {table_name}({columns}) SELECT {columns} FROM {staging_table_name};" + ) return sql + class SqlMergeJob(SqlBaseJob): """Generates a list of sql statements that merge the data from staging dataset into destination dataset.""" + failed_text: str = "Tried to generate a merge sql job for the following tables:" @classmethod - def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any], params: Optional[SqlJobParams] = None) -> List[str]: + def generate_sql( + cls, + table_chain: Sequence[TTableSchema], + sql_client: SqlClientBase[Any], + params: Optional[SqlJobParams] = None, + ) -> List[str]: """Generates a list of sql statements that merge the data in staging dataset with the data in destination dataset. The `table_chain` contains a list schemas of a tables with parent-child relationship, ordered by the ancestry (the root of the tree is first on the list). @@ -84,29 +123,46 @@ def generate_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClient return cls.gen_merge_sql(table_chain, sql_client) @classmethod - def _gen_key_table_clauses(cls, primary_keys: Sequence[str], merge_keys: Sequence[str])-> List[str]: + def _gen_key_table_clauses( + cls, primary_keys: Sequence[str], merge_keys: Sequence[str] + ) -> List[str]: """Generate sql clauses to select rows to delete via merge and primary key. Return select all clause if no keys defined.""" clauses: List[str] = [] if primary_keys or merge_keys: if primary_keys: - clauses.append(" AND ".join(["%s.%s = %s.%s" % ("{d}", c, "{s}", c) for c in primary_keys])) + clauses.append( + " AND ".join(["%s.%s = %s.%s" % ("{d}", c, "{s}", c) for c in primary_keys]) + ) if merge_keys: - clauses.append(" AND ".join(["%s.%s = %s.%s" % ("{d}", c, "{s}", c) for c in merge_keys])) + clauses.append( + " AND ".join(["%s.%s = %s.%s" % ("{d}", c, "{s}", c) for c in merge_keys]) + ) return clauses or ["1=1"] @classmethod - def gen_key_table_clauses(cls, root_table_name: str, staging_root_table_name: str, key_clauses: Sequence[str], for_delete: bool) -> List[str]: + def gen_key_table_clauses( + cls, + root_table_name: str, + staging_root_table_name: str, + key_clauses: Sequence[str], + for_delete: bool, + ) -> List[str]: """Generate sql clauses that may be used to select or delete rows in root table of destination dataset - A list of clauses may be returned for engines that do not support OR in subqueries. Like BigQuery + A list of clauses may be returned for engines that do not support OR in subqueries. Like BigQuery """ - return [f"FROM {root_table_name} as d WHERE EXISTS (SELECT 1 FROM {staging_root_table_name} as s WHERE {' OR '.join([c.format(d='d',s='s') for c in key_clauses])})"] + return [ + f"FROM {root_table_name} as d WHERE EXISTS (SELECT 1 FROM {staging_root_table_name} as" + f" s WHERE {' OR '.join([c.format(d='d',s='s') for c in key_clauses])})" + ] @classmethod - def gen_delete_temp_table_sql(cls, unique_column: str, key_table_clauses: Sequence[str]) -> Tuple[List[str], str]: + def gen_delete_temp_table_sql( + cls, unique_column: str, key_table_clauses: Sequence[str] + ) -> Tuple[List[str], str]: """Generate sql that creates delete temp table and inserts `unique_column` from root table for all records to delete. May return several statements. - Returns temp table name for cases where special names are required like SQLServer. + Returns temp table name for cases where special names are required like SQLServer. """ sql: List[str] = [] temp_table_name = cls._new_temp_table_name("delete") @@ -117,7 +173,9 @@ def gen_delete_temp_table_sql(cls, unique_column: str, key_table_clauses: Sequen return sql, temp_table_name @classmethod - def gen_insert_temp_table_sql(cls, staging_root_table_name: str, primary_keys: Sequence[str], unique_column: str) -> Tuple[List[str], str]: + def gen_insert_temp_table_sql( + cls, staging_root_table_name: str, primary_keys: Sequence[str], unique_column: str + ) -> Tuple[List[str], str]: temp_table_name = cls._new_temp_table_name("insert") select_statement = f""" SELECT {unique_column} @@ -146,7 +204,9 @@ def _to_temp_table(cls, select_sql: str, temp_table_name: str) -> str: return f"CREATE TEMP TABLE {temp_table_name} AS {select_sql};" @classmethod - def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any]) -> List[str]: + def gen_merge_sql( + cls, table_chain: Sequence[TTableSchema], sql_client: SqlClientBase[Any] + ) -> List[str]: sql: List[str] = [] root_table = table_chain[0] @@ -155,22 +215,35 @@ def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClien with sql_client.with_staging_dataset(staging=True): staging_root_table_name = sql_client.make_qualified_table_name(root_table["name"]) # get merge and primary keys from top level - primary_keys = list(map(sql_client.capabilities.escape_identifier, get_columns_names_with_prop(root_table, "primary_key"))) - merge_keys = list(map(sql_client.capabilities.escape_identifier, get_columns_names_with_prop(root_table, "merge_key"))) + primary_keys = list( + map( + sql_client.capabilities.escape_identifier, + get_columns_names_with_prop(root_table, "primary_key"), + ) + ) + merge_keys = list( + map( + sql_client.capabilities.escape_identifier, + get_columns_names_with_prop(root_table, "merge_key"), + ) + ) key_clauses = cls._gen_key_table_clauses(primary_keys, merge_keys) unique_column: str = None root_key_column: str = None insert_temp_table_name: str = None - if len(table_chain) == 1: - key_table_clauses = cls.gen_key_table_clauses(root_table_name, staging_root_table_name, key_clauses, for_delete=True) + key_table_clauses = cls.gen_key_table_clauses( + root_table_name, staging_root_table_name, key_clauses, for_delete=True + ) # if no child tables, just delete data from top table for clause in key_table_clauses: sql.append(f"DELETE {clause};") else: - key_table_clauses = cls.gen_key_table_clauses(root_table_name, staging_root_table_name, key_clauses, for_delete=False) + key_table_clauses = cls.gen_key_table_clauses( + root_table_name, staging_root_table_name, key_clauses, for_delete=False + ) # use unique hint to create temp table with all identifiers to delete unique_columns = get_columns_names_with_prop(root_table, "unique") if not unique_columns: @@ -178,15 +251,21 @@ def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClien sql_client.fully_qualified_dataset_name(), staging_root_table_name, [t["name"] for t in table_chain], - f"There is no unique column (ie _dlt_id) in top table {root_table['name']} so it is not possible to link child tables to it." + f"There is no unique column (ie _dlt_id) in top table {root_table['name']} so" + " it is not possible to link child tables to it.", ) # get first unique column unique_column = sql_client.capabilities.escape_identifier(unique_columns[0]) # create temp table with unique identifier - create_delete_temp_table_sql, delete_temp_table_name = cls.gen_delete_temp_table_sql(unique_column, key_table_clauses) + create_delete_temp_table_sql, delete_temp_table_name = cls.gen_delete_temp_table_sql( + unique_column, key_table_clauses + ) sql.extend(create_delete_temp_table_sql) # delete top table - sql.append(f"DELETE FROM {root_table_name} WHERE {unique_column} IN (SELECT * FROM {delete_temp_table_name});") + sql.append( + f"DELETE FROM {root_table_name} WHERE {unique_column} IN (SELECT * FROM" + f" {delete_temp_table_name});" + ) # delete other tables for table in table_chain[1:]: table_name = sql_client.make_qualified_table_name(table["name"]) @@ -196,13 +275,22 @@ def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClien sql_client.fully_qualified_dataset_name(), staging_root_table_name, [t["name"] for t in table_chain], - f"There is no root foreign key (ie _dlt_root_id) in child table {table['name']} so it is not possible to refer to top level table {root_table['name']} unique column {unique_column}" + "There is no root foreign key (ie _dlt_root_id) in child table" + f" {table['name']} so it is not possible to refer to top level table" + f" {root_table['name']} unique column {unique_column}", ) root_key_column = sql_client.capabilities.escape_identifier(root_key_columns[0]) - sql.append(f"DELETE FROM {table_name} WHERE {root_key_column} IN (SELECT * FROM {delete_temp_table_name});") + sql.append( + f"DELETE FROM {table_name} WHERE {root_key_column} IN (SELECT * FROM" + f" {delete_temp_table_name});" + ) # create temp table used to deduplicate, only when we have primary keys if primary_keys: - create_insert_temp_table_sql, insert_temp_table_name = cls.gen_insert_temp_table_sql(staging_root_table_name, primary_keys, unique_column) + create_insert_temp_table_sql, insert_temp_table_name = ( + cls.gen_insert_temp_table_sql( + staging_root_table_name, primary_keys, unique_column + ) + ) sql.extend(create_insert_temp_table_sql) # insert from staging to dataset, truncate staging table @@ -210,8 +298,15 @@ def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClien table_name = sql_client.make_qualified_table_name(table["name"]) with sql_client.with_staging_dataset(staging=True): staging_table_name = sql_client.make_qualified_table_name(table["name"]) - columns = ", ".join(map(sql_client.capabilities.escape_identifier, get_columns_names_with_prop(table, "name"))) - insert_sql = f"INSERT INTO {table_name}({columns}) SELECT {columns} FROM {staging_table_name}" + columns = ", ".join( + map( + sql_client.capabilities.escape_identifier, + get_columns_names_with_prop(table, "name"), + ) + ) + insert_sql = ( + f"INSERT INTO {table_name}({columns}) SELECT {columns} FROM {staging_table_name}" + ) if len(primary_keys) > 0: if len(table_chain) == 1: insert_sql = f"""INSERT INTO {table_name}({columns}) @@ -222,11 +317,13 @@ def gen_merge_sql(cls, table_chain: Sequence[TTableSchema], sql_client: SqlClien """ else: uniq_column = unique_column if table.get("parent") is None else root_key_column - insert_sql += f" WHERE {uniq_column} IN (SELECT * FROM {insert_temp_table_name});" + insert_sql += ( + f" WHERE {uniq_column} IN (SELECT * FROM {insert_temp_table_name});" + ) if insert_sql.strip()[-1] != ";": insert_sql += ";" sql.append(insert_sql) # -- DELETE FROM {staging_table_name} WHERE 1=1; - return sql \ No newline at end of file + return sql diff --git a/dlt/destinations/type_mapping.py b/dlt/destinations/type_mapping.py index 3f09524bbf..765fd6dbed 100644 --- a/dlt/destinations/type_mapping.py +++ b/dlt/destinations/type_mapping.py @@ -20,11 +20,15 @@ class TypeMapper: def __init__(self, capabilities: DestinationCapabilitiesContext) -> None: self.capabilities = capabilities - def to_db_integer_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_integer_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: # Override in subclass if db supports other integer types (e.g. smallint, integer, tinyint, etc.) return self.sct_to_unbound_dbt["bigint"] - def to_db_datetime_type(self, precision: Optional[int], table_format: TTableFormat = None) -> str: + def to_db_datetime_type( + self, precision: Optional[int], table_format: TTableFormat = None + ) -> str: # Override in subclass if db supports other timestamp types (e.g. with different time resolutions) return None @@ -54,7 +58,9 @@ def to_db_type(self, column: TColumnSchema, table_format: TTableFormat = None) - return self.sct_to_unbound_dbt[sc_t] return self.sct_to_dbt[sc_t] % precision_tuple - def precision_tuple_or_default(self, data_type: TDataType, precision: Optional[int], scale: Optional[int]) -> Optional[Tuple[int, ...]]: + def precision_tuple_or_default( + self, data_type: TDataType, precision: Optional[int], scale: Optional[int] + ) -> Optional[Tuple[int, ...]]: if data_type in ("timestamp", "time"): if precision is None: return None # Use default which is usually the max @@ -66,30 +72,38 @@ def precision_tuple_or_default(self, data_type: TDataType, precision: Optional[i if precision is None: return None elif scale is None: - return (precision, ) + return (precision,) return (precision, scale) - def decimal_precision(self, precision: Optional[int] = None, scale: Optional[int] = None) -> Optional[Tuple[int, int]]: + def decimal_precision( + self, precision: Optional[int] = None, scale: Optional[int] = None + ) -> Optional[Tuple[int, int]]: defaults = self.capabilities.decimal_precision if not defaults: return None default_precision, default_scale = defaults return ( - precision if precision is not None else default_precision, scale if scale is not None else default_scale + precision if precision is not None else default_precision, + scale if scale is not None else default_scale, ) - def wei_precision(self, precision: Optional[int] = None, scale: Optional[int] = None) -> Optional[Tuple[int, int]]: + def wei_precision( + self, precision: Optional[int] = None, scale: Optional[int] = None + ) -> Optional[Tuple[int, int]]: defaults = self.capabilities.wei_precision if not defaults: return None default_precision, default_scale = defaults return ( - precision if precision is not None else default_precision, scale if scale is not None else default_scale + precision if precision is not None else default_precision, + scale if scale is not None else default_scale, ) - def from_db_type(self, db_type: str, precision: Optional[int], scale: Optional[int]) -> TColumnType: - return without_none(dict( # type: ignore[return-value] - data_type=self.dbt_to_sct.get(db_type, "text"), - precision=precision, - scale=scale - )) + def from_db_type( + self, db_type: str, precision: Optional[int], scale: Optional[int] + ) -> TColumnType: + return without_none( + dict( # type: ignore[return-value] + data_type=self.dbt_to_sct.get(db_type, "text"), precision=precision, scale=scale + ) + ) diff --git a/dlt/destinations/typing.py b/dlt/destinations/typing.py index 7edf69d2ea..99ffed01fd 100644 --- a/dlt/destinations/typing.py +++ b/dlt/destinations/typing.py @@ -1,4 +1,5 @@ from typing import Any, AnyStr, List, Type, Optional, Protocol, Tuple, TypeVar + try: from pandas import DataFrame except ImportError: @@ -7,12 +8,11 @@ # native connection TNativeConn = TypeVar("TNativeConn", bound=Any) + class DBTransaction(Protocol): - def commit_transaction(self) -> None: - ... + def commit_transaction(self) -> None: ... - def rollback_transaction(self) -> None: - ... + def rollback_transaction(self) -> None: ... class DBApi(Protocol): @@ -23,21 +23,17 @@ class DBApi(Protocol): class DBApiCursor(Protocol): """Protocol for DBAPI cursor""" + description: Tuple[Any, ...] native_cursor: "DBApiCursor" """Cursor implementation native to current destination""" - def execute(self, query: AnyStr, *args: Any, **kwargs: Any) -> None: - ... - def fetchall(self) -> List[Tuple[Any, ...]]: - ... - def fetchmany(self, size: int = ...) -> List[Tuple[Any, ...]]: - ... - def fetchone(self) -> Optional[Tuple[Any, ...]]: - ... - def close(self) -> None: - ... + def execute(self, query: AnyStr, *args: Any, **kwargs: Any) -> None: ... + def fetchall(self) -> List[Tuple[Any, ...]]: ... + def fetchmany(self, size: int = ...) -> List[Tuple[Any, ...]]: ... + def fetchone(self) -> Optional[Tuple[Any, ...]]: ... + def close(self) -> None: ... def df(self, chunk_size: int = None, **kwargs: None) -> Optional[DataFrame]: """Fetches the results as data frame. For large queries the results may be chunked @@ -54,4 +50,3 @@ def df(self, chunk_size: int = None, **kwargs: None) -> Optional[DataFrame]: Optional[DataFrame]: A data frame with query results. If chunk_size > 0, None will be returned if there is no more data in results """ ... - diff --git a/dlt/destinations/weaviate/__init__.py b/dlt/destinations/weaviate/__init__.py deleted file mode 100644 index ebd87aea0c..0000000000 --- a/dlt/destinations/weaviate/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination.reference import ( - JobClientBase, - DestinationClientConfiguration, -) -from dlt.common.destination import DestinationCapabilitiesContext - -from dlt.destinations.weaviate.weaviate_adapter import weaviate_adapter -from dlt.destinations.weaviate.configuration import WeaviateClientConfiguration - - -@with_config( - spec=WeaviateClientConfiguration, - sections=( - known_sections.DESTINATION, - "weaviate", - ), -) -def _configure( - config: WeaviateClientConfiguration = config.value, -) -> WeaviateClientConfiguration: - return config - - -def capabilities() -> DestinationCapabilitiesContext: - caps = DestinationCapabilitiesContext() - caps.preferred_loader_file_format = "jsonl" - caps.supported_loader_file_formats = ["jsonl"] - - caps.max_identifier_length = 200 - caps.max_column_identifier_length = 1024 - caps.max_query_length = 8 * 1024 * 1024 - caps.is_max_query_length_in_bytes = False - caps.max_text_data_type_length = 8 * 1024 * 1024 - caps.is_max_text_data_type_length_in_bytes = False - caps.supports_ddl_transactions = False - caps.naming_convention = "dlt.destinations.weaviate.naming" - - return caps - - -def client( - schema: Schema, initial_config: DestinationClientConfiguration = config.value -) -> JobClientBase: - from dlt.destinations.weaviate.weaviate_client import WeaviateClient - - return WeaviateClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[WeaviateClientConfiguration]: - return WeaviateClientConfiguration diff --git a/dlt/destinations/weaviate/exceptions.py b/dlt/destinations/weaviate/exceptions.py deleted file mode 100644 index adec0fee1e..0000000000 --- a/dlt/destinations/weaviate/exceptions.py +++ /dev/null @@ -1,12 +0,0 @@ -from dlt.common.exceptions import DestinationException, DestinationTerminalException - - -class WeaviateBatchError(DestinationException): - pass - - -class PropertyNameConflict(DestinationTerminalException): - def __init__(self) -> None: - super().__init__("Your data contains items with identical property names when compared case insensitive. Weaviate cannot handle such data." - " Please clean up your data before loading or change to case insensitive naming convention." - " See https://dlthub.com/docs/dlt-ecosystem/destinations/weaviate#names-normalization for details.") diff --git a/dlt/extract/__init__.py b/dlt/extract/__init__.py index e69de29bb2..9dcffdacb9 100644 --- a/dlt/extract/__init__.py +++ b/dlt/extract/__init__.py @@ -0,0 +1,17 @@ +from dlt.extract.resource import DltResource, with_table_name +from dlt.extract.source import DltSource +from dlt.extract.decorators import source, resource, transformer, defer +from dlt.extract.incremental import Incremental +from dlt.extract.wrappers import wrap_additional_type + +__all__ = [ + "DltResource", + "DltSource", + "with_table_name", + "source", + "resource", + "transformer", + "defer", + "Incremental", + "wrap_additional_type", +] diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index dbc5f2fa82..cf7426e683 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -2,7 +2,21 @@ import inspect from types import ModuleType from functools import wraps -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterator, List, Literal, Optional, Tuple, Type, Union, cast, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Iterator, + List, + Literal, + Optional, + Tuple, + Type, + Union, + cast, + overload, +) from typing_extensions import TypeVar from dlt.common.configuration import with_config, get_fun_spec, known_sections, configspec @@ -15,29 +29,54 @@ from dlt.common.pipeline import PipelineContext from dlt.common.source import _SOURCES, SourceInfo from dlt.common.schema.schema import Schema -from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns, TWriteDisposition, TAnySchemaColumns, TTableFormat -from dlt.extract.utils import ensure_table_schema_columns_hint, simulate_func_call, wrap_compat_transformer, wrap_resource_gen +from dlt.common.schema.typing import ( + TColumnNames, + TTableSchemaColumns, + TWriteDisposition, + TAnySchemaColumns, + TSchemaContract, + TTableFormat, +) +from dlt.extract.utils import ( + ensure_table_schema_columns_hint, + simulate_func_call, + wrap_compat_transformer, + wrap_resource_gen, +) from dlt.common.storages.exceptions import SchemaNotFoundError from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.typing import AnyFun, ParamSpec, Concatenate, TDataItem, TDataItems from dlt.common.utils import get_callable_name, get_module_name, is_inner_callable -from dlt.extract.exceptions import DynamicNameNotStandaloneResource, InvalidTransformerDataTypeGeneratorFunctionRequired, ResourceFunctionExpected, ResourceInnerCallableConfigWrapDisallowed, SourceDataIsNone, SourceIsAClassTypeError, ExplicitSourceNameInvalid, SourceNotAFunction, SourceSchemaNotAvailable +from dlt.extract.exceptions import ( + DynamicNameNotStandaloneResource, + InvalidTransformerDataTypeGeneratorFunctionRequired, + ResourceFunctionExpected, + ResourceInnerCallableConfigWrapDisallowed, + SourceDataIsNone, + SourceIsAClassTypeError, + ExplicitSourceNameInvalid, + SourceNotAFunction, + SourceSchemaNotAvailable, +) from dlt.extract.incremental import IncrementalResourceWrapper from dlt.extract.typing import TTableHintTemplate -from dlt.extract.source import DltResource, DltSource, TUnboundDltResource +from dlt.extract.source import DltSource +from dlt.extract.resource import DltResource, TUnboundDltResource @configspec class SourceSchemaInjectableContext(ContainerInjectableContext): """A context containing the source schema, present when decorated function is executed""" + schema: Schema can_create_default: ClassVar[bool] = False if TYPE_CHECKING: - def __init__(self, schema: Schema = None) -> None: - ... + + def __init__(self, schema: Schema = None) -> None: ... + TSourceFunParams = ParamSpec("TSourceFunParams") TResourceFunParams = ParamSpec("TResourceFunParams") @@ -53,10 +92,11 @@ def source( max_table_nesting: int = None, root_key: bool = False, schema: Schema = None, + schema_contract: TSchemaContract = None, spec: Type[BaseConfiguration] = None, - _impl_cls: Type[TDltSourceImpl] = DltSource # type: ignore[assignment] -) -> Callable[TSourceFunParams, TDltSourceImpl]: - ... + _impl_cls: Type[TDltSourceImpl] = DltSource, # type: ignore[assignment] +) -> Callable[TSourceFunParams, DltSource]: ... + @overload def source( @@ -67,10 +107,11 @@ def source( max_table_nesting: int = None, root_key: bool = False, schema: Schema = None, + schema_contract: TSchemaContract = None, spec: Type[BaseConfiguration] = None, - _impl_cls: Type[TDltSourceImpl] = DltSource # type: ignore[assignment] -) -> Callable[[Callable[TSourceFunParams, Any]], Callable[TSourceFunParams, TDltSourceImpl]]: - ... + _impl_cls: Type[TDltSourceImpl] = DltSource, # type: ignore[assignment] +) -> Callable[[Callable[TSourceFunParams, Any]], Callable[TSourceFunParams, TDltSourceImpl]]: ... + def source( func: Optional[AnyFun] = None, @@ -80,8 +121,9 @@ def source( max_table_nesting: int = None, root_key: bool = False, schema: Schema = None, + schema_contract: TSchemaContract = None, spec: Type[BaseConfiguration] = None, - _impl_cls: Type[TDltSourceImpl] = DltSource # type: ignore[assignment] + _impl_cls: Type[TDltSourceImpl] = DltSource, # type: ignore[assignment] ) -> Any: """A decorator that transforms a function returning one or more `dlt resources` into a `dlt source` in order to load it with `dlt`. @@ -115,6 +157,8 @@ def source( schema (Schema, optional): An explicit `Schema` instance to be associated with the source. If not present, `dlt` creates a new `Schema` object with provided `name`. If such `Schema` already exists in the same folder as the module containing the decorated function, such schema will be loaded from file. + schema_contract (TSchemaContract, optional): Schema contract settings that will be applied to this resource. + spec (Type[BaseConfiguration], optional): A specification of configuration and secret values required by the source. _impl_cls (Type[TDltSourceImpl], optional): A custom implementation of DltSource, may be also used to providing just a typing stub @@ -122,9 +166,10 @@ def source( Returns: `DltSource` instance """ - if name and schema: - raise ArgumentsOverloadException("'name' has no effect when `schema` argument is present", source.__name__) + raise ArgumentsOverloadException( + "'name' has no effect when `schema` argument is present", source.__name__ + ) def decorator(f: Callable[TSourceFunParams, Any]) -> Callable[TSourceFunParams, TDltSourceImpl]: nonlocal schema, name @@ -145,9 +190,6 @@ def decorator(f: Callable[TSourceFunParams, Any]) -> Callable[TSourceFunParams, if name and name != schema.name: raise ExplicitSourceNameInvalid(name, schema.name) - # the name of the source must be identical to the name of the schema - name = schema.name - # wrap source extraction function in configuration with section func_module = inspect.getmodule(f) source_section = section or _get_source_section_name(func_module) @@ -162,24 +204,30 @@ def _wrap(*args: Any, **kwargs: Any) -> TDltSourceImpl: # configurations will be accessed in this section in the source proxy = Container()[PipelineContext] pipeline_name = None if not proxy.is_active() else proxy.pipeline().pipeline_name - with inject_section(ConfigSectionContext(pipeline_name=pipeline_name, sections=source_sections, source_state_key=name)): + with inject_section( + ConfigSectionContext( + pipeline_name=pipeline_name, + sections=source_sections, + source_state_key=schema.name, + ) + ): rv = conf_f(*args, **kwargs) if rv is None: - raise SourceDataIsNone(name) + raise SourceDataIsNone(schema.name) # if generator, consume it immediately if inspect.isgenerator(rv): rv = list(rv) # convert to source - s = _impl_cls.from_data(name, source_section, schema.clone(update_normalizers=True), rv) + s = _impl_cls.from_data(schema.clone(update_normalizers=True), source_section, rv) # apply hints if max_table_nesting is not None: s.max_table_nesting = max_table_nesting + s.schema_contract = schema_contract # enable root propagation s.root_key = root_key return s - # get spec for wrapped function SPEC = get_fun_spec(conf_f) # store the source information @@ -206,11 +254,12 @@ def resource( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None -) -> DltResource: - ... + spec: Type[BaseConfiguration] = None, +) -> DltResource: ... + @overload def resource( @@ -222,11 +271,12 @@ def resource( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None -) -> Callable[[Callable[TResourceFunParams, Any]], DltResource]: - ... + spec: Type[BaseConfiguration] = None, +) -> Callable[[Callable[TResourceFunParams, Any]], DltResource]: ... + @overload def resource( @@ -238,12 +288,12 @@ def resource( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, - standalone: Literal[True] = True -) -> Callable[[Callable[TResourceFunParams, Any]], Callable[TResourceFunParams, DltResource]]: - ... + standalone: Literal[True] = True, +) -> Callable[[Callable[TResourceFunParams, Any]], Callable[TResourceFunParams, DltResource]]: ... @overload @@ -256,11 +306,11 @@ def resource( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None -) -> DltResource: - ... + spec: Type[BaseConfiguration] = None, +) -> DltResource: ... def resource( @@ -272,6 +322,7 @@ def resource( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, table_format: TTableHintTemplate[TTableFormat] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, @@ -322,6 +373,7 @@ def resource( merge_key (str | Sequence[str]): A column name or a list of column names that define a merge key. Typically used with "merge" write disposition to remove overlapping data ranges ie. to keep a single record for a given day. This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + schema_contract (TSchemaContract, optional): Schema contract settings that will be applied to all resources of this source (if not overridden in the resource itself) table_format (Literal["iceberg"], optional): Defines the storage format of the table. Currently only "iceberg" is supported on Athena, other destinations ignore this hint. selected (bool, optional): When `True` `dlt pipeline` will extract and load this resource, if `False`, the resource will be ignored. @@ -339,23 +391,38 @@ def resource( Returns: DltResource instance which may be loaded, iterated or combined with other resources into a pipeline. """ - def make_resource(_name: str, _section: str, _data: Any, incremental: IncrementalResourceWrapper = None) -> DltResource: + + def make_resource( + _name: str, _section: str, _data: Any, incremental: IncrementalResourceWrapper = None + ) -> DltResource: table_template = DltResource.new_table_template( table_name, write_disposition=write_disposition, columns=columns, primary_key=primary_key, merge_key=merge_key, - table_format=table_format + schema_contract=schema_contract, + table_format=table_format, + ) + return DltResource.from_data( + _data, + _name, + _section, + table_template, + selected, + cast(DltResource, data_from), + incremental=incremental, ) - return DltResource.from_data(_data, _name, _section, table_template, selected, cast(DltResource, data_from), incremental=incremental) - - def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunParams, DltResource]: + def decorator( + f: Callable[TResourceFunParams, Any] + ) -> Callable[TResourceFunParams, DltResource]: if not callable(f): if data_from: # raise more descriptive exception if we construct transformer - raise InvalidTransformerDataTypeGeneratorFunctionRequired(name or "", f, type(f)) + raise InvalidTransformerDataTypeGeneratorFunctionRequired( + name or "", f, type(f) + ) raise ResourceFunctionExpected(name or "", f, type(f)) if not standalone and callable(name): raise DynamicNameNotStandaloneResource(get_callable_name(f)) @@ -381,7 +448,10 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara # for autogenerated spec do not include defaults conf_f = with_config( incr_f, - spec=spec, sections=resource_sections, sections_merge_style=ConfigSectionContext.resource_merge_style, include_defaults=spec is not None + spec=spec, + sections=resource_sections, + sections_merge_style=ConfigSectionContext.resource_merge_style, + include_defaults=spec is not None, ) is_inner_resource = is_inner_callable(f) if conf_f != incr_f and is_inner_resource and not standalone: @@ -402,13 +472,21 @@ def decorator(f: Callable[TResourceFunParams, Any]) -> Callable[TResourceFunPara @wraps(conf_f) def _wrap(*args: Any, **kwargs: Any) -> DltResource: _, mod_sig, bound_args = simulate_func_call(conf_f, skip_args, *args, **kwargs) - actual_resource_name = name(bound_args.arguments) if callable(name) else resource_name - r = make_resource(actual_resource_name, source_section, compat_wrapper(actual_resource_name, conf_f, sig, *args, **kwargs), incremental) + actual_resource_name = ( + name(bound_args.arguments) if callable(name) else resource_name + ) + r = make_resource( + actual_resource_name, + source_section, + compat_wrapper(actual_resource_name, conf_f, sig, *args, **kwargs), + incremental, + ) # consider transformer arguments bound r._args_bound = True # keep explicit args passed r._set_explicit_args(conf_f, mod_sig, *args, **kwargs) return r + return _wrap else: return make_resource(resource_name, source_section, conf_f, incremental) @@ -443,9 +521,9 @@ def transformer( primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None -) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], DltResource]: - ... + spec: Type[BaseConfiguration] = None, +) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], DltResource]: ... + @overload def transformer( @@ -460,9 +538,12 @@ def transformer( merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, - standalone: Literal[True] = True -) -> Callable[[Callable[Concatenate[TDataItem, TResourceFunParams], Any]], Callable[TResourceFunParams, DltResource]]: - ... + standalone: Literal[True] = True, +) -> Callable[ + [Callable[Concatenate[TDataItem, TResourceFunParams], Any]], + Callable[TResourceFunParams, DltResource], +]: ... + @overload def transformer( @@ -476,9 +557,9 @@ def transformer( primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, - spec: Type[BaseConfiguration] = None -) -> DltResource: - ... + spec: Type[BaseConfiguration] = None, +) -> DltResource: ... + @overload def transformer( @@ -493,9 +574,9 @@ def transformer( merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, - standalone: Literal[True] = True -) -> Callable[TResourceFunParams, DltResource]: - ... + standalone: Literal[True] = True, +) -> Callable[TResourceFunParams, DltResource]: ... + def transformer( f: Optional[Callable[Concatenate[TDataItem, TResourceFunParams], Any]] = None, @@ -509,7 +590,7 @@ def transformer( merge_key: TTableHintTemplate[TColumnNames] = None, selected: bool = True, spec: Type[BaseConfiguration] = None, - standalone: bool = False + standalone: bool = False, ) -> Any: """A form of `dlt resource` that takes input from other resources via `data_from` argument in order to enrich or transform the data. @@ -566,7 +647,10 @@ def transformer( standalone (bool, optional): Returns a wrapped decorated function that creates DltResource instance. Must be called before use. Cannot be part of a source. """ if isinstance(f, DltResource): - raise ValueError("Please pass `data_from=` argument as keyword argument. The only positional argument to transformer is the decorated function") + raise ValueError( + "Please pass `data_from=` argument as keyword argument. The only positional argument to" + " transformer is the decorated function" + ) return resource( # type: ignore f, @@ -579,7 +663,7 @@ def transformer( selected=selected, spec=spec, standalone=standalone, - data_from=data_from + data_from=data_from, ) @@ -618,12 +702,14 @@ def get_source_schema() -> Schema: TDeferredFunParams = ParamSpec("TDeferredFunParams") -def defer(f: Callable[TDeferredFunParams, TBoundItems]) -> Callable[TDeferredFunParams, TDeferred[TBoundItems]]: - +def defer( + f: Callable[TDeferredFunParams, TBoundItems] +) -> Callable[TDeferredFunParams, TDeferred[TBoundItems]]: @wraps(f) def _wrap(*args: Any, **kwargs: Any) -> TDeferred[TBoundItems]: def _curry() -> TBoundItems: return f(*args, **kwargs) + return _curry return _wrap diff --git a/dlt/extract/exceptions.py b/dlt/extract/exceptions.py index e540a2468f..8e7d0dddf8 100644 --- a/dlt/extract/exceptions.py +++ b/dlt/extract/exceptions.py @@ -42,7 +42,11 @@ def __init__(self, pipe_name: str, has_parent: bool) -> None: self.pipe_name = pipe_name self.has_parent = has_parent if has_parent: - msg = f"A pipe created from transformer {pipe_name} is unbound or its parent is unbound or empty. Provide a resource in `data_from` argument or bind resources with | operator." + msg = ( + f"A pipe created from transformer {pipe_name} is unbound or its parent is unbound" + " or empty. Provide a resource in `data_from` argument or bind resources with |" + " operator." + ) else: msg = "Pipe is empty and does not have a resource at its head" super().__init__(pipe_name, msg) @@ -52,21 +56,41 @@ class InvalidStepFunctionArguments(PipeException): def __init__(self, pipe_name: str, func_name: str, sig: Signature, call_error: str) -> None: self.func_name = func_name self.sig = sig - super().__init__(pipe_name, f"Unable to call {func_name}: {call_error}. The mapping/filtering function {func_name} requires first argument to take data item and optional second argument named 'meta', but the signature is {sig}") + super().__init__( + pipe_name, + f"Unable to call {func_name}: {call_error}. The mapping/filtering function" + f" {func_name} requires first argument to take data item and optional second argument" + f" named 'meta', but the signature is {sig}", + ) class ResourceExtractionError(PipeException): def __init__(self, pipe_name: str, gen: Any, msg: str, kind: str) -> None: self.msg = msg self.kind = kind - self.func_name = gen.__name__ if isgenerator(gen) else get_callable_name(gen) if callable(gen) else str(gen) - super().__init__(pipe_name, f"extraction of resource {pipe_name} in {kind} {self.func_name} caused an exception: {msg}") + self.func_name = ( + gen.__name__ + if isgenerator(gen) + else get_callable_name(gen) if callable(gen) else str(gen) + ) + super().__init__( + pipe_name, + f"extraction of resource {pipe_name} in {kind} {self.func_name} caused an exception:" + f" {msg}", + ) class PipeGenInvalid(PipeException): def __init__(self, pipe_name: str, gen: Any) -> None: - msg = "A pipe generator element must be an Iterator (ie. list or generator function). Generator element is typically created from a `data` argument to pipeline.run or extract method." - msg += "dlt will evaluate functions that were passed as data argument. If you passed a function the returned data type is not iterable. " + msg = ( + "A pipe generator element must be an Iterator (ie. list or generator function)." + " Generator element is typically created from a `data` argument to pipeline.run or" + " extract method." + ) + msg += ( + "dlt will evaluate functions that were passed as data argument. If you passed a" + " function the returned data type is not iterable. " + ) type_name = str(type(gen)) msg += f" Generator type is {type_name}." if "DltSource" in type_name: @@ -79,13 +103,21 @@ def __init__(self, pipe_name: str, gen: Any) -> None: class ResourceNameMissing(DltResourceException): def __init__(self) -> None: - super().__init__(None, """Resource name is missing. If you create a resource directly from data ie. from a list you must pass the name explicitly in `name` argument. - Please note that for resources created from functions or generators, the name is the function name by default.""") + super().__init__( + None, + """Resource name is missing. If you create a resource directly from data ie. from a list you must pass the name explicitly in `name` argument. + Please note that for resources created from functions or generators, the name is the function name by default.""", + ) class DynamicNameNotStandaloneResource(DltResourceException): def __init__(self, resource_name: str) -> None: - super().__init__(resource_name, "You must set the resource as standalone to be able to dynamically set its name based on call arguments") + super().__init__( + resource_name, + "You must set the resource as standalone to be able to dynamically set its name based" + " on call arguments", + ) + # class DependentResourceIsNotCallable(DltResourceException): # def __init__(self, resource_name: str) -> None: @@ -93,42 +125,81 @@ def __init__(self, resource_name: str) -> None: class ResourceNotFoundError(DltResourceException, KeyError): - def __init__(self, resource_name: str, context: str) -> None: - self.resource_name = resource_name - super().__init__(resource_name, f"Resource with a name {resource_name} could not be found. {context}") + def __init__(self, resource_name: str, context: str) -> None: + self.resource_name = resource_name + super().__init__( + resource_name, f"Resource with a name {resource_name} could not be found. {context}" + ) class InvalidResourceDataType(DltResourceException): def __init__(self, resource_name: str, item: Any, _typ: Type[Any], msg: str) -> None: self.item = item self._typ = _typ - super().__init__(resource_name, f"Cannot create resource {resource_name} from specified data. If you want to process just one data item, enclose it in a list. " + msg) + super().__init__( + resource_name, + f"Cannot create resource {resource_name} from specified data. If you want to process" + " just one data item, enclose it in a list. " + + msg, + ) class InvalidResourceDataTypeAsync(InvalidResourceDataType): - def __init__(self, resource_name: str, item: Any,_typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, "Async iterators and generators are not valid resources. Please use standard iterators and generators that yield Awaitables instead (for example by yielding from async function without await") + def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: + super().__init__( + resource_name, + item, + _typ, + "Async iterators and generators are not valid resources. Please use standard iterators" + " and generators that yield Awaitables instead (for example by yielding from async" + " function without await", + ) class InvalidResourceDataTypeBasic(InvalidResourceDataType): - def __init__(self, resource_name: str, item: Any,_typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, f"Resources cannot be strings or dictionaries but {_typ.__name__} was provided. Please pass your data in a list or as a function yielding items. If you want to process just one data item, enclose it in a list.") + def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: + super().__init__( + resource_name, + item, + _typ, + f"Resources cannot be strings or dictionaries but {_typ.__name__} was provided. Please" + " pass your data in a list or as a function yielding items. If you want to process" + " just one data item, enclose it in a list.", + ) class InvalidResourceDataTypeFunctionNotAGenerator(InvalidResourceDataType): - def __init__(self, resource_name: str, item: Any,_typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, "Please make sure that function decorated with @dlt.resource uses 'yield' to return the data.") + def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: + super().__init__( + resource_name, + item, + _typ, + "Please make sure that function decorated with @dlt.resource uses 'yield' to return the" + " data.", + ) class InvalidResourceDataTypeMultiplePipes(InvalidResourceDataType): - def __init__(self, resource_name: str, item: Any,_typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, "Resources with multiple parallel data pipes are not yet supported. This problem most often happens when you are creating a source with @dlt.source decorator that has several resources with the same name.") + def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: + super().__init__( + resource_name, + item, + _typ, + "Resources with multiple parallel data pipes are not yet supported. This problem most" + " often happens when you are creating a source with @dlt.source decorator that has" + " several resources with the same name.", + ) class InvalidTransformerDataTypeGeneratorFunctionRequired(InvalidResourceDataType): def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, - "Transformer must be a function decorated with @dlt.transformer that takes data item as its first argument. Only first argument may be 'positional only'.") + super().__init__( + resource_name, + item, + _typ, + "Transformer must be a function decorated with @dlt.transformer that takes data item as" + " its first argument. Only first argument may be 'positional only'.", + ) class InvalidTransformerGeneratorFunction(DltResourceException): @@ -150,29 +221,57 @@ def __init__(self, resource_name: str, func_name: str, sig: Signature, code: int class ResourceInnerCallableConfigWrapDisallowed(DltResourceException): def __init__(self, resource_name: str, section: str) -> None: self.section = section - msg = f"Resource {resource_name} in section {section} is defined over an inner function and requests config/secrets in its arguments. Requesting secret and config values via 'dlt.secrets.values' or 'dlt.config.value' is disallowed for resources that are inner functions. Use the dlt.source to get the required configuration and pass them explicitly to your source." + msg = ( + f"Resource {resource_name} in section {section} is defined over an inner function and" + " requests config/secrets in its arguments. Requesting secret and config values via" + " 'dlt.secrets.values' or 'dlt.config.value' is disallowed for resources that are" + " inner functions. Use the dlt.source to get the required configuration and pass them" + " explicitly to your source." + ) super().__init__(resource_name, msg) class InvalidResourceDataTypeIsNone(InvalidResourceDataType): def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, "Resource data missing. Did you forget the return statement in @dlt.resource decorated function?") + super().__init__( + resource_name, + item, + _typ, + "Resource data missing. Did you forget the return statement in @dlt.resource decorated" + " function?", + ) class ResourceFunctionExpected(InvalidResourceDataType): def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, f"Expected function or callable as first parameter to resource {resource_name} but {_typ.__name__} found. Please decorate a function with @dlt.resource") + super().__init__( + resource_name, + item, + _typ, + f"Expected function or callable as first parameter to resource {resource_name} but" + f" {_typ.__name__} found. Please decorate a function with @dlt.resource", + ) class InvalidParentResourceDataType(InvalidResourceDataType): - def __init__(self, resource_name: str, item: Any,_typ: Type[Any]) -> None: - super().__init__(resource_name, item, _typ, f"A parent resource of {resource_name} is of type {_typ.__name__}. Did you forget to use '@dlt.resource` decorator or `resource` function?") + def __init__(self, resource_name: str, item: Any, _typ: Type[Any]) -> None: + super().__init__( + resource_name, + item, + _typ, + f"A parent resource of {resource_name} is of type {_typ.__name__}. Did you forget to" + " use '@dlt.resource` decorator or `resource` function?", + ) class InvalidParentResourceIsAFunction(DltResourceException): def __init__(self, resource_name: str, func_name: str) -> None: self.func_name = func_name - super().__init__(resource_name, f"A data source {func_name} of a transformer {resource_name} is an undecorated function. Please decorate it with '@dlt.resource' or pass to 'resource' function.") + super().__init__( + resource_name, + f"A data source {func_name} of a transformer {resource_name} is an undecorated" + " function. Please decorate it with '@dlt.resource' or pass to 'resource' function.", + ) class DeletingResourcesNotSupported(DltResourceException): @@ -181,10 +280,16 @@ def __init__(self, source_name: str, resource_name: str) -> None: class ParametrizedResourceUnbound(DltResourceException): - def __init__(self, resource_name: str, func_name: str, sig: Signature, kind: str, error: str) -> None: + def __init__( + self, resource_name: str, func_name: str, sig: Signature, kind: str, error: str + ) -> None: self.func_name = func_name self.sig = sig - msg = f"The {kind} {resource_name} is parametrized and expects following arguments: {sig}. Did you forget to bind the {func_name} function? For example from `source.{resource_name}.bind(...)" + msg = ( + f"The {kind} {resource_name} is parametrized and expects following arguments: {sig}." + f" Did you forget to bind the {func_name} function? For example from" + f" `source.{resource_name}.bind(...)" + ) if error: msg += f" .Details: {error}" super().__init__(resource_name, msg) @@ -197,7 +302,9 @@ def __init__(self, resource_name: str, msg: str) -> None: class TableNameMissing(DltSourceException): def __init__(self) -> None: - super().__init__("""Table name is missing in table template. Please provide a string or a function that takes a data item as an argument""") + super().__init__( + """Table name is missing in table template. Please provide a string or a function that takes a data item as an argument""" + ) class InconsistentTableTemplate(DltSourceException): @@ -208,29 +315,43 @@ def __init__(self, reason: str) -> None: class DataItemRequiredForDynamicTableHints(DltResourceException): def __init__(self, resource_name: str) -> None: - super().__init__(resource_name, f"""An instance of resource's data required to generate table schema in resource {resource_name}. - One of table hints for that resource (typically table name) is a function and hint is computed separately for each instance of data extracted from that resource.""") + super().__init__( + resource_name, + f"""An instance of resource's data required to generate table schema in resource {resource_name}. + One of table hints for that resource (typically table name) is a function and hint is computed separately for each instance of data extracted from that resource.""", + ) class SourceDataIsNone(DltSourceException): def __init__(self, source_name: str) -> None: self.source_name = source_name - super().__init__(f"No data returned or yielded from source function {source_name}. Did you forget the return statement?") + super().__init__( + f"No data returned or yielded from source function {source_name}. Did you forget the" + " return statement?" + ) class SourceExhausted(DltSourceException): def __init__(self, source_name: str) -> None: self.source_name = source_name - super().__init__(f"Source {source_name} is exhausted or has active iterator. You can iterate or pass the source to dlt pipeline only once.") + super().__init__( + f"Source {source_name} is exhausted or has active iterator. You can iterate or pass the" + " source to dlt pipeline only once." + ) class ResourcesNotFoundError(DltSourceException): - def __init__(self, source_name: str, available_resources: Set[str], requested_resources: Set[str]) -> None: + def __init__( + self, source_name: str, available_resources: Set[str], requested_resources: Set[str] + ) -> None: self.source_name = source_name self.available_resources = available_resources self.requested_resources = requested_resources self.not_found_resources = requested_resources.difference(available_resources) - msg = f"The following resources could not be found in source {source_name}: {self.not_found_resources}. Available resources are: {available_resources}" + msg = ( + f"The following resources could not be found in source {source_name}:" + f" {self.not_found_resources}. Available resources are: {available_resources}" + ) super().__init__(msg) @@ -239,36 +360,48 @@ def __init__(self, source_name: str, item: Any, _typ: Type[Any]) -> None: self.source_name = source_name self.item = item self.typ = _typ - super().__init__(f"First parameter to the source {source_name} must be a function or callable but is {_typ.__name__}. Please decorate a function with @dlt.source") + super().__init__( + f"First parameter to the source {source_name} must be a function or callable but is" + f" {_typ.__name__}. Please decorate a function with @dlt.source" + ) class SourceIsAClassTypeError(DltSourceException): - def __init__(self, source_name: str, _typ: Type[Any]) -> None: + def __init__(self, source_name: str, _typ: Type[Any]) -> None: self.source_name = source_name self.typ = _typ - super().__init__(f"First parameter to the source {source_name} is a class {_typ.__name__}. Do not decorate classes with @dlt.source. Instead implement __call__ in your class and pass instance of such class to dlt.source() directly") + super().__init__( + f"First parameter to the source {source_name} is a class {_typ.__name__}. Do not" + " decorate classes with @dlt.source. Instead implement __call__ in your class and pass" + " instance of such class to dlt.source() directly" + ) class SourceSchemaNotAvailable(DltSourceException): def __init__(self) -> None: - super().__init__("Current source schema is available only when called from a function decorated with dlt.source or dlt.resource") + super().__init__( + "Current source schema is available only when called from a function decorated with" + " dlt.source or dlt.resource" + ) class ExplicitSourceNameInvalid(DltSourceException): def __init__(self, source_name: str, schema_name: str) -> None: self.source_name = source_name self.schema_name = schema_name - super().__init__(f"Your explicit source name {source_name} is not a valid schema name. Please use a valid schema name ie. '{schema_name}'.") + super().__init__( + f"Your explicit source name {source_name} is not a valid schema name. Please use a" + f" valid schema name ie. '{schema_name}'." + ) class IncrementalUnboundError(DltResourceException): def __init__(self, cursor_path: str) -> None: - super().__init__("", f"The incremental definition with cursor path {cursor_path} is used without being bound to the resource. This most often happens when you create dynamic resource from a generator function that uses incremental. See https://dlthub.com/docs/general-usage/incremental-loading#incremental-loading-with-last-value for an example.") - - -class ValidationError(ValueError, DltException): - def __init__(self, validator: ValidateItem, data_item: TDataItems, original_exception: Exception) ->None: - self.original_exception = original_exception - self.validator = validator - self.data_item = data_item - super().__init__(f"Extracted data item could not be validated with {validator}. Original message: {original_exception}") + super().__init__( + "", + f"The incremental definition with cursor path {cursor_path} is used without being bound" + " to the resource. This most often happens when you create dynamic resource from a" + " generator function that uses incremental. See" + " https://dlthub.com/docs/general-usage/incremental-loading#incremental-loading-with-last-value" + " for an example.", + ) diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py index 3f71943579..9ff3cf872c 100644 --- a/dlt/extract/extract.py +++ b/dlt/extract/extract.py @@ -1,351 +1,372 @@ import contextlib -import os -from typing import ClassVar, List, Set, Dict, Type, Any, Sequence, Optional -from collections import defaultdict +from collections.abc import Sequence as C_Sequence +from datetime import datetime # noqa: 251 +import itertools +from typing import List, Set, Dict, Optional, Set, Any +import yaml from dlt.common.configuration.container import Container from dlt.common.configuration.resolve import inject_section -from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.pipeline import reset_resource_state +from dlt.common.configuration.specs import ConfigSectionContext, known_sections from dlt.common.data_writers import TLoaderFileFormat -from dlt.common.exceptions import MissingDependencyException - +from dlt.common.data_writers.writers import EMPTY_DATA_WRITER_METRICS +from dlt.common.pipeline import ( + ExtractDataInfo, + ExtractInfo, + ExtractMetrics, + SupportsPipeline, + WithStepInfo, + reset_resource_state, +) from dlt.common.runtime import signals from dlt.common.runtime.collector import Collector, NULL_COLLECTOR -from dlt.common.utils import uniq_id -from dlt.common.typing import TDataItems, TDataItem -from dlt.common.schema import Schema, utils, TSchemaUpdate -from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns -from dlt.common.storages import NormalizeStorageConfiguration, NormalizeStorage, DataItemStorage, FileStorage -from dlt.common.configuration.specs import known_sections +from dlt.common.schema import Schema, utils +from dlt.common.schema.typing import ( + TAnySchemaColumns, + TColumnNames, + TSchemaContract, + TWriteDisposition, +) +from dlt.common.storages import NormalizeStorageConfiguration, LoadPackageInfo, SchemaStorage +from dlt.common.storages.load_package import ParsedLoadJobFileName +from dlt.common.utils import get_callable_name, get_full_class_name from dlt.extract.decorators import SourceSchemaInjectableContext from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints from dlt.extract.pipe import PipeIterator -from dlt.extract.source import DltResource, DltSource -from dlt.extract.typing import TableNameMeta -try: - from dlt.common.libs import pyarrow - from dlt.common.libs.pyarrow import pyarrow as pa -except MissingDependencyException: - pyarrow = None -try: - import pandas as pd -except ModuleNotFoundError: - pd = None - - -class ExtractorItemStorage(DataItemStorage): - load_file_type: TLoaderFileFormat - - def __init__(self, storage: FileStorage, extract_folder: str="extract") -> None: - # data item storage with jsonl with pua encoding - super().__init__(self.load_file_type) - self.extract_folder = extract_folder - self.storage = storage - - - def _get_data_item_path_template(self, load_id: str, schema_name: str, table_name: str) -> str: - template = NormalizeStorage.build_extracted_file_stem(schema_name, table_name, "%s") - return self.storage.make_full_path(os.path.join(self._get_extract_path(load_id), template)) - - def _get_extract_path(self, extract_id: str) -> str: - return os.path.join(self.extract_folder, extract_id) - - -class JsonLExtractorStorage(ExtractorItemStorage): - load_file_type: TLoaderFileFormat = "puae-jsonl" - - -class ArrowExtractorStorage(ExtractorItemStorage): - load_file_type: TLoaderFileFormat = "arrow" - - -class ExtractorStorage(NormalizeStorage): - EXTRACT_FOLDER: ClassVar[str] = "extract" - - """Wrapper around multiple extractor storages with different file formats""" - def __init__(self, C: NormalizeStorageConfiguration) -> None: - super().__init__(True, C) - self._item_storages: Dict[TLoaderFileFormat, ExtractorItemStorage] = { - "puae-jsonl": JsonLExtractorStorage(self.storage, extract_folder=self.EXTRACT_FOLDER), - "arrow": ArrowExtractorStorage(self.storage, extract_folder=self.EXTRACT_FOLDER) - } - - def _get_extract_path(self, extract_id: str) -> str: - return os.path.join(self.EXTRACT_FOLDER, extract_id) - - def create_extract_id(self) -> str: - extract_id = uniq_id() - self.storage.create_folder(self._get_extract_path(extract_id)) - return extract_id - - def get_storage(self, loader_file_format: TLoaderFileFormat) -> ExtractorItemStorage: - return self._item_storages[loader_file_format] +from dlt.extract.source import DltSource +from dlt.extract.resource import DltResource +from dlt.extract.storage import ExtractStorage +from dlt.extract.extractors import JsonLExtractor, ArrowExtractor, Extractor + + +def data_to_sources( + data: Any, + pipeline: SupportsPipeline, + schema: Schema = None, + table_name: str = None, + parent_table_name: str = None, + write_disposition: TWriteDisposition = None, + columns: TAnySchemaColumns = None, + primary_key: TColumnNames = None, + schema_contract: TSchemaContract = None, +) -> List[DltSource]: + """Creates a list of sources for data items present in `data` and applies specified hints to all resources. + + `data` may be a DltSource, DltResource, a list of those or any other data type accepted by pipeline.run + """ + + def apply_hint_args(resource: DltResource) -> None: + resource.apply_hints( + table_name, + parent_table_name, + write_disposition, + columns, + primary_key, + schema_contract=schema_contract, + ) - def close_writers(self, extract_id: str) -> None: - for storage in self._item_storages.values(): - storage.close_writers(extract_id) + def apply_settings(source_: DltSource) -> None: + # apply schema contract settings + if schema_contract: + source_.schema_contract = schema_contract + + def choose_schema() -> Schema: + """Except of explicitly passed schema, use a clone that will get discarded if extraction fails""" + if schema: + schema_ = schema + elif pipeline.default_schema_name: + schema_ = pipeline.schemas[pipeline.default_schema_name].clone() + else: + schema_ = pipeline._make_schema_with_default_name() + return schema_ + + effective_schema = choose_schema() + + # a list of sources or a list of resources may be passed as data + sources: List[DltSource] = [] + resources: List[DltResource] = [] + + def append_data(data_item: Any) -> None: + if isinstance(data_item, DltSource): + # if schema is explicit then override source schema + if schema: + data_item.schema = schema + sources.append(data_item) + elif isinstance(data_item, DltResource): + # do not set section to prevent source that represent a standalone resource + # to overwrite other standalone resources (ie. parents) in that source + sources.append(DltSource(effective_schema, "", [data_item])) + else: + # iterator/iterable/generator + # create resource first without table template + resources.append( + DltResource.from_data(data_item, name=table_name, section=pipeline.pipeline_name) + ) - def commit_extract_files(self, extract_id: str, with_delete: bool = True) -> None: - extract_path = self._get_extract_path(extract_id) - for file in self.storage.list_folder_files(extract_path, to_root=False): - from_file = os.path.join(extract_path, file) - to_file = os.path.join(NormalizeStorage.EXTRACTED_FOLDER, file) - if with_delete: - self.storage.atomic_rename(from_file, to_file) - else: - # create hardlink which will act as a copy - self.storage.link_hard(from_file, to_file) - if with_delete: - self.storage.delete_folder(extract_path, recursively=True) + if isinstance(data, C_Sequence) and len(data) > 0: + # if first element is source or resource + if isinstance(data[0], (DltResource, DltSource)): + for item in data: + append_data(item) + else: + append_data(data) + else: + append_data(data) + + # add all the appended resources in one source + if resources: + sources.append(DltSource(effective_schema, pipeline.pipeline_name, resources)) + + # apply hints and settings + for source in sources: + apply_settings(source) + for resource in source.selected_resources.values(): + apply_hint_args(resource) + + return sources + + +def describe_extract_data(data: Any) -> List[ExtractDataInfo]: + """Extract source and resource names from data passed to extract""" + data_info: List[ExtractDataInfo] = [] + + def add_item(item: Any) -> bool: + if isinstance(item, (DltResource, DltSource)): + # record names of sources/resources + data_info.append( + { + "name": item.name, + "data_type": "resource" if isinstance(item, DltResource) else "source", + } + ) + return False + else: + # skip None + if data is not None: + # any other data type does not have a name - just type + data_info.append({"name": "", "data_type": type(item).__name__}) + return True - def write_data_item(self, file_format: TLoaderFileFormat, load_id: str, schema_name: str, table_name: str, item: TDataItems, columns: TTableSchemaColumns) -> None: - self.get_storage(file_format).write_data_item(load_id, schema_name, table_name, item, columns) + item: Any = data + if isinstance(data, C_Sequence) and len(data) > 0: + for item in data: + # add_item returns True if non named item was returned. in that case we break + if add_item(item): + break + return data_info + add_item(item) + return data_info -class Extractor: - file_format: TLoaderFileFormat - dynamic_tables: TSchemaUpdate +class Extract(WithStepInfo[ExtractMetrics, ExtractInfo]): def __init__( - self, - extract_id: str, - storage: ExtractorStorage, - schema: Schema, - resources_with_items: Set[str], - dynamic_tables: TSchemaUpdate, - collector: Collector = NULL_COLLECTOR + self, + schema_storage: SchemaStorage, + normalize_storage_config: NormalizeStorageConfiguration, + collector: Collector = NULL_COLLECTOR, + original_data: Any = None, ) -> None: - self._storage = storage - self.schema = schema - self.dynamic_tables = dynamic_tables + """optionally saves originally extracted `original_data` to generate extract info""" self.collector = collector - self.resources_with_items = resources_with_items - self.extract_id = extract_id - - @property - def storage(self) -> ExtractorItemStorage: - return self._storage.get_storage(self.file_format) - - @staticmethod - def item_format(items: TDataItems) -> Optional[TLoaderFileFormat]: - """Detect the loader file format of the data items based on type. - Currently this is either 'arrow' or 'puae-jsonl' - - Returns: - The loader file format or `None` if if can't be detected. - """ - for item in items if isinstance(items, list) else [items]: - # Assume all items in list are the same type - if (pyarrow and pyarrow.is_arrow_item(item)) or (pd and isinstance(item, pd.DataFrame)): - return "arrow" - return "puae-jsonl" - return None # Empty list is unknown format - - def write_table(self, resource: DltResource, items: TDataItems, meta: Any) -> None: - if isinstance(meta, TableNameMeta): - table_name = meta.table_name - self._write_static_table(resource, table_name, items) - self._write_item(table_name, resource.name, items) - else: - if resource._table_name_hint_fun: - if isinstance(items, list): - for item in items: - self._write_dynamic_table(resource, item) - else: - self._write_dynamic_table(resource, items) - else: - # write item belonging to table with static name - table_name = resource.table_name # type: ignore[assignment] - self._write_static_table(resource, table_name, items) - self._write_item(table_name, resource.name, items) - - def write_empty_file(self, table_name: str) -> None: - table_name = self.schema.naming.normalize_table_identifier(table_name) - self.storage.write_empty_file(self.extract_id, self.schema.name, table_name, None) - - def _write_item(self, table_name: str, resource_name: str, items: TDataItems, columns: TTableSchemaColumns = None) -> None: - # normalize table name before writing so the name match the name in schema - # note: normalize function should be cached so there's almost no penalty on frequent calling - # note: column schema is not required for jsonl writer used here - table_name = self.schema.naming.normalize_identifier(table_name) - self.collector.update(table_name) - self.resources_with_items.add(resource_name) - self.storage.write_data_item(self.extract_id, self.schema.name, table_name, items, columns) - - def _write_dynamic_table(self, resource: DltResource, item: TDataItem) -> None: - table_name = resource._table_name_hint_fun(item) - existing_table = self.dynamic_tables.get(table_name) - if existing_table is None: - self.dynamic_tables[table_name] = [resource.compute_table_schema(item)] - else: - # quick check if deep table merge is required - if resource._table_has_other_dynamic_hints: - new_table = resource.compute_table_schema(item) - # this merges into existing table in place - utils.merge_tables(existing_table[0], new_table) - else: - # if there are no other dynamic hints besides name then we just leave the existing partial table - pass - # write to storage with inferred table name - self._write_item(table_name, resource.name, item) - - def _write_static_table(self, resource: DltResource, table_name: str, items: TDataItems) -> None: - existing_table = self.dynamic_tables.get(table_name) - if existing_table is None: - static_table = resource.compute_table_schema() - static_table["name"] = table_name - self.dynamic_tables[table_name] = [static_table] - - -class JsonLExtractor(Extractor): - file_format = "puae-jsonl" - - -class ArrowExtractor(Extractor): - file_format = "arrow" - - def _rename_columns(self, items: List[TDataItem], new_column_names: List[str]) -> List[TDataItem]: - """Rename arrow columns to normalized schema column names""" - if not items: - return items - if items[0].schema.names == new_column_names: - # No need to rename - return items - if isinstance(items[0], pyarrow.pyarrow.Table): - return [item.rename_columns(new_column_names) for item in items] - elif isinstance(items[0], pyarrow.pyarrow.RecordBatch): - # Convert the batches to table -> rename -> then back to batches - return pa.Table.from_batches(items).rename_columns(new_column_names).to_batches() # type: ignore[no-any-return] - else: - raise TypeError(f"Unsupported data item type {type(items[0])}") - - def write_table(self, resource: DltResource, items: TDataItems, meta: Any) -> None: - items = [ - # 2. Remove null-type columns from the table(s) as they can't be loaded - pyarrow.remove_null_columns(tbl) for tbl in ( - # 1. Convert pandas frame(s) to arrow Table - pyarrow.pyarrow.Table.from_pandas(item) if (pd and isinstance(item, pd.DataFrame)) else item - for item in (items if isinstance(items, list) else [items]) + self.schema_storage = schema_storage + self.extract_storage = ExtractStorage(normalize_storage_config) + self.original_data: Any = original_data + super().__init__() + + def _compute_metrics(self, load_id: str, source: DltSource) -> ExtractMetrics: + # map by job id + job_metrics = { + ParsedLoadJobFileName.parse(m.file_path): m + for m in self.extract_storage.closed_files(load_id) + } + # aggregate by table name + table_metrics = { + table_name: sum(map(lambda pair: pair[1], metrics), EMPTY_DATA_WRITER_METRICS) + for table_name, metrics in itertools.groupby( + job_metrics.items(), lambda pair: pair[0].table_name ) - ] - super().write_table(resource, items, meta) - - def _write_item(self, table_name: str, resource_name: str, items: TDataItems, columns: TTableSchemaColumns = None) -> None: - # Note: `items` is always a list here due to the conversion in `write_table` - new_columns = list(self.dynamic_tables[table_name][0]["columns"].keys()) - super()._write_item(table_name, resource_name, self._rename_columns(items, new_columns), self.dynamic_tables[table_name][0]["columns"]) - - def _write_static_table(self, resource: DltResource, table_name: str, items: TDataItems) -> None: - existing_table = self.dynamic_tables.get(table_name) - if existing_table is not None: - return - static_table = resource.compute_table_schema() - if isinstance(items, list): - item = items[0] - else: - item = items - # Merge the columns to include primary_key and other hints that may be set on the resource - arrow_columns = pyarrow.py_arrow_to_table_schema_columns(item.schema) - for key, value in static_table["columns"].items(): - arrow_columns[key] = utils.merge_columns(value, arrow_columns.get(key, {})) - static_table["columns"] = arrow_columns - static_table["name"] = table_name - self.dynamic_tables[table_name] = [self.schema.normalize_table_identifiers(static_table)] - - -def extract( - extract_id: str, - source: DltSource, - storage: ExtractorStorage, - collector: Collector = NULL_COLLECTOR, - *, - max_parallel_items: int = None, - workers: int = None, - futures_poll_interval: float = None -) -> TSchemaUpdate: - dynamic_tables: TSchemaUpdate = {} - schema = source.schema - resources_with_items: Set[str] = set() - extractors: Dict[TLoaderFileFormat, Extractor] = { - "puae-jsonl": JsonLExtractor( - extract_id, storage, schema, resources_with_items, dynamic_tables, collector=collector - ), - "arrow": ArrowExtractor( - extract_id, storage, schema, resources_with_items, dynamic_tables, collector=collector - ) - } - last_item_format: Optional[TLoaderFileFormat] = None - - with collector(f"Extract {source.name}"): - # yield from all selected pipes - with PipeIterator.from_pipes(source.resources.selected_pipes, max_parallel_items=max_parallel_items, workers=workers, futures_poll_interval=futures_poll_interval) as pipes: - left_gens = total_gens = len(pipes._sources) - collector.update("Resources", 0, total_gens) - for pipe_item in pipes: - - curr_gens = len(pipes._sources) - if left_gens > curr_gens: - delta = left_gens - curr_gens - left_gens -= delta - collector.update("Resources", delta) - - signals.raise_if_signalled() - - resource = source.resources[pipe_item.pipe.name] - # Fallback to last item's format or default (puae-jsonl) if the current item is an empty list - item_format = Extractor.item_format(pipe_item.item) or last_item_format or "puae-jsonl" - extractors[item_format].write_table(resource, pipe_item.item, pipe_item.meta) - last_item_format = item_format - - # find defined resources that did not yield any pipeitems and create empty jobs for them - data_tables = {t["name"]: t for t in schema.data_tables()} - tables_by_resources = utils.group_tables_by_resource(data_tables) - for resource in source.resources.selected.values(): - if resource.write_disposition != "replace" or resource.name in resources_with_items: + } + # aggregate by resource name + resource_metrics = { + resource_name: sum(map(lambda pair: pair[1], metrics), EMPTY_DATA_WRITER_METRICS) + for resource_name, metrics in itertools.groupby( + table_metrics.items(), lambda pair: source.schema.get_table(pair[0])["resource"] + ) + } + # collect resource hints + clean_hints: Dict[str, Dict[str, Any]] = {} + for resource in source.selected_resources.values(): + # cleanup the hints + hints = clean_hints[resource.name] = {} + resource_hints = resource._hints or resource.compute_table_schema() + + for name, hint in resource_hints.items(): + if hint is None or name in ["validator"]: + continue + if name == "incremental": + # represent incremental as dictionary (it derives from BaseConfiguration) + hints[name] = dict(hint) # type: ignore[call-overload] + continue + if name == "original_columns": + # this is original type of the columns ie. Pydantic model + hints[name] = get_full_class_name(hint) continue - if resource.name not in tables_by_resources: + if callable(hint): + hints[name] = get_callable_name(hint) continue - for table in tables_by_resources[resource.name]: - # we only need to write empty files for the top tables - if not table.get("parent", None): - extractors[last_item_format or "puae-jsonl"].write_empty_file(table["name"]) - - if left_gens > 0: - # go to 100% - collector.update("Resources", left_gens) - - # flush all buffered writers - storage.close_writers(extract_id) - - # returns set of partial tables - return dynamic_tables - - -def extract_with_schema( - storage: ExtractorStorage, - source: DltSource, - schema: Schema, - collector: Collector, - max_parallel_items: int, - workers: int -) -> str: - # generate extract_id to be able to commit all the sources together later - extract_id = storage.create_extract_id() - with Container().injectable_context(SourceSchemaInjectableContext(schema)): - # inject the config section with the current source name - with inject_section(ConfigSectionContext(sections=(known_sections.SOURCES, source.section, source.name), source_state_key=source.name)): - # reset resource states, the `extracted` list contains all the explicit resources and all their parents - for resource in source.resources.extracted.values(): - with contextlib.suppress(DataItemRequiredForDynamicTableHints): - if resource.write_disposition == "replace": - reset_resource_state(resource.name) - - extractor = extract(extract_id, source, storage, collector, max_parallel_items=max_parallel_items, workers=workers) - # iterate over all items in the pipeline and update the schema if dynamic table hints were present - for _, partials in extractor.items(): - for partial in partials: - schema.update_table(schema.normalize_table_identifiers(partial)) - - return extract_id + if name == "columns": + if hint: + hints[name] = yaml.dump( + hint, allow_unicode=True, default_flow_style=False, sort_keys=False + ) + continue + hints[name] = hint + + return { + "started_at": None, + "finished_at": None, + "schema_name": source.schema.name, + "job_metrics": {job.job_id(): metrics for job, metrics in job_metrics.items()}, + "table_metrics": table_metrics, + "resource_metrics": resource_metrics, + "dag": source.resources.selected_dag, + "hints": clean_hints, + } + + def _extract_single_source( + self, + load_id: str, + source: DltSource, + *, + max_parallel_items: int = None, + workers: int = None, + futures_poll_interval: float = None, + ) -> None: + schema = source.schema + collector = self.collector + resources_with_items: Set[str] = set() + extractors: Dict[TLoaderFileFormat, Extractor] = { + "puae-jsonl": JsonLExtractor( + load_id, self.extract_storage, schema, resources_with_items, collector=collector + ), + "arrow": ArrowExtractor( + load_id, self.extract_storage, schema, resources_with_items, collector=collector + ), + } + last_item_format: Optional[TLoaderFileFormat] = None + + with collector(f"Extract {source.name}"): + self._step_info_start_load_id(load_id) + # yield from all selected pipes + with PipeIterator.from_pipes( + source.resources.selected_pipes, + max_parallel_items=max_parallel_items, + workers=workers, + futures_poll_interval=futures_poll_interval, + ) as pipes: + left_gens = total_gens = len(pipes._sources) + collector.update("Resources", 0, total_gens) + for pipe_item in pipes: + curr_gens = len(pipes._sources) + if left_gens > curr_gens: + delta = left_gens - curr_gens + left_gens -= delta + collector.update("Resources", delta) + + signals.raise_if_signalled() + + resource = source.resources[pipe_item.pipe.name] + # Fallback to last item's format or default (puae-jsonl) if the current item is an empty list + item_format = ( + Extractor.item_format(pipe_item.item) or last_item_format or "puae-jsonl" + ) + extractors[item_format].write_items(resource, pipe_item.item, pipe_item.meta) + last_item_format = item_format + + # find defined resources that did not yield any pipeitems and create empty jobs for them + # NOTE: do not include incomplete tables. those tables have never seen data so we do not need to reset them + data_tables = {t["name"]: t for t in schema.data_tables(include_incomplete=False)} + tables_by_resources = utils.group_tables_by_resource(data_tables) + for resource in source.resources.selected.values(): + if ( + resource.write_disposition != "replace" + or resource.name in resources_with_items + ): + continue + if resource.name not in tables_by_resources: + continue + for table in tables_by_resources[resource.name]: + # we only need to write empty files for the top tables + if not table.get("parent", None): + extractors["puae-jsonl"].write_empty_items_file(table["name"]) + + if left_gens > 0: + # go to 100% + collector.update("Resources", left_gens) + + # flush all buffered writers + self.extract_storage.close_writers(load_id) + # gather metrics + self._step_info_complete_load_id(load_id, self._compute_metrics(load_id, source)) + # remove the metrics of files processed in this extract run + # NOTE: there may be more than one extract run per load id: ie. the resource and then dlt state + self.extract_storage.remove_closed_files(load_id) + + def extract( + self, + source: DltSource, + max_parallel_items: int, + workers: int, + ) -> str: + # generate load package to be able to commit all the sources together later + load_id = self.extract_storage.create_load_package(source.discover_schema()) + with Container().injectable_context(SourceSchemaInjectableContext(source.schema)): + # inject the config section with the current source name + with inject_section( + ConfigSectionContext( + sections=(known_sections.SOURCES, source.section, source.name), + source_state_key=source.name, + ) + ): + # reset resource states, the `extracted` list contains all the explicit resources and all their parents + for resource in source.resources.extracted.values(): + with contextlib.suppress(DataItemRequiredForDynamicTableHints): + if resource.write_disposition == "replace": + reset_resource_state(resource.name) + + self._extract_single_source( + load_id, + source, + max_parallel_items=max_parallel_items, + workers=workers, + ) + return load_id + + def commit_packages(self) -> None: + """Commits all extracted packages to normalize storage""" + # commit load packages + for load_id, metrics in self._load_id_metrics.items(): + self.extract_storage.commit_new_load_package( + load_id, self.schema_storage[metrics[0]["schema_name"]] + ) + # all load ids got processed, cleanup empty folder + self.extract_storage.delete_empty_extract_folder() + + def get_step_info(self, pipeline: SupportsPipeline) -> ExtractInfo: + load_ids = list(self._load_id_metrics.keys()) + load_packages: List[LoadPackageInfo] = [] + metrics: Dict[str, List[ExtractMetrics]] = {} + for load_id in self._load_id_metrics.keys(): + load_package = self.extract_storage.get_load_package_info(load_id) + load_packages.append(load_package) + metrics[load_id] = self._step_info_metrics(load_id) + return ExtractInfo( + pipeline, + metrics, + describe_extract_data(self.original_data), + load_ids, + load_packages, + pipeline.first_run, + ) diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py new file mode 100644 index 0000000000..bc32893677 --- /dev/null +++ b/dlt/extract/extractors.py @@ -0,0 +1,293 @@ +from copy import copy +from typing import Set, Dict, Any, Optional, Set + +from dlt.common.configuration.inject import with_config +from dlt.common.configuration.specs import BaseConfiguration, configspec +from dlt.common.destination.capabilities import DestinationCapabilitiesContext +from dlt.common.data_writers import TLoaderFileFormat +from dlt.common.exceptions import MissingDependencyException + +from dlt.common.runtime.collector import Collector, NULL_COLLECTOR +from dlt.common.utils import update_dict_nested +from dlt.common.typing import TDataItems, TDataItem +from dlt.common.schema import Schema, utils +from dlt.common.schema.typing import ( + TSchemaContractDict, + TSchemaEvolutionMode, + TTableSchema, + TTableSchemaColumns, + TPartialTableSchema, +) + +from dlt.extract.resource import DltResource +from dlt.extract.typing import TableNameMeta +from dlt.extract.storage import ExtractStorage, ExtractorItemStorage + +try: + from dlt.common.libs import pyarrow + from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem +except MissingDependencyException: + pyarrow = None + +try: + import pandas as pd +except ModuleNotFoundError: + pd = None + + +class Extractor: + file_format: TLoaderFileFormat + + @configspec + class ExtractorConfiguration(BaseConfiguration): + _caps: Optional[DestinationCapabilitiesContext] = None + + @with_config(spec=ExtractorConfiguration) + def __init__( + self, + load_id: str, + storage: ExtractStorage, + schema: Schema, + resources_with_items: Set[str], + collector: Collector = NULL_COLLECTOR, + *, + _caps: DestinationCapabilitiesContext = None, + ) -> None: + self.schema = schema + self.naming = schema.naming + self.collector = collector + self.resources_with_items = resources_with_items + self.load_id = load_id + self._table_contracts: Dict[str, TSchemaContractDict] = {} + self._filtered_tables: Set[str] = set() + self._filtered_columns: Dict[str, Dict[str, TSchemaEvolutionMode]] = {} + self._storage = storage + self._caps = _caps or DestinationCapabilitiesContext.generic_capabilities() + + @property + def storage(self) -> ExtractorItemStorage: + return self._storage.get_storage(self.file_format) + + @staticmethod + def item_format(items: TDataItems) -> Optional[TLoaderFileFormat]: + """Detect the loader file format of the data items based on type. + Currently this is either 'arrow' or 'puae-jsonl' + + Returns: + The loader file format or `None` if if can't be detected. + """ + for item in items if isinstance(items, list) else [items]: + # Assume all items in list are the same type + if (pyarrow and pyarrow.is_arrow_item(item)) or (pd and isinstance(item, pd.DataFrame)): + return "arrow" + return "puae-jsonl" + return None # Empty list is unknown format + + def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> None: + """Write `items` to `resource` optionally computing table schemas and revalidating/filtering data""" + if table_name := self._get_static_table_name(resource, meta): + # write item belonging to table with static name + self._write_to_static_table(resource, table_name, items) + else: + # table has name or other hints depending on data items + self._write_to_dynamic_table(resource, items) + + def write_empty_items_file(self, table_name: str) -> None: + table_name = self.naming.normalize_table_identifier(table_name) + self.storage.write_empty_items_file(self.load_id, self.schema.name, table_name, None) + + def _get_static_table_name(self, resource: DltResource, meta: Any) -> Optional[str]: + if resource._table_name_hint_fun: + return None + if isinstance(meta, TableNameMeta): + table_name = meta.table_name + else: + table_name = resource.table_name # type: ignore[assignment] + return self.naming.normalize_table_identifier(table_name) + + def _get_dynamic_table_name(self, resource: DltResource, item: TDataItem) -> str: + return self.naming.normalize_table_identifier(resource._table_name_hint_fun(item)) + + def _write_item( + self, + table_name: str, + resource_name: str, + items: TDataItems, + columns: TTableSchemaColumns = None, + ) -> None: + new_rows_count = self.storage.write_data_item( + self.load_id, self.schema.name, table_name, items, columns + ) + self.collector.update(table_name, inc=new_rows_count) + if new_rows_count > 0: + self.resources_with_items.add(resource_name) + + def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems) -> None: + if not isinstance(items, list): + items = [items] + + for item in items: + table_name = self._get_dynamic_table_name(resource, item) + if table_name in self._filtered_tables: + continue + if table_name not in self._table_contracts or resource._table_has_other_dynamic_hints: + item = self._compute_and_update_table(resource, table_name, item) + # write to storage with inferred table name + if table_name not in self._filtered_tables: + self._write_item(table_name, resource.name, item) + + def _write_to_static_table( + self, resource: DltResource, table_name: str, items: TDataItems + ) -> None: + if table_name not in self._table_contracts: + items = self._compute_and_update_table(resource, table_name, items) + if table_name not in self._filtered_tables: + self._write_item(table_name, resource.name, items) + + def _compute_table(self, resource: DltResource, items: TDataItems) -> TTableSchema: + """Computes a schema for a new or dynamic table and normalizes identifiers""" + return self.schema.normalize_table_identifiers(resource.compute_table_schema(items)) + + def _compute_and_update_table( + self, resource: DltResource, table_name: str, items: TDataItems + ) -> TDataItems: + """ + Computes new table and does contract checks, if false is returned, the table may not be created and not items should be written + """ + computed_table = self._compute_table(resource, items) + # overwrite table name (if coming from meta) + computed_table["name"] = table_name + # get or compute contract + schema_contract = self._table_contracts.setdefault( + table_name, self.schema.resolve_contract_settings_for_table(table_name, computed_table) + ) + + # this is a new table so allow evolve once + if schema_contract["columns"] != "evolve" and self.schema.is_new_table(table_name): + computed_table["x-normalizer"] = {"evolve-columns-once": True} # type: ignore[typeddict-unknown-key] + existing_table = self.schema._schema_tables.get(table_name, None) + if existing_table: + diff_table = utils.diff_tables(existing_table, computed_table) + else: + diff_table = computed_table + + # apply contracts + diff_table, filters = self.schema.apply_schema_contract( + schema_contract, diff_table, data_item=items + ) + + # merge with schema table + if diff_table: + self.schema.update_table(diff_table) + + # process filters + if filters: + for entity, name, mode in filters: + if entity == "tables": + self._filtered_tables.add(name) + elif entity == "columns": + filtered_columns = self._filtered_columns.setdefault(table_name, {}) + filtered_columns[name] = mode + return items + + +class JsonLExtractor(Extractor): + file_format = "puae-jsonl" + + +class ArrowExtractor(Extractor): + file_format = "arrow" + + def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> None: + static_table_name = self._get_static_table_name(resource, meta) + items = [ + # 3. remove columns and rows in data contract filters + # 2. Remove null-type columns from the table(s) as they can't be loaded + self._apply_contract_filters( + pyarrow.remove_null_columns(tbl), resource, static_table_name + ) + for tbl in ( + ( + # 1. Convert pandas frame(s) to arrow Table + pa.Table.from_pandas(item) + if (pd and isinstance(item, pd.DataFrame)) + else item + ) + for item in (items if isinstance(items, list) else [items]) + ) + ] + super().write_items(resource, items, meta) + + def _apply_contract_filters( + self, item: "TAnyArrowItem", resource: DltResource, static_table_name: Optional[str] + ) -> "TAnyArrowItem": + """Removes the columns (discard value) or rows (discard rows) as indicated by contract filters.""" + # convert arrow schema names into normalized names + rename_mapping = pyarrow.get_normalized_arrow_fields_mapping(item, self.naming) + # find matching columns and delete by original name + table_name = static_table_name or self._get_dynamic_table_name(resource, item) + filtered_columns = self._filtered_columns.get(table_name) + if filtered_columns: + # remove rows where columns have non null values + # create a mask where rows will be False if any of the specified columns are non-null + mask = None + rev_mapping = {v: k for k, v in rename_mapping.items()} + for column in [ + name for name, mode in filtered_columns.items() if mode == "discard_row" + ]: + is_null = pyarrow.pyarrow.compute.is_null(item[rev_mapping[column]]) + mask = is_null if mask is None else pyarrow.pyarrow.compute.and_(mask, is_null) + # filter the table using the mask + if mask is not None: + item = item.filter(mask) + + # remove value actually removes the whole columns from the table + # NOTE: filtered columns has normalized column names so we need to go through mapping + removed_columns = [ + name + for name in rename_mapping + if filtered_columns.get(rename_mapping[name]) is not None + ] + if removed_columns: + item = pyarrow.remove_columns(item, removed_columns) + + return item + + def _write_item( + self, + table_name: str, + resource_name: str, + items: TDataItems, + columns: TTableSchemaColumns = None, + ) -> None: + columns = columns or self.schema.tables[table_name]["columns"] + # Note: `items` is always a list here due to the conversion in `write_table` + items = [ + pyarrow.normalize_py_arrow_schema(item, columns, self.naming, self._caps) + for item in items + ] + super()._write_item(table_name, resource_name, items, columns) + + def _compute_table(self, resource: DltResource, items: TDataItems) -> TPartialTableSchema: + items = items[0] + computed_table = super()._compute_table(resource, items) + + # Merge the columns to include primary_key and other hints that may be set on the resource + arrow_table = copy(computed_table) + arrow_table["columns"] = pyarrow.py_arrow_to_table_schema_columns(items.schema) + # normalize arrow table before merging + arrow_table = self.schema.normalize_table_identifiers(arrow_table) + # we must override the columns to preserve the order in arrow table + arrow_table["columns"] = update_dict_nested( + arrow_table["columns"], computed_table["columns"] + ) + + return arrow_table + + def _compute_and_update_table( + self, resource: DltResource, table_name: str, items: TDataItems + ) -> TDataItems: + items = super()._compute_and_update_table(resource, table_name, items) + # filter data item as filters could be updated in compute table + items = [self._apply_contract_filters(item, resource, table_name) for item in items] + return items diff --git a/dlt/extract/schema.py b/dlt/extract/hints.py similarity index 53% rename from dlt/extract/schema.py rename to dlt/extract/hints.py index c1dfd1f7f5..437dbbc6bd 100644 --- a/dlt/extract/schema.py +++ b/dlt/extract/hints.py @@ -1,21 +1,35 @@ from copy import copy, deepcopy -from collections.abc import Mapping as C_Mapping from typing import List, TypedDict, cast, Any from dlt.common.schema.utils import DEFAULT_WRITE_DISPOSITION, merge_columns, new_column, new_table -from dlt.common.schema.typing import TColumnNames, TColumnProp, TColumnSchema, TPartialTableSchema, TTableSchemaColumns, TWriteDisposition, TAnySchemaColumns, TTableFormat +from dlt.common.schema.typing import ( + TColumnNames, + TColumnProp, + TColumnSchema, + TPartialTableSchema, + TTableSchema, + TTableSchemaColumns, + TWriteDisposition, + TAnySchemaColumns, + TTableFormat, + TSchemaContract, +) from dlt.common.typing import TDataItem from dlt.common.utils import update_dict_nested from dlt.common.validation import validate_dict_ignoring_xkeys from dlt.extract.incremental import Incremental from dlt.extract.typing import TFunHintTemplate, TTableHintTemplate, ValidateItem -from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, InconsistentTableTemplate, TableNameMissing +from dlt.extract.exceptions import ( + DataItemRequiredForDynamicTableHints, + InconsistentTableTemplate, + TableNameMissing, +) from dlt.extract.utils import ensure_table_schema_columns, ensure_table_schema_columns_hint -from dlt.extract.validation import get_column_validator +from dlt.extract.validation import create_item_validator -class TTableSchemaTemplate(TypedDict, total=False): +class TResourceHints(TypedDict, total=False): name: TTableHintTemplate[str] # description: TTableHintTemplate[str] write_disposition: TTableHintTemplate[TWriteDisposition] @@ -25,17 +39,19 @@ class TTableSchemaTemplate(TypedDict, total=False): primary_key: TTableHintTemplate[TColumnNames] merge_key: TTableHintTemplate[TColumnNames] incremental: Incremental[Any] + schema_contract: TTableHintTemplate[TSchemaContract] validator: ValidateItem + original_columns: TTableHintTemplate[TAnySchemaColumns] -class DltResourceSchema: - def __init__(self, table_schema_template: TTableSchemaTemplate = None): +class DltResourceHints: + def __init__(self, table_schema_template: TResourceHints = None): self.__qualname__ = self.__name__ = self.name self._table_name_hint_fun: TFunHintTemplate[str] = None self._table_has_other_dynamic_hints: bool = False - self._table_schema_template: TTableSchemaTemplate = None + self._hints: TResourceHints = None if table_schema_template: - self.set_template(table_schema_template) + self.set_hints(table_schema_template) @property def name(self) -> str: @@ -47,7 +63,7 @@ def table_name(self) -> TTableHintTemplate[str]: if self._table_name_hint_fun: return self._table_name_hint_fun # get table name or default name - return self._table_schema_template.get("name") or self.name if self._table_schema_template else self.name + return self._hints.get("name") or self.name if self._hints else self.name @table_name.setter def table_name(self, value: TTableHintTemplate[str]) -> None: @@ -55,9 +71,9 @@ def table_name(self, value: TTableHintTemplate[str]) -> None: @property def write_disposition(self) -> TTableHintTemplate[TWriteDisposition]: - if self._table_schema_template is None or self._table_schema_template.get("write_disposition") is None: + if self._hints is None or self._hints.get("write_disposition") is None: return DEFAULT_WRITE_DISPOSITION - return self._table_schema_template.get("write_disposition") + return self._hints.get("write_disposition") @write_disposition.setter def write_disposition(self, value: TTableHintTemplate[TWriteDisposition]) -> None: @@ -66,32 +82,34 @@ def write_disposition(self, value: TTableHintTemplate[TWriteDisposition]) -> Non @property def columns(self) -> TTableHintTemplate[TTableSchemaColumns]: """Gets columns schema that can be modified in place""" - if self._table_schema_template is None: + if self._hints is None: return None - return self._table_schema_template.get("columns") + return self._hints.get("columns") - def compute_table_schema(self, item: TDataItem = None) -> TPartialTableSchema: + @property + def schema_contract(self) -> TTableHintTemplate[TSchemaContract]: + return self._hints.get("schema_contract") + + def compute_table_schema(self, item: TDataItem = None) -> TTableSchema: """Computes the table schema based on hints and column definitions passed during resource creation. `item` parameter is used to resolve table hints based on data""" - if not self._table_schema_template: + if not self._hints: return new_table(self.name, resource=self.name) # resolve a copy of a held template - table_template = copy(self._table_schema_template) + table_template = copy(self._hints) if "name" not in table_template: table_template["name"] = self.name - table_template["columns"] = copy(self._table_schema_template["columns"]) + table_template["columns"] = copy(self._hints["columns"]) # if table template present and has dynamic hints, the data item must be provided if self._table_name_hint_fun and item is None: raise DataItemRequiredForDynamicTableHints(self.name) # resolve - resolved_template: TTableSchemaTemplate = {k: self._resolve_hint(item, v) for k, v in table_template.items()} # type: ignore - resolved_template.pop("incremental", None) - resolved_template.pop("validator", None) + resolved_template: TResourceHints = {k: self._resolve_hint(item, v) for k, v in table_template.items() if k not in ["incremental", "validator", "original_columns"]} # type: ignore table_schema = self._merge_keys(resolved_template) table_schema["resource"] = self.name validate_dict_ignoring_xkeys( - spec=TPartialTableSchema, + spec=TTableSchema, doc=table_schema, path=f"new_table/{self.name}", ) @@ -105,27 +123,36 @@ def apply_hints( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, - incremental: Incremental[Any] = None + incremental: Incremental[Any] = None, + schema_contract: TTableHintTemplate[TSchemaContract] = None, ) -> None: """Creates or modifies existing table schema by setting provided hints. Accepts both static and dynamic hints based on data. - This method accepts the same table hints arguments as `dlt.resource` decorator with the following additions. - Skip the argument or pass None to leave the existing hint. - Pass empty value (for particular type ie "" for a string) to remove hint + This method accepts the same table hints arguments as `dlt.resource` decorator with the following additions. + Skip the argument or pass None to leave the existing hint. + Pass empty value (for particular type ie "" for a string) to remove hint - parent_table_name (str, optional): A name of parent table if foreign relation is defined. Please note that if you use merge you must define `root_key` columns explicitly - incremental (Incremental, optional): Enables the incremental loading for a resource. + parent_table_name (str, optional): A name of parent table if foreign relation is defined. Please note that if you use merge you must define `root_key` columns explicitly + incremental (Incremental, optional): Enables the incremental loading for a resource. - Please note that for efficient incremental loading, the resource must be aware of the Incremental by accepting it as one if its arguments and then using is to skip already loaded data. - In non-aware resources, `dlt` will filter out the loaded values, however the resource will yield all the values again. + Please note that for efficient incremental loading, the resource must be aware of the Incremental by accepting it as one if its arguments and then using is to skip already loaded data. + In non-aware resources, `dlt` will filter out the loaded values, however the resource will yield all the values again. """ t = None - if not self._table_schema_template: + if not self._hints: # if there's no template yet, create and set new one - t = self.new_table_template(table_name, parent_table_name, write_disposition, columns, primary_key, merge_key) + t = self.new_table_template( + table_name, + parent_table_name, + write_disposition, + columns, + primary_key, + merge_key, + schema_contract, + ) else: # set single hints - t = deepcopy(self._table_schema_template) + t = self._clone_hints(self._hints) if table_name is not None: if table_name: t["name"] = table_name @@ -139,7 +166,8 @@ def apply_hints( if write_disposition: t["write_disposition"] = write_disposition if columns is not None: - t['validator'] = get_column_validator(columns) + # keep original columns: ie in case it is a Pydantic model + t["original_columns"] = columns # if callable then override existing if callable(columns) or callable(t["columns"]): t["columns"] = ensure_table_schema_columns_hint(columns) @@ -151,7 +179,6 @@ def apply_hints( else: # set to empty columns t["columns"] = ensure_table_schema_columns(columns) - if primary_key is not None: if primary_key: t["primary_key"] = primary_key @@ -162,30 +189,56 @@ def apply_hints( t["merge_key"] = merge_key else: t.pop("merge_key", None) + if schema_contract is not None: + if schema_contract: + t["schema_contract"] = schema_contract + else: + t.pop("schema_contract", None) + # recreate validator if columns definition or contract changed + if schema_contract is not None or columns is not None: + t["validator"], schema_contract = create_item_validator( + t.get("original_columns"), t.get("schema_contract") + ) + if schema_contract is not None: + t["schema_contract"] = schema_contract # set properties that cannot be passed to new_table_template - t["incremental"] = incremental - self.set_template(t) + if incremental is not None: + if incremental is Incremental.EMPTY: + t["incremental"] = None + else: + t["incremental"] = incremental + self.set_hints(t) - def set_template(self, table_schema_template: TTableSchemaTemplate) -> None: - DltResourceSchema.validate_dynamic_hints(table_schema_template) + def set_hints(self, hints_template: TResourceHints) -> None: + DltResourceHints.validate_dynamic_hints(hints_template) # if "name" is callable in the template then the table schema requires actual data item to be inferred - name_hint = table_schema_template.get("name") + name_hint = hints_template.get("name") if callable(name_hint): self._table_name_hint_fun = name_hint else: self._table_name_hint_fun = None # check if any other hints in the table template should be inferred from data - self._table_has_other_dynamic_hints = any(callable(v) for k, v in table_schema_template.items() if k != "name") - self._table_schema_template = table_schema_template + self._table_has_other_dynamic_hints = any( + callable(v) for k, v in hints_template.items() if k != "name" + ) + self._hints = hints_template + + @staticmethod + def _clone_hints(hints_template: TResourceHints) -> TResourceHints: + t_ = copy(hints_template) + t_["columns"] = deepcopy(hints_template["columns"]) + if "schema_contract" in hints_template: + t_["schema_contract"] = deepcopy(hints_template["schema_contract"]) + return t_ @staticmethod def _resolve_hint(item: TDataItem, hint: TTableHintTemplate[Any]) -> Any: - """Calls each dynamic hint passing a data item""" - if callable(hint): - return hint(item) - else: - return hint + """Calls each dynamic hint passing a data item""" + if callable(hint): + return hint(item) + else: + return hint @staticmethod def _merge_key(hint: TColumnProp, keys: TColumnNames, partial: TPartialTableSchema) -> None: @@ -199,15 +252,15 @@ def _merge_key(hint: TColumnProp, keys: TColumnNames, partial: TPartialTableSche partial["columns"][key][hint] = True @staticmethod - def _merge_keys(t_: TTableSchemaTemplate) -> TPartialTableSchema: + def _merge_keys(t_: TResourceHints) -> TPartialTableSchema: """Merges resolved keys into columns""" partial = cast(TPartialTableSchema, t_) # assert not callable(t_["merge_key"]) # assert not callable(t_["primary_key"]) if "primary_key" in t_: - DltResourceSchema._merge_key("primary_key", t_.pop("primary_key"), partial) # type: ignore + DltResourceHints._merge_key("primary_key", t_.pop("primary_key"), partial) # type: ignore if "merge_key" in t_: - DltResourceSchema._merge_key("merge_key", t_.pop("merge_key"), partial) # type: ignore + DltResourceHints._merge_key("merge_key", t_.pop("merge_key"), partial) # type: ignore return partial @@ -219,21 +272,29 @@ def new_table_template( columns: TTableHintTemplate[TAnySchemaColumns] = None, primary_key: TTableHintTemplate[TColumnNames] = None, merge_key: TTableHintTemplate[TColumnNames] = None, - table_format: TTableHintTemplate[TTableFormat] = None - ) -> TTableSchemaTemplate: + schema_contract: TTableHintTemplate[TSchemaContract] = None, + table_format: TTableHintTemplate[TTableFormat] = None, + ) -> TResourceHints: + validator, schema_contract = create_item_validator(columns, schema_contract) + clean_columns = columns if columns is not None: - validator = get_column_validator(columns) - columns = ensure_table_schema_columns_hint(columns) - if not callable(columns): - columns = columns.values() # type: ignore - else: - validator = None + clean_columns = ensure_table_schema_columns_hint(columns) + if not callable(clean_columns): + clean_columns = clean_columns.values() # type: ignore # create a table schema template where hints can be functions taking TDataItem - new_template: TTableSchemaTemplate = new_table( - table_name, parent_table_name, write_disposition=write_disposition, columns=columns, table_format=table_format # type: ignore + new_template: TResourceHints = new_table( + table_name, # type: ignore + parent_table_name, # type: ignore + write_disposition=write_disposition, # type: ignore + columns=clean_columns, # type: ignore + schema_contract=schema_contract, # type: ignore + table_format=table_format, # type: ignore ) if not table_name: new_template.pop("name") + # remember original columns + if columns is not None: + new_template["original_columns"] = columns # always remove resource new_template.pop("resource", None) # type: ignore if primary_key: @@ -242,12 +303,18 @@ def new_table_template( new_template["merge_key"] = merge_key if validator: new_template["validator"] = validator - DltResourceSchema.validate_dynamic_hints(new_template) + DltResourceHints.validate_dynamic_hints(new_template) return new_template @staticmethod - def validate_dynamic_hints(template: TTableSchemaTemplate) -> None: + def validate_dynamic_hints(template: TResourceHints) -> None: table_name = template.get("name") # if any of the hints is a function then name must be as well - if any(callable(v) for k, v in template.items() if k not in ["name", "incremental", "validator"]) and not callable(table_name): - raise InconsistentTableTemplate(f"Table name {table_name} must be a function if any other table hint is a function") + if any( + callable(v) + for k, v in template.items() + if k not in ["name", "incremental", "validator", "original_columns"] + ) and not callable(table_name): + raise InconsistentTableTemplate( + f"Table name {table_name} must be a function if any other table hint is a function" + ) diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py index 6d042aa15d..39c9866fe3 100644 --- a/dlt/extract/incremental/__init__.py +++ b/dlt/extract/incremental/__init__.py @@ -1,5 +1,6 @@ import os -from typing import Generic, Any, Optional, get_args, get_origin, Type, Dict +from typing import Generic, ClassVar, Any, Optional, Type, Dict +from typing_extensions import get_origin, get_args import inspect from functools import wraps @@ -12,19 +13,38 @@ from dlt.common.exceptions import MissingDependencyException from dlt.common import pendulum, logger from dlt.common.jsonpath import compile_path -from dlt.common.typing import TDataItem, TDataItems, TFun, extract_inner_type, get_generic_type_argument_from_instance, is_optional_type +from dlt.common.typing import ( + TDataItem, + TDataItems, + TFun, + extract_inner_type, + get_generic_type_argument_from_instance, + is_optional_type, +) from dlt.common.schema.typing import TColumnNames from dlt.common.configuration import configspec, ConfigurationValueError from dlt.common.configuration.specs import BaseConfiguration from dlt.common.pipeline import resource_state -from dlt.common.data_types.type_helpers import coerce_from_date_types, coerce_value, py_type_to_sc_type +from dlt.common.data_types.type_helpers import ( + coerce_from_date_types, + coerce_value, + py_type_to_sc_type, +) from dlt.extract.exceptions import IncrementalUnboundError -from dlt.extract.incremental.exceptions import IncrementalCursorPathMissing, IncrementalPrimaryKeyMissing +from dlt.extract.incremental.exceptions import ( + IncrementalCursorPathMissing, + IncrementalPrimaryKeyMissing, +) from dlt.extract.incremental.typing import IncrementalColumnState, TCursorValue, LastValueFunc from dlt.extract.pipe import Pipe from dlt.extract.typing import SupportsPipe, TTableHintTemplate, ItemTransform -from dlt.extract.incremental.transform import JsonIncremental, ArrowIncremental, IncrementalTransform +from dlt.extract.incremental.transform import ( + JsonIncremental, + ArrowIncremental, + IncrementalTransform, +) + try: from dlt.common.libs.pyarrow import is_arrow_item except MissingDependencyException: @@ -69,19 +89,24 @@ class Incremental(ItemTransform[TDataItem], BaseConfiguration, Generic[TCursorVa The values passed explicitly to Incremental will be ignored. Note that if logical "end date" is present then also "end_value" will be set which means that resource state is not used and exactly this range of date will be loaded """ + + # this is config/dataclass so declare members cursor_path: str = None # TODO: Support typevar here initial_value: Optional[Any] = None end_value: Optional[Any] = None + # incremental acting as empty + EMPTY: ClassVar["Incremental[Any]"] = None + def __init__( - self, - cursor_path: str = dlt.config.value, - initial_value: Optional[TCursorValue]=None, - last_value_func: Optional[LastValueFunc[TCursorValue]]=max, - primary_key: Optional[TTableHintTemplate[TColumnNames]] = None, - end_value: Optional[TCursorValue] = None, - allow_external_schedulers: bool = False + self, + cursor_path: str = dlt.config.value, + initial_value: Optional[TCursorValue] = None, + last_value_func: Optional[LastValueFunc[TCursorValue]] = max, + primary_key: Optional[TTableHintTemplate[TColumnNames]] = None, + end_value: Optional[TCursorValue] = None, + allow_external_schedulers: bool = False, ) -> None: # make sure that path is valid if cursor_path: @@ -118,12 +143,13 @@ def _make_transforms(self) -> None: self.end_value, self._cached_state, self.last_value_func, - self.primary_key + self.primary_key, ) - @classmethod - def from_existing_state(cls, resource_name: str, cursor_path: str) -> "Incremental[TCursorValue]": + def from_existing_state( + cls, resource_name: str, cursor_path: str + ) -> "Incremental[TCursorValue]": """Create Incremental instance from existing state.""" state = Incremental._get_state(resource_name, cursor_path) i = cls(cursor_path, state["initial_value"]) @@ -139,7 +165,7 @@ def copy(self) -> "Incremental[TCursorValue]": last_value_func=self.last_value_func, primary_key=self.primary_key, end_value=self.end_value, - allow_external_schedulers=self.allow_external_schedulers + allow_external_schedulers=self.allow_external_schedulers, ) def merge(self, other: "Incremental[TCursorValue]") -> "Incremental[TCursorValue]": @@ -154,15 +180,17 @@ def merge(self, other: "Incremental[TCursorValue]") -> "Incremental[TCursorValue """ kwargs = dict(self, last_value_func=self.last_value_func, primary_key=self.primary_key) for key, value in dict( - other, - last_value_func=other.last_value_func, primary_key=other.primary_key).items(): + other, last_value_func=other.last_value_func, primary_key=other.primary_key + ).items(): if value is not None: kwargs[key] = value # preserve Generic param information if hasattr(self, "__orig_class__"): constructor = self.__orig_class__ else: - constructor = other.__orig_class__ if hasattr(other, "__orig_class__") else other.__class__ + constructor = ( + other.__orig_class__ if hasattr(other, "__orig_class__") else other.__class__ + ) constructor = extract_inner_type(constructor) return constructor(**kwargs) # type: ignore @@ -170,17 +198,28 @@ def on_resolved(self) -> None: compile_path(self.cursor_path) if self.end_value is not None and self.initial_value is None: raise ConfigurationValueError( - "Incremental 'end_value' was specified without 'initial_value'. 'initial_value' is required when using 'end_value'." + "Incremental 'end_value' was specified without 'initial_value'. 'initial_value' is" + " required when using 'end_value'." ) # Ensure end value is "higher" than initial value - if self.end_value is not None and self.last_value_func([self.end_value, self.initial_value]) != self.end_value: + if ( + self.end_value is not None + and self.last_value_func([self.end_value, self.initial_value]) != self.end_value + ): if self.last_value_func in (min, max): - adject = 'higher' if self.last_value_func is max else 'lower' - msg = f"Incremental 'initial_value' ({self.initial_value}) is {adject} than 'end_value` ({self.end_value}). 'end_value' must be {adject} than 'initial_value'" + adject = "higher" if self.last_value_func is max else "lower" + msg = ( + f"Incremental 'initial_value' ({self.initial_value}) is {adject} than" + f" 'end_value` ({self.end_value}). 'end_value' must be {adject} than" + " 'initial_value'" + ) else: msg = ( - f"Incremental 'initial_value' ({self.initial_value}) is greater than 'end_value' ({self.end_value}) as determined by the custom 'last_value_func'. " - f"The result of '{self.last_value_func.__name__}([end_value, initial_value])' must equal 'end_value'" + f"Incremental 'initial_value' ({self.initial_value}) is greater than" + f" 'end_value' ({self.end_value}) as determined by the custom" + " 'last_value_func'. The result of" + f" '{self.last_value_func.__name__}([end_value, initial_value])' must equal" + " 'end_value'" ) raise ConfigurationValueError(msg) @@ -202,9 +241,9 @@ def get_state(self) -> IncrementalColumnState: if self.end_value is not None: # End value uses mock state. We don't want to write it. return { - 'initial_value': self.initial_value, - 'last_value': self.initial_value, - 'unique_hashes': [] + "initial_value": self.initial_value, + "last_value": self.initial_value, + "unique_hashes": [], } if not self.resource_name: @@ -217,23 +256,27 @@ def get_state(self) -> IncrementalColumnState: { "initial_value": self.initial_value, "last_value": self.initial_value, - 'unique_hashes': [] + "unique_hashes": [], } ) return self._cached_state @staticmethod def _get_state(resource_name: str, cursor_path: str) -> IncrementalColumnState: - state: IncrementalColumnState = resource_state(resource_name).setdefault('incremental', {}).setdefault(cursor_path, {}) + state: IncrementalColumnState = ( + resource_state(resource_name).setdefault("incremental", {}).setdefault(cursor_path, {}) + ) # if state params is empty return state @property def last_value(self) -> Optional[TCursorValue]: s = self.get_state() - return s['last_value'] # type: ignore + return s["last_value"] # type: ignore - def _transform_item(self, transformer: IncrementalTransform, row: TDataItem) -> Optional[TDataItem]: + def _transform_item( + self, transformer: IncrementalTransform, row: TDataItem + ) -> Optional[TDataItem]: row, start_out_of_range, end_out_of_range = transformer(row) self.start_out_of_range = start_out_of_range self.end_out_of_range = end_out_of_range @@ -245,8 +288,8 @@ def get_incremental_value_type(self) -> Type[Any]: def _join_external_scheduler(self) -> None: """Detects existence of external scheduler from which `start_value` and `end_value` are taken. Detects Airflow and environment variables. - The logical "start date" coming from external scheduler will set the `initial_value` in incremental. if additionally logical "end date" is - present then also "end_value" will be set which means that resource state is not used and exactly this range of date will be loaded + The logical "start date" coming from external scheduler will set the `initial_value` in incremental. if additionally logical "end date" is + present then also "end_value" will be set which means that resource state is not used and exactly this range of date will be loaded """ # fit the pendulum into incremental type param_type = self.get_incremental_value_type() @@ -255,14 +298,22 @@ def _join_external_scheduler(self) -> None: if param_type is not Any: data_type = py_type_to_sc_type(param_type) except Exception as ex: - logger.warning(f"Specified Incremental last value type {param_type} is not supported. Please use DateTime, Date, float, int or str to join external schedulers.({ex})") + logger.warning( + f"Specified Incremental last value type {param_type} is not supported. Please use" + f" DateTime, Date, float, int or str to join external schedulers.({ex})" + ) if param_type is Any: - logger.warning("Could not find the last value type of Incremental class participating in external schedule. " - "Please add typing when declaring incremental argument in your resource or pass initial_value from which the type can be inferred.") + logger.warning( + "Could not find the last value type of Incremental class participating in external" + " schedule. Please add typing when declaring incremental argument in your resource" + " or pass initial_value from which the type can be inferred." + ) return - def _ensure_airflow_end_date(start_date: pendulum.DateTime, end_date: pendulum.DateTime) -> Optional[pendulum.DateTime]: + def _ensure_airflow_end_date( + start_date: pendulum.DateTime, end_date: pendulum.DateTime + ) -> Optional[pendulum.DateTime]: """if end_date is in the future or same as start date (manual run), set it to None so dlt state is used for incremental loading""" now = pendulum.now() if end_date is None or end_date > now or start_date == end_date: @@ -272,6 +323,7 @@ def _ensure_airflow_end_date(start_date: pendulum.DateTime, end_date: pendulum.D try: # we can move it to separate module when we have more of those from airflow.operators.python import get_current_context # noqa + context = get_current_context() start_date = context["data_interval_start"] end_date = _ensure_airflow_end_date(start_date, context["data_interval_end"]) @@ -280,10 +332,17 @@ def _ensure_airflow_end_date(start_date: pendulum.DateTime, end_date: pendulum.D self.end_value = coerce_from_date_types(data_type, end_date) else: self.end_value = None - logger.info(f"Found Airflow scheduler: initial value: {self.initial_value} from data_interval_start {context['data_interval_start']}, end value: {self.end_value} from data_interval_end {context['data_interval_end']}") + logger.info( + f"Found Airflow scheduler: initial value: {self.initial_value} from" + f" data_interval_start {context['data_interval_start']}, end value:" + f" {self.end_value} from data_interval_end {context['data_interval_end']}" + ) return except TypeError as te: - logger.warning(f"Could not coerce Airflow execution dates into the last value type {param_type}. ({te})") + logger.warning( + f"Could not coerce Airflow execution dates into the last value type {param_type}." + f" ({te})" + ) except Exception: pass @@ -306,24 +365,30 @@ def bind(self, pipe: SupportsPipe) -> "Incremental[TCursorValue]": self._join_external_scheduler() # set initial value from last value, in case of a new state those are equal self.start_value = self.last_value - logger.info(f"Bind incremental on {self.resource_name} with initial_value: {self.initial_value}, start_value: {self.start_value}, end_value: {self.end_value}") + logger.info( + f"Bind incremental on {self.resource_name} with initial_value: {self.initial_value}," + f" start_value: {self.start_value}, end_value: {self.end_value}" + ) # cache state self._cached_state = self.get_state() self._make_transforms() return self def __str__(self) -> str: - return f"Incremental at {id(self)} for resource {self.resource_name} with cursor path: {self.cursor_path} initial {self.initial_value} lv_func {self.last_value_func}" + return ( + f"Incremental at {id(self)} for resource {self.resource_name} with cursor path:" + f" {self.cursor_path} initial {self.initial_value} lv_func {self.last_value_func}" + ) def _get_transformer(self, items: TDataItems) -> IncrementalTransform: # Assume list is all of the same type for item in items if isinstance(items, list) else [items]: if is_arrow_item(item): - return self._transformers['arrow'] + return self._transformers["arrow"] elif pd is not None and isinstance(item, pd.DataFrame): - return self._transformers['arrow'] - return self._transformers['json'] - return self._transformers['json'] + return self._transformers["arrow"] + return self._transformers["json"] + return self._transformers["json"] def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]: if rows is None: @@ -333,10 +398,17 @@ def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]: transformer.primary_key = self.primary_key if isinstance(rows, list): - return [item for item in (self._transform_item(transformer, row) for row in rows) if item is not None] + return [ + item + for item in (self._transform_item(transformer, row) for row in rows) + if item is not None + ] return self._transform_item(transformer, rows) +Incremental.EMPTY = Incremental[Any]("") + + class IncrementalResourceWrapper(ItemTransform[TDataItem]): _incremental: Optional[Incremental[Any]] = None """Keeps the injectable incremental""" @@ -366,14 +438,15 @@ def get_incremental_arg(sig: inspect.Signature) -> Optional[inspect.Parameter]: for p in sig.parameters.values(): annotation = extract_inner_type(p.annotation) annotation = get_origin(annotation) or annotation - if (inspect.isclass(annotation) and issubclass(annotation, Incremental)) or isinstance(p.default, Incremental): + if (inspect.isclass(annotation) and issubclass(annotation, Incremental)) or isinstance( + p.default, Incremental + ): incremental_param = p break return incremental_param def wrap(self, sig: inspect.Signature, func: TFun) -> TFun: - """Wrap the callable to inject an `Incremental` object configured for the resource. - """ + """Wrap the callable to inject an `Incremental` object configured for the resource.""" incremental_param = self.get_incremental_arg(sig) assert incremental_param, "Please use `should_wrap` to decide if to call this function" @@ -386,16 +459,16 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: if p.name in bound_args.arguments: explicit_value = bound_args.arguments[p.name] - if isinstance(explicit_value, Incremental): + if explicit_value is Incremental.EMPTY or p.default is Incremental.EMPTY: + # drop incremental + pass + elif isinstance(explicit_value, Incremental): # Explicit Incremental instance is merged with default # allowing e.g. to only update initial_value/end_value but keeping default cursor_path if isinstance(p.default, Incremental): new_incremental = p.default.merge(explicit_value) else: new_incremental = explicit_value.copy() - elif explicit_value is None: - # new_incremental not set! - pass elif isinstance(p.default, Incremental): # Passing only initial value explicitly updates the default instance new_incremental = p.default.copy() @@ -407,9 +480,16 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: if is_optional_type(p.annotation): bound_args.arguments[p.name] = None # Remove partial spec return func(*bound_args.args, **bound_args.kwargs) - raise ValueError(f"{p.name} Incremental argument has no default. Please wrap its typing in Optional[] to allow no incremental") + raise ValueError( + f"{p.name} Incremental argument has no default. Please wrap its typing in" + " Optional[] to allow no incremental" + ) # pass Generic information from annotation to new_incremental - if not hasattr(new_incremental, "__orig_class__") and p.annotation and get_args(p.annotation): + if ( + not hasattr(new_incremental, "__orig_class__") + and p.annotation + and get_args(p.annotation) + ): new_incremental.__orig_class__ = p.annotation # type: ignore # set the incremental only if not yet set or if it was passed explicitly diff --git a/dlt/extract/incremental/exceptions.py b/dlt/extract/incremental/exceptions.py index 8de5623c78..e318a028dc 100644 --- a/dlt/extract/incremental/exceptions.py +++ b/dlt/extract/incremental/exceptions.py @@ -3,10 +3,13 @@ class IncrementalCursorPathMissing(PipeException): - def __init__(self, pipe_name: str, json_path: str, item: TDataItem, msg: str=None) -> None: + def __init__(self, pipe_name: str, json_path: str, item: TDataItem, msg: str = None) -> None: self.json_path = json_path self.item = item - msg = msg or f"Cursor element with JSON path {json_path} was not found in extracted data item. All data items must contain this path. Use the same names of fields as in your JSON document - if those are different from the names you see in database." + msg = ( + msg + or f"Cursor element with JSON path {json_path} was not found in extracted data item. All data items must contain this path. Use the same names of fields as in your JSON document - if those are different from the names you see in database." + ) super().__init__(pipe_name, msg) @@ -14,5 +17,9 @@ class IncrementalPrimaryKeyMissing(PipeException): def __init__(self, pipe_name: str, primary_key_column: str, item: TDataItem) -> None: self.primary_key_column = primary_key_column self.item = item - msg = f"Primary key column {primary_key_column} was not found in extracted data item. All data items must contain this column. Use the same names of fields as in your JSON document." + msg = ( + f"Primary key column {primary_key_column} was not found in extracted data item. All" + " data items must contain this column. Use the same names of fields as in your JSON" + " document." + ) super().__init__(pipe_name, msg) diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py index af45736da4..adf0c33ad3 100644 --- a/dlt/extract/incremental/transform.py +++ b/dlt/extract/incremental/transform.py @@ -17,15 +17,21 @@ from dlt.common import pendulum from dlt.common.typing import TDataItem, TDataItems from dlt.common.jsonpath import TJsonPath, find_values, JSONPathFields, compile_path -from dlt.extract.incremental.exceptions import IncrementalCursorPathMissing, IncrementalPrimaryKeyMissing +from dlt.extract.incremental.exceptions import ( + IncrementalCursorPathMissing, + IncrementalPrimaryKeyMissing, +) from dlt.extract.incremental.typing import IncrementalColumnState, TCursorValue, LastValueFunc from dlt.extract.utils import resolve_column_value from dlt.extract.typing import TTableHintTemplate from dlt.common.schema.typing import TColumnNames + try: + from dlt.common.libs import pyarrow from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem except MissingDependencyException: pa = None + pyarrow = None class IncrementalTransform: @@ -50,24 +56,26 @@ def __init__( # compile jsonpath self._compiled_cursor_path = compile_path(cursor_path) # for simple column name we'll fallback to search in dict - if isinstance(self._compiled_cursor_path, JSONPathFields) and len(self._compiled_cursor_path.fields) == 1 and self._compiled_cursor_path.fields[0] != "*": + if ( + isinstance(self._compiled_cursor_path, JSONPathFields) + and len(self._compiled_cursor_path.fields) == 1 + and self._compiled_cursor_path.fields[0] != "*" + ): self.cursor_path = self._compiled_cursor_path.fields[0] self._compiled_cursor_path = None def __call__( self, row: TDataItem, - ) -> Tuple[bool, bool, bool]: - ... + ) -> Tuple[bool, bool, bool]: ... class JsonIncremental(IncrementalTransform): - def unique_value( self, row: TDataItem, primary_key: Optional[TTableHintTemplate[TColumnNames]], - resource_name: str + resource_name: str, ) -> str: try: if primary_key: @@ -82,7 +90,7 @@ def unique_value( def find_cursor_value(self, row: TDataItem) -> Any: """Finds value in row at cursor defined by self.cursor_path. - Will use compiled JSONPath if present, otherwise it reverts to column search if row is dict + Will use compiled JSONPath if present, otherwise it reverts to column search if row is dict """ row_value: Any = None if self._compiled_cursor_path: @@ -117,33 +125,36 @@ def __call__( if isinstance(row_value, datetime): row_value = pendulum.instance(row_value) - last_value = self.incremental_state['last_value'] + last_value = self.incremental_state["last_value"] # Check whether end_value has been reached # Filter end value ranges exclusively, so in case of "max" function we remove values >= end_value if self.end_value is not None and ( - self.last_value_func((row_value, self.end_value)) != self.end_value or self.last_value_func((row_value, )) == self.end_value + self.last_value_func((row_value, self.end_value)) != self.end_value + or self.last_value_func((row_value,)) == self.end_value ): end_out_of_range = True return None, start_out_of_range, end_out_of_range - check_values = (row_value,) + ((last_value, ) if last_value is not None else ()) + check_values = (row_value,) + ((last_value,) if last_value is not None else ()) new_value = self.last_value_func(check_values) if last_value == new_value: - processed_row_value = self.last_value_func((row_value, )) + processed_row_value = self.last_value_func((row_value,)) # we store row id for all records with the current "last_value" in state and use it to deduplicate if processed_row_value == last_value: unique_value = self.unique_value(row, self.primary_key, self.resource_name) # if unique value exists then use it to deduplicate if unique_value: - if unique_value in self.incremental_state['unique_hashes']: + if unique_value in self.incremental_state["unique_hashes"]: return None, start_out_of_range, end_out_of_range # add new hash only if the record row id is same as current last value - self.incremental_state['unique_hashes'].append(unique_value) + self.incremental_state["unique_hashes"].append(unique_value) return row, start_out_of_range, end_out_of_range # skip the record that is not a last_value or new_value: that record was already processed - check_values = (row_value,) + ((self.start_value,) if self.start_value is not None else ()) + check_values = (row_value,) + ( + (self.start_value,) if self.start_value is not None else () + ) new_value = self.last_value_func(check_values) # Include rows == start_value but exclude "lower" if new_value == self.start_value and processed_row_value != self.start_value: @@ -164,10 +175,7 @@ class ArrowIncremental(IncrementalTransform): _dlt_index = "_dlt_index" def unique_values( - self, - item: "TAnyArrowItem", - unique_columns: List[str], - resource_name: str + self, item: "TAnyArrowItem", unique_columns: List[str], resource_name: str ) -> List[Tuple[int, str]]: if not unique_columns: return [] @@ -178,28 +186,13 @@ def unique_values( (index, digest128(json.dumps(row, sort_keys=True))) for index, row in zip(indices, rows) ] - def _deduplicate(self, tbl: "pa.Table", unique_columns: Optional[List[str]], aggregate: str, cursor_path: str) -> "pa.Table": + def _deduplicate( + self, tbl: "pa.Table", unique_columns: Optional[List[str]], aggregate: str, cursor_path: str + ) -> "pa.Table": """Creates unique index if necessary.""" # create unique index if necessary if self._dlt_index not in tbl.schema.names: - tbl = tbl.append_column(self._dlt_index, pa.array(np.arange(tbl.num_rows))) - # code below deduplicates groups that include the cursor column in the group id. that was just artifact of - # json incremental and there's no need to duplicate it here - - # if unique_columns is None: - # return tbl - # group_cols = unique_columns + [cursor_path] - # try: - # tbl = tbl.filter( - # pa.compute.is_in( - # tbl[self._dlt_index], - # tbl.group_by(group_cols).aggregate( - # [(self._dlt_index, "one"), (cursor_path, aggregate)] - # )[f'{self._dlt_index}_one'] - # ) - # ) - # except KeyError as e: - # raise IncrementalPrimaryKeyMissing(self.resource_name, unique_columns[0], tbl) from e + tbl = pyarrow.append_column(tbl, self._dlt_index, pa.array(np.arange(tbl.num_rows))) return tbl def __call__( @@ -225,7 +218,7 @@ def __call__( if isinstance(primary_key, str): self._dlt_index = primary_key elif primary_key is None: - unique_columns = tbl.column_names + unique_columns = tbl.schema.names else: # deduplicating is disabled unique_columns = None @@ -233,7 +226,7 @@ def __call__( if not tbl: # row is None or empty arrow table return tbl, start_out_of_range, end_out_of_range - last_value = self.incremental_state['last_value'] + last_value = self.incremental_state["last_value"] if self.last_value_func is max: compute = pa.compute.max @@ -248,8 +241,9 @@ def __call__( last_value_compare = pa.compute.less_equal new_value_compare = pa.compute.less else: - raise NotImplementedError("Only min or max last_value_func is supported for arrow tables") - + raise NotImplementedError( + "Only min or max last_value_func is supported for arrow tables" + ) # TODO: Json path support. For now assume the cursor_path is a column name cursor_path = self.cursor_path @@ -264,8 +258,12 @@ def __call__( row_value = pendulum.from_timestamp(orig_row_value.cast(pa.int64()).as_py() / 1000) except KeyError as e: raise IncrementalCursorPathMissing( - self.resource_name, cursor_path, tbl, - f"Column name {cursor_path} was not found in the arrow table. Not nested JSON paths are not supported for arrow tables and dataframes, the incremental cursor_path must be a column name." + self.resource_name, + cursor_path, + tbl, + f"Column name {cursor_path} was not found in the arrow table. Not nested JSON paths" + " are not supported for arrow tables and dataframes, the incremental cursor_path" + " must be a column name.", ) from e # If end_value is provided, filter to include table rows that are "less" than end_value @@ -288,31 +286,52 @@ def __call__( eq_rows = tbl.filter(pa.compute.equal(tbl[cursor_path], last_value)) # compute index, unique hash mapping unique_values = self.unique_values(eq_rows, unique_columns, self.resource_name) - unique_values = [(i, uq_val) for i, uq_val in unique_values if uq_val in self.incremental_state['unique_hashes']] + unique_values = [ + (i, uq_val) + for i, uq_val in unique_values + if uq_val in self.incremental_state["unique_hashes"] + ] remove_idx = pa.array(i for i, _ in unique_values) # Filter the table tbl = tbl.filter(pa.compute.invert(pa.compute.is_in(tbl[self._dlt_index], remove_idx))) - if new_value_compare(row_value, last_value).as_py() and row_value != last_value: # Last value has changed - self.incremental_state['last_value'] = row_value + if ( + new_value_compare(row_value, last_value).as_py() and row_value != last_value + ): # Last value has changed + self.incremental_state["last_value"] = row_value # Compute unique hashes for all rows equal to row value - self.incremental_state['unique_hashes'] = [uq_val for _, uq_val in self.unique_values( - tbl.filter(pa.compute.equal(tbl[cursor_path], row_value)), unique_columns, self.resource_name - )] + self.incremental_state["unique_hashes"] = [ + uq_val + for _, uq_val in self.unique_values( + tbl.filter(pa.compute.equal(tbl[cursor_path], row_value)), + unique_columns, + self.resource_name, + ) + ] else: # last value is unchanged, add the hashes - self.incremental_state['unique_hashes'] = list(set(self.incremental_state['unique_hashes'] + [uq_val for _, uq_val in unique_values])) + self.incremental_state["unique_hashes"] = list( + set( + self.incremental_state["unique_hashes"] + + [uq_val for _, uq_val in unique_values] + ) + ) else: tbl = self._deduplicate(tbl, unique_columns, aggregate, cursor_path) - self.incremental_state['last_value'] = row_value - self.incremental_state['unique_hashes'] = [uq_val for _, uq_val in self.unique_values( - tbl.filter(pa.compute.equal(tbl[cursor_path], row_value)), unique_columns, self.resource_name - )] + self.incremental_state["last_value"] = row_value + self.incremental_state["unique_hashes"] = [ + uq_val + for _, uq_val in self.unique_values( + tbl.filter(pa.compute.equal(tbl[cursor_path], row_value)), + unique_columns, + self.resource_name, + ) + ] if len(tbl) == 0: return None, start_out_of_range, end_out_of_range try: - tbl = tbl.drop(["_dlt_index"]) + tbl = pyarrow.remove_columns(tbl, ["_dlt_index"]) except KeyError: pass if is_pandas: diff --git a/dlt/extract/incremental/typing.py b/dlt/extract/incremental/typing.py index 03f36121be..9cec97d34d 100644 --- a/dlt/extract/incremental/typing.py +++ b/dlt/extract/incremental/typing.py @@ -4,6 +4,7 @@ TCursorValue = TypeVar("TCursorValue", bound=Any) LastValueFunc = Callable[[Sequence[TCursorValue]], Any] + class IncrementalColumnState(TypedDict): initial_value: Optional[Any] last_value: Optional[Any] diff --git a/dlt/extract/pipe.py b/dlt/extract/pipe.py index 24fe3203aa..6f02f882bc 100644 --- a/dlt/extract/pipe.py +++ b/dlt/extract/pipe.py @@ -6,7 +6,24 @@ from concurrent.futures import ThreadPoolExecutor from copy import copy from threading import Thread -from typing import Any, Dict, Optional, Sequence, Union, Callable, Iterable, Iterator, List, NamedTuple, Awaitable, Tuple, Type, TYPE_CHECKING, Literal +from typing import ( + Any, + AsyncIterator, + Dict, + Optional, + Sequence, + Union, + Callable, + Iterable, + Iterator, + List, + NamedTuple, + Awaitable, + Tuple, + Type, + TYPE_CHECKING, + Literal, +) from dlt.common import sleep from dlt.common.configuration import configspec @@ -18,11 +35,27 @@ from dlt.common.typing import AnyFun, AnyType, TDataItems from dlt.common.utils import get_callable_name -from dlt.extract.exceptions import (CreatePipeException, DltSourceException, ExtractorException, InvalidStepFunctionArguments, - InvalidResourceDataTypeFunctionNotAGenerator, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, - PipeException, PipeGenInvalid, PipeItemProcessingError, PipeNotBoundToData, ResourceExtractionError) +from dlt.extract.exceptions import ( + CreatePipeException, + DltSourceException, + ExtractorException, + InvalidStepFunctionArguments, + InvalidResourceDataTypeFunctionNotAGenerator, + InvalidTransformerGeneratorFunction, + ParametrizedResourceUnbound, + PipeException, + PipeGenInvalid, + PipeItemProcessingError, + PipeNotBoundToData, + ResourceExtractionError, +) from dlt.extract.typing import DataItemWithMeta, ItemTransform, SupportsPipe, TPipedDataItems -from dlt.extract.utils import check_compat_transformer, simulate_func_call, wrap_compat_transformer, wrap_resource_gen +from dlt.extract.utils import ( + check_compat_transformer, + simulate_func_call, + wrap_compat_transformer, + wrap_resource_gen, +) if TYPE_CHECKING: TItemFuture = Future[Union[TDataItems, DataItemWithMeta]] @@ -61,6 +94,7 @@ class SourcePipeItem(NamedTuple): # pipeline step may be iterator of data items or mapping function that returns data item or another iterator from dlt.common.typing import TDataItem + TPipeStep = Union[ Iterable[TPipedDataItems], Iterator[TPipedDataItems], @@ -115,7 +149,12 @@ def __init__(self, name: str, steps: List[TPipeStep] = None, parent: "Pipe" = No self.append_step(step) @classmethod - def from_data(cls, name: str, gen: Union[Iterable[TPipedDataItems], Iterator[TPipedDataItems], AnyFun], parent: "Pipe" = None) -> "Pipe": + def from_data( + cls, + name: str, + gen: Union[Iterable[TPipedDataItems], Iterator[TPipedDataItems], AnyFun], + parent: "Pipe" = None, + ) -> "Pipe": return cls(name, [gen], parent=parent) @property @@ -150,7 +189,7 @@ def steps(self) -> List[TPipeStep]: def find(self, *step_type: AnyType) -> int: """Finds a step with object of type `step_type`""" - return next((i for i,v in enumerate(self._steps) if isinstance(v, step_type)), -1) + return next((i for i, v in enumerate(self._steps) if isinstance(v, step_type)), -1) def __getitem__(self, i: int) -> TPipeStep: return self._steps[i] @@ -188,7 +227,11 @@ def insert_step(self, step: TPipeStep, index: int) -> "Pipe": return self.append_step(step) if index == 0: if not self.has_parent: - raise CreatePipeException(self.name, "You cannot insert a step before head of the resource that is not a transformer") + raise CreatePipeException( + self.name, + "You cannot insert a step before head of the resource that is not a" + " transformer", + ) step = self._wrap_transform_step_meta(index, step) # actually insert in the list self._steps.insert(index, step) @@ -200,7 +243,10 @@ def insert_step(self, step: TPipeStep, index: int) -> "Pipe": def remove_step(self, index: int) -> None: """Removes steps at a given index. Gen step cannot be removed""" if index == self._gen_idx: - raise CreatePipeException(self.name, f"Step at index {index} holds a data generator for this pipe and cannot be removed") + raise CreatePipeException( + self.name, + f"Step at index {index} holds a data generator for this pipe and cannot be removed", + ) self._steps.pop(index) if index < self._gen_idx: self._gen_idx -= 1 @@ -241,7 +287,13 @@ def ensure_gen_bound(self) -> None: sig.bind() except TypeError as ex: callable_name = get_callable_name(head) - raise ParametrizedResourceUnbound(self.name, callable_name, sig.replace(parameters=list(sig.parameters.values())[1:]), "resource", str(ex)) + raise ParametrizedResourceUnbound( + self.name, + callable_name, + sig.replace(parameters=list(sig.parameters.values())[1:]), + "resource", + str(ex), + ) def evaluate_gen(self) -> None: """Lazily evaluate gen of the pipe when creating PipeIterator. Allows creating multiple use pipes from generator functions and lists""" @@ -255,7 +307,13 @@ def evaluate_gen(self) -> None: # must be parameter-less callable or parameters must have defaults self.replace_gen(gen()) # type: ignore except TypeError as ex: - raise ParametrizedResourceUnbound(self.name, get_callable_name(gen), inspect.signature(gen), "resource", str(ex)) + raise ParametrizedResourceUnbound( + self.name, + get_callable_name(gen), + inspect.signature(gen), + "resource", + str(ex), + ) # otherwise it must be an iterator if isinstance(gen, Iterable): self.replace_gen(iter(gen)) @@ -309,18 +367,28 @@ def _wrap_gen(self, *args: Any, **kwargs: Any) -> Any: def _verify_head_step(self, step: TPipeStep) -> None: # first element must be Iterable, Iterator or Callable in resource pipe if not isinstance(step, (Iterable, Iterator)) and not callable(step): - raise CreatePipeException(self.name, "A head of a resource pipe must be Iterable, Iterator or a Callable") + raise CreatePipeException( + self.name, "A head of a resource pipe must be Iterable, Iterator or a Callable" + ) def _wrap_transform_step_meta(self, step_no: int, step: TPipeStep) -> TPipeStep: # step must be a callable: a transformer or a transformation if isinstance(step, (Iterable, Iterator)) and not callable(step): if self.has_parent: - raise CreatePipeException(self.name, "Iterable or Iterator cannot be a step in transformer pipe") + raise CreatePipeException( + self.name, "Iterable or Iterator cannot be a step in transformer pipe" + ) else: - raise CreatePipeException(self.name, "Iterable or Iterator can only be a first step in resource pipe") + raise CreatePipeException( + self.name, "Iterable or Iterator can only be a first step in resource pipe" + ) if not callable(step): - raise CreatePipeException(self.name, "Pipe step must be a callable taking one data item as argument and optional second meta argument") + raise CreatePipeException( + self.name, + "Pipe step must be a callable taking one data item as argument and optional second" + " meta argument", + ) else: # check the signature sig = inspect.signature(step) @@ -344,8 +412,13 @@ def _partial(*args: Any, **kwargs: Any) -> Any: # del kwargs["meta"] return orig_step(*args, **kwargs) - meta_arg = inspect.Parameter("meta", inspect._ParameterKind.KEYWORD_ONLY, default=None) - kwargs_arg = next((p for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD), None) + meta_arg = inspect.Parameter( + "meta", inspect._ParameterKind.KEYWORD_ONLY, default=None + ) + kwargs_arg = next( + (p for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD), + None, + ) if kwargs_arg: # pass meta in variadic new_sig = sig @@ -358,7 +431,6 @@ def _partial(*args: Any, **kwargs: Any) -> Any: self._ensure_transform_step(step_no, step) return step - def _ensure_transform_step(self, step_no: int, step: TPipeStep) -> None: """Verifies that `step` is a valid callable to be a transform step of the pipeline""" assert callable(step), f"{step} must be callable" @@ -375,7 +447,13 @@ def _ensure_transform_step(self, step_no: int, step: TPipeStep) -> None: raise InvalidTransformerGeneratorFunction(self.name, callable_name, sig, code=1) else: # show the sig without first argument - raise ParametrizedResourceUnbound(self.name, callable_name, sig.replace(parameters=list(sig.parameters.values())[1:]), "transformer", str(ty_ex)) + raise ParametrizedResourceUnbound( + self.name, + callable_name, + sig.replace(parameters=list(sig.parameters.values())[1:]), + "transformer", + str(ty_ex), + ) else: raise InvalidStepFunctionArguments(self.name, callable_name, sig, str(ty_ex)) @@ -405,7 +483,6 @@ def __repr__(self) -> str: class PipeIterator(Iterator[PipeItem]): - @configspec class PipeIteratorConfiguration(BaseConfiguration): max_parallel_items: int = 20 @@ -416,7 +493,13 @@ class PipeIteratorConfiguration(BaseConfiguration): __section__ = "extract" - def __init__(self, max_parallel_items: int, workers: int, futures_poll_interval: float, next_item_mode: TPipeNextItemMode) -> None: + def __init__( + self, + max_parallel_items: int, + workers: int, + futures_poll_interval: float, + next_item_mode: TPipeNextItemMode, + ) -> None: self.max_parallel_items = max_parallel_items self.workers = workers self.futures_poll_interval = futures_poll_interval @@ -432,7 +515,15 @@ def __init__(self, max_parallel_items: int, workers: int, futures_poll_interval: @classmethod @with_config(spec=PipeIteratorConfiguration) - def from_pipe(cls, pipe: Pipe, *, max_parallel_items: int = 20, workers: int = 5, futures_poll_interval: float = 0.01, next_item_mode: TPipeNextItemMode = "fifo") -> "PipeIterator": + def from_pipe( + cls, + pipe: Pipe, + *, + max_parallel_items: int = 20, + workers: int = 5, + futures_poll_interval: float = 0.01, + next_item_mode: TPipeNextItemMode = "fifo", + ) -> "PipeIterator": # join all dependent pipes if pipe.parent: pipe = pipe.full_pipe() @@ -460,15 +551,13 @@ def from_pipes( workers: int = 5, futures_poll_interval: float = 0.01, copy_on_fork: bool = False, - next_item_mode: TPipeNextItemMode = "fifo" + next_item_mode: TPipeNextItemMode = "fifo", ) -> "PipeIterator": - # print(f"max_parallel_items: {max_parallel_items} workers: {workers}") extract = cls(max_parallel_items, workers, futures_poll_interval, next_item_mode) # clone all pipes before iterating (recursively) as we will fork them (this add steps) and evaluate gens pipes, _ = PipeIterator.clone_pipes(pipes) - def _fork_pipeline(pipe: Pipe) -> None: if pipe.parent: # fork the parent pipe @@ -524,7 +613,9 @@ def __next__(self) -> PipeItem: # if item is iterator, then add it as a new source if isinstance(item, Iterator): # print(f"adding iterable {item}") - self._sources.append(SourcePipeItem(item, pipe_item.step, pipe_item.pipe, pipe_item.meta)) + self._sources.append( + SourcePipeItem(item, pipe_item.step, pipe_item.pipe, pipe_item.meta) + ) pipe_item = None continue @@ -550,9 +641,13 @@ def __next__(self) -> PipeItem: # if we are at the end of the pipe then yield element if pipe_item.step == len(pipe_item.pipe) - 1: # must be resolved - if isinstance(item, (Iterator, Awaitable)) or callable(item): + if isinstance(item, (Iterator, Awaitable, AsyncIterator)) or callable(item): raise PipeItemProcessingError( - pipe_item.pipe.name, f"Pipe item at step {pipe_item.step} was not fully evaluated and is of type {type(pipe_item.item).__name__}. This is internal error or you are yielding something weird from resources ie. functions or awaitables.") + pipe_item.pipe.name, + f"Pipe item at step {pipe_item.step} was not fully evaluated and is of type" + f" {type(pipe_item.item).__name__}. This is internal error or you are" + " yielding something weird from resources ie. functions or awaitables.", + ) # mypy not able to figure out that item was resolved return pipe_item # type: ignore @@ -567,14 +662,23 @@ def __next__(self) -> PipeItem: next_item = next_item.data except TypeError as ty_ex: assert callable(step) - raise InvalidStepFunctionArguments(pipe_item.pipe.name, get_callable_name(step), inspect.signature(step), str(ty_ex)) + raise InvalidStepFunctionArguments( + pipe_item.pipe.name, + get_callable_name(step), + inspect.signature(step), + str(ty_ex), + ) except (PipelineException, ExtractorException, DltSourceException, PipeException): raise except Exception as ex: - raise ResourceExtractionError(pipe_item.pipe.name, step, str(ex), "transform") from ex + raise ResourceExtractionError( + pipe_item.pipe.name, step, str(ex), "transform" + ) from ex # create next pipe item if a value was returned. A None means that item was consumed/filtered out and should not be further processed if next_item is not None: - pipe_item = ResolvablePipeItem(next_item, pipe_item.step + 1, pipe_item.pipe, next_meta) + pipe_item = ResolvablePipeItem( + next_item, pipe_item.step + 1, pipe_item.pipe, next_meta + ) else: pipe_item = None @@ -622,7 +726,7 @@ def start_background_loop(loop: asyncio.AbstractEventLoop) -> None: target=start_background_loop, args=(self._async_pool,), daemon=True, - name="DltFuturesThread" + name=Container.thread_pool_prefix() + "futures", ) self._async_pool_thread.start() @@ -634,13 +738,17 @@ def _ensure_thread_pool(self) -> ThreadPoolExecutor: if self._thread_pool: return self._thread_pool - self._thread_pool = ThreadPoolExecutor(self.workers) + self._thread_pool = ThreadPoolExecutor( + self.workers, thread_name_prefix=Container.thread_pool_prefix() + "threads" + ) return self._thread_pool def __enter__(self) -> "PipeIterator": return self - def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: types.TracebackType) -> None: + def __exit__( + self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: types.TracebackType + ) -> None: self.close() def _next_future(self) -> int: @@ -665,7 +773,9 @@ def _resolve_futures(self) -> ResolvablePipeItem: if future.exception(): ex = future.exception() - if isinstance(ex, (PipelineException, ExtractorException, DltSourceException, PipeException)): + if isinstance( + ex, (PipelineException, ExtractorException, DltSourceException, PipeException) + ): raise ex raise ResourceExtractionError(pipe.name, future, str(ex), "future") from ex @@ -754,7 +864,9 @@ def _get_source_item_round_robin(self) -> ResolvablePipeItem: raise ResourceExtractionError(pipe.name, gen, str(ex), "generator") from ex @staticmethod - def clone_pipes(pipes: Sequence[Pipe], existing_cloned_pairs: Dict[int, Pipe] = None) -> Tuple[List[Pipe], Dict[int, Pipe]]: + def clone_pipes( + pipes: Sequence[Pipe], existing_cloned_pairs: Dict[int, Pipe] = None + ) -> Tuple[List[Pipe], Dict[int, Pipe]]: """This will clone pipes and fix the parent/dependent references""" cloned_pipes = [p._clone() for p in pipes if id(p) not in (existing_cloned_pairs or {})] cloned_pairs = {id(p): c for p, c in zip(pipes, cloned_pipes)} @@ -784,6 +896,7 @@ def clone_pipes(pipes: Sequence[Pipe], existing_cloned_pairs: Dict[int, Pipe] = class ManagedPipeIterator(PipeIterator): """A version of the pipe iterator that gets closed automatically on an exception in _next_""" + _ctx: List[ContainerInjectableContext] = None _container: Container = None diff --git a/dlt/extract/resource.py b/dlt/extract/resource.py new file mode 100644 index 0000000000..93c23e05a8 --- /dev/null +++ b/dlt/extract/resource.py @@ -0,0 +1,565 @@ +from copy import deepcopy +import inspect +from typing import ( + AsyncIterable, + AsyncIterator, + ClassVar, + Callable, + Iterable, + Iterator, + Union, + Any, + Optional, +) + +from dlt.common.configuration.resolve import inject_section +from dlt.common.configuration.specs import known_sections +from dlt.common.configuration.specs.config_section_context import ConfigSectionContext +from dlt.common.typing import AnyFun, DictStrAny, StrAny, TDataItem, TDataItems, NoneType +from dlt.common.configuration.container import Container +from dlt.common.pipeline import ( + PipelineContext, + StateInjectableContext, + resource_state, + pipeline_state, +) +from dlt.common.utils import flatten_list_or_items, get_callable_name, uniq_id + +from dlt.extract.typing import ( + DataItemWithMeta, + ItemTransformFunc, + ItemTransformFunctionWithMeta, + TableNameMeta, + FilterItem, + MapItem, + YieldMapItem, + ValidateItem, +) +from dlt.extract.pipe import Pipe, ManagedPipeIterator, TPipeStep +from dlt.extract.hints import DltResourceHints, TResourceHints +from dlt.extract.incremental import Incremental, IncrementalResourceWrapper +from dlt.extract.exceptions import ( + InvalidTransformerDataTypeGeneratorFunctionRequired, + InvalidParentResourceDataType, + InvalidParentResourceIsAFunction, + InvalidResourceDataType, + InvalidResourceDataTypeIsNone, + InvalidTransformerGeneratorFunction, + InvalidResourceDataTypeAsync, + InvalidResourceDataTypeBasic, + InvalidResourceDataTypeMultiplePipes, + ParametrizedResourceUnbound, + ResourceNameMissing, + ResourceNotATransformer, +) +from dlt.extract.wrappers import wrap_additional_type + + +def with_table_name(item: TDataItems, table_name: str) -> DataItemWithMeta: + """Marks `item` to be dispatched to table `table_name` when yielded from resource function.""" + return DataItemWithMeta(TableNameMeta(table_name), item) + + +class DltResource(Iterable[TDataItem], DltResourceHints): + """Implements dlt resource. Contains a data pipe that wraps a generating item and table schema that can be adjusted""" + + Empty: ClassVar["DltResource"] = None + source_name: str + """Name of the source that contains this instance of the source, set when added to DltResourcesDict""" + section: str + """A config section name""" + + def __init__( + self, + pipe: Pipe, + hints: TResourceHints, + selected: bool, + incremental: IncrementalResourceWrapper = None, + section: str = None, + args_bound: bool = False, + ) -> None: + self.section = section + self.selected = selected + self._pipe = pipe + self._args_bound = args_bound + self._explicit_args: DictStrAny = None + if incremental and not self.incremental: + self.add_step(incremental) + self.source_name = None + super().__init__(hints) + + @classmethod + def from_data( + cls, + data: Any, + name: str = None, + section: str = None, + hints: TResourceHints = None, + selected: bool = True, + data_from: Union["DltResource", Pipe] = None, + incremental: IncrementalResourceWrapper = None, + ) -> "DltResource": + if data is None: + raise InvalidResourceDataTypeIsNone(name, data, NoneType) # type: ignore + + if isinstance(data, DltResource): + return data + + if isinstance(data, Pipe): + return cls(data, hints, selected, incremental=incremental, section=section) + + if callable(data): + name = name or get_callable_name(data) + + # if generator, take name from it + if inspect.isgenerator(data): + name = name or get_callable_name(data) # type: ignore + + # name is mandatory + if not name: + raise ResourceNameMissing() + + # wrap additional types + data = wrap_additional_type(data) + + # several iterable types are not allowed and must be excluded right away + if isinstance(data, (AsyncIterator, AsyncIterable)): + raise InvalidResourceDataTypeAsync(name, data, type(data)) + if isinstance(data, (str, dict)): + raise InvalidResourceDataTypeBasic(name, data, type(data)) + + # check if depends_on is a valid resource + parent_pipe: Pipe = None + if data_from is not None: + DltResource._ensure_valid_transformer_resource(name, data) + parent_pipe = DltResource._get_parent_pipe(name, data_from) + + # create resource from iterator, iterable or generator function + if isinstance(data, (Iterable, Iterator)) or callable(data): + pipe = Pipe.from_data(name, data, parent=parent_pipe) + return cls( + pipe, + hints, + selected, + incremental=incremental, + section=section, + args_bound=not callable(data), + ) + else: + # some other data type that is not supported + raise InvalidResourceDataType( + name, data, type(data), f"The data type of supplied type is {type(data).__name__}" + ) + + @property + def name(self) -> str: + """Resource name inherited from the pipe""" + return self._pipe.name + + def with_name(self, new_name: str) -> "DltResource": + """Clones the resource with a new name. Such resource keeps separate state and loads data to `new_name` table by default.""" + return self._clone(new_name=new_name, with_parent=True) + + @property + def is_transformer(self) -> bool: + """Checks if the resource is a transformer that takes data from another resource""" + return self._pipe.has_parent + + @property + def requires_args(self) -> bool: + """Checks if resource has unbound arguments""" + try: + self._pipe.ensure_gen_bound() + return False + except (TypeError, ParametrizedResourceUnbound): + return True + + @property + def incremental(self) -> IncrementalResourceWrapper: + """Gets incremental transform if it is in the pipe""" + incremental: IncrementalResourceWrapper = None + step_no = self._pipe.find(IncrementalResourceWrapper, Incremental) + if step_no >= 0: + incremental = self._pipe.steps[step_no] # type: ignore + return incremental + + @property + def validator(self) -> Optional[ValidateItem]: + """Gets validator transform if it is in the pipe""" + validator: ValidateItem = None + step_no = self._pipe.find(ValidateItem) + if step_no >= 0: + validator = self._pipe.steps[step_no] # type: ignore[assignment] + return validator + + @validator.setter + def validator(self, validator: Optional[ValidateItem]) -> None: + """Add/remove or replace the validator in pipe""" + step_no = self._pipe.find(ValidateItem) + if step_no >= 0: + self._pipe.remove_step(step_no) + if validator: + self.add_step(validator, insert_at=step_no if step_no >= 0 else None) + + def pipe_data_from(self, data_from: Union["DltResource", Pipe]) -> None: + """Replaces the parent in the transformer resource pipe from which the data is piped.""" + if self.is_transformer: + DltResource._ensure_valid_transformer_resource(self.name, self._pipe.gen) + else: + raise ResourceNotATransformer( + self.name, "Cannot pipe data into resource that is not a transformer." + ) + parent_pipe = self._get_parent_pipe(self.name, data_from) + self._pipe.parent = parent_pipe + + def add_pipe(self, data: Any) -> None: + """Creates additional pipe for the resource from the specified data""" + # TODO: (1) self resource cannot be a transformer (2) if data is resource both self must and it must be selected/unselected + cannot be tranformer + raise InvalidResourceDataTypeMultiplePipes(self.name, data, type(data)) + + def select_tables(self, *table_names: Iterable[str]) -> "DltResource": + """For resources that dynamically dispatch data to several tables allows to select tables that will receive data, effectively filtering out other data items. + + Both `with_table_name` marker and data-based (function) table name hints are supported. + """ + + def _filter(item: TDataItem, meta: Any = None) -> bool: + is_in_meta = isinstance(meta, TableNameMeta) and meta.table_name in table_names + is_in_dyn = self._table_name_hint_fun and self._table_name_hint_fun(item) in table_names + return is_in_meta or is_in_dyn + + # add filtering function at the end of pipe + self.add_filter(_filter) + return self + + def add_map( + self, item_map: ItemTransformFunc[TDataItem], insert_at: int = None + ) -> "DltResource": # noqa: A003 + """Adds mapping function defined in `item_map` to the resource pipe at position `inserted_at` + + `item_map` receives single data items, `dlt` will enumerate any lists of data items automatically + + Args: + item_map (ItemTransformFunc[TDataItem]): A function taking a single data item and optional meta argument. Returns transformed data item. + insert_at (int, optional): At which step in pipe to insert the mapping. Defaults to None which inserts after last step + + Returns: + "DltResource": returns self + """ + if insert_at is None: + self._pipe.append_step(MapItem(item_map)) + else: + self._pipe.insert_step(MapItem(item_map), insert_at) + return self + + def add_yield_map( + self, item_map: ItemTransformFunc[Iterator[TDataItem]], insert_at: int = None + ) -> "DltResource": # noqa: A003 + """Adds generating function defined in `item_map` to the resource pipe at position `inserted_at` + + `item_map` receives single data items, `dlt` will enumerate any lists of data items automatically. It may yield 0 or more data items and be used to + ie. pivot an item into sequence of rows. + + Args: + item_map (ItemTransformFunc[Iterator[TDataItem]]): A function taking a single data item and optional meta argument. Yields 0 or more data items. + insert_at (int, optional): At which step in pipe to insert the generator. Defaults to None which inserts after last step + + Returns: + "DltResource": returns self + """ + if insert_at is None: + self._pipe.append_step(YieldMapItem(item_map)) + else: + self._pipe.insert_step(YieldMapItem(item_map), insert_at) + return self + + def add_filter( + self, item_filter: ItemTransformFunc[bool], insert_at: int = None + ) -> "DltResource": # noqa: A003 + """Adds filter defined in `item_filter` to the resource pipe at position `inserted_at` + + `item_filter` receives single data items, `dlt` will enumerate any lists of data items automatically + + Args: + item_filter (ItemTransformFunc[bool]): A function taking a single data item and optional meta argument. Returns bool. If True, item is kept + insert_at (int, optional): At which step in pipe to insert the filter. Defaults to None which inserts after last step + Returns: + "DltResource": returns self + """ + if insert_at is None: + self._pipe.append_step(FilterItem(item_filter)) + else: + self._pipe.insert_step(FilterItem(item_filter), insert_at) + return self + + def add_limit(self, max_items: int) -> "DltResource": # noqa: A003 + """Adds a limit `max_items` to the resource pipe + + This mutates the encapsulated generator to stop after `max_items` items are yielded. This is useful for testing and debugging. It is + a no-op for transformers. Those should be limited by their input data. + + Args: + max_items (int): The maximum number of items to yield + Returns: + "DltResource": returns self + """ + + def _gen_wrap(gen: TPipeStep) -> TPipeStep: + """Wrap a generator to take the first `max_items` records""" + nonlocal max_items + count = 0 + if inspect.isfunction(gen): + gen = gen() + try: + for i in gen: # type: ignore # TODO: help me fix this later + yield i + count += 1 + if count == max_items: + return + finally: + if inspect.isgenerator(gen): + gen.close() + return + + # transformers should be limited by their input, so we only limit non-transformers + if not self.is_transformer: + self._pipe.replace_gen(_gen_wrap(self._pipe.gen)) + return self + + def add_step( + self, item_transform: ItemTransformFunctionWithMeta[TDataItems], insert_at: int = None + ) -> "DltResource": # noqa: A003 + if insert_at is None: + self._pipe.append_step(item_transform) + else: + self._pipe.insert_step(item_transform, insert_at) + return self + + def set_hints(self, table_schema_template: TResourceHints) -> None: + super().set_hints(table_schema_template) + incremental = self.incremental + # try to late assign incremental + if table_schema_template.get("incremental") is not None: + if incremental: + incremental._incremental = table_schema_template["incremental"] + else: + # if there's no wrapper add incremental as a transform + incremental = table_schema_template["incremental"] # type: ignore + self.add_step(incremental) + + if incremental: + primary_key = table_schema_template.get("primary_key", incremental.primary_key) + if primary_key is not None: + incremental.primary_key = primary_key + + if table_schema_template.get("validator") is not None: + self.validator = table_schema_template["validator"] + + def bind(self, *args: Any, **kwargs: Any) -> "DltResource": + """Binds the parametrized resource to passed arguments. Modifies resource pipe in place. Does not evaluate generators or iterators.""" + if self._args_bound: + raise TypeError(f"Parametrized resource {self.name} is not callable") + orig_gen = self._pipe.gen + gen = self._pipe.bind_gen(*args, **kwargs) + if isinstance(gen, DltResource): + # the resource returned resource: update in place + old_pipe = self._pipe + self.__dict__.clear() + self.__dict__.update(gen.__dict__) + # keep old pipe instance + self._pipe = old_pipe + self._pipe.__dict__.clear() + # write props from new pipe instance + self._pipe.__dict__.update(gen._pipe.__dict__) + elif isinstance(gen, Pipe): + # the resource returned pipe: just replace pipe + self._pipe.__dict__.clear() + # write props from new pipe instance + self._pipe.__dict__.update(gen.__dict__) + else: + self._args_bound = True + self._set_explicit_args(orig_gen, None, *args, **kwargs) # type: ignore + return self + + @property + def explicit_args(self) -> StrAny: + """Returns a dictionary of arguments used to parametrize the resource. Does not include defaults and injected args.""" + if not self._args_bound: + raise TypeError(f"Resource {self.name} is not yet parametrized") + return self._explicit_args + + @property + def state(self) -> StrAny: + """Gets resource-scoped state from the active pipeline. PipelineStateNotAvailable is raised if pipeline context is not available""" + with inject_section(self._get_config_section_context()): + return resource_state(self.name) + + def __call__(self, *args: Any, **kwargs: Any) -> "DltResource": + """Binds the parametrized resources to passed arguments. Creates and returns a bound resource. Generators and iterators are not evaluated.""" + if self._args_bound: + raise TypeError(f"Parametrized resource {self.name} is not callable") + r = self._clone() + return r.bind(*args, **kwargs) + + def __or__(self, transform: Union["DltResource", AnyFun]) -> "DltResource": + """Allows to pipe data from across resources and transform functions with | operator""" + # print(f"{resource.name} | {self.name} -> {resource.name}[{resource.is_transformer}]") + if isinstance(transform, DltResource): + transform.pipe_data_from(self) + # return transformed resource for chaining + return transform + else: + # map or yield map + if inspect.isgeneratorfunction(inspect.unwrap(transform)): + return self.add_yield_map(transform) + else: + return self.add_map(transform) + + def __iter__(self) -> Iterator[TDataItem]: + """Opens iterator that yields the data items from the resources in the same order as in Pipeline class. + + A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. + """ + # use the same state dict when opening iterator and when iterator is iterated + container = Container() + state, _ = pipeline_state(container, {}) + state_context = StateInjectableContext(state=state) + section_context = self._get_config_section_context() + + # managed pipe iterator will set the context on each call to __next__ + with inject_section(section_context), Container().injectable_context(state_context): + pipe_iterator: ManagedPipeIterator = ManagedPipeIterator.from_pipes([self._pipe]) # type: ignore + + pipe_iterator.set_context([state_context, section_context]) + _iter = map(lambda item: item.item, pipe_iterator) + return flatten_list_or_items(_iter) + + def _set_explicit_args( + self, f: AnyFun, sig: inspect.Signature = None, *args: Any, **kwargs: Any + ) -> None: + try: + sig = sig or inspect.signature(f) + self._explicit_args = sig.bind_partial(*args, **kwargs).arguments + except Exception: + pass + + def _clone(self, new_name: str = None, with_parent: bool = False) -> "DltResource": + """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source""" + pipe = self._pipe + if self._pipe and not self._pipe.is_empty: + pipe = pipe._clone(new_name=new_name, with_parent=with_parent) + # incremental and parent are already in the pipe (if any) + return DltResource( + pipe, + deepcopy(self._hints), + selected=self.selected, + section=self.section, + ) + + def _get_config_section_context(self) -> ConfigSectionContext: + container = Container() + proxy = container[PipelineContext] + pipeline = None if not proxy.is_active() else proxy.pipeline() + if pipeline: + pipeline_name = pipeline.pipeline_name + else: + pipeline_name = None + if pipeline: + default_schema_name = pipeline.default_schema_name + else: + default_schema_name = None + if not default_schema_name and pipeline_name: + default_schema_name = pipeline._make_schema_with_default_name().name + return ConfigSectionContext( + pipeline_name=pipeline_name, + # do not emit middle config section to not overwrite the resource section + # only sources emit middle config section + sections=( + known_sections.SOURCES, + "", + self.source_name or default_schema_name or self.name, + ), + source_state_key=self.source_name or default_schema_name or self.section or uniq_id(), + ) + + def __str__(self) -> str: + info = f"DltResource [{self.name}]" + if self.section: + info += f" in section [{self.section}]" + if self.source_name: + info += f" added to source [{self.source_name}]:" + else: + info += ":" + + if self.is_transformer: + info += ( + "\nThis resource is a transformer and takes data items from" + f" {self._pipe.parent.name}" + ) + else: + if self._pipe.is_data_bound: + if self.requires_args: + head_sig = inspect.signature(self._pipe.gen) # type: ignore + info += ( + "\nThis resource is parametrized and takes the following arguments" + f" {head_sig}. You must call this resource before loading." + ) + else: + info += ( + "\nIf you want to see the data items in the resource you must iterate it or" + " convert to list ie. list(resource). Note that, like any iterator, you can" + " iterate the resource only once." + ) + else: + info += "\nThis resource is not bound to the data" + info += f"\nInstance: info: (data pipe id:{id(self._pipe)}) at {id(self)}" + return info + + @staticmethod + def _ensure_valid_transformer_resource(name: str, data: Any) -> None: + # resource must be a callable with single argument + if callable(data): + valid_code = DltResource.validate_transformer_generator_function(data) + if valid_code != 0: + raise InvalidTransformerGeneratorFunction( + name, get_callable_name(data), inspect.signature(data), valid_code + ) + else: + raise InvalidTransformerDataTypeGeneratorFunctionRequired(name, data, type(data)) + + @staticmethod + def _get_parent_pipe(name: str, data_from: Union["DltResource", Pipe]) -> Pipe: + # parent resource + if isinstance(data_from, Pipe): + return data_from + elif isinstance(data_from, DltResource): + return data_from._pipe + else: + # if this is generator function provide nicer exception + if callable(data_from): + raise InvalidParentResourceIsAFunction(name, get_callable_name(data_from)) + else: + raise InvalidParentResourceDataType(name, data_from, type(data_from)) + + @staticmethod + def validate_transformer_generator_function(f: AnyFun) -> int: + sig = inspect.signature(f) + if len(sig.parameters) == 0: + return 1 + # transformer may take only one positional only argument + pos_only_len = sum(1 for p in sig.parameters.values() if p.kind == p.POSITIONAL_ONLY) + if pos_only_len > 1: + return 2 + first_ar = next(iter(sig.parameters.values())) + # and pos only must be first + if pos_only_len == 1 and first_ar.kind != first_ar.POSITIONAL_ONLY: + return 2 + # first arg must be positional or kw_pos + if first_ar.kind not in (first_ar.POSITIONAL_ONLY, first_ar.POSITIONAL_OR_KEYWORD): + return 3 + return 0 + + +# produce Empty resource singleton +DltResource.Empty = DltResource(Pipe(None), None, False) +TUnboundDltResource = Callable[..., DltResource] diff --git a/dlt/extract/source.py b/dlt/extract/source.py index d36cb4b121..b1f59f7bda 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -1,502 +1,37 @@ import warnings import contextlib -from copy import copy, deepcopy +from copy import copy import makefun import inspect -from typing import AsyncIterable, AsyncIterator, ClassVar, Callable, Dict, Iterable, Iterator, List, Sequence, Tuple, Union, Any, Optional +from typing import Dict, Iterable, Iterator, List, Sequence, Tuple, Any from typing_extensions import Self from dlt.common.configuration.resolve import inject_section from dlt.common.configuration.specs import known_sections from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer, RelationalNormalizerConfigPropagation +from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer from dlt.common.schema import Schema -from dlt.common.schema.typing import TColumnName -from dlt.common.typing import AnyFun, DictStrAny, StrAny, TDataItem, TDataItems, NoneType +from dlt.common.schema.typing import TColumnName, TSchemaContract +from dlt.common.typing import StrAny, TDataItem from dlt.common.configuration.container import Container -from dlt.common.pipeline import PipelineContext, StateInjectableContext, SupportsPipelineRun, resource_state, source_state, pipeline_state -from dlt.common.utils import graph_find_scc_nodes, flatten_list_or_items, get_callable_name, graph_edges_to_nodes, multi_context_manager, uniq_id - -from dlt.extract.typing import (DataItemWithMeta, ItemTransformFunc, ItemTransformFunctionWithMeta, TDecompositionStrategy, TableNameMeta, - FilterItem, MapItem, YieldMapItem, ValidateItem) -from dlt.extract.pipe import Pipe, ManagedPipeIterator, TPipeStep -from dlt.extract.schema import DltResourceSchema, TTableSchemaTemplate -from dlt.extract.incremental import Incremental, IncrementalResourceWrapper +from dlt.common.pipeline import ( + PipelineContext, + StateInjectableContext, + SupportsPipelineRun, + source_state, + pipeline_state, +) +from dlt.common.utils import graph_find_scc_nodes, flatten_list_or_items, graph_edges_to_nodes + +from dlt.extract.typing import TDecompositionStrategy +from dlt.extract.pipe import Pipe, ManagedPipeIterator +from dlt.extract.hints import DltResourceHints +from dlt.extract.resource import DltResource from dlt.extract.exceptions import ( - InvalidTransformerDataTypeGeneratorFunctionRequired, InvalidParentResourceDataType, InvalidParentResourceIsAFunction, InvalidResourceDataType, InvalidResourceDataTypeIsNone, InvalidTransformerGeneratorFunction, - DataItemRequiredForDynamicTableHints, InvalidResourceDataTypeAsync, InvalidResourceDataTypeBasic, - InvalidResourceDataTypeMultiplePipes, ParametrizedResourceUnbound, ResourceNameMissing, ResourceNotATransformer, ResourcesNotFoundError, DeletingResourcesNotSupported) -from dlt.extract.wrappers import wrap_additional_type - - -def with_table_name(item: TDataItems, table_name: str) -> DataItemWithMeta: - """Marks `item` to be dispatched to table `table_name` when yielded from resource function.""" - return DataItemWithMeta(TableNameMeta(table_name), item) - - -class DltResource(Iterable[TDataItem], DltResourceSchema): - """Implements dlt resource. Contains a data pipe that wraps a generating item and table schema that can be adjusted""" - Empty: ClassVar["DltResource"] = None - source_name: str - """Name of the source that contains this instance of the source, set when added to DltResourcesDict""" - section: str - """A config section name""" - - def __init__( - self, - pipe: Pipe, - table_schema_template: TTableSchemaTemplate, - selected: bool, - incremental: IncrementalResourceWrapper = None, - section: str = None, - args_bound: bool = False - ) -> None: - self.section = section - self.selected = selected - self._pipe = pipe - self._args_bound = args_bound - self._explicit_args: DictStrAny = None - if incremental and not self.incremental: - self.add_step(incremental) - self.source_name = None - super().__init__(table_schema_template) - - @classmethod - def from_data( - cls, - data: Any, - name: str = None, - section: str = None, - table_schema_template: TTableSchemaTemplate = None, - selected: bool = True, - data_from: Union["DltResource", Pipe] = None, - incremental: IncrementalResourceWrapper = None - ) -> "DltResource": - if data is None: - raise InvalidResourceDataTypeIsNone(name, data, NoneType) # type: ignore - - if isinstance(data, DltResource): - return data - - if isinstance(data, Pipe): - return cls(data, table_schema_template, selected, incremental=incremental, section=section) - - if callable(data): - name = name or get_callable_name(data) - - # if generator, take name from it - if inspect.isgenerator(data): - name = name or get_callable_name(data) # type: ignore - - # name is mandatory - if not name: - raise ResourceNameMissing() - - # wrap additional types - data = wrap_additional_type(data) - - # several iterable types are not allowed and must be excluded right away - if isinstance(data, (AsyncIterator, AsyncIterable)): - raise InvalidResourceDataTypeAsync(name, data, type(data)) - if isinstance(data, (str, dict)): - raise InvalidResourceDataTypeBasic(name, data, type(data)) - - # check if depends_on is a valid resource - parent_pipe: Pipe = None - if data_from is not None: - DltResource._ensure_valid_transformer_resource(name, data) - parent_pipe = DltResource._get_parent_pipe(name, data_from) - - # create resource from iterator, iterable or generator function - if isinstance(data, (Iterable, Iterator)) or callable(data): - pipe = Pipe.from_data(name, data, parent=parent_pipe) - return cls(pipe, table_schema_template, selected, incremental=incremental, section=section, args_bound=not callable(data)) - else: - # some other data type that is not supported - raise InvalidResourceDataType(name, data, type(data), f"The data type of supplied type is {type(data).__name__}") - - @property - def name(self) -> str: - """Resource name inherited from the pipe""" - return self._pipe.name - - def with_name(self, new_name: str) -> "DltResource": - """Clones the resource with a new name. Such resource keeps separate state and loads data to `new_name` table by default.""" - return self._clone(new_name=new_name, with_parent=True) - - @property - def is_transformer(self) -> bool: - """Checks if the resource is a transformer that takes data from another resource""" - return self._pipe.has_parent - - @property - def requires_args(self) -> bool: - """Checks if resource has unbound arguments""" - try: - self._pipe.ensure_gen_bound() - return False - except (TypeError, ParametrizedResourceUnbound): - return True - - @property - def incremental(self) -> IncrementalResourceWrapper: - """Gets incremental transform if it is in the pipe""" - incremental: IncrementalResourceWrapper = None - step_no = self._pipe.find(IncrementalResourceWrapper, Incremental) - if step_no >= 0: - incremental = self._pipe.steps[step_no] # type: ignore - return incremental - - @property - def validator(self) -> Optional[ValidateItem]: - """Gets validator transform if it is in the pipe""" - validator: ValidateItem = None - step_no = self._pipe.find(ValidateItem) - if step_no >= 0: - validator = self._pipe.steps[step_no] # type: ignore[assignment] - return validator - - @validator.setter - def validator(self, validator: Optional[ValidateItem]) -> None: - """Add/remove or replace the validator in pipe""" - step_no = self._pipe.find(ValidateItem) - if step_no >= 0: - self._pipe.remove_step(step_no) - if validator: - self.add_step(validator, insert_at=step_no if step_no >= 0 else None) - - def pipe_data_from(self, data_from: Union["DltResource", Pipe]) -> None: - """Replaces the parent in the transformer resource pipe from which the data is piped.""" - if self.is_transformer: - DltResource._ensure_valid_transformer_resource(self.name, self._pipe.gen) - else: - raise ResourceNotATransformer(self.name, "Cannot pipe data into resource that is not a transformer.") - parent_pipe = self._get_parent_pipe(self.name, data_from) - self._pipe.parent = parent_pipe - - def add_pipe(self, data: Any) -> None: - """Creates additional pipe for the resource from the specified data""" - # TODO: (1) self resource cannot be a transformer (2) if data is resource both self must and it must be selected/unselected + cannot be tranformer - raise InvalidResourceDataTypeMultiplePipes(self.name, data, type(data)) - - def select_tables(self, *table_names: Iterable[str]) -> "DltResource": - """For resources that dynamically dispatch data to several tables allows to select tables that will receive data, effectively filtering out other data items. - - Both `with_table_name` marker and data-based (function) table name hints are supported. - """ - def _filter(item: TDataItem, meta: Any = None) -> bool: - is_in_meta = isinstance(meta, TableNameMeta) and meta.table_name in table_names - is_in_dyn = self._table_name_hint_fun and self._table_name_hint_fun(item) in table_names - return is_in_meta or is_in_dyn - - # add filtering function at the end of pipe - self.add_filter(_filter) - return self - - def add_map(self, item_map: ItemTransformFunc[TDataItem], insert_at: int = None) -> "DltResource": # noqa: A003 - """Adds mapping function defined in `item_map` to the resource pipe at position `inserted_at` - - `item_map` receives single data items, `dlt` will enumerate any lists of data items automatically - - Args: - item_map (ItemTransformFunc[TDataItem]): A function taking a single data item and optional meta argument. Returns transformed data item. - insert_at (int, optional): At which step in pipe to insert the mapping. Defaults to None which inserts after last step - - Returns: - "DltResource": returns self - """ - if insert_at is None: - self._pipe.append_step(MapItem(item_map)) - else: - self._pipe.insert_step(MapItem(item_map), insert_at) - return self - - def add_yield_map(self, item_map: ItemTransformFunc[Iterator[TDataItem]], insert_at: int = None) -> "DltResource": # noqa: A003 - """Adds generating function defined in `item_map` to the resource pipe at position `inserted_at` - - `item_map` receives single data items, `dlt` will enumerate any lists of data items automatically. It may yield 0 or more data items and be used to - ie. pivot an item into sequence of rows. - - Args: - item_map (ItemTransformFunc[Iterator[TDataItem]]): A function taking a single data item and optional meta argument. Yields 0 or more data items. - insert_at (int, optional): At which step in pipe to insert the generator. Defaults to None which inserts after last step - - Returns: - "DltResource": returns self - """ - if insert_at is None: - self._pipe.append_step(YieldMapItem(item_map)) - else: - self._pipe.insert_step(YieldMapItem(item_map), insert_at) - return self - - def add_filter(self, item_filter: ItemTransformFunc[bool], insert_at: int = None) -> "DltResource": # noqa: A003 - """Adds filter defined in `item_filter` to the resource pipe at position `inserted_at` - - `item_filter` receives single data items, `dlt` will enumerate any lists of data items automatically - - Args: - item_filter (ItemTransformFunc[bool]): A function taking a single data item and optional meta argument. Returns bool. If True, item is kept - insert_at (int, optional): At which step in pipe to insert the filter. Defaults to None which inserts after last step - Returns: - "DltResource": returns self - """ - if insert_at is None: - self._pipe.append_step(FilterItem(item_filter)) - else: - self._pipe.insert_step(FilterItem(item_filter), insert_at) - return self - - def add_limit(self, max_items: int) -> "DltResource": # noqa: A003 - """Adds a limit `max_items` to the resource pipe - - This mutates the encapsulated generator to stop after `max_items` items are yielded. This is useful for testing and debugging. It is - a no-op for transformers. Those should be limited by their input data. - - Args: - max_items (int): The maximum number of items to yield - Returns: - "DltResource": returns self - """ - def _gen_wrap(gen: TPipeStep) -> TPipeStep: - """Wrap a generator to take the first `max_items` records""" - nonlocal max_items - count = 0 - if inspect.isfunction(gen): - gen = gen() - try: - for i in gen: # type: ignore # TODO: help me fix this later - yield i - count += 1 - if count == max_items: - return - finally: - if inspect.isgenerator(gen): - gen.close() - return - # transformers should be limited by their input, so we only limit non-transformers - if not self.is_transformer: - self._pipe.replace_gen(_gen_wrap(self._pipe.gen)) - return self - - def add_step(self, item_transform: ItemTransformFunctionWithMeta[TDataItems], insert_at: int = None) -> "DltResource": # noqa: A003 - if insert_at is None: - self._pipe.append_step(item_transform) - else: - self._pipe.insert_step(item_transform, insert_at) - return self - - def set_template(self, table_schema_template: TTableSchemaTemplate) -> None: - super().set_template(table_schema_template) - incremental = self.incremental - # try to late assign incremental - if table_schema_template.get("incremental") is not None: - if incremental: - incremental._incremental = table_schema_template["incremental"] - else: - # if there's no wrapper add incremental as a transform - incremental = table_schema_template["incremental"] # type: ignore - self.add_step(incremental) - - if incremental: - primary_key = table_schema_template.get("primary_key", incremental.primary_key) - if primary_key is not None: - incremental.primary_key = primary_key - - if table_schema_template.get('validator') is not None: - self.validator = table_schema_template['validator'] - - def bind(self, *args: Any, **kwargs: Any) -> "DltResource": - """Binds the parametrized resource to passed arguments. Modifies resource pipe in place. Does not evaluate generators or iterators.""" - if self._args_bound: - raise TypeError(f"Parametrized resource {self.name} is not callable") - orig_gen = self._pipe.gen - gen = self._pipe.bind_gen(*args, **kwargs) - if isinstance(gen, DltResource): - # the resource returned resource: update in place - old_pipe = self._pipe - self.__dict__.clear() - self.__dict__.update(gen.__dict__) - # keep old pipe instance - self._pipe = old_pipe - self._pipe.__dict__.clear() - # write props from new pipe instance - self._pipe.__dict__.update(gen._pipe.__dict__) - elif isinstance(gen, Pipe): - # the resource returned pipe: just replace pipe - self._pipe.__dict__.clear() - # write props from new pipe instance - self._pipe.__dict__.update(gen.__dict__) - else: - self._args_bound = True - self._set_explicit_args(orig_gen, None, *args, **kwargs) # type: ignore - return self - - @property - def explicit_args(self) -> StrAny: - """Returns a dictionary of arguments used to parametrize the resource. Does not include defaults and injected args.""" - if not self._args_bound: - raise TypeError(f"Resource {self.name} is not yet parametrized") - return self._explicit_args - - @property - def state(self) -> StrAny: - """Gets resource-scoped state from the active pipeline. PipelineStateNotAvailable is raised if pipeline context is not available""" - with inject_section(self._get_config_section_context()): - return resource_state(self.name) - - def __call__(self, *args: Any, **kwargs: Any) -> "DltResource": - """Binds the parametrized resources to passed arguments. Creates and returns a bound resource. Generators and iterators are not evaluated.""" - if self._args_bound: - raise TypeError(f"Parametrized resource {self.name} is not callable") - r = self._clone() - return r.bind(*args, **kwargs) - - def __or__(self, transform: Union["DltResource", AnyFun]) -> "DltResource": - """Allows to pipe data from across resources and transform functions with | operator""" - # print(f"{resource.name} | {self.name} -> {resource.name}[{resource.is_transformer}]") - if isinstance(transform, DltResource): - transform.pipe_data_from(self) - # return transformed resource for chaining - return transform - else: - # map or yield map - if inspect.isgeneratorfunction(inspect.unwrap(transform)): - return self.add_yield_map(transform) - else: - return self.add_map(transform) - - def __iter__(self) -> Iterator[TDataItem]: - """Opens iterator that yields the data items from the resources in the same order as in Pipeline class. - - A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. - """ - # use the same state dict when opening iterator and when iterator is iterated - container = Container() - state, _ = pipeline_state(container, {}) - state_context = StateInjectableContext(state=state) - section_context = self._get_config_section_context() - - # managed pipe iterator will set the context on each call to __next__ - with inject_section(section_context), Container().injectable_context(state_context): - pipe_iterator: ManagedPipeIterator = ManagedPipeIterator.from_pipes([self._pipe]) # type: ignore - - pipe_iterator.set_context([state_context, section_context]) - _iter = map(lambda item: item.item, pipe_iterator) - return flatten_list_or_items(_iter) - - def _set_explicit_args(self, f: AnyFun, sig: inspect.Signature = None, *args: Any, **kwargs: Any) -> None: - try: - sig = sig or inspect.signature(f) - self._explicit_args = sig.bind_partial(*args, **kwargs).arguments - except Exception: - pass - - def _clone(self, new_name: str = None, with_parent: bool = False) -> "DltResource": - """Creates a deep copy of a current resource, optionally renaming the resource. The clone will not be part of the source - """ - pipe = self._pipe - if self._pipe and not self._pipe.is_empty: - pipe = pipe._clone(new_name=new_name, with_parent=with_parent) - # incremental and parent are already in the pipe (if any) - return DltResource( - pipe, - deepcopy(self._table_schema_template), - selected=self.selected, - section=self.section - ) - - def _get_config_section_context(self) -> ConfigSectionContext: - container = Container() - proxy = container[PipelineContext] - pipeline = None if not proxy.is_active() else proxy.pipeline() - if pipeline: - pipeline_name = pipeline.pipeline_name - else: - pipeline_name = None - if pipeline: - default_schema_name = pipeline.default_schema_name - else: - default_schema_name = None - if not default_schema_name and pipeline_name: - default_schema_name = pipeline._make_schema_with_default_name().name - return ConfigSectionContext( - pipeline_name=pipeline_name, - # do not emit middle config section to not overwrite the resource section - # only sources emit middle config section - sections=(known_sections.SOURCES, "", self.source_name or default_schema_name or self.name), - source_state_key=self.source_name or default_schema_name or self.section or uniq_id() - ) - - def __str__(self) -> str: - info = f"DltResource [{self.name}]" - if self.section: - info += f" in section [{self.section}]" - if self.source_name: - info += f" added to source [{self.source_name}]:" - else: - info += ":" - - if self.is_transformer: - info += f"\nThis resource is a transformer and takes data items from {self._pipe.parent.name}" - else: - if self._pipe.is_data_bound: - if self.requires_args: - head_sig = inspect.signature(self._pipe.gen) # type: ignore - info += f"\nThis resource is parametrized and takes the following arguments {head_sig}. You must call this resource before loading." - else: - info += "\nIf you want to see the data items in the resource you must iterate it or convert to list ie. list(resource). Note that, like any iterator, you can iterate the resource only once." - else: - info += "\nThis resource is not bound to the data" - info += f"\nInstance: info: (data pipe id:{id(self._pipe)}) at {id(self)}" - return info - - @staticmethod - def _ensure_valid_transformer_resource(name: str, data: Any) -> None: - # resource must be a callable with single argument - if callable(data): - valid_code = DltResource.validate_transformer_generator_function(data) - if valid_code != 0: - raise InvalidTransformerGeneratorFunction(name, get_callable_name(data), inspect.signature(data), valid_code) - else: - raise InvalidTransformerDataTypeGeneratorFunctionRequired(name, data, type(data)) - - @staticmethod - def _get_parent_pipe(name: str, data_from: Union["DltResource", Pipe]) -> Pipe: - # parent resource - if isinstance(data_from, Pipe): - return data_from - elif isinstance(data_from, DltResource): - return data_from._pipe - else: - # if this is generator function provide nicer exception - if callable(data_from): - raise InvalidParentResourceIsAFunction(name, get_callable_name(data_from)) - else: - raise InvalidParentResourceDataType(name, data_from, type(data_from)) - - @staticmethod - def validate_transformer_generator_function(f: AnyFun) -> int: - sig = inspect.signature(f) - if len(sig.parameters) == 0: - return 1 - # transformer may take only one positional only argument - pos_only_len = sum(1 for p in sig.parameters.values() if p.kind == p.POSITIONAL_ONLY) - if pos_only_len > 1: - return 2 - first_ar = next(iter(sig.parameters.values())) - # and pos only must be first - if pos_only_len == 1 and first_ar.kind != first_ar.POSITIONAL_ONLY: - return 2 - # first arg must be positional or kw_pos - if first_ar.kind not in (first_ar.POSITIONAL_ONLY, first_ar.POSITIONAL_OR_KEYWORD): - return 3 - return 0 - - -# produce Empty resource singleton -DltResource.Empty = DltResource(Pipe(None), None, False) -TUnboundDltResource = Callable[..., DltResource] + DataItemRequiredForDynamicTableHints, + ResourcesNotFoundError, + DeletingResourcesNotSupported, +) class DltResourceDict(Dict[str, DltResource]): @@ -513,7 +48,7 @@ def __init__(self, source_name: str, source_section: str) -> None: @property def selected(self) -> Dict[str, DltResource]: """Returns a subset of all resources that will be extracted and loaded to the destination.""" - return {k:v for k,v in self.items() if v.selected} + return {k: v for k, v in self.items() if v.selected} @property def extracted(self) -> Dict[str, DltResource]: @@ -529,9 +64,8 @@ def extracted(self) -> Dict[str, DltResource]: resource = self[pipe.name] except KeyError: # resource for pipe not found: return mock resource - mock_template = DltResourceSchema.new_table_template( - pipe.name, - write_disposition=resource.write_disposition + mock_template = DltResourceHints.new_table_template( + pipe.name, write_disposition=resource.write_disposition ) resource = DltResource(pipe, mock_template, False, section=resource.section) resource.source_name = resource.source_name @@ -572,7 +106,9 @@ def select(self, *resource_names: str) -> Dict[str, DltResource]: for name in resource_names: if name not in self: # if any key is missing, display the full info - raise ResourcesNotFoundError(self.source_name, set(self.keys()), set(resource_names)) + raise ResourcesNotFoundError( + self.source_name, set(self.keys()), set(resource_names) + ) # set the selected flags for resource in self.values(): self[resource.name].selected = resource.name in resource_names @@ -606,7 +142,10 @@ def _clone_new_pipes(self, resource_names: Sequence[str]) -> None: def __setitem__(self, resource_name: str, resource: DltResource) -> None: if resource_name != resource.name: - raise ValueError(f"The index name {resource_name} does not correspond to resource name {resource.name}") + raise ValueError( + f"The index name {resource_name} does not correspond to resource name" + f" {resource.name}" + ) pipe_id = id(resource._pipe) # make shallow copy of the resource resource = copy(resource) @@ -642,22 +181,20 @@ class DltSource(Iterable[TDataItem]): * You can use a `run` method to load the data with a default instance of dlt pipeline. * You can get source read only state for the currently active Pipeline instance """ - def __init__(self, name: str, section: str, schema: Schema, resources: Sequence[DltResource] = None) -> None: - self.name = name + + def __init__( + self, schema: Schema, section: str, resources: Sequence[DltResource] = None + ) -> None: self.section = section """Tells if iterator associated with a source is exhausted""" self._schema = schema self._resources: DltResourceDict = DltResourceDict(self.name, self.section) - if self.name != schema.name: - # raise ValueError(f"Schema name {schema.name} differs from source name {name}! The explicit source name argument is deprecated and will be soon removed.") - warnings.warn(f"Schema name {schema.name} differs from source name {name}! The explicit source name argument is deprecated and will be soon removed.") - if resources: self.resources.add(*resources) @classmethod - def from_data(cls, name: str, section: str, schema: Schema, data: Any) -> Self: + def from_data(cls, schema: Schema, section: str, data: Any) -> Self: """Converts any `data` supported by `dlt` `run` method into `dlt source` with a name `section`.`name` and `schema` schema.""" # creates source from various forms of data if isinstance(data, DltSource): @@ -669,10 +206,13 @@ def from_data(cls, name: str, section: str, schema: Schema, data: Any) -> Self: else: resources = [DltResource.from_data(data)] - return cls(name, section, schema, resources) + return cls(schema, section, resources) - # TODO: 4 properties below must go somewhere else ie. into RelationalSchema which is Schema + Relational normalizer. + @property + def name(self) -> str: + return self._schema.name + # TODO: 4 properties below must go somewhere else ie. into RelationalSchema which is Schema + Relational normalizer. @property def max_table_nesting(self) -> int: """A schema hint that sets the maximum depth of nested table above which the remaining nodes are loaded as structs or JSON.""" @@ -682,6 +222,14 @@ def max_table_nesting(self) -> int: def max_table_nesting(self, value: int) -> None: RelationalNormalizer.update_normalizer_config(self._schema, {"max_nesting": value}) + @property + def schema_contract(self) -> TSchemaContract: + return self.schema.settings["schema_contract"] + + @schema_contract.setter + def schema_contract(self, settings: TSchemaContract) -> None: + self.schema.set_schema_contract(settings) + @property def exhausted(self) -> bool: """check all selected pipes wether one of them has started. if so, the source is exhausted.""" @@ -696,19 +244,24 @@ def exhausted(self) -> bool: def root_key(self) -> bool: """Enables merging on all resources by propagating root foreign key to child tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge""" config = RelationalNormalizer.get_normalizer_config(self._schema).get("propagation") - return config is not None and "root" in config and "_dlt_id" in config["root"] and config["root"]["_dlt_id"] == "_dlt_root_id" + return ( + config is not None + and "root" in config + and "_dlt_id" in config["root"] + and config["root"]["_dlt_id"] == "_dlt_root_id" + ) @root_key.setter def root_key(self, value: bool) -> None: if value is True: - RelationalNormalizer.update_normalizer_config(self._schema, - {"propagation": { - "root": { - "_dlt_id": TColumnName("_dlt_root_id") - }}}) + RelationalNormalizer.update_normalizer_config( + self._schema, {"propagation": {"root": {"_dlt_id": TColumnName("_dlt_root_id")}}} + ) else: if self.root_key: - propagation_config = RelationalNormalizer.get_normalizer_config(self._schema)["propagation"] + propagation_config = RelationalNormalizer.get_normalizer_config(self._schema)[ + "propagation" + ] propagation_config["root"].pop("_dlt_id") # type: ignore @property @@ -751,8 +304,8 @@ def with_resources(self, *resource_names: str) -> "DltSource": def decompose(self, strategy: TDecompositionStrategy) -> List["DltSource"]: """Decomposes source into a list of sources with a given strategy. - "none" will return source as is - "scc" will decompose the dag of selected pipes and their parent into strongly connected components + "none" will return source as is + "scc" will decompose the dag of selected pipes and their parent into strongly connected components """ if strategy == "none": return [self] @@ -783,7 +336,9 @@ def add_limit(self, max_items: int) -> "DltSource": # noqa: A003 @property def run(self) -> SupportsPipelineRun: """A convenience method that will call `run` run on the currently active `dlt` pipeline. If pipeline instance is not found, one with default settings will be created.""" - self_run: SupportsPipelineRun = makefun.partial(Container()[PipelineContext].pipeline().run, *(), data=self) + self_run: SupportsPipelineRun = makefun.partial( + Container()[PipelineContext].pipeline().run, *(), data=self + ) return self_run @property @@ -792,17 +347,22 @@ def state(self) -> StrAny: with inject_section(self._get_config_section_context()): return source_state() - def clone(self) -> "DltSource": - """Creates a deep copy of the source where copies of schema, resources and pipes are created""" + def clone(self, with_name: str = None) -> "DltSource": + """Creates a deep copy of the source where copies of schema, resources and pipes are created. + + If `with_name` is provided, a schema is cloned with a changed name + """ # mind that resources and pipes are cloned when added to the DltResourcesDict in the source constructor - return DltSource(self.name, self.section, self.schema.clone(), list(self._resources.values())) + return DltSource( + self.schema.clone(with_name=with_name), self.section, list(self._resources.values()) + ) def __iter__(self) -> Iterator[TDataItem]: """Opens iterator that yields the data items from all the resources within the source in the same order as in Pipeline class. - A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. + A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. - A source config section is injected to allow secrets/config injection as during regular extraction. + A source config section is injected to allow secrets/config injection as during regular extraction. """ # use the same state dict when opening iterator and when iterator is iterated mock_state, _ = pipeline_state(Container(), {}) @@ -822,14 +382,16 @@ def _get_config_section_context(self) -> ConfigSectionContext: return ConfigSectionContext( pipeline_name=pipeline_name, sections=(known_sections.SOURCES, self.section, self.name), - source_state_key=self.name + source_state_key=self.name, ) def __getattr__(self, resource_name: str) -> DltResource: try: return self._resources[resource_name] except KeyError: - raise AttributeError(f"Resource with name {resource_name} not found in source {self.name}") + raise AttributeError( + f"Resource with name {resource_name} not found in source {self.name}" + ) def __setattr__(self, name: str, value: Any) -> None: if isinstance(value, DltResource): @@ -838,17 +400,29 @@ def __setattr__(self, name: str, value: Any) -> None: super().__setattr__(name, value) def __str__(self) -> str: - info = f"DltSource {self.name} section {self.section} contains {len(self.resources)} resource(s) of which {len(self.selected_resources)} are selected" + info = ( + f"DltSource {self.name} section {self.section} contains" + f" {len(self.resources)} resource(s) of which {len(self.selected_resources)} are" + " selected" + ) for r in self.resources.values(): selected_info = "selected" if r.selected else "not selected" if r.is_transformer: - info += f"\ntransformer {r.name} is {selected_info} and takes data from {r._pipe.parent.name}" + info += ( + f"\ntransformer {r.name} is {selected_info} and takes data from" + f" {r._pipe.parent.name}" + ) else: info += f"\nresource {r.name} is {selected_info}" if self.exhausted: - info += "\nSource is already iterated and cannot be used again ie. to display or load data." + info += ( + "\nSource is already iterated and cannot be used again ie. to display or load data." + ) else: - info += "\nIf you want to see the data items in this source you must iterate it or convert to list ie. list(source)." + info += ( + "\nIf you want to see the data items in this source you must iterate it or convert" + " to list ie. list(source)." + ) info += " Note that, like any iterator, you can iterate the source only once." info += f"\ninstance id: {id(self)}" return info diff --git a/dlt/extract/storage.py b/dlt/extract/storage.py new file mode 100644 index 0000000000..251d7a5ce9 --- /dev/null +++ b/dlt/extract/storage.py @@ -0,0 +1,132 @@ +import os +from typing import Dict, List + +from dlt.common.data_writers import TLoaderFileFormat +from dlt.common.data_writers.writers import DataWriterMetrics +from dlt.common.schema import Schema +from dlt.common.schema.typing import TTableSchemaColumns +from dlt.common.storages import ( + NormalizeStorageConfiguration, + NormalizeStorage, + DataItemStorage, + FileStorage, + PackageStorage, + LoadPackageInfo, +) +from dlt.common.storages.exceptions import LoadPackageNotFound +from dlt.common.typing import TDataItems +from dlt.common.time import precise_time +from dlt.common.utils import uniq_id + + +class ExtractorItemStorage(DataItemStorage): + load_file_type: TLoaderFileFormat + + def __init__(self, package_storage: PackageStorage) -> None: + """Data item storage using `storage` to manage load packages""" + super().__init__(self.load_file_type) + self.package_storage = package_storage + + def _get_data_item_path_template(self, load_id: str, _: str, table_name: str) -> str: + file_name = PackageStorage.build_job_file_name(table_name, "%s") + file_path = self.package_storage.get_job_file_path( + load_id, PackageStorage.NEW_JOBS_FOLDER, file_name + ) + return self.package_storage.storage.make_full_path(file_path) + + +class JsonLExtractorStorage(ExtractorItemStorage): + load_file_type: TLoaderFileFormat = "puae-jsonl" + + +class ArrowExtractorStorage(ExtractorItemStorage): + load_file_type: TLoaderFileFormat = "arrow" + + +class ExtractStorage(NormalizeStorage): + """Wrapper around multiple extractor storages with different file formats""" + + def __init__(self, config: NormalizeStorageConfiguration) -> None: + super().__init__(True, config) + # always create new packages in an unique folder for each instance so + # extracts are isolated ie. if they fail + self.new_packages_folder = uniq_id(8) + self.storage.create_folder(self.new_packages_folder, exists_ok=True) + self.new_packages = PackageStorage( + FileStorage(os.path.join(self.storage.storage_path, self.new_packages_folder)), "new" + ) + self._item_storages: Dict[TLoaderFileFormat, ExtractorItemStorage] = { + "puae-jsonl": JsonLExtractorStorage(self.new_packages), + "arrow": ArrowExtractorStorage(self.new_packages), + } + + def create_load_package(self, schema: Schema, reuse_exiting_package: bool = True) -> str: + """Creates a new load package for given `schema` or returns if such package already exists. + + You can prevent reuse of the existing package by setting `reuse_exiting_package` to False + """ + load_id: str = None + if reuse_exiting_package: + # look for existing package with the same schema name + # TODO: we may cache this mapping but fallback to files is required if pipeline restarts + load_ids = self.new_packages.list_packages() + for load_id in load_ids: + if self.new_packages.schema_name(load_id) == schema.name: + break + load_id = None + if not load_id: + load_id = str(precise_time()) + self.new_packages.create_package(load_id) + # always save schema + self.new_packages.save_schema(load_id, schema) + return load_id + + def get_storage(self, loader_file_format: TLoaderFileFormat) -> ExtractorItemStorage: + return self._item_storages[loader_file_format] + + def close_writers(self, load_id: str) -> None: + for storage in self._item_storages.values(): + storage.close_writers(load_id) + + def closed_files(self, load_id: str) -> List[DataWriterMetrics]: + files = [] + for storage in self._item_storages.values(): + files.extend(storage.closed_files(load_id)) + return files + + def remove_closed_files(self, load_id: str) -> None: + for storage in self._item_storages.values(): + storage.remove_closed_files(load_id) + + def commit_new_load_package(self, load_id: str, schema: Schema) -> None: + self.new_packages.save_schema(load_id, schema) + self.storage.rename_tree( + os.path.join(self.new_packages_folder, self.new_packages.get_package_path(load_id)), + os.path.join( + NormalizeStorage.EXTRACTED_FOLDER, self.new_packages.get_package_path(load_id) + ), + ) + + def delete_empty_extract_folder(self) -> None: + """Deletes temporary extract folder if empty""" + self.storage.delete_folder(self.new_packages_folder, recursively=False) + + def get_load_package_info(self, load_id: str) -> LoadPackageInfo: + """Returns information on temp and extracted packages""" + try: + return self.new_packages.get_load_package_info(load_id) + except LoadPackageNotFound: + return self.extracted_packages.get_load_package_info(load_id) + + def write_data_item( + self, + file_format: TLoaderFileFormat, + load_id: str, + schema_name: str, + table_name: str, + item: TDataItems, + columns: TTableSchemaColumns, + ) -> None: + self.get_storage(file_format).write_data_item( + load_id, schema_name, table_name, item, columns + ) diff --git a/dlt/extract/typing.py b/dlt/extract/typing.py index ad4e23b84f..e0096a255f 100644 --- a/dlt/extract/typing.py +++ b/dlt/extract/typing.py @@ -1,6 +1,17 @@ import inspect from abc import ABC, abstractmethod -from typing import Any, Callable, Generic, Iterator, Literal, Optional, Protocol, TypeVar, Union, Awaitable +from typing import ( + Any, + Callable, + Generic, + Iterator, + Literal, + Optional, + Protocol, + TypeVar, + Union, + Awaitable, +) from dlt.common.typing import TAny, TDataItem, TDataItems @@ -37,10 +48,12 @@ def __init__(self, table_name: str) -> None: class SupportsPipe(Protocol): """A protocol with the core Pipe properties and operations""" + name: str """Pipe name which is inherited by a resource""" parent: "SupportsPipe" """A parent of the current pipe""" + @property def has_parent(self) -> bool: """Checks if pipe is connected to parent pipe from which it takes data items. Connected pipes are created from transformer resources""" @@ -51,6 +64,7 @@ def has_parent(self) -> bool: ItemTransformFunctionNoMeta = Callable[[TDataItem], TAny] ItemTransformFunc = Union[ItemTransformFunctionWithMeta[TAny], ItemTransformFunctionNoMeta[TAny]] + class ItemTransform(ABC, Generic[TAny]): _f_meta: ItemTransformFunctionWithMeta[TAny] = None _f: ItemTransformFunctionNoMeta[TAny] = None @@ -114,7 +128,7 @@ def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]: class YieldMapItem(ItemTransform[Iterator[TDataItem]]): - # mypy needs those to type correctly + # mypy needs those to type correctly _f_meta: ItemTransformFunctionWithMeta[TDataItem] _f: ItemTransformFunctionNoMeta[TDataItem] @@ -138,3 +152,9 @@ class ValidateItem(ItemTransform[TDataItem]): Subclass should implement the `__call__` method to either return the data item(s) or raise `extract.exceptions.ValidationError`. See `PydanticValidator` for possible implementation. """ + + table_name: str + + def bind(self, pipe: SupportsPipe) -> ItemTransform[TDataItem]: + self.table_name = pipe.name + return self diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py index 3bd9f56a74..1db18ff47e 100644 --- a/dlt/extract/utils.py +++ b/dlt/extract/utils.py @@ -8,8 +8,11 @@ from dlt.common.schema.typing import TColumnNames, TAnySchemaColumns, TTableSchemaColumns from dlt.common.typing import AnyFun, DictStrAny, TDataItem, TDataItems from dlt.common.utils import get_callable_name -from dlt.extract.exceptions import InvalidResourceDataTypeFunctionNotAGenerator, InvalidStepFunctionArguments +from dlt.extract.exceptions import ( + InvalidResourceDataTypeFunctionNotAGenerator, + InvalidStepFunctionArguments, +) from dlt.extract.typing import TTableHintTemplate, TDataItem, TFunHintTemplate, SupportsPipe try: @@ -18,7 +21,9 @@ pydantic = None -def resolve_column_value(column_hint: TTableHintTemplate[TColumnNames], item: TDataItem) -> Union[Any, List[Any]]: +def resolve_column_value( + column_hint: TTableHintTemplate[TColumnNames], item: TDataItem +) -> Union[Any, List[Any]]: """Extract values from the data item given a column hint. Returns either a single value or list of values when hint is a composite. """ @@ -42,7 +47,7 @@ def ensure_table_schema_columns(columns: TAnySchemaColumns) -> TTableSchemaColum return columns elif isinstance(columns, Sequence): # Assume list of columns - return {col['name']: col for col in columns} + return {col["name"]: col for col in columns} elif pydantic is not None and ( isinstance(columns, pydantic.BaseModel) or issubclass(columns, pydantic.BaseModel) ): @@ -51,13 +56,19 @@ def ensure_table_schema_columns(columns: TAnySchemaColumns) -> TTableSchemaColum raise ValueError(f"Unsupported columns type: {type(columns)}") -def ensure_table_schema_columns_hint(columns: TTableHintTemplate[TAnySchemaColumns]) -> TTableHintTemplate[TTableSchemaColumns]: +def ensure_table_schema_columns_hint( + columns: TTableHintTemplate[TAnySchemaColumns], +) -> TTableHintTemplate[TTableSchemaColumns]: """Convert column schema hint to a hint returning `TTableSchemaColumns`. A callable hint is wrapped in another function which converts the original result. """ if callable(columns) and not isinstance(columns, type): + def wrapper(item: TDataItem) -> TTableSchemaColumns: - return ensure_table_schema_columns(cast(TFunHintTemplate[TAnySchemaColumns], columns)(item)) + return ensure_table_schema_columns( + cast(TFunHintTemplate[TAnySchemaColumns], columns)(item) + ) + return wrapper return ensure_table_schema_columns(columns) @@ -70,10 +81,12 @@ def reset_pipe_state(pipe: SupportsPipe, source_state_: Optional[DictStrAny] = N reset_resource_state(pipe.name, source_state_) -def simulate_func_call(f: Union[Any, AnyFun], args_to_skip: int, *args: Any, **kwargs: Any) -> Tuple[inspect.Signature, inspect.Signature, inspect.BoundArguments]: +def simulate_func_call( + f: Union[Any, AnyFun], args_to_skip: int, *args: Any, **kwargs: Any +) -> Tuple[inspect.Signature, inspect.Signature, inspect.BoundArguments]: """Simulates a call to a resource or transformer function before it will be wrapped for later execution in the pipe - Returns a tuple with a `f` signature, modified signature in case of transformers and bound arguments + Returns a tuple with a `f` signature, modified signature in case of transformers and bound arguments """ if not callable(f): # just provoke a call to raise default exception @@ -100,11 +113,15 @@ def check_compat_transformer(name: str, f: AnyFun, sig: inspect.Signature) -> in meta_arg = next((p for p in sig.parameters.values() if p.name == "meta"), None) if meta_arg is not None: if meta_arg.kind not in (meta_arg.KEYWORD_ONLY, meta_arg.POSITIONAL_OR_KEYWORD): - raise InvalidStepFunctionArguments(name, callable_name, sig, "'meta' cannot be pos only argument '") + raise InvalidStepFunctionArguments( + name, callable_name, sig, "'meta' cannot be pos only argument '" + ) return meta_arg -def wrap_compat_transformer(name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any) -> AnyFun: +def wrap_compat_transformer( + name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any +) -> AnyFun: """Creates a compatible wrapper over transformer function. A pure transformer function expects data item in first argument and one keyword argument called `meta`""" check_compat_transformer(name, f, sig) if len(sig.parameters) == 2 and "meta" in sig.parameters: @@ -121,7 +138,9 @@ def _tx_partial(item: TDataItems, meta: Any = None) -> Any: return makefun.wraps(f, new_sig=inspect.signature(_tx_partial))(_tx_partial) # type: ignore -def wrap_resource_gen(name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any) -> AnyFun: +def wrap_resource_gen( + name: str, f: AnyFun, sig: inspect.Signature, *args: Any, **kwargs: Any +) -> AnyFun: """Wraps a generator or generator function so it is evaluated on extraction""" if inspect.isgeneratorfunction(inspect.unwrap(f)) or inspect.isgenerator(f): # always wrap generators and generator functions. evaluate only at runtime! diff --git a/dlt/extract/validation.py b/dlt/extract/validation.py index c8e30d0eb2..72b70c5661 100644 --- a/dlt/extract/validation.py +++ b/dlt/extract/validation.py @@ -1,13 +1,13 @@ -from typing import Optional, Protocol, TypeVar, Generic, Type, Union, Any, List +from typing import Optional, Tuple, TypeVar, Generic, Type, Union, Any, List +from dlt.common.schema.schema import Schema try: - from pydantic import BaseModel as PydanticBaseModel, ValidationError as PydanticValidationError, create_model + from pydantic import BaseModel as PydanticBaseModel except ModuleNotFoundError: - PydanticBaseModel = None # type: ignore[misc] + PydanticBaseModel = Any # type: ignore[misc, assignment] -from dlt.extract.exceptions import ValidationError from dlt.common.typing import TDataItems -from dlt.common.schema.typing import TAnySchemaColumns +from dlt.common.schema.typing import TAnySchemaColumns, TSchemaContract, TSchemaEvolutionMode from dlt.extract.typing import TTableHintTemplate, ValidateItem @@ -16,31 +16,73 @@ class PydanticValidator(ValidateItem, Generic[_TPydanticModel]): model: Type[_TPydanticModel] - def __init__(self, model: Type[_TPydanticModel]) -> None: - self.model = model - # Create a model for validating list of items in batch - self.list_model = create_model( - "List" + model.__name__, - items=(List[model], ...) # type: ignore[valid-type] - ) + def __init__( + self, + model: Type[_TPydanticModel], + column_mode: TSchemaEvolutionMode, + data_mode: TSchemaEvolutionMode, + ) -> None: + from dlt.common.libs.pydantic import apply_schema_contract_to_model, create_list_model + + self.column_mode: TSchemaEvolutionMode = column_mode + self.data_mode: TSchemaEvolutionMode = data_mode + self.model = apply_schema_contract_to_model(model, column_mode, data_mode) + self.list_model = create_list_model(self.model, data_mode) - def __call__(self, item: TDataItems, meta: Any = None) -> Union[_TPydanticModel, List[_TPydanticModel]]: + def __call__( + self, item: TDataItems, meta: Any = None + ) -> Union[_TPydanticModel, List[_TPydanticModel]]: """Validate a data item against the pydantic model""" if item is None: return None - try: - if isinstance(item, list): - return self.list_model(items=item).items # type: ignore[attr-defined, no-any-return] - return self.model.parse_obj(item) - except PydanticValidationError as e: - raise ValidationError(self, item, e) from e + + from dlt.common.libs.pydantic import validate_item, validate_items + + if isinstance(item, list): + return validate_items( + self.table_name, self.list_model, item, self.column_mode, self.data_mode + ) + return validate_item(self.table_name, self.model, item, self.column_mode, self.data_mode) def __str__(self, *args: Any, **kwargs: Any) -> str: return f"PydanticValidator(model={self.model.__qualname__})" -def get_column_validator(columns: TTableHintTemplate[TAnySchemaColumns]) -> Optional[ValidateItem]: - if PydanticBaseModel is not None and isinstance(columns, type) and issubclass(columns, PydanticBaseModel): - return PydanticValidator(columns) - return None +def create_item_validator( + columns: TTableHintTemplate[TAnySchemaColumns], + schema_contract: TTableHintTemplate[TSchemaContract] = None, +) -> Tuple[Optional[ValidateItem], TTableHintTemplate[TSchemaContract]]: + """Creates item validator for a `columns` definition and a `schema_contract` + + Returns a tuple (validator, schema contract). If validator could not be created, returns None at first position. + If schema_contract was not specified a default schema contract for given validator will be returned + """ + if ( + PydanticBaseModel is not None + and isinstance(columns, type) + and issubclass(columns, PydanticBaseModel) + ): + assert not callable( + schema_contract + ), "schema_contract cannot be dynamic for Pydantic item validator" + + from dlt.common.libs.pydantic import extra_to_column_mode, get_extra_from_model + + # freeze the columns if we have a fully defined table and no other explicit contract + expanded_schema_contract = Schema.expand_schema_contract_settings( + schema_contract, + # corresponds to default Pydantic behavior + default={ + "tables": "evolve", + "columns": extra_to_column_mode(get_extra_from_model(columns)), + "data_type": "freeze", + }, + ) + return ( + PydanticValidator( + columns, expanded_schema_contract["columns"], expanded_schema_contract["data_type"] + ), + schema_contract or expanded_schema_contract, + ) + return None, schema_contract diff --git a/dlt/extract/wrappers.py b/dlt/extract/wrappers.py index e8e295f245..7ffb6b4fc6 100644 --- a/dlt/extract/wrappers.py +++ b/dlt/extract/wrappers.py @@ -22,4 +22,4 @@ def wrap_additional_type(data: Any) -> Any: if isinstance(data, (PandaFrame, ArrowTable, ArrowRecords)): return [data] - return data \ No newline at end of file + return data diff --git a/dlt/helpers/airflow_helper.py b/dlt/helpers/airflow_helper.py index 2a9c76cc76..c72118cfc9 100644 --- a/dlt/helpers/airflow_helper.py +++ b/dlt/helpers/airflow_helper.py @@ -1,7 +1,13 @@ import os from tempfile import gettempdir from typing import Any, Callable, List, Literal, Optional, Sequence, Tuple -from tenacity import retry_if_exception, wait_exponential, stop_after_attempt, Retrying, RetryCallState +from tenacity import ( + retry_if_exception, + wait_exponential, + stop_after_attempt, + Retrying, + RetryCallState, +) from dlt.common import pendulum from dlt.common.exceptions import MissingDependencyException @@ -25,7 +31,7 @@ from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.runtime.collector import NULL_COLLECTOR -from dlt.extract.source import DltSource +from dlt.extract import DltSource from dlt.pipeline.helpers import retry_load from dlt.pipeline.pipeline import Pipeline from dlt.pipeline.progress import log @@ -33,7 +39,9 @@ DEFAULT_RETRY_NO_RETRY = Retrying(stop=stop_after_attempt(1), reraise=True) -DEFAULT_RETRY_BACKOFF = Retrying(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1.5, min=4, max=10), reraise=True) +DEFAULT_RETRY_BACKOFF = Retrying( + stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1.5, min=4, max=10), reraise=True +) class PipelineTasksGroup(TaskGroup): @@ -50,13 +58,13 @@ def __init__( log_progress_period: float = 30.0, buffer_max_items: int = 1000, retry_policy: Retrying = DEFAULT_RETRY_NO_RETRY, - retry_pipeline_steps: Sequence[TPipelineStep] = ("load", ), + retry_pipeline_steps: Sequence[TPipelineStep] = ("load",), fail_task_if_any_job_failed: bool = True, abort_task_if_any_job_failed: bool = False, wipe_local_data: bool = True, save_load_info: bool = False, save_trace_info: bool = False, - **kwargs: Any + **kwargs: Any, ) -> None: """Creates a task group to which you can add pipeline runs @@ -103,7 +111,7 @@ def __init__( self.save_trace_info = save_trace_info # reload providers so config.toml in dags folder is included - dags_folder = conf.get('core', 'dags_folder') + dags_folder = conf.get("core", "dags_folder") # set the dlt project folder to dags os.environ["DLT_PROJECT_DIR"] = dags_folder @@ -129,7 +137,7 @@ def add_run( decompose: Literal["none", "serialize"] = "none", table_name: str = None, write_disposition: TWriteDisposition = None, - **kwargs: Any + **kwargs: Any, ) -> List[PythonOperator]: """Creates a task or a group of tasks to run `data` with `pipeline` @@ -153,7 +161,10 @@ def add_run( # make sure that pipeline was created after dag was initialized if not pipeline.pipelines_dir.startswith(os.environ["DLT_DATA_DIR"]): - raise ValueError("Please create your Pipeline instance after AirflowTasks are created. The dlt pipelines directory is not set correctly") + raise ValueError( + "Please create your Pipeline instance after AirflowTasks are created. The dlt" + " pipelines directory is not set correctly" + ) def task_name(pipeline: Pipeline, data: Any) -> str: task_name = pipeline.pipeline_name @@ -165,11 +176,9 @@ def task_name(pipeline: Pipeline, data: Any) -> str: return task_name with self: - # use factory function to make test, in order to parametrize it. passing arguments to task function (_run) is serializing them and # running template engine on them def make_task(pipeline: Pipeline, data: Any) -> PythonOperator: - def _run() -> None: # activate pipeline pipeline.activate() @@ -182,7 +191,10 @@ def _run() -> None: logger.LOGGER = ti.log # set global number of buffered items - if dlt.config.get("data_writer.buffer_max_items") is None and self.buffer_max_items > 0: + if ( + dlt.config.get("data_writer.buffer_max_items") is None + and self.buffer_max_items > 0 + ): dlt.config["data_writer.buffer_max_items"] = self.buffer_max_items logger.info(f"Set data_writer.buffer_max_items to {self.buffer_max_items}") @@ -192,24 +204,36 @@ def _run() -> None: logger.info("Set load.abort_task_if_any_job_failed to True") if self.log_progress_period > 0 and task_pipeline.collector == NULL_COLLECTOR: - task_pipeline.collector = log(log_period=self.log_progress_period, logger=logger.LOGGER) + task_pipeline.collector = log( + log_period=self.log_progress_period, logger=logger.LOGGER + ) logger.info(f"Enabled log progress with period {self.log_progress_period}") logger.info(f"Pipeline data in {task_pipeline.working_dir}") def log_after_attempt(retry_state: RetryCallState) -> None: if not retry_state.retry_object.stop(retry_state): - logger.error("Retrying pipeline run due to exception: %s", retry_state.outcome.exception()) + logger.error( + "Retrying pipeline run due to exception: %s", + retry_state.outcome.exception(), + ) try: # retry with given policy on selected pipeline steps for attempt in self.retry_policy.copy( - retry=retry_if_exception(retry_load(retry_on_pipeline_steps=self.retry_pipeline_steps)), - after=log_after_attempt + retry=retry_if_exception( + retry_load(retry_on_pipeline_steps=self.retry_pipeline_steps) + ), + after=log_after_attempt, ): with attempt: - logger.info("Running the pipeline, attempt=%s" % attempt.retry_state.attempt_number) - load_info = task_pipeline.run(data, table_name=table_name, write_disposition=write_disposition) + logger.info( + "Running the pipeline, attempt=%s" + % attempt.retry_state.attempt_number + ) + load_info = task_pipeline.run( + data, table_name=table_name, write_disposition=write_disposition + ) logger.info(str(load_info)) # save load and trace if self.save_load_info: @@ -217,7 +241,9 @@ def log_after_attempt(retry_state: RetryCallState) -> None: task_pipeline.run([load_info], table_name="_load_info") if self.save_trace_info: logger.info("Saving the trace in the destination") - task_pipeline.run([task_pipeline.last_trace], table_name="_trace") + task_pipeline.run( + [task_pipeline.last_trace], table_name="_trace" + ) # raise on failed jobs if requested if self.fail_task_if_any_job_failed: load_info.raise_on_failed_jobs() @@ -228,9 +254,7 @@ def log_after_attempt(retry_state: RetryCallState) -> None: task_pipeline._wipe_working_folder() return PythonOperator( - task_id=task_name(pipeline, data), - python_callable=_run, - **kwargs + task_id=task_name(pipeline, data), python_callable=_run, **kwargs ) if decompose == "none": @@ -263,6 +287,7 @@ def airflow_get_execution_dates() -> Tuple[pendulum.DateTime, Optional[pendulum. # prefer logging to task logger try: from airflow.operators.python import get_current_context # noqa + context = get_current_context() return context["data_interval_start"], context["data_interval_end"] except Exception: diff --git a/dlt/helpers/dbt/__init__.py b/dlt/helpers/dbt/__init__.py index c6107b2873..b555bcd3a9 100644 --- a/dlt/helpers/dbt/__init__.py +++ b/dlt/helpers/dbt/__init__.py @@ -22,19 +22,21 @@ def _default_profile_name(credentials: DestinationClientDwhConfiguration) -> str: - profile_name = credentials.destination_name + profile_name = credentials.destination_type # in case of credentials with default add default to the profile name if isinstance(credentials.credentials, CredentialsWithDefault): if credentials.credentials.has_default_credentials(): profile_name += "_default" - elif profile_name == 'snowflake': - if getattr(credentials.credentials, 'private_key', None): + elif profile_name == "snowflake": + if getattr(credentials.credentials, "private_key", None): # snowflake with private key is a separate profile - profile_name += '_pkey' + profile_name += "_pkey" return profile_name -def _create_dbt_deps(destination_names: List[str], dbt_version: str = DEFAULT_DBT_VERSION) -> List[str]: +def _create_dbt_deps( + destination_types: List[str], dbt_version: str = DEFAULT_DBT_VERSION +) -> List[str]: if dbt_version: # if parses as version use "==" operator with contextlib.suppress(ValueError): @@ -44,7 +46,7 @@ def _create_dbt_deps(destination_names: List[str], dbt_version: str = DEFAULT_DB dbt_version = "" # add version only to the core package. the other packages versions must be resolved by pip - all_packages = ["core" + dbt_version] + destination_names + all_packages = ["core" + dbt_version] + destination_types for idx, package in enumerate(all_packages): package = "dbt-" + DBT_DESTINATION_MAP.get(package, package) # verify package @@ -54,22 +56,27 @@ def _create_dbt_deps(destination_names: List[str], dbt_version: str = DEFAULT_DB dlt_requirement = get_installed_requirement_string() # get additional requirements additional_deps: List[str] = [] - if "duckdb" in destination_names or "motherduck" in destination_names: + if "duckdb" in destination_types or "motherduck" in destination_types: from importlib.metadata import version as pkg_version + # force locally installed duckdb additional_deps = ["duckdb" + "==" + pkg_version("duckdb")] return all_packages + additional_deps + [dlt_requirement] -def restore_venv(venv_dir: str, destination_names: List[str], dbt_version: str = DEFAULT_DBT_VERSION) -> Venv: +def restore_venv( + venv_dir: str, destination_types: List[str], dbt_version: str = DEFAULT_DBT_VERSION +) -> Venv: venv = Venv.restore(venv_dir) - venv.add_dependencies(_create_dbt_deps(destination_names, dbt_version)) + venv.add_dependencies(_create_dbt_deps(destination_types, dbt_version)) return venv -def create_venv(venv_dir: str, destination_names: List[str], dbt_version: str = DEFAULT_DBT_VERSION) -> Venv: - return Venv.create(venv_dir, _create_dbt_deps(destination_names, dbt_version)) +def create_venv( + venv_dir: str, destination_types: List[str], dbt_version: str = DEFAULT_DBT_VERSION +) -> Venv: + return Venv.create(venv_dir, _create_dbt_deps(destination_types, dbt_version)) def package_runner( @@ -79,7 +86,7 @@ def package_runner( package_location: str, package_repository_branch: str = None, package_repository_ssh_key: TSecretValue = TSecretValue(""), # noqa - auto_full_refresh_when_out_of_sync: bool = None + auto_full_refresh_when_out_of_sync: bool = None, ) -> DBTPackageRunner: default_profile_name = _default_profile_name(destination_configuration) return create_runner( @@ -90,5 +97,5 @@ def package_runner( package_repository_branch=package_repository_branch, package_repository_ssh_key=package_repository_ssh_key, package_profile_name=default_profile_name, - auto_full_refresh_when_out_of_sync=auto_full_refresh_when_out_of_sync + auto_full_refresh_when_out_of_sync=auto_full_refresh_when_out_of_sync, ) diff --git a/dlt/helpers/dbt/configuration.py b/dlt/helpers/dbt/configuration.py index d21266196e..4cd3f3a0f4 100644 --- a/dlt/helpers/dbt/configuration.py +++ b/dlt/helpers/dbt/configuration.py @@ -10,7 +10,9 @@ class DBTRunnerConfiguration(BaseConfiguration): package_location: str = None package_repository_branch: Optional[str] = None - package_repository_ssh_key: TSecretValue = TSecretValue("") # the default is empty value which will disable custom SSH KEY + package_repository_ssh_key: TSecretValue = TSecretValue( + "" + ) # the default is empty value which will disable custom SSH KEY package_profiles_dir: Optional[str] = None package_profile_name: Optional[str] = None auto_full_refresh_when_out_of_sync: bool = True diff --git a/dlt/helpers/dbt/dbt_utils.py b/dlt/helpers/dbt/dbt_utils.py index 06154eb58c..b4097e4434 100644 --- a/dlt/helpers/dbt/dbt_utils.py +++ b/dlt/helpers/dbt/dbt_utils.py @@ -7,12 +7,17 @@ from dlt.common.exceptions import MissingDependencyException from dlt.common.typing import StrAny -from dlt.helpers.dbt.exceptions import DBTProcessingError, DBTNodeResult, IncrementalSchemaOutOfSyncError +from dlt.helpers.dbt.exceptions import ( + DBTProcessingError, + DBTNodeResult, + IncrementalSchemaOutOfSyncError, +) try: # block disabling root logger import logbook.compat - logbook.compat.redirect_logging = lambda : None + + logbook.compat.redirect_logging = lambda: None # can only import DBT after redirect is disabled # https://stackoverflow.com/questions/48619517/call-a-click-command-from-code @@ -78,9 +83,12 @@ def set_path_wrapper(self: dbt.logger.LogManager, path: str) -> None: def is_incremental_schema_out_of_sync_error(error: Any) -> bool: - def _check_single_item(error_: dbt_results.RunResult) -> bool: - return error_.status == dbt_results.RunStatus.Error and "The source and target schemas on this incremental model are out of sync" in error_.message + return ( + error_.status == dbt_results.RunStatus.Error + and "The source and target schemas on this incremental model are out of sync" + in error_.message + ) if isinstance(error, dbt_results.RunResult): return _check_single_item(error) @@ -102,18 +110,20 @@ def parse_dbt_execution_results(results: Any) -> Sequence[DBTNodeResult]: return None return [ - DBTNodeResult(res.node.name, res.message, res.execution_time, str(res.status)) for res in results if isinstance(res, dbt_results.NodeResult) - ] + DBTNodeResult(res.node.name, res.message, res.execution_time, str(res.status)) + for res in results + if isinstance(res, dbt_results.NodeResult) + ] def run_dbt_command( - package_path: str, - command: str, - profiles_dir: str, - profile_name: Optional[str] = None, - global_args: Sequence[str] = None, - command_args: Sequence[str] = None, - package_vars: StrAny = None + package_path: str, + command: str, + profiles_dir: str, + profile_name: Optional[str] = None, + global_args: Sequence[str] = None, + command_args: Sequence[str] = None, + package_vars: StrAny = None, ) -> Union[Sequence[DBTNodeResult], dbt_results.ExecutionResult]: args = ["--profiles-dir", profiles_dir] # add profile name if provided @@ -133,7 +143,7 @@ def run_dbt_command( success: bool = None # dbt uses logbook which does not run on python 10. below is a hack that allows that warnings.filterwarnings("ignore", category=DeprecationWarning, module="logbook") - runner_args = (global_args or []) + [command] + args # type: ignore + runner_args = (global_args or []) + [command] + args # type: ignore with dbt.logger.log_manager.applicationbound(): try: @@ -177,8 +187,16 @@ def init_logging_and_run_dbt_command( profiles_dir: str, profile_name: Optional[str] = None, command_args: Sequence[str] = None, - package_vars: StrAny = None + package_vars: StrAny = None, ) -> Union[Sequence[DBTNodeResult], dbt_results.ExecutionResult]: # initialize dbt logging, returns global parameters to dbt command dbt_global_args = initialize_dbt_logging(log_level, is_json_logging) - return run_dbt_command(package_path, command, profiles_dir, profile_name, dbt_global_args, command_args, package_vars) + return run_dbt_command( + package_path, + command, + profiles_dir, + profile_name, + dbt_global_args, + command_args, + package_vars, + ) diff --git a/dlt/helpers/dbt/exceptions.py b/dlt/helpers/dbt/exceptions.py index 3a9d6f9c80..545b01868a 100644 --- a/dlt/helpers/dbt/exceptions.py +++ b/dlt/helpers/dbt/exceptions.py @@ -23,7 +23,9 @@ class DBTNodeResult(NamedTuple): class DBTProcessingError(DBTRunnerException): - def __init__(self, command: str, run_results: Sequence[DBTNodeResult], dbt_results: Any) -> None: + def __init__( + self, command: str, run_results: Sequence[DBTNodeResult], dbt_results: Any + ) -> None: self.command = command self.run_results = run_results # the results from DBT may be anything diff --git a/dlt/helpers/dbt/runner.py b/dlt/helpers/dbt/runner.py index 381260536c..388b81b2ee 100644 --- a/dlt/helpers/dbt/runner.py +++ b/dlt/helpers/dbt/runner.py @@ -17,7 +17,12 @@ from dlt.common.utils import with_custom_environ from dlt.helpers.dbt.configuration import DBTRunnerConfiguration -from dlt.helpers.dbt.exceptions import IncrementalSchemaOutOfSyncError, PrerequisitesException, DBTNodeResult, DBTProcessingError +from dlt.helpers.dbt.exceptions import ( + IncrementalSchemaOutOfSyncError, + PrerequisitesException, + DBTNodeResult, + DBTProcessingError, +) from dlt.common.runtime.telemetry import with_telemetry @@ -31,12 +36,13 @@ class DBTPackageRunner: passed via DBTRunnerConfiguration instance """ - def __init__(self, + def __init__( + self, venv: Venv, credentials: DestinationClientDwhConfiguration, working_dir: str, source_dataset_name: str, - config: DBTRunnerConfiguration + config: DBTRunnerConfiguration, ) -> None: self.venv = venv self.credentials = credentials @@ -62,7 +68,9 @@ def _setup_location(self) -> None: self.cloned_package_name = url.name self.package_path = os.path.join(self.working_dir, self.cloned_package_name) - def _get_package_vars(self, additional_vars: StrAny = None, destination_dataset_name: str = None) -> StrAny: + def _get_package_vars( + self, additional_vars: StrAny = None, destination_dataset_name: str = None + ) -> StrAny: if self.config.package_additional_vars: package_vars = dict(self.config.package_additional_vars) else: @@ -82,7 +90,9 @@ def _log_dbt_run_results(self, results: Sequence[DBTNodeResult]) -> None: if res.status == "error": logger.error(f"Model {res.model_name} error! Error: {res.message}") else: - logger.info(f"Model {res.model_name} {res.status} in {res.time} seconds with {res.message}") + logger.info( + f"Model {res.model_name} {res.status} in {res.time} seconds with {res.message}" + ) def ensure_newest_package(self) -> None: """Clones or brings the dbt package at `package_location` up to date.""" @@ -90,19 +100,37 @@ def ensure_newest_package(self) -> None: with git_custom_key_command(self.config.package_repository_ssh_key) as ssh_command: try: - ensure_remote_head(self.package_path, branch=self.config.package_repository_branch, with_git_command=ssh_command) + ensure_remote_head( + self.package_path, + branch=self.config.package_repository_branch, + with_git_command=ssh_command, + ) except GitError as err: # cleanup package folder logger.info(f"Package will be cloned due to {type(err).__name__}:{str(err)}") - logger.info(f"Will clone {self.config.package_location} head {self.config.package_repository_branch} into {self.package_path}") - force_clone_repo(self.config.package_location, self.repo_storage, self.cloned_package_name, self.config.package_repository_branch, with_git_command=ssh_command) + logger.info( + f"Will clone {self.config.package_location} head" + f" {self.config.package_repository_branch} into {self.package_path}" + ) + force_clone_repo( + self.config.package_location, + self.repo_storage, + self.cloned_package_name, + self.config.package_repository_branch, + with_git_command=ssh_command, + ) @with_custom_environ - def _run_dbt_command(self, command: str, command_args: Sequence[str] = None, package_vars: StrAny = None) -> Sequence[DBTNodeResult]: - logger.info(f"Exec dbt command: {command} {command_args} {package_vars} on profile {self.config.package_profile_name}") + def _run_dbt_command( + self, command: str, command_args: Sequence[str] = None, package_vars: StrAny = None + ) -> Sequence[DBTNodeResult]: + logger.info( + f"Exec dbt command: {command} {command_args} {package_vars} on profile" + f" {self.config.package_profile_name}" + ) # write credentials to environ to pass them to dbt, add DLT__ prefix if self.credentials: - add_config_to_env(self.credentials, ("dlt", )) + add_config_to_env(self.credentials, ("dlt",)) args = [ self.config.runtime.log_level, is_json_logging(self.config.runtime.log_format), @@ -111,7 +139,7 @@ def _run_dbt_command(self, command: str, command_args: Sequence[str] = None, pac self.config.package_profiles_dir, self.config.package_profile_name, command_args, - package_vars + package_vars, ] script = f""" from functools import partial @@ -134,7 +162,12 @@ def _run_dbt_command(self, command: str, command_args: Sequence[str] = None, pac print(cpe.stderr) raise - def run(self, cmd_params: Sequence[str] = ("--fail-fast", ), additional_vars: StrAny = None, destination_dataset_name: str = None) -> Sequence[DBTNodeResult]: + def run( + self, + cmd_params: Sequence[str] = ("--fail-fast",), + additional_vars: StrAny = None, + destination_dataset_name: str = None, + ) -> Sequence[DBTNodeResult]: """Runs `dbt` package Executes `dbt run` on previously cloned package. @@ -151,12 +184,15 @@ def run(self, cmd_params: Sequence[str] = ("--fail-fast", ), additional_vars: St DBTProcessingError: `run` command failed. Contains a list of models with their execution statuses and error messages """ return self._run_dbt_command( - "run", - cmd_params, - self._get_package_vars(additional_vars, destination_dataset_name) + "run", cmd_params, self._get_package_vars(additional_vars, destination_dataset_name) ) - def test(self, cmd_params: Sequence[str] = None, additional_vars: StrAny = None, destination_dataset_name: str = None) -> Sequence[DBTNodeResult]: + def test( + self, + cmd_params: Sequence[str] = None, + additional_vars: StrAny = None, + destination_dataset_name: str = None, + ) -> Sequence[DBTNodeResult]: """Tests `dbt` package Executes `dbt test` on previously cloned package. @@ -173,12 +209,12 @@ def test(self, cmd_params: Sequence[str] = None, additional_vars: StrAny = None, DBTProcessingError: `test` command failed. Contains a list of models with their execution statuses and error messages """ return self._run_dbt_command( - "test", - cmd_params, - self._get_package_vars(additional_vars, destination_dataset_name) + "test", cmd_params, self._get_package_vars(additional_vars, destination_dataset_name) ) - def _run_db_steps(self, run_params: Sequence[str], package_vars: StrAny, source_tests_selector: str) -> Sequence[DBTNodeResult]: + def _run_db_steps( + self, run_params: Sequence[str], package_vars: StrAny, source_tests_selector: str + ) -> Sequence[DBTNodeResult]: if self.repo_storage: # make sure we use package from the remote head self.ensure_newest_package() @@ -209,8 +245,9 @@ def _run_db_steps(self, run_params: Sequence[str], package_vars: StrAny, source_ else: raise - def run_all(self, - run_params: Sequence[str] = ("--fail-fast", ), + def run_all( + self, + run_params: Sequence[str] = ("--fail-fast",), additional_vars: StrAny = None, source_tests_selector: str = None, destination_dataset_name: str = None, @@ -244,7 +281,7 @@ def run_all(self, results = self._run_db_steps( run_params, self._get_package_vars(additional_vars, destination_dataset_name), - source_tests_selector + source_tests_selector, ) self._log_dbt_run_results(results) return results @@ -270,7 +307,7 @@ def create_runner( package_profiles_dir: str = None, package_profile_name: str = None, auto_full_refresh_when_out_of_sync: bool = None, - config: DBTRunnerConfiguration = None + config: DBTRunnerConfiguration = None, ) -> DBTPackageRunner: """Creates a Python wrapper over `dbt` package present at specified location, that allows to control it (ie. run and test) from Python code. diff --git a/dlt/helpers/dbt_cloud/client.py b/dlt/helpers/dbt_cloud/client.py index 8851aaa168..67d315f0d1 100644 --- a/dlt/helpers/dbt_cloud/client.py +++ b/dlt/helpers/dbt_cloud/client.py @@ -41,9 +41,7 @@ def __init__( self.accounts_url = f"accounts/{self.account_id}" def get_endpoint(self, endpoint: str) -> Any: - response = requests.get( - f"{self.base_api_url}/{endpoint}", headers=self._headers - ) + response = requests.get(f"{self.base_api_url}/{endpoint}", headers=self._headers) results = response.json() return results @@ -103,16 +101,15 @@ def trigger_job_run( """ if not (self.account_id and job_id): raise InvalidCredentialsException( - f"account_id and job_id are required, got account_id: {self.account_id} and job_id: {job_id}" + f"account_id and job_id are required, got account_id: {self.account_id} and job_id:" + f" {job_id}" ) json_body = {} if data: json_body.update(data) - response = self.post_endpoint( - f"{self.accounts_url}/jobs/{job_id}/run", json_body=json_body - ) + response = self.post_endpoint(f"{self.accounts_url}/jobs/{job_id}/run", json_body=json_body) return int(response["data"]["id"]) def get_run_status(self, run_id: Union[int, str]) -> Dict[Any, Any]: @@ -136,7 +133,8 @@ def get_run_status(self, run_id: Union[int, str]) -> Dict[Any, Any]: """ if not (self.account_id and run_id): raise InvalidCredentialsException( - f"account_id and run_id are required, got account_id: {self.account_id} and run_id: {run_id}." + f"account_id and run_id are required, got account_id: {self.account_id} and run_id:" + f" {run_id}." ) response = self.get_endpoint(f"{self.accounts_url}/runs/{run_id}") diff --git a/dlt/helpers/streamlit_helper.py b/dlt/helpers/streamlit_helper.py index 7921e4e2e1..d3e194b18d 100644 --- a/dlt/helpers/streamlit_helper.py +++ b/dlt/helpers/streamlit_helper.py @@ -16,9 +16,14 @@ try: import streamlit as st + # from streamlit import SECRETS_FILE_LOC, secrets except ModuleNotFoundError: - raise MissingDependencyException("DLT Streamlit Helpers", ["streamlit"], "DLT Helpers for Streamlit should be run within a streamlit app.") + raise MissingDependencyException( + "DLT Streamlit Helpers", + ["streamlit"], + "DLT Helpers for Streamlit should be run within a streamlit app.", + ) # use right caching function to disable deprecation message @@ -120,7 +125,7 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: schema_names = ", ".join(sorted(pipeline.schema_names)) st.markdown(f""" * pipeline name: **{pipeline.pipeline_name}** - * destination: **{str(credentials)}** in **{pipeline.destination.__name__}** + * destination: **{str(credentials)}** in **{pipeline.destination.destination_description}** * dataset name: **{pipeline.dataset_name}** * default schema name: **{pipeline.default_schema_name}** * all schema names: **{schema_names}** @@ -129,11 +134,17 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: st.header("Last load info") col1, col2, col3 = st.columns(3) loads_df = _query_data_live( - f"SELECT load_id, inserted_at FROM {pipeline.default_schema.loads_table_name} WHERE status = 0 ORDER BY inserted_at DESC LIMIT 101 " + f"SELECT load_id, inserted_at FROM {pipeline.default_schema.loads_table_name} WHERE" + " status = 0 ORDER BY inserted_at DESC LIMIT 101 " ) loads_no = loads_df.shape[0] if loads_df.shape[0] > 0: - rel_time = humanize.naturaldelta(pendulum.now() - pendulum.from_timestamp(loads_df.iloc[0, 1].timestamp())) + " ago" + rel_time = ( + humanize.naturaldelta( + pendulum.now() - pendulum.from_timestamp(loads_df.iloc[0, 1].timestamp()) + ) + + " ago" + ) last_load_id = loads_df.iloc[0, 0] if loads_no > 100: loads_no = "> " + str(loads_no) @@ -154,7 +165,10 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: if "parent" in table: continue table_name = table["name"] - query_parts.append(f"SELECT '{table_name}' as table_name, COUNT(1) As rows_count FROM {table_name} WHERE _dlt_load_id = '{selected_load_id}'") + query_parts.append( + f"SELECT '{table_name}' as table_name, COUNT(1) As rows_count FROM" + f" {table_name} WHERE _dlt_load_id = '{selected_load_id}'" + ) query_parts.append("UNION ALL") query_parts.pop() rows_counts_df = _query_data("\n".join(query_parts)) @@ -167,8 +181,9 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: st.header("Schema updates") schemas_df = _query_data_live( - f"SELECT schema_name, inserted_at, version, version_hash FROM {pipeline.default_schema.version_table_name} ORDER BY inserted_at DESC LIMIT 101 " - ) + "SELECT schema_name, inserted_at, version, version_hash FROM" + f" {pipeline.default_schema.version_table_name} ORDER BY inserted_at DESC LIMIT 101 " + ) st.markdown("**100 recent schema updates**") st.dataframe(schemas_df) @@ -188,14 +203,19 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: col2.metric("Remote state version", remote_state_version) if remote_state_version != local_state["_state_version"]: - st.warning("Looks like that local state is not yet synchronized or synchronization is disabled") + st.warning( + "Looks like that local state is not yet synchronized or synchronization is disabled" + ) except CannotRestorePipelineException as restore_ex: st.error("Seems like the pipeline does not exist. Did you run it at least once?") st.exception(restore_ex) except ConfigFieldMissingException as cf_ex: - st.error("Pipeline credentials/configuration is missing. This most often happen when you run the streamlit app from different folder than the `.dlt` with `toml` files resides.") + st.error( + "Pipeline credentials/configuration is missing. This most often happen when you run the" + " streamlit app from different folder than the `.dlt` with `toml` files resides." + ) st.text(str(cf_ex)) except Exception as ex: @@ -203,8 +223,13 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: st.exception(ex) - -def write_data_explorer_page(pipeline: Pipeline, schema_name: str = None, show_dlt_tables: bool = False, example_query: str = "", show_charts: bool = True) -> None: +def write_data_explorer_page( + pipeline: Pipeline, + schema_name: str = None, + show_dlt_tables: bool = False, + example_query: str = "", + show_charts: bool = True, +) -> None: """Writes Streamlit app page with a schema and live data preview. #### Args: @@ -253,21 +278,29 @@ def _query_data(query: str, chunk_size: int = None) -> pd.DataFrame: if "write_disposition" in table: table_hints.append("write disposition: **%s**" % table["write_disposition"]) columns = table["columns"] - primary_keys: Iterator[str] = flatten_list_or_items([ - col_name for col_name in columns.keys() + primary_keys: Iterator[str] = flatten_list_or_items( + [ + col_name + for col_name in columns.keys() if not col_name.startswith("_") and not columns[col_name].get("primary_key") is None - ]) + ] + ) table_hints.append("primary key(s): **%s**" % ", ".join(primary_keys)) - merge_keys = flatten_list_or_items([ - col_name for col_name in columns.keys() + merge_keys = flatten_list_or_items( + [ + col_name + for col_name in columns.keys() if not col_name.startswith("_") and not columns[col_name].get("merge_key") is None - ]) + ] + ) table_hints.append("merge key(s): **%s**" % ", ".join(merge_keys)) st.markdown(" | ".join(table_hints)) # table schema contains various hints (like clustering or partition options) that we do not want to show in basic view - essentials_f = lambda c: {k:v for k, v in c.items() if k in ["name", "data_type", "nullable"]} + essentials_f = lambda c: { + k: v for k, v in c.items() if k in ["name", "data_type", "nullable"] + } st.table(map(essentials_f, table["columns"].values())) # add a button that when pressed will show the full content of a table @@ -302,7 +335,6 @@ def _query_data(query: str, chunk_size: int = None) -> pd.DataFrame: # try barchart st.bar_chart(df) if df.dtypes.shape[0] == 2 and show_charts: - # try to import altair charts try: import altair as alt @@ -310,13 +342,17 @@ def _query_data(query: str, chunk_size: int = None) -> pd.DataFrame: raise MissingDependencyException( "DLT Streamlit Helpers", ["altair"], - "DLT Helpers for Streamlit should be run within a streamlit app." + "DLT Helpers for Streamlit should be run within a streamlit" + " app.", ) # try altair - bar_chart = alt.Chart(df).mark_bar().encode( - x=f'{df.columns[1]}:Q', - y=alt.Y(f'{df.columns[0]}:N', sort='-x') + bar_chart = ( + alt.Chart(df) + .mark_bar() + .encode( + x=f"{df.columns[1]}:Q", y=alt.Y(f"{df.columns[0]}:N", sort="-x") + ) ) st.altair_chart(bar_chart, use_container_width=True) except Exception as ex: diff --git a/dlt/load/configuration.py b/dlt/load/configuration.py index f62c3a4dda..0a84e3c331 100644 --- a/dlt/load/configuration.py +++ b/dlt/load/configuration.py @@ -20,11 +20,11 @@ def on_resolved(self) -> None: self.pool_type = "none" if self.workers == 1 else "thread" if TYPE_CHECKING: + def __init__( self, pool_type: TPoolType = "thread", workers: int = None, raise_on_failed_jobs: bool = False, - _load_storage_config: LoadStorageConfiguration = None - ) -> None: - ... + _load_storage_config: LoadStorageConfiguration = None, + ) -> None: ... diff --git a/dlt/load/exceptions.py b/dlt/load/exceptions.py index 93d4ef76e1..8a704660ce 100644 --- a/dlt/load/exceptions.py +++ b/dlt/load/exceptions.py @@ -12,7 +12,10 @@ def __init__(self, load_id: str, job_id: str, failed_message: str) -> None: self.load_id = load_id self.job_id = job_id self.failed_message = failed_message - super().__init__(f"Job for {job_id} failed terminally in load {load_id} with message {failed_message}. The package is aborted and cannot be retried.") + super().__init__( + f"Job for {job_id} failed terminally in load {load_id} with message {failed_message}." + " The package is aborted and cannot be retried." + ) class LoadClientJobRetry(DestinationTransientException): @@ -21,15 +24,23 @@ def __init__(self, load_id: str, job_id: str, retry_count: int, max_retry_count: self.job_id = job_id self.retry_count = retry_count self.max_retry_count = max_retry_count - super().__init__(f"Job for {job_id} had {retry_count} retries which a multiple of {max_retry_count}. Exiting retry loop. You can still rerun the load package to retry this job.") + super().__init__( + f"Job for {job_id} had {retry_count} retries which a multiple of {max_retry_count}." + " Exiting retry loop. You can still rerun the load package to retry this job." + ) class LoadClientUnsupportedFileFormats(DestinationTerminalException): - def __init__(self, file_format: str, supported_file_format: Sequence[str], file_path: str) -> None: + def __init__( + self, file_format: str, supported_file_format: Sequence[str], file_path: str + ) -> None: self.file_format = file_format self.supported_types = supported_file_format self.file_path = file_path - super().__init__(f"Loader does not support writer {file_format} in file {file_path}. Supported writers: {supported_file_format}") + super().__init__( + f"Loader does not support writer {file_format} in file {file_path}. Supported writers:" + f" {supported_file_format}" + ) class LoadClientUnsupportedWriteDisposition(DestinationTerminalException): @@ -37,4 +48,7 @@ def __init__(self, table_name: str, write_disposition: str, file_name: str) -> N self.table_name = table_name self.write_disposition = write_disposition self.file_name = file_name - super().__init__(f"Loader does not support {write_disposition} in table {table_name} when loading file {file_name}") + super().__init__( + f"Loader does not support {write_disposition} in table {table_name} when loading file" + f" {file_name}" + ) diff --git a/dlt/load/load.py b/dlt/load/load.py index beae130789..b0b52d61d6 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -1,5 +1,4 @@ import contextlib -from copy import copy from functools import reduce import datetime # noqa: 251 from typing import Dict, List, Optional, Tuple, Set, Iterator, Iterable, Callable @@ -9,38 +8,58 @@ from dlt.common import sleep, logger from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config -from dlt.common.pipeline import LoadInfo, SupportsPipeline +from dlt.common.pipeline import LoadInfo, LoadMetrics, SupportsPipeline, WithStepInfo from dlt.common.schema.utils import get_child_tables, get_top_level_table from dlt.common.storages.load_storage import LoadPackageInfo, ParsedLoadJobFileName, TJobState -from dlt.common.typing import StrAny from dlt.common.runners import TRunMetrics, Runnable, workermethod, NullExecutor from dlt.common.runtime.collector import Collector, NULL_COLLECTOR from dlt.common.runtime.logger import pretty_format_exception -from dlt.common.exceptions import TerminalValueError, DestinationTerminalException, DestinationTransientException +from dlt.common.exceptions import ( + TerminalValueError, + DestinationTerminalException, + DestinationTransientException, +) from dlt.common.schema import Schema, TSchemaTables from dlt.common.schema.typing import TTableSchema, TWriteDisposition from dlt.common.storages import LoadStorage -from dlt.common.destination.reference import DestinationClientDwhConfiguration, FollowupJob, JobClientBase, WithStagingDataset, DestinationReference, LoadJob, NewLoadJob, TLoadJobState, DestinationClientConfiguration, SupportsStagingDestination +from dlt.common.destination.reference import ( + DestinationClientDwhConfiguration, + FollowupJob, + JobClientBase, + WithStagingDataset, + Destination, + LoadJob, + NewLoadJob, + TLoadJobState, + DestinationClientConfiguration, + SupportsStagingDestination, + TDestination, +) from dlt.destinations.job_impl import EmptyLoadJob from dlt.load.configuration import LoaderConfiguration -from dlt.load.exceptions import LoadClientJobFailed, LoadClientJobRetry, LoadClientUnsupportedWriteDisposition, LoadClientUnsupportedFileFormats +from dlt.load.exceptions import ( + LoadClientJobFailed, + LoadClientJobRetry, + LoadClientUnsupportedWriteDisposition, + LoadClientUnsupportedFileFormats, +) -class Load(Runnable[Executor]): +class Load(Runnable[Executor], WithStepInfo[LoadMetrics, LoadInfo]): pool: Executor @with_config(spec=LoaderConfiguration, sections=(known_sections.LOAD,)) def __init__( self, - destination: DestinationReference, - staging_destination: DestinationReference = None, + destination: TDestination, + staging_destination: TDestination = None, collector: Collector = NULL_COLLECTOR, is_storage_owner: bool = False, config: LoaderConfiguration = config.value, initial_client_config: DestinationClientConfiguration = config.value, - initial_staging_client_config: DestinationClientConfiguration = config.value + initial_staging_client_config: DestinationClientConfiguration = config.value, ) -> None: self.config = config self.collector = collector @@ -51,21 +70,23 @@ def __init__( self.staging_destination = staging_destination self.pool = NullExecutor() self.load_storage: LoadStorage = self.create_storage(is_storage_owner) - self._processed_load_ids: Dict[str, str] = {} - """Load ids to dataset name""" - + self._loaded_packages: List[LoadPackageInfo] = [] + super().__init__() def create_storage(self, is_storage_owner: bool) -> LoadStorage: supported_file_formats = self.capabilities.supported_loader_file_formats if self.staging_destination: - supported_file_formats = self.staging_destination.capabilities().supported_loader_file_formats + ["reference"] + supported_file_formats = ( + self.staging_destination.capabilities().supported_loader_file_formats + + ["reference"] + ) if isinstance(self.get_destination_client(Schema("test")), WithStagingDataset): supported_file_formats += ["sql"] load_storage = LoadStorage( is_storage_owner, self.capabilities.preferred_loader_file_format, supported_file_formats, - config=self.config._load_storage_config + config=self.config._load_storage_config, ) return load_storage @@ -76,10 +97,16 @@ def get_staging_destination_client(self, schema: Schema) -> JobClientBase: return self.staging_destination.client(schema, self.initial_staging_client_config) def is_staging_destination_job(self, file_path: str) -> bool: - return self.staging_destination is not None and os.path.splitext(file_path)[1][1:] in self.staging_destination.capabilities().supported_loader_file_formats + return ( + self.staging_destination is not None + and os.path.splitext(file_path)[1][1:] + in self.staging_destination.capabilities().supported_loader_file_formats + ) @contextlib.contextmanager - def maybe_with_staging_dataset(self, job_client: JobClientBase, use_staging: bool) -> Iterator[None]: + def maybe_with_staging_dataset( + self, job_client: JobClientBase, use_staging: bool + ) -> Iterator[None]: """Executes job client methods in context of staging dataset if `table` has `write_disposition` that requires it""" if isinstance(job_client, WithStagingDataset) and use_staging: with job_client.with_staging_dataset(): @@ -89,29 +116,51 @@ def maybe_with_staging_dataset(self, job_client: JobClientBase, use_staging: boo @staticmethod @workermethod - def w_spool_job(self: "Load", file_path: str, load_id: str, schema: Schema) -> Optional[LoadJob]: + def w_spool_job( + self: "Load", file_path: str, load_id: str, schema: Schema + ) -> Optional[LoadJob]: job: LoadJob = None try: is_staging_destination_job = self.is_staging_destination_job(file_path) job_client = self.get_destination_client(schema) # if we have a staging destination and the file is not a reference, send to staging - with (self.get_staging_destination_client(schema) if is_staging_destination_job else job_client) as client: - job_info = self.load_storage.parse_job_file_name(file_path) + with ( + self.get_staging_destination_client(schema) + if is_staging_destination_job + else job_client + ) as client: + job_info = ParsedLoadJobFileName.parse(file_path) if job_info.file_format not in self.load_storage.supported_file_formats: - raise LoadClientUnsupportedFileFormats(job_info.file_format, self.capabilities.supported_loader_file_formats, file_path) + raise LoadClientUnsupportedFileFormats( + job_info.file_format, + self.capabilities.supported_loader_file_formats, + file_path, + ) logger.info(f"Will load file {file_path} with table name {job_info.table_name}") table = client.get_load_table(job_info.table_name) if table["write_disposition"] not in ["append", "replace", "merge"]: - raise LoadClientUnsupportedWriteDisposition(job_info.table_name, table["write_disposition"], file_path) + raise LoadClientUnsupportedWriteDisposition( + job_info.table_name, table["write_disposition"], file_path + ) if is_staging_destination_job: - use_staging_dataset = isinstance(job_client, SupportsStagingDestination) and job_client.should_load_data_to_staging_dataset_on_staging_destination(table) + use_staging_dataset = isinstance( + job_client, SupportsStagingDestination + ) and job_client.should_load_data_to_staging_dataset_on_staging_destination( + table + ) else: - use_staging_dataset = isinstance(job_client, WithStagingDataset) and job_client.should_load_data_to_staging_dataset(table) + use_staging_dataset = isinstance( + job_client, WithStagingDataset + ) and job_client.should_load_data_to_staging_dataset(table) with self.maybe_with_staging_dataset(client, use_staging_dataset): - job = client.start_file_load(table, self.load_storage.storage.make_full_path(file_path), load_id) + job = client.start_file_load( + table, + self.load_storage.normalized_packages.storage.make_full_path(file_path), + load_id, + ) except (DestinationTerminalException, TerminalValueError): # if job irreversibly cannot be started, mark it as failed logger.exception(f"Terminal problem when adding job {file_path}") @@ -120,16 +169,12 @@ def w_spool_job(self: "Load", file_path: str, load_id: str, schema: Schema) -> O # return no job so file stays in new jobs (root) folder logger.exception(f"Temporary problem when adding job {file_path}") job = EmptyLoadJob.from_file_path(file_path, "retry", pretty_format_exception()) - self.load_storage.start_job(load_id, job.file_name()) + self.load_storage.normalized_packages.start_job(load_id, job.file_name()) return job def spool_new_jobs(self, load_id: str, schema: Schema) -> Tuple[int, List[LoadJob]]: - # TODO: validate file type, combine files, finalize etc., this is client specific, jsonl for single table - # can just be combined, insert_values must be finalized and then combined # use thread based pool as jobs processing is mostly I/O and we do not want to pickle jobs - # TODO: combine files by providing a list of files pertaining to same table into job, so job must be - # extended to accept a list - load_files = self.load_storage.list_new_jobs(load_id)[:self.config.workers] + load_files = self.load_storage.list_new_jobs(load_id)[: self.config.workers] file_count = len(load_files) if file_count == 0: logger.info(f"No new jobs found in {load_id}") @@ -142,11 +187,13 @@ def spool_new_jobs(self, load_id: str, schema: Schema) -> Tuple[int, List[LoadJo # remove None jobs and check the rest return file_count, [job for job in jobs if job is not None] - def retrieve_jobs(self, client: JobClientBase, load_id: str, staging_client: JobClientBase = None) -> Tuple[int, List[LoadJob]]: + def retrieve_jobs( + self, client: JobClientBase, load_id: str, staging_client: JobClientBase = None + ) -> Tuple[int, List[LoadJob]]: jobs: List[LoadJob] = [] # list all files that were started but not yet completed - started_jobs = self.load_storage.list_started_jobs(load_id) + started_jobs = self.load_storage.normalized_packages.list_started_jobs(load_id) logger.info(f"Found {len(started_jobs)} that are already started and should be continued") if len(started_jobs) == 0: @@ -169,30 +216,47 @@ def retrieve_jobs(self, client: JobClientBase, load_id: str, staging_client: Job return len(jobs), jobs def get_new_jobs_info(self, load_id: str) -> List[ParsedLoadJobFileName]: - return [LoadStorage.parse_job_file_name(job_file) for job_file in self.load_storage.list_new_jobs(load_id)] + return [ + ParsedLoadJobFileName.parse(job_file) + for job_file in self.load_storage.list_new_jobs(load_id) + ] - def get_completed_table_chain(self, load_id: str, schema: Schema, top_merged_table: TTableSchema, being_completed_job_id: str = None) -> List[TTableSchema]: + def get_completed_table_chain( + self, + load_id: str, + schema: Schema, + top_merged_table: TTableSchema, + being_completed_job_id: str = None, + ) -> List[TTableSchema]: """Gets a table chain starting from the `top_merged_table` containing only tables with completed/failed jobs. None is returned if there's any job that is not completed - Optionally `being_completed_job_id` can be passed that is considered to be completed before job itself moves in storage + Optionally `being_completed_job_id` can be passed that is considered to be completed before job itself moves in storage """ # returns ordered list of tables from parent to child leaf tables table_chain: List[TTableSchema] = [] # make sure all the jobs for the table chain is completed for table in get_child_tables(schema.tables, top_merged_table["name"]): - table_jobs = self.load_storage.list_jobs_for_table(load_id, table["name"]) + table_jobs = self.load_storage.normalized_packages.list_jobs_for_table( + load_id, table["name"] + ) # all jobs must be completed in order for merge to be created - if any(job.state not in ("failed_jobs", "completed_jobs") and job.job_file_info.job_id() != being_completed_job_id for job in table_jobs): + if any( + job.state not in ("failed_jobs", "completed_jobs") + and job.job_file_info.job_id() != being_completed_job_id + for job in table_jobs + ): return None # if there are no jobs for the table, skip it, unless the write disposition is replace, as we need to create and clear the child tables if not table_jobs and top_merged_table["write_disposition"] != "replace": - continue + continue table_chain.append(table) # there must be at least table assert len(table_chain) > 0 return table_chain - def create_followup_jobs(self, load_id: str, state: TLoadJobState, starting_job: LoadJob, schema: Schema) -> List[NewLoadJob]: + def create_followup_jobs( + self, load_id: str, state: TLoadJobState, starting_job: LoadJob, schema: Schema + ) -> List[NewLoadJob]: jobs: List[NewLoadJob] = [] if isinstance(starting_job, FollowupJob): # check for merge jobs only for jobs executing on the destination, the staging destination jobs must be excluded @@ -200,10 +264,16 @@ def create_followup_jobs(self, load_id: str, state: TLoadJobState, starting_job: starting_job_file_name = starting_job.file_name() if state == "completed" and not self.is_staging_destination_job(starting_job_file_name): client = self.destination.client(schema, self.initial_client_config) - top_job_table = get_top_level_table(schema.tables, starting_job.job_file_info().table_name) + top_job_table = get_top_level_table( + schema.tables, starting_job.job_file_info().table_name + ) # if all tables of chain completed, create follow up jobs - if table_chain := self.get_completed_table_chain(load_id, schema, top_job_table, starting_job.job_file_info().job_id()): - if follow_up_jobs := client.create_table_chain_completed_followup_jobs(table_chain): + if table_chain := self.get_completed_table_chain( + load_id, schema, top_job_table, starting_job.job_file_info().job_id() + ): + if follow_up_jobs := client.create_table_chain_completed_followup_jobs( + table_chain + ): jobs = jobs + follow_up_jobs jobs = jobs + starting_job.create_followup_jobs(state) return jobs @@ -222,35 +292,51 @@ def complete_jobs(self, load_id: str, jobs: List[LoadJob], schema: Schema) -> Li elif state == "failed": # try to get exception message from job failed_message = job.exception() - self.load_storage.fail_job(load_id, job.file_name(), failed_message) - logger.error(f"Job for {job.job_id()} failed terminally in load {load_id} with message {failed_message}") + self.load_storage.normalized_packages.fail_job( + load_id, job.file_name(), failed_message + ) + logger.error( + f"Job for {job.job_id()} failed terminally in load {load_id} with message" + f" {failed_message}" + ) elif state == "retry": # try to get exception message from job retry_message = job.exception() # move back to new folder to try again - self.load_storage.retry_job(load_id, job.file_name()) - logger.warning(f"Job for {job.job_id()} retried in load {load_id} with message {retry_message}") + self.load_storage.normalized_packages.retry_job(load_id, job.file_name()) + logger.warning( + f"Job for {job.job_id()} retried in load {load_id} with message {retry_message}" + ) elif state == "completed": # create followup jobs followup_jobs = self.create_followup_jobs(load_id, state, job, schema) for followup_job in followup_jobs: # running should be moved into "new jobs", other statuses into started - folder: TJobState = "new_jobs" if followup_job.state() == "running" else "started_jobs" + folder: TJobState = ( + "new_jobs" if followup_job.state() == "running" else "started_jobs" + ) # save all created jobs - self.load_storage.add_new_job(load_id, followup_job.new_file_path(), job_state=folder) - logger.info(f"Job {job.job_id()} CREATED a new FOLLOWUP JOB {followup_job.new_file_path()} placed in {folder}") + self.load_storage.normalized_packages.import_job( + load_id, followup_job.new_file_path(), job_state=folder + ) + logger.info( + f"Job {job.job_id()} CREATED a new FOLLOWUP JOB" + f" {followup_job.new_file_path()} placed in {folder}" + ) # if followup job is not "running" place it in current queue to be finalized if not followup_job.state() == "running": remaining_jobs.append(followup_job) # move to completed folder after followup jobs are created # in case of exception when creating followup job, the loader will retry operation and try to complete again - self.load_storage.complete_job(load_id, job.file_name()) + self.load_storage.normalized_packages.complete_job(load_id, job.file_name()) logger.info(f"Job for {job.job_id()} completed in load {load_id}") if state in ["failed", "completed"]: self.collector.update("Jobs") if state == "failed": - self.collector.update("Jobs", 1, message="WARNING: Some of the jobs failed!", label="Failed") + self.collector.update( + "Jobs", 1, message="WARNING: Some of the jobs failed!", label="Failed" + ) return remaining_jobs @@ -259,17 +345,20 @@ def complete_package(self, load_id: str, schema: Schema, aborted: bool = False) if not aborted: with self.get_destination_client(schema) as job_client: job_client.complete_load(load_id) - # TODO: Load must provide a clear interface to get last loads and metrics - # TODO: get more info ie. was package aborted, schema name etc. - if isinstance(job_client.config, DestinationClientDwhConfiguration): - self._processed_load_ids[load_id] = job_client.config.normalize_dataset_name(schema) - else: - self._processed_load_ids[load_id] = None self.load_storage.complete_load_package(load_id, aborted) - logger.info(f"All jobs completed, archiving package {load_id} with aborted set to {aborted}") + # collect package info + self._loaded_packages.append(self.load_storage.get_load_package_info(load_id)) + self._step_info_complete_load_id(load_id, metrics={"started_at": None, "finished_at": None}) + # delete jobs only now + self.load_storage.maybe_remove_completed_jobs(load_id) + logger.info( + f"All jobs completed, archiving package {load_id} with aborted set to {aborted}" + ) @staticmethod - def _get_table_chain_tables_with_filter(schema: Schema, f: Callable[[TTableSchema], bool], tables_with_jobs: Iterable[str]) -> Set[str]: + def _get_table_chain_tables_with_filter( + schema: Schema, f: Callable[[TTableSchema], bool], tables_with_jobs: Iterable[str] + ) -> Set[str]: """Get all jobs for tables with given write disposition and resolve the table chain""" result: Set[str] = set() for table_name in tables_with_jobs: @@ -280,54 +369,110 @@ def _get_table_chain_tables_with_filter(schema: Schema, f: Callable[[TTableSchem # only add tables for tables that have jobs unless the disposition is replace # TODO: this is a (formerly used) hack to make test_merge_on_keys_in_schema, # we should change that test - if not table["name"] in tables_with_jobs and top_job_table["write_disposition"] != "replace": + if ( + not table["name"] in tables_with_jobs + and top_job_table["write_disposition"] != "replace" + ): continue result.add(table["name"]) return result @staticmethod - def _init_dataset_and_update_schema(job_client: JobClientBase, expected_update: TSchemaTables, update_tables: Iterable[str], truncate_tables: Iterable[str] = None, staging_info: bool = False) -> TSchemaTables: + def _init_dataset_and_update_schema( + job_client: JobClientBase, + expected_update: TSchemaTables, + update_tables: Iterable[str], + truncate_tables: Iterable[str] = None, + staging_info: bool = False, + ) -> TSchemaTables: staging_text = "for staging dataset" if staging_info else "" - logger.info(f"Client for {job_client.config.destination_name} will start initialize storage {staging_text}") + logger.info( + f"Client for {job_client.config.destination_type} will start initialize storage" + f" {staging_text}" + ) job_client.initialize_storage() - logger.info(f"Client for {job_client.config.destination_name} will update schema to package schema {staging_text}") - applied_update = job_client.update_stored_schema(only_tables=update_tables, expected_update=expected_update) - logger.info(f"Client for {job_client.config.destination_name} will truncate tables {staging_text}") + logger.info( + f"Client for {job_client.config.destination_type} will update schema to package schema" + f" {staging_text}" + ) + applied_update = job_client.update_stored_schema( + only_tables=update_tables, expected_update=expected_update + ) + logger.info( + f"Client for {job_client.config.destination_type} will truncate tables {staging_text}" + ) job_client.initialize_storage(truncate_tables=truncate_tables) return applied_update - - def _init_client(self, job_client: JobClientBase, schema: Schema, expected_update: TSchemaTables, load_id: str, truncate_filter: Callable[[TTableSchema], bool], truncate_staging_filter: Callable[[TTableSchema], bool]) -> TSchemaTables: - + def _init_client( + self, + job_client: JobClientBase, + schema: Schema, + expected_update: TSchemaTables, + load_id: str, + truncate_filter: Callable[[TTableSchema], bool], + truncate_staging_filter: Callable[[TTableSchema], bool], + ) -> TSchemaTables: tables_with_jobs = set(job.table_name for job in self.get_new_jobs_info(load_id)) dlt_tables = set(t["name"] for t in schema.dlt_tables()) # update the default dataset - truncate_tables = self._get_table_chain_tables_with_filter(schema, truncate_filter, tables_with_jobs) - applied_update = self._init_dataset_and_update_schema(job_client, expected_update, tables_with_jobs | dlt_tables, truncate_tables) + truncate_tables = self._get_table_chain_tables_with_filter( + schema, truncate_filter, tables_with_jobs + ) + applied_update = self._init_dataset_and_update_schema( + job_client, expected_update, tables_with_jobs | dlt_tables, truncate_tables + ) # update the staging dataset if client supports this if isinstance(job_client, WithStagingDataset): - if staging_tables := self._get_table_chain_tables_with_filter(schema, truncate_staging_filter, tables_with_jobs): + if staging_tables := self._get_table_chain_tables_with_filter( + schema, truncate_staging_filter, tables_with_jobs + ): with job_client.with_staging_dataset(): - self._init_dataset_and_update_schema(job_client, expected_update, staging_tables | {schema.version_table_name}, staging_tables, staging_info=True) + self._init_dataset_and_update_schema( + job_client, + expected_update, + staging_tables | {schema.version_table_name}, + staging_tables, + staging_info=True, + ) return applied_update - def load_single_package(self, load_id: str, schema: Schema) -> None: # initialize analytical storage ie. create dataset required by passed schema with self.get_destination_client(schema) as job_client: - if (expected_update := self.load_storage.begin_schema_update(load_id)) is not None: - # init job client - applied_update = self._init_client(job_client, schema, expected_update, load_id, job_client.should_truncate_table_before_load, job_client.should_load_data_to_staging_dataset if isinstance(job_client, WithStagingDataset) else None) + applied_update = self._init_client( + job_client, + schema, + expected_update, + load_id, + job_client.should_truncate_table_before_load, + ( + job_client.should_load_data_to_staging_dataset + if isinstance(job_client, WithStagingDataset) + else None + ), + ) # init staging client - if self.staging_destination and isinstance(job_client, SupportsStagingDestination): + if self.staging_destination: + assert isinstance(job_client, SupportsStagingDestination), ( + f"Job client for destination {self.destination.destination_type} does not" + " implement SupportsStagingDestination" + ) with self.get_staging_destination_client(schema) as staging_client: - self._init_client(staging_client, schema, expected_update, load_id, job_client.should_truncate_table_before_load_on_staging_destination, job_client.should_load_data_to_staging_dataset_on_staging_destination) + self._init_client( + staging_client, + schema, + expected_update, + load_id, + job_client.should_truncate_table_before_load_on_staging_destination, + job_client.should_load_data_to_staging_dataset_on_staging_destination, + ) self.load_storage.commit_schema_update(load_id, applied_update) @@ -346,31 +491,44 @@ def load_single_package(self, load_id: str, schema: Schema) -> None: self.complete_package(load_id, schema, False) return # update counter we only care about the jobs that are scheduled to be loaded - package_info = self.load_storage.get_load_package_info(load_id) + package_info = self.load_storage.normalized_packages.get_load_package_info(load_id) total_jobs = reduce(lambda p, c: p + len(c), package_info.jobs.values(), 0) no_failed_jobs = len(package_info.jobs["failed_jobs"]) no_completed_jobs = len(package_info.jobs["completed_jobs"]) + no_failed_jobs self.collector.update("Jobs", no_completed_jobs, total_jobs) if no_failed_jobs > 0: - self.collector.update("Jobs", no_failed_jobs, message="WARNING: Some of the jobs failed!", label="Failed") + self.collector.update( + "Jobs", no_failed_jobs, message="WARNING: Some of the jobs failed!", label="Failed" + ) # loop until all jobs are processed while True: try: remaining_jobs = self.complete_jobs(load_id, jobs, schema) if len(remaining_jobs) == 0: # get package status - package_info = self.load_storage.get_load_package_info(load_id) + package_info = self.load_storage.normalized_packages.get_load_package_info( + load_id + ) # possibly raise on failed jobs if self.config.raise_on_failed_jobs: if package_info.jobs["failed_jobs"]: failed_job = package_info.jobs["failed_jobs"][0] - raise LoadClientJobFailed(load_id, failed_job.job_file_info.job_id(), failed_job.failed_message) + raise LoadClientJobFailed( + load_id, + failed_job.job_file_info.job_id(), + failed_job.failed_message, + ) # possibly raise on too many retires if self.config.raise_on_max_retries: for new_job in package_info.jobs["new_jobs"]: r_c = new_job.job_file_info.retry_count if r_c > 0 and r_c % self.config.raise_on_max_retries == 0: - raise LoadClientJobRetry(load_id, new_job.job_file_info.job_id(), r_c, self.config.raise_on_max_retries) + raise LoadClientJobRetry( + load_id, + new_job.job_file_info.job_id(), + r_c, + self.config.raise_on_max_retries, + ) break # process remaining jobs again jobs = remaining_jobs @@ -395,36 +553,56 @@ def run(self, pool: Optional[Executor]) -> TRunMetrics: # load the schema from the package load_id = loads[0] logger.info(f"Loading schema from load package in {load_id}") - schema = self.load_storage.load_package_schema(load_id) + schema = self.load_storage.normalized_packages.load_schema(load_id) logger.info(f"Loaded schema name {schema.name} and version {schema.stored_version}") # get top load id and mark as being processed - # TODO: another place where tracing must be refactored - self._processed_load_ids[load_id] = None with self.collector(f"Load {schema.name} in {load_id}"): + # the same load id may be processed across multiple runs + if not self.current_load_id: + self._step_info_start_load_id(load_id) self.load_single_package(load_id, schema) return TRunMetrics(False, len(self.load_storage.list_normalized_packages())) - def get_load_info(self, pipeline: SupportsPipeline, started_at: datetime.datetime = None) -> LoadInfo: + def get_step_info( + self, + pipeline: SupportsPipeline, + ) -> LoadInfo: # TODO: LoadInfo should hold many datasets - load_ids = list(self._processed_load_ids.keys()) - load_packages: List[LoadPackageInfo] = [] + load_ids = list(self._load_id_metrics.keys()) + metrics: Dict[str, List[LoadMetrics]] = {} # get load packages and dataset_name from the last package _dataset_name: str = None - for load_id, _dataset_name in self._processed_load_ids.items(): - load_packages.append(self.load_storage.get_load_package_info(load_id)) + for load_package in self._loaded_packages: + # TODO: each load id may have a separate dataset so construct a list of datasets here + if isinstance(self.initial_client_config, DestinationClientDwhConfiguration): + _dataset_name = self.initial_client_config.normalize_dataset_name( + load_package.schema + ) + metrics[load_package.load_id] = self._step_info_metrics(load_package.load_id) return LoadInfo( pipeline, - self.initial_client_config.destination_name, + metrics, + Destination.normalize_type(self.initial_client_config.destination_type), str(self.initial_client_config), - self.initial_staging_client_config.destination_name if self.initial_staging_client_config else None, + self.initial_client_config.destination_name, + self.initial_client_config.environment, + ( + Destination.normalize_type(self.initial_staging_client_config.destination_type) + if self.initial_staging_client_config + else None + ), + ( + self.initial_staging_client_config.destination_name + if self.initial_staging_client_config + else None + ), str(self.initial_staging_client_config) if self.initial_staging_client_config else None, self.initial_client_config.fingerprint(), _dataset_name, list(load_ids), - load_packages, - started_at, - pipeline.first_run + self._loaded_packages, + pipeline.first_run, ) diff --git a/dlt/normalize/__init__.py b/dlt/normalize/__init__.py index 25d6a4afd3..b2fba68797 100644 --- a/dlt/normalize/__init__.py +++ b/dlt/normalize/__init__.py @@ -1,3 +1,3 @@ from .normalize import Normalize -__all__ = ['Normalize'] +__all__ = ["Normalize"] diff --git a/dlt/normalize/configuration.py b/dlt/normalize/configuration.py index f34f8b6fdc..3949a07fa8 100644 --- a/dlt/normalize/configuration.py +++ b/dlt/normalize/configuration.py @@ -1,10 +1,15 @@ -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from dlt.common.configuration import configspec from dlt.common.configuration.specs import BaseConfiguration from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.runners.configuration import PoolRunnerConfiguration, TPoolType -from dlt.common.storages import LoadStorageConfiguration, NormalizeStorageConfiguration, SchemaStorageConfiguration +from dlt.common.storages import ( + LoadStorageConfiguration, + NormalizeStorageConfiguration, + SchemaStorageConfiguration, +) + @configspec class ItemsNormalizerConfiguration(BaseConfiguration): @@ -14,8 +19,8 @@ class ItemsNormalizerConfiguration(BaseConfiguration): """When true, items to be normalized will have `_dlt_load_id` column added with the current load ID.""" if TYPE_CHECKING: - def __init__(self, add_dlt_id: bool = None, add_dlt_load_id: bool = None) -> None: - ... + + def __init__(self, add_dlt_id: bool = None, add_dlt_load_id: bool = None) -> None: ... @configspec @@ -26,20 +31,24 @@ class NormalizeConfiguration(PoolRunnerConfiguration): _normalize_storage_config: NormalizeStorageConfiguration _load_storage_config: LoadStorageConfiguration - json_normalizer: ItemsNormalizerConfiguration = ItemsNormalizerConfiguration(add_dlt_id=True, add_dlt_load_id=True) + json_normalizer: ItemsNormalizerConfiguration = ItemsNormalizerConfiguration( + add_dlt_id=True, add_dlt_load_id=True + ) - parquet_normalizer: ItemsNormalizerConfiguration = ItemsNormalizerConfiguration(add_dlt_id=False, add_dlt_load_id=False) + parquet_normalizer: ItemsNormalizerConfiguration = ItemsNormalizerConfiguration( + add_dlt_id=False, add_dlt_load_id=False + ) def on_resolved(self) -> None: self.pool_type = "none" if self.workers == 1 else "process" if TYPE_CHECKING: + def __init__( self, pool_type: TPoolType = "process", workers: int = None, _schema_storage_config: SchemaStorageConfiguration = None, _normalize_storage_config: NormalizeStorageConfiguration = None, - _load_storage_config: LoadStorageConfiguration = None - ) -> None: - ... + _load_storage_config: LoadStorageConfiguration = None, + ) -> None: ... diff --git a/dlt/normalize/exceptions.py b/dlt/normalize/exceptions.py index e69de29bb2..a172196899 100644 --- a/dlt/normalize/exceptions.py +++ b/dlt/normalize/exceptions.py @@ -0,0 +1,16 @@ +from dlt.common.exceptions import DltException + + +class NormalizeException(DltException): + def __init__(self, msg: str) -> None: + super().__init__(msg) + + +class NormalizeJobFailed(NormalizeException): + def __init__(self, load_id: str, job_id: str, failed_message: str) -> None: + self.load_id = load_id + self.job_id = job_id + self.failed_message = failed_message + super().__init__( + f"Job for {job_id} failed terminally in load {load_id} with message {failed_message}." + ) diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index b9bd5468dc..2167250036 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -1,20 +1,23 @@ -import os -from typing import List, Dict, Tuple, Protocol, Any -from pathlib import Path +from typing import List, Dict, Set, Any from abc import abstractmethod from dlt.common import json, logger -from dlt.common.json import custom_pua_decode +from dlt.common.data_writers import DataWriterMetrics +from dlt.common.json import custom_pua_decode, may_have_pua from dlt.common.runtime import signals -from dlt.common.schema.typing import TTableSchemaColumns -from dlt.common.storages import NormalizeStorage, LoadStorage, NormalizeStorageConfiguration, FileStorage -from dlt.common.typing import TDataItem +from dlt.common.schema.typing import TSchemaEvolutionMode, TTableSchemaColumns, TSchemaContractDict +from dlt.common.storages import ( + NormalizeStorage, + LoadStorage, +) +from dlt.common.storages.load_package import ParsedLoadJobFileName +from dlt.common.typing import DictStrAny, TDataItem from dlt.common.schema import TSchemaUpdate, Schema -from dlt.common.utils import TRowCount, merge_row_count, increase_row_count -from dlt.normalize.configuration import NormalizeConfiguration from dlt.common.exceptions import MissingDependencyException from dlt.common.normalizers.utils import generate_dlt_ids +from dlt.normalize.configuration import NormalizeConfiguration + try: from dlt.common.libs import pyarrow from dlt.common.libs.pyarrow import pyarrow as pa @@ -30,7 +33,7 @@ def __init__( normalize_storage: NormalizeStorage, schema: Schema, load_id: str, - config: NormalizeConfiguration + config: NormalizeConfiguration, ) -> None: self.load_storage = load_storage self.normalize_storage = normalize_storage @@ -39,82 +42,171 @@ def __init__( self.config = config @abstractmethod - def __call__(self, extracted_items_file: str, root_table_name: str) -> Tuple[List[TSchemaUpdate], int, TRowCount]: - ... + def __call__(self, extracted_items_file: str, root_table_name: str) -> List[TSchemaUpdate]: ... class JsonLItemsNormalizer(ItemsNormalizer): - def _normalize_chunk(self, root_table_name: str, items: List[TDataItem]) -> Tuple[TSchemaUpdate, int, TRowCount]: - column_schemas: Dict[ - str, TTableSchemaColumns - ] = {} # quick access to column schema for writers below + def __init__( + self, + load_storage: LoadStorage, + normalize_storage: NormalizeStorage, + schema: Schema, + load_id: str, + config: NormalizeConfiguration, + ) -> None: + super().__init__(load_storage, normalize_storage, schema, load_id, config) + self._table_contracts: Dict[str, TSchemaContractDict] = {} + self._filtered_tables: Set[str] = set() + self._filtered_tables_columns: Dict[str, Dict[str, TSchemaEvolutionMode]] = {} + # quick access to column schema for writers below + self._column_schemas: Dict[str, TTableSchemaColumns] = {} + + def _filter_columns( + self, filtered_columns: Dict[str, TSchemaEvolutionMode], row: DictStrAny + ) -> DictStrAny: + for name, mode in filtered_columns.items(): + if name in row: + if mode == "discard_row": + return None + elif mode == "discard_value": + row.pop(name) + return row + + def _normalize_chunk( + self, root_table_name: str, items: List[TDataItem], may_have_pua: bool + ) -> TSchemaUpdate: + column_schemas = self._column_schemas schema_update: TSchemaUpdate = {} schema = self.schema schema_name = schema.name - items_count = 0 - row_counts: TRowCount = {} + normalize_data_fun = self.schema.normalize_data_item for item in items: - for (table_name, parent_table), row in self.schema.normalize_data_item( - item, self.load_id, root_table_name - ): - # filter row, may eliminate some or all fields - row = schema.filter_row(table_name, row) - # do not process empty rows - if row: + items_gen = normalize_data_fun(item, self.load_id, root_table_name) + try: + should_descend: bool = None + # use send to prevent descending into child rows when row was discarded + while row_info := items_gen.send(should_descend): + should_descend = True + (table_name, parent_table), row = row_info + + # rows belonging to filtered out tables are skipped + if table_name in self._filtered_tables: + # stop descending into further rows + should_descend = False + continue + + # filter row, may eliminate some or all fields + row = schema.filter_row(table_name, row) + # do not process empty rows + if not row: + should_descend = False + continue + + # filter columns or full rows if schema contract said so + # do it before schema inference in `coerce_row` to not trigger costly migration code + filtered_columns = self._filtered_tables_columns.get(table_name, None) + if filtered_columns: + row = self._filter_columns(filtered_columns, row) # type: ignore[arg-type] + # if whole row got dropped + if not row: + should_descend = False + continue + # decode pua types - for k, v in row.items(): - row[k] = custom_pua_decode(v) # type: ignore + if may_have_pua: + for k, v in row.items(): + row[k] = custom_pua_decode(v) # type: ignore + # coerce row of values into schema table, generating partial table with new columns if any - row, partial_table = schema.coerce_row( - table_name, parent_table, row - ) - # theres a new table or new columns in existing table + row, partial_table = schema.coerce_row(table_name, parent_table, row) + + # if we detect a migration, check schema contract if partial_table: + schema_contract = self._table_contracts.setdefault( + table_name, + schema.resolve_contract_settings_for_table( + parent_table or table_name + ), # parent_table, if present, exists in the schema + ) + partial_table, filters = schema.apply_schema_contract( + schema_contract, partial_table, data_item=row + ) + if filters: + for entity, name, mode in filters: + if entity == "tables": + self._filtered_tables.add(name) + elif entity == "columns": + filtered_columns = self._filtered_tables_columns.setdefault( + table_name, {} + ) + filtered_columns[name] = mode + + if partial_table is None: + # discard migration and row + should_descend = False + continue + # theres a new table or new columns in existing table # update schema and save the change schema.update_table(partial_table) table_updates = schema_update.setdefault(table_name, []) table_updates.append(partial_table) + # update our columns - column_schemas[table_name] = schema.get_table_columns( - table_name - ) + column_schemas[table_name] = schema.get_table_columns(table_name) + + # apply new filters + if filtered_columns and filters: + row = self._filter_columns(filtered_columns, row) + # do not continue if new filters skipped the full row + if not row: + should_descend = False + continue + # get current columns schema columns = column_schemas.get(table_name) if not columns: columns = schema.get_table_columns(table_name) column_schemas[table_name] = columns # store row - # TODO: it is possible to write to single file from many processes using this: https://gitlab.com/warsaw/flufl.lock + # TODO: store all rows for particular items all together after item is fully completed + # will be useful if we implement bad data sending to a table self.load_storage.write_data_item( self.load_id, schema_name, table_name, row, columns ) - # count total items - items_count += 1 - increase_row_count(row_counts, table_name, 1) + except StopIteration: + pass signals.raise_if_signalled() - return schema_update, items_count, row_counts + return schema_update def __call__( self, extracted_items_file: str, root_table_name: str, - ) -> Tuple[List[TSchemaUpdate], int, TRowCount]: + ) -> List[TSchemaUpdate]: schema_updates: List[TSchemaUpdate] = [] - row_counts: TRowCount = {} - with self.normalize_storage.storage.open_file(extracted_items_file) as f: + with self.normalize_storage.extracted_packages.storage.open_file( + extracted_items_file, "rb" + ) as f: # enumerate jsonl file line by line - items_count = 0 + line: bytes = None for line_no, line in enumerate(f): - items: List[TDataItem] = json.loads(line) - partial_update, items_count, r_counts = self._normalize_chunk(root_table_name, items) + items: List[TDataItem] = json.loadb(line) + partial_update = self._normalize_chunk(root_table_name, items, may_have_pua(line)) schema_updates.append(partial_update) - merge_row_count(row_counts, r_counts) + logger.debug(f"Processed {line_no} lines from file {extracted_items_file}") + if line is None and root_table_name in self.schema.tables: + self.load_storage.write_empty_items_file( + self.load_id, + self.schema.name, + root_table_name, + self.schema.get_table_columns(root_table_name), + ) logger.debug( - f"Processed {line_no} items from file {extracted_items_file}, items {items_count}" + f"No lines in file {extracted_items_file}, written empty load job file" ) - return schema_updates, items_count, row_counts + return schema_updates class ParquetItemsNormalizer(ItemsNormalizer): @@ -122,46 +214,83 @@ class ParquetItemsNormalizer(ItemsNormalizer): def _write_with_dlt_columns( self, extracted_items_file: str, root_table_name: str, add_load_id: bool, add_dlt_id: bool - ) -> Tuple[List[TSchemaUpdate], int]: + ) -> List[TSchemaUpdate]: new_columns: List[Any] = [] schema = self.schema load_id = self.load_id schema_update: TSchemaUpdate = {} if add_load_id: - table_update = schema.update_table({"name": root_table_name, "columns": {"_dlt_load_id": {"name": "_dlt_load_id", "data_type": "text", "nullable": False}}}) + table_update = schema.update_table( + { + "name": root_table_name, + "columns": { + "_dlt_load_id": { + "name": "_dlt_load_id", + "data_type": "text", + "nullable": False, + } + }, + } + ) table_updates = schema_update.setdefault(root_table_name, []) table_updates.append(table_update) load_id_type = pa.dictionary(pa.int8(), pa.string()) - new_columns.append(( - pa.field("_dlt_load_id", load_id_type, nullable=False), - lambda batch: pa.array([load_id] * batch.num_rows, type=load_id_type) - )) + new_columns.append( + ( + -1, + pa.field("_dlt_load_id", load_id_type, nullable=False), + lambda batch: pa.array([load_id] * batch.num_rows, type=load_id_type), + ) + ) if add_dlt_id: - table_update = schema.update_table({"name": root_table_name, "columns": {"_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}}}) + table_update = schema.update_table( + { + "name": root_table_name, + "columns": { + "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False} + }, + } + ) table_updates = schema_update.setdefault(root_table_name, []) table_updates.append(table_update) - new_columns.append(( - pa.field("_dlt_id", pyarrow.pyarrow.string(), nullable=False), - lambda batch: pa.array(generate_dlt_ids(batch.num_rows)) - )) + new_columns.append( + ( + -1, + pa.field("_dlt_id", pyarrow.pyarrow.string(), nullable=False), + lambda batch: pa.array(generate_dlt_ids(batch.num_rows)), + ) + ) items_count = 0 as_py = self.load_storage.loader_file_format != "arrow" - with self.normalize_storage.storage.open_file(extracted_items_file, "rb") as f: - for batch in pyarrow.pq_stream_with_new_columns(f, new_columns, row_groups_per_read=self.REWRITE_ROW_GROUPS): + with self.normalize_storage.extracted_packages.storage.open_file( + extracted_items_file, "rb" + ) as f: + for batch in pyarrow.pq_stream_with_new_columns( + f, new_columns, row_groups_per_read=self.REWRITE_ROW_GROUPS + ): items_count += batch.num_rows if as_py: # Write python rows to jsonl, insert-values, etc... storage - self.load_storage.write_data_item( - load_id, schema.name, root_table_name, batch.to_pylist(), schema.get_table_columns(root_table_name) - ) - else: - self.load_storage.write_data_item( - load_id, schema.name, root_table_name, batch, schema.get_table_columns(root_table_name) - ) - return [schema_update], items_count + batch = batch.to_pylist() + self.load_storage.write_data_item( + load_id, + schema.name, + root_table_name, + batch, + schema.get_table_columns(root_table_name), + ) + if items_count == 0: + self.load_storage.write_empty_items_file( + load_id, + schema.name, + root_table_name, + self.schema.get_table_columns(root_table_name), + ) + + return [schema_update] def _fix_schema_precisions(self, root_table_name: str) -> List[TSchemaUpdate]: """Reduce precision of timestamp columns if needed, according to destination caps""" @@ -177,37 +306,36 @@ def _fix_schema_precisions(self, root_table_name: str) -> List[TSchemaUpdate]: if not new_cols: return [] - return [{root_table_name: [schema.update_table({ - "name": root_table_name, - "columns": new_cols - })]}] + return [ + {root_table_name: [schema.update_table({"name": root_table_name, "columns": new_cols})]} + ] - def __call__( - self, extracted_items_file: str, root_table_name: str - ) -> Tuple[List[TSchemaUpdate], int, TRowCount]: + def __call__(self, extracted_items_file: str, root_table_name: str) -> List[TSchemaUpdate]: base_schema_update = self._fix_schema_precisions(root_table_name) - import pyarrow as pa add_dlt_id = self.config.parquet_normalizer.add_dlt_id add_dlt_load_id = self.config.parquet_normalizer.add_dlt_load_id if add_dlt_id or add_dlt_load_id or self.load_storage.loader_file_format != "arrow": - schema_update, items_count = self._write_with_dlt_columns( - extracted_items_file, - root_table_name, - add_dlt_load_id, - add_dlt_id + schema_update = self._write_with_dlt_columns( + extracted_items_file, root_table_name, add_dlt_load_id, add_dlt_id ) - return base_schema_update + schema_update, items_count, {root_table_name: items_count} + return base_schema_update + schema_update from dlt.common.libs.pyarrow import get_row_count - with self.normalize_storage.storage.open_file(extracted_items_file, "rb") as f: - items_count = get_row_count(f) - target_folder = self.load_storage.storage.make_full_path(os.path.join(self.load_id, LoadStorage.NEW_JOBS_FOLDER)) - parts = NormalizeStorage.parse_normalize_file_name(extracted_items_file) - new_file_name = self.load_storage.build_job_file_name(parts.table_name, parts.file_id, with_extension=True) - FileStorage.link_hard_with_fallback( - self.normalize_storage.storage.make_full_path(extracted_items_file), - os.path.join(target_folder, new_file_name) + + with self.normalize_storage.extracted_packages.storage.open_file( + extracted_items_file, "rb" + ) as f: + file_metrics = DataWriterMetrics(extracted_items_file, get_row_count(f), f.tell(), 0, 0) + + parts = ParsedLoadJobFileName.parse(extracted_items_file) + self.load_storage.import_items_file( + self.load_id, + self.schema.name, + parts.table_name, + self.normalize_storage.extracted_packages.storage.make_full_path(extracted_items_file), + file_metrics, ) - return base_schema_update, items_count, {root_table_name: items_count} + + return base_schema_update diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index d60ea05965..0a3c6784c7 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -1,69 +1,98 @@ import os -from typing import Callable, List, Dict, Sequence, Tuple, Set, Optional +import datetime # noqa: 251 +import itertools +from typing import Callable, List, Dict, NamedTuple, Sequence, Tuple, Set, Optional from concurrent.futures import Future, Executor -from dlt.common import pendulum, json, logger, sleep +from dlt.common import logger, sleep from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config from dlt.common.configuration.container import Container +from dlt.common.data_writers import DataWriterMetrics +from dlt.common.data_writers.writers import EMPTY_DATA_WRITER_METRICS from dlt.common.destination import TLoaderFileFormat from dlt.common.runners import TRunMetrics, Runnable, NullExecutor from dlt.common.runtime import signals from dlt.common.runtime.collector import Collector, NULL_COLLECTOR from dlt.common.schema.typing import TStoredSchema from dlt.common.schema.utils import merge_schema_updates -from dlt.common.storages.exceptions import SchemaNotFoundError -from dlt.common.storages import NormalizeStorage, SchemaStorage, LoadStorage, LoadStorageConfiguration, NormalizeStorageConfiguration -from dlt.common.typing import TDataItem +from dlt.common.storages import ( + NormalizeStorage, + SchemaStorage, + LoadStorage, + LoadStorageConfiguration, + NormalizeStorageConfiguration, + ParsedLoadJobFileName, +) from dlt.common.schema import TSchemaUpdate, Schema from dlt.common.schema.exceptions import CannotCoerceColumnException -from dlt.common.pipeline import NormalizeInfo -from dlt.common.utils import chunks, TRowCount, merge_row_count, increase_row_count +from dlt.common.pipeline import ( + NormalizeInfo, + NormalizeMetrics, + SupportsPipeline, + WithStepInfo, +) +from dlt.common.storages.exceptions import LoadPackageNotFound +from dlt.common.storages.load_package import LoadPackageInfo +from dlt.common.utils import chunks from dlt.normalize.configuration import NormalizeConfiguration -from dlt.normalize.items_normalizers import ParquetItemsNormalizer, JsonLItemsNormalizer, ItemsNormalizer +from dlt.normalize.exceptions import NormalizeJobFailed +from dlt.normalize.items_normalizers import ( + ParquetItemsNormalizer, + JsonLItemsNormalizer, + ItemsNormalizer, +) + + +class TWorkerRV(NamedTuple): + schema_updates: List[TSchemaUpdate] + file_metrics: List[DataWriterMetrics] + -# normalize worker wrapping function (map_parallel, map_single) return type -TMapFuncRV = Tuple[Sequence[TSchemaUpdate], TRowCount] # normalize worker wrapping function signature -TMapFuncType = Callable[[Schema, str, Sequence[str]], TMapFuncRV] # input parameters: (schema name, load_id, list of files to process) -# tuple returned by the worker -TWorkerRV = Tuple[List[TSchemaUpdate], int, List[str], TRowCount] +TMapFuncType = Callable[ + [Schema, str, Sequence[str]], TWorkerRV +] # input parameters: (schema name, load_id, list of files to process) -class Normalize(Runnable[Executor]): +class Normalize(Runnable[Executor], WithStepInfo[NormalizeMetrics, NormalizeInfo]): pool: Executor + @with_config(spec=NormalizeConfiguration, sections=(known_sections.NORMALIZE,)) - def __init__(self, collector: Collector = NULL_COLLECTOR, schema_storage: SchemaStorage = None, config: NormalizeConfiguration = config.value) -> None: + def __init__( + self, + collector: Collector = NULL_COLLECTOR, + schema_storage: SchemaStorage = None, + config: NormalizeConfiguration = config.value, + ) -> None: self.config = config self.collector = collector self.normalize_storage: NormalizeStorage = None self.pool = NullExecutor() self.load_storage: LoadStorage = None self.schema_storage: SchemaStorage = None - self._row_counts: TRowCount = {} # setup storages self.create_storages() # create schema storage with give type - self.schema_storage = schema_storage or SchemaStorage(self.config._schema_storage_config, makedirs=True) + self.schema_storage = schema_storage or SchemaStorage( + self.config._schema_storage_config, makedirs=True + ) + super().__init__() def create_storages(self) -> None: # pass initial normalize storage config embedded in normalize config - self.normalize_storage = NormalizeStorage(True, config=self.config._normalize_storage_config) + self.normalize_storage = NormalizeStorage( + True, config=self.config._normalize_storage_config + ) # normalize saves in preferred format but can read all supported formats - self.load_storage = LoadStorage(True, self.config.destination_capabilities.preferred_loader_file_format, LoadStorage.ALL_SUPPORTED_FILE_FORMATS, config=self.config._load_storage_config) - - @staticmethod - def load_or_create_schema(schema_storage: SchemaStorage, schema_name: str) -> Schema: - try: - schema = schema_storage.load_schema(schema_name) - schema.update_normalizers() - logger.info(f"Loaded schema with name {schema_name} with version {schema.stored_version}") - except SchemaNotFoundError: - schema = Schema(schema_name) - logger.info(f"Created new schema with name {schema_name}") - return schema + self.load_storage = LoadStorage( + True, + self.config.destination_capabilities.preferred_loader_file_format, + LoadStorage.ALL_SUPPORTED_FILE_FORMATS, + config=self.config._load_storage_config, + ) @staticmethod def w_normalize_files( @@ -76,89 +105,84 @@ def w_normalize_files( ) -> TWorkerRV: destination_caps = config.destination_capabilities schema_updates: List[TSchemaUpdate] = [] - total_items = 0 - row_counts: TRowCount = {} - load_storages: Dict[TLoaderFileFormat, LoadStorage] = {} + item_normalizers: Dict[TLoaderFileFormat, ItemsNormalizer] = {} - def _get_load_storage(file_format: TLoaderFileFormat) -> LoadStorage: + def _create_load_storage(file_format: TLoaderFileFormat) -> LoadStorage: + """Creates a load storage for particular file_format""" # TODO: capabilities.supported_*_formats can be None, it should have defaults supported_formats = destination_caps.supported_loader_file_formats or [] if file_format == "parquet": if file_format in supported_formats: - supported_formats.append("arrow") # TODO: Hack to make load storage use the correct writer + supported_formats.append( + "arrow" + ) # TODO: Hack to make load storage use the correct writer file_format = "arrow" else: # Use default storage if parquet is not supported to make normalizer fallback to read rows from the file - file_format = destination_caps.preferred_loader_file_format or destination_caps.preferred_staging_file_format + file_format = ( + destination_caps.preferred_loader_file_format + or destination_caps.preferred_staging_file_format + ) else: - file_format = destination_caps.preferred_loader_file_format or destination_caps.preferred_staging_file_format - if storage := load_storages.get(file_format): - return storage - storage = load_storages[file_format] = LoadStorage(False, file_format, supported_formats, loader_storage_config) - return storage + file_format = ( + destination_caps.preferred_loader_file_format + or destination_caps.preferred_staging_file_format + ) + return LoadStorage(False, file_format, supported_formats, loader_storage_config) # process all files with data items and write to buffered item storage with Container().injectable_context(destination_caps): schema = Schema.from_stored_schema(stored_schema) - load_storage = _get_load_storage(destination_caps.preferred_loader_file_format) # Default load storage, used for empty tables when no data normalize_storage = NormalizeStorage(False, normalize_storage_config) - item_normalizers: Dict[TLoaderFileFormat, ItemsNormalizer] = {} - - def _get_items_normalizer(file_format: TLoaderFileFormat) -> Tuple[ItemsNormalizer, LoadStorage]: - load_storage = _get_load_storage(file_format) + def _get_items_normalizer(file_format: TLoaderFileFormat) -> ItemsNormalizer: if file_format in item_normalizers: - return item_normalizers[file_format], load_storage + return item_normalizers[file_format] klass = ParquetItemsNormalizer if file_format == "parquet" else JsonLItemsNormalizer norm = item_normalizers[file_format] = klass( - load_storage, normalize_storage, schema, load_id, config + _create_load_storage(file_format), normalize_storage, schema, load_id, config ) - return norm, load_storage + return norm + parsed_file_name: ParsedLoadJobFileName = None try: root_tables: Set[str] = set() - populated_root_tables: Set[str] = set() for extracted_items_file in extracted_items_files: - line_no: int = 0 - parsed_file_name = NormalizeStorage.parse_normalize_file_name(extracted_items_file) + parsed_file_name = ParsedLoadJobFileName.parse(extracted_items_file) # normalize table name in case the normalization changed # NOTE: this is the best we can do, until a full lineage information is in the schema - root_table_name = schema.naming.normalize_table_identifier(parsed_file_name.table_name) + root_table_name = schema.naming.normalize_table_identifier( + parsed_file_name.table_name + ) root_tables.add(root_table_name) - logger.debug(f"Processing extracted items in {extracted_items_file} in load_id {load_id} with table name {root_table_name} and schema {schema.name}") - - file_format = parsed_file_name.file_format - normalizer, load_storage = _get_items_normalizer(file_format) - partial_updates, items_count, r_counts = normalizer(extracted_items_file, root_table_name) + normalizer = _get_items_normalizer(parsed_file_name.file_format) + logger.debug( + f"Processing extracted items in {extracted_items_file} in load_id" + f" {load_id} with table name {root_table_name} and schema {schema.name}" + ) + partial_updates = normalizer(extracted_items_file, root_table_name) schema_updates.extend(partial_updates) - total_items += items_count - merge_row_count(row_counts, r_counts) - if items_count > 0: - populated_root_tables.add(root_table_name) - logger.debug(f"Processed total {line_no + 1} lines from file {extracted_items_file}, total items {total_items}") - # make sure base tables are all covered - increase_row_count(row_counts, root_table_name, 0) - # write empty jobs for tables without items if table exists in schema - for table_name in root_tables - populated_root_tables: - if table_name not in schema.tables: - continue - logger.debug(f"Writing empty job for table {table_name}") - columns = schema.get_table_columns(table_name) - load_storage.write_empty_file(load_id, schema.name, table_name, columns) - except Exception: - logger.exception(f"Exception when processing file {extracted_items_file}, line {line_no}") - raise + logger.debug(f"Processed file {extracted_items_file}") + except Exception as exc: + job_id = parsed_file_name.job_id() if parsed_file_name else "" + raise NormalizeJobFailed(load_id, job_id, str(exc)) from exc finally: - load_storage.close_writers(load_id) - - logger.info(f"Processed total {total_items} items in {len(extracted_items_files)} files") + for normalizer in item_normalizers.values(): + normalizer.load_storage.close_writers(load_id) - return schema_updates, total_items, load_storage.closed_files(), row_counts + writer_metrics: List[DataWriterMetrics] = [] + for normalizer in item_normalizers.values(): + norm_metrics = normalizer.load_storage.closed_files(load_id) + writer_metrics.extend(norm_metrics) + logger.info(f"Processed all items in {len(extracted_items_files)} files") + return TWorkerRV(schema_updates, writer_metrics) def update_table(self, schema: Schema, schema_updates: List[TSchemaUpdate]) -> None: for schema_update in schema_updates: for table_name, table_updates in schema_update.items(): - logger.info(f"Updating schema for table {table_name} with {len(table_updates)} deltas") + logger.info( + f"Updating schema for table {table_name} with {len(table_updates)} deltas" + ) for partial_table in table_updates: # merge columns schema.update_table(partial_table) @@ -176,25 +200,31 @@ def group_worker_files(files: Sequence[str], no_groups: int) -> List[Sequence[st while remainder_l > 0: for idx, file in enumerate(reversed(chunk_files.pop())): chunk_files[-l_idx - idx - remainder_l].append(file) # type: ignore - remainder_l -=1 + remainder_l -= 1 l_idx = idx + 1 return chunk_files - def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TMapFuncRV: - workers: int = getattr(self.pool, '_max_workers', 1) + def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWorkerRV: + workers: int = getattr(self.pool, "_max_workers", 1) chunk_files = self.group_worker_files(files, workers) schema_dict: TStoredSchema = schema.to_dict() - param_chunk = [( - self.config, self.normalize_storage.config, self.load_storage.config, schema_dict, load_id, files - ) for files in chunk_files] - row_counts: TRowCount = {} - + param_chunk = [ + ( + self.config, + self.normalize_storage.config, + self.load_storage.config, + schema_dict, + load_id, + files, + ) + for files in chunk_files + ] # return stats - schema_updates: List[TSchemaUpdate] = [] - + summary = TWorkerRV([], []) # push all tasks to queue tasks = [ - (self.pool.submit(Normalize.w_normalize_files, *params), params) for params in param_chunk + (self.pool.submit(Normalize.w_normalize_files, *params), params) + for params in param_chunk ] while len(tasks) > 0: @@ -203,34 +233,42 @@ def map_parallel(self, schema: Schema, load_id: str, files: Sequence[str]) -> TM for task in list(tasks): pending, params = task if pending.done(): - result: TWorkerRV = pending.result() # Exception in task (if any) is raised here + result: TWorkerRV = ( + pending.result() + ) # Exception in task (if any) is raised here try: # gather schema from all manifests, validate consistency and combine self.update_table(schema, result[0]) - schema_updates.extend(result[0]) + summary.schema_updates.extend(result.schema_updates) + summary.file_metrics.extend(result.file_metrics) # update metrics - self.collector.update("Files", len(result[2])) - self.collector.update("Items", result[1]) - # merge row counts - merge_row_count(row_counts, result[3]) + self.collector.update("Files", len(result.file_metrics)) + self.collector.update( + "Items", sum(result.file_metrics, EMPTY_DATA_WRITER_METRICS).items_count + ) except CannotCoerceColumnException as exc: # schema conflicts resulting from parallel executing - logger.warning(f"Parallel schema update conflict, retrying task ({str(exc)}") + logger.warning( + f"Parallel schema update conflict, retrying task ({str(exc)}" + ) # delete all files produced by the task - for file in result[2]: - os.remove(file) + for metrics in result.file_metrics: + os.remove(metrics.file_path) # schedule the task again schema_dict = schema.to_dict() # TODO: it's time for a named tuple params = params[:3] + (schema_dict,) + params[4:] - retry_pending: Future[TWorkerRV] = self.pool.submit(Normalize.w_normalize_files, *params) + retry_pending: Future[TWorkerRV] = self.pool.submit( + Normalize.w_normalize_files, *params + ) tasks.append((retry_pending, params)) # remove finished tasks tasks.remove(task) + logger.debug(f"{len(tasks)} tasks still remaining for {load_id}...") - return schema_updates, row_counts + return summary - def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TMapFuncRV: + def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWorkerRV: result = Normalize.w_normalize_files( self.config, self.normalize_storage.config, @@ -239,74 +277,125 @@ def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TMap load_id, files, ) - self.update_table(schema, result[0]) - self.collector.update("Files", len(result[2])) - self.collector.update("Items", result[1]) - return result[0], result[3] - - def spool_files(self, schema_name: str, load_id: str, map_f: TMapFuncType, files: Sequence[str]) -> None: - schema = Normalize.load_or_create_schema(self.schema_storage, schema_name) + self.update_table(schema, result.schema_updates) + self.collector.update("Files", len(result.file_metrics)) + self.collector.update( + "Items", sum(result.file_metrics, EMPTY_DATA_WRITER_METRICS).items_count + ) + return result + def spool_files( + self, load_id: str, schema: Schema, map_f: TMapFuncType, files: Sequence[str] + ) -> None: # process files in parallel or in single thread, depending on map_f - schema_updates, row_counts = map_f(schema, load_id, files) - # logger.metrics("Normalize metrics", extra=get_logging_extras([self.schema_version_gauge.labels(schema_name)])) - if len(schema_updates) > 0: - logger.info(f"Saving schema {schema_name} with version {schema.version}, writing manifest files") - # schema is updated, save it to schema volume - self.schema_storage.save_schema(schema) - # save schema to temp load folder - self.load_storage.save_temp_schema(schema, load_id) + schema_updates, writer_metrics = map_f(schema, load_id, files) + # remove normalizer specific info + for table in schema.tables.values(): + table.pop("x-normalizer", None) # type: ignore[typeddict-item] + logger.info( + f"Saving schema {schema.name} with version {schema.stored_version}:{schema.version}" + ) + # schema is updated, save it to schema volume + self.schema_storage.save_schema(schema) + # save schema new package + self.load_storage.new_packages.save_schema(load_id, schema) # save schema updates even if empty - self.load_storage.save_temp_schema_updates(load_id, merge_schema_updates(schema_updates)) + self.load_storage.new_packages.save_schema_updates( + load_id, merge_schema_updates(schema_updates) + ) # files must be renamed and deleted together so do not attempt that when process is about to be terminated signals.raise_if_signalled() logger.info("Committing storage, do not kill this process") # rename temp folder to processing - self.load_storage.commit_temp_load_package(load_id) + self.load_storage.commit_new_load_package(load_id) # delete item files to complete commit - self.normalize_storage.delete_extracted_files(files) + self.normalize_storage.extracted_packages.delete_package(load_id) # log and update metrics - logger.info(f"Chunk {load_id} processed") - self._row_counts = row_counts + logger.info(f"Extracted package {load_id} processed") + job_metrics = {ParsedLoadJobFileName.parse(m.file_path): m for m in writer_metrics} + self._step_info_complete_load_id( + load_id, + { + "started_at": None, + "finished_at": None, + "job_metrics": {job.job_id(): metrics for job, metrics in job_metrics.items()}, + "table_metrics": { + table_name: sum(map(lambda pair: pair[1], metrics), EMPTY_DATA_WRITER_METRICS) + for table_name, metrics in itertools.groupby( + job_metrics.items(), lambda pair: pair[0].table_name + ) + }, + }, + ) - def spool_schema_files(self, load_id: str, schema_name: str, files: Sequence[str]) -> str: + def spool_schema_files(self, load_id: str, schema: Schema, files: Sequence[str]) -> str: # normalized files will go here before being atomically renamed - - self.load_storage.create_temp_load_package(load_id) - logger.info(f"Created temp load folder {load_id} on loading volume") + self.load_storage.new_packages.create_package(load_id) + logger.info(f"Created new load package {load_id} on loading volume") try: # process parallel - self.spool_files(schema_name, load_id, self.map_parallel, files) + self.spool_files( + load_id, schema.clone(update_normalizers=True), self.map_parallel, files + ) except CannotCoerceColumnException as exc: # schema conflicts resulting from parallel executing - logger.warning(f"Parallel schema update conflict, switching to single thread ({str(exc)}") + logger.warning( + f"Parallel schema update conflict, switching to single thread ({str(exc)}" + ) # start from scratch - self.load_storage.create_temp_load_package(load_id) - self.spool_files(schema_name, load_id, self.map_single, files) + self.load_storage.new_packages.delete_package(load_id) + self.load_storage.new_packages.create_package(load_id) + self.spool_files(load_id, schema.clone(update_normalizers=True), self.map_single, files) return load_id def run(self, pool: Optional[Executor]) -> TRunMetrics: # keep the pool in class instance self.pool = pool or NullExecutor() - self._row_counts = {} logger.info("Running file normalizing") - # list files and group by schema name, list must be sorted for group by to actually work - files = self.normalize_storage.list_files_to_normalize_sorted() - logger.info(f"Found {len(files)} files") - if len(files) == 0: + # list all load packages in extracted folder + load_ids = self.normalize_storage.extracted_packages.list_packages() + logger.info(f"Found {len(load_ids)} load packages") + if len(load_ids) == 0: return TRunMetrics(True, 0) - # group files by schema - for schema_name, files_iter in self.normalize_storage.group_by_schema(files): - schema_files = list(files_iter) - load_id = str(pendulum.now().timestamp()) - logger.info(f"Found {len(schema_files)} files in schema {schema_name} load_id {load_id}") - with self.collector(f"Normalize {schema_name} in {load_id}"): + for load_id in load_ids: + # read schema from package + schema = self.normalize_storage.extracted_packages.load_schema(load_id) + # read all files to normalize placed as new jobs + schema_files = self.normalize_storage.extracted_packages.list_new_jobs(load_id) + logger.info( + f"Found {len(schema_files)} files in schema {schema.name} load_id {load_id}" + ) + if len(schema_files) == 0: + # delete empty package + self.normalize_storage.extracted_packages.delete_package(load_id) + logger.info(f"Empty package {load_id} processed") + continue + with self.collector(f"Normalize {schema.name} in {load_id}"): self.collector.update("Files", 0, len(schema_files)) self.collector.update("Items", 0) - self.spool_schema_files(load_id, schema_name, schema_files) - # return info on still pending files (if extractor saved something in the meantime) - return TRunMetrics(False, len(self.normalize_storage.list_files_to_normalize_sorted())) + self._step_info_start_load_id(load_id) + self.spool_schema_files(load_id, schema, schema_files) + + # return info on still pending packages (if extractor saved something in the meantime) + return TRunMetrics(False, len(self.normalize_storage.extracted_packages.list_packages())) - def get_normalize_info(self) -> NormalizeInfo: - return NormalizeInfo(row_counts=self._row_counts) + def get_load_package_info(self, load_id: str) -> LoadPackageInfo: + """Returns information on extracted/normalized/completed package with given load_id, all jobs and their statuses.""" + try: + return self.load_storage.get_load_package_info(load_id) + except LoadPackageNotFound: + return self.normalize_storage.extracted_packages.get_load_package_info(load_id) + + def get_step_info( + self, + pipeline: SupportsPipeline, + ) -> NormalizeInfo: + load_ids = list(self._load_id_metrics.keys()) + load_packages: List[LoadPackageInfo] = [] + metrics: Dict[str, List[NormalizeMetrics]] = {} + for load_id in self._load_id_metrics.keys(): + load_package = self.get_load_package_info(load_id) + load_packages.append(load_package) + metrics[load_id] = self._step_info_metrics(load_id) + return NormalizeInfo(pipeline, metrics, load_ids, load_packages, pipeline.first_run) diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 71c37c40ba..4101e58320 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -1,18 +1,19 @@ from typing import Sequence, cast, overload from dlt.common.schema import Schema -from dlt.common.schema.typing import TColumnSchema, TWriteDisposition +from dlt.common.schema.typing import TColumnSchema, TWriteDisposition, TSchemaContract from dlt.common.typing import TSecretValue, Any from dlt.common.configuration import with_config from dlt.common.configuration.container import Container from dlt.common.configuration.inject import get_orig_args, last_config -from dlt.common.destination.reference import DestinationReference, TDestinationReferenceArg +from dlt.common.destination import TLoaderFileFormat, Destination, TDestinationReferenceArg from dlt.common.pipeline import LoadInfo, PipelineContext, get_dlt_pipelines_dir from dlt.pipeline.configuration import PipelineConfiguration, ensure_correct_pipeline_kwargs from dlt.pipeline.pipeline import Pipeline from dlt.pipeline.progress import _from_name as collector_from_name, TCollectorArg, _NULL_COLLECTOR +from dlt.pipeline.warnings import credentials_argument_deprecated @overload @@ -96,7 +97,7 @@ def pipeline( full_refresh: bool = False, credentials: Any = None, progress: TCollectorArg = _NULL_COLLECTOR, - **kwargs: Any + **kwargs: Any, ) -> Pipeline: ensure_correct_pipeline_kwargs(pipeline, **kwargs) # call without arguments returns current pipeline @@ -104,6 +105,8 @@ def pipeline( # is any of the arguments different from defaults has_arguments = bool(orig_args[0]) or any(orig_args[1].values()) + credentials_argument_deprecated("pipeline", credentials, destination) + if not has_arguments: context = Container()[PipelineContext] # if pipeline instance is already active then return it, otherwise create a new one @@ -116,8 +119,13 @@ def pipeline( if not pipelines_dir: pipelines_dir = get_dlt_pipelines_dir() - destination = DestinationReference.from_name(destination or kwargs["destination_name"]) - staging = DestinationReference.from_name(staging or kwargs.get("staging_name", None)) if staging is not None else None + destination = Destination.from_reference( + destination or kwargs["destination_type"], destination_name=kwargs["destination_name"] + ) + staging = Destination.from_reference( + staging or kwargs.get("staging_type", None), + destination_name=kwargs.get("staging_name", None), + ) progress = collector_from_name(progress) # create new pipeline instance @@ -135,7 +143,8 @@ def pipeline( progress, False, last_config(**kwargs), - kwargs["runtime"]) + kwargs["runtime"], + ) # set it as current pipeline p.activate() return p @@ -149,7 +158,7 @@ def attach( full_refresh: bool = False, credentials: Any = None, progress: TCollectorArg = _NULL_COLLECTOR, - **kwargs: Any + **kwargs: Any, ) -> Pipeline: """Attaches to the working folder of `pipeline_name` in `pipelines_dir` or in default directory. Requires that valid pipeline state exists in working folder.""" ensure_correct_pipeline_kwargs(attach, **kwargs) @@ -158,7 +167,22 @@ def attach( pipelines_dir = get_dlt_pipelines_dir() progress = collector_from_name(progress) # create new pipeline instance - p = Pipeline(pipeline_name, pipelines_dir, pipeline_salt, None, None, None, credentials, None, None, full_refresh, progress, True, last_config(**kwargs), kwargs["runtime"]) + p = Pipeline( + pipeline_name, + pipelines_dir, + pipeline_salt, + None, + None, + None, + credentials, + None, + None, + full_refresh, + progress, + True, + last_config(**kwargs), + kwargs["runtime"], + ) # set it as current pipeline p.activate() return p @@ -174,7 +198,9 @@ def run( table_name: str = None, write_disposition: TWriteDisposition = None, columns: Sequence[TColumnSchema] = None, - schema: Schema = None + schema: Schema = None, + loader_file_format: TLoaderFileFormat = None, + schema_contract: TSchemaContract = None, ) -> LoadInfo: """Loads the data in `data` argument into the destination specified in `destination` and dataset specified in `dataset_name`. @@ -224,7 +250,7 @@ def run( Returns: LoadInfo: Information on loaded data including the list of package ids and failed job statuses. Please not that `dlt` will not raise if a single job terminally fails. Such information is provided via LoadInfo. """ - destination = DestinationReference.from_name(destination) + destination = Destination.from_reference(destination, credentials=credentials) return pipeline().run( data, destination=destination, @@ -234,12 +260,16 @@ def run( table_name=table_name, write_disposition=write_disposition, columns=columns, - schema=schema + schema=schema, + loader_file_format=loader_file_format, + schema_contract=schema_contract, ) + # plug default tracking module -from dlt.pipeline import trace, track -trace.TRACKING_MODULE = track +from dlt.pipeline import trace, track, platform + +trace.TRACKING_MODULES = [track, platform] # setup default pipeline in the container Container()[PipelineContext] = PipelineContext(pipeline) diff --git a/dlt/pipeline/configuration.py b/dlt/pipeline/configuration.py index 3d0c70f4b1..7aa54541c0 100644 --- a/dlt/pipeline/configuration.py +++ b/dlt/pipeline/configuration.py @@ -11,7 +11,9 @@ class PipelineConfiguration(BaseConfiguration): pipeline_name: Optional[str] = None pipelines_dir: Optional[str] = None + destination_type: Optional[str] = None destination_name: Optional[str] = None + staging_type: Optional[str] = None staging_name: Optional[str] = None loader_file_format: Optional[TLoaderFileFormat] = None dataset_name: Optional[str] = None diff --git a/dlt/pipeline/dbt.py b/dlt/pipeline/dbt.py index 70bd425f12..e647e475ed 100644 --- a/dlt/pipeline/dbt.py +++ b/dlt/pipeline/dbt.py @@ -6,11 +6,19 @@ from dlt.common.typing import TSecretValue from dlt.common.schema.utils import normalize_schema_name -from dlt.helpers.dbt import create_venv as _create_venv, package_runner as _package_runner, DBTPackageRunner, DEFAULT_DBT_VERSION as _DEFAULT_DBT_VERSION, restore_venv as _restore_venv +from dlt.helpers.dbt import ( + create_venv as _create_venv, + package_runner as _package_runner, + DBTPackageRunner, + DEFAULT_DBT_VERSION as _DEFAULT_DBT_VERSION, + restore_venv as _restore_venv, +) from dlt.pipeline.pipeline import Pipeline -def get_venv(pipeline: Pipeline, venv_path: str = "dbt", dbt_version: str = _DEFAULT_DBT_VERSION) -> Venv: +def get_venv( + pipeline: Pipeline, venv_path: str = "dbt", dbt_version: str = _DEFAULT_DBT_VERSION +) -> Venv: """Creates or restores a virtual environment in which the `dbt` packages are executed. The recommended way to execute dbt package is to use a separate virtual environment where only the dbt-core @@ -36,18 +44,18 @@ def get_venv(pipeline: Pipeline, venv_path: str = "dbt", dbt_version: str = _DEF # try to restore existing venv with contextlib.suppress(VenvNotFound): # TODO: check dlt version in venv and update it if local version updated - return _restore_venv(venv_dir, [pipeline.destination.spec().destination_name], dbt_version) + return _restore_venv(venv_dir, [pipeline.destination.spec().destination_type], dbt_version) - return _create_venv(venv_dir, [pipeline.destination.spec().destination_name], dbt_version) + return _create_venv(venv_dir, [pipeline.destination.spec().destination_type], dbt_version) def package( - pipeline: Pipeline, - package_location: str, - package_repository_branch: str = None, - package_repository_ssh_key: TSecretValue = TSecretValue(""), # noqa - auto_full_refresh_when_out_of_sync: bool = None, - venv: Venv = None + pipeline: Pipeline, + package_location: str, + package_repository_branch: str = None, + package_repository_ssh_key: TSecretValue = TSecretValue(""), # noqa + auto_full_refresh_when_out_of_sync: bool = None, + venv: Venv = None, ) -> DBTPackageRunner: """Creates a Python wrapper over `dbt` package present at specified location, that allows to control it (ie. run and test) from Python code. @@ -70,7 +78,11 @@ def package( Returns: DBTPackageRunner: A configured and authenticated Python `dbt` wrapper """ - schema = pipeline.default_schema if pipeline.default_schema_name else Schema(normalize_schema_name(pipeline.dataset_name)) + schema = ( + pipeline.default_schema + if pipeline.default_schema_name + else Schema(normalize_schema_name(pipeline.dataset_name)) + ) job_client = pipeline._sql_job_client(schema) if not venv: venv = Venv.restore_current() @@ -81,5 +93,5 @@ def package( package_location, package_repository_branch, package_repository_ssh_key, - auto_full_refresh_when_out_of_sync + auto_full_refresh_when_out_of_sync, ) diff --git a/dlt/pipeline/exceptions.py b/dlt/pipeline/exceptions.py index 0289c07158..ac203d95a0 100644 --- a/dlt/pipeline/exceptions.py +++ b/dlt/pipeline/exceptions.py @@ -1,19 +1,29 @@ -from typing import Any +from typing import Any, Dict from dlt.common.exceptions import PipelineException -from dlt.common.pipeline import SupportsPipeline +from dlt.common.pipeline import StepInfo, StepMetrics, SupportsPipeline from dlt.pipeline.typing import TPipelineStep class InvalidPipelineName(PipelineException, ValueError): def __init__(self, pipeline_name: str, details: str) -> None: - super().__init__(pipeline_name, f"The pipeline name {pipeline_name} contains invalid characters. The pipeline name is used to create a pipeline working directory and must be a valid directory name. The actual error is: {details}") + super().__init__( + pipeline_name, + f"The pipeline name {pipeline_name} contains invalid characters. The pipeline name is" + " used to create a pipeline working directory and must be a valid directory name. The" + f" actual error is: {details}", + ) class PipelineConfigMissing(PipelineException): - def __init__(self, pipeline_name: str, config_elem: str, step: TPipelineStep, _help: str = None) -> None: + def __init__( + self, pipeline_name: str, config_elem: str, step: TPipelineStep, _help: str = None + ) -> None: self.config_elem = config_elem self.step = step - msg = f"Configuration element {config_elem} was not provided and {step} step cannot be executed" + msg = ( + f"Configuration element {config_elem} was not provided and {step} step cannot be" + " executed" + ) if _help: msg += f"\n{_help}\n" super().__init__(pipeline_name, msg) @@ -21,49 +31,91 @@ def __init__(self, pipeline_name: str, config_elem: str, step: TPipelineStep, _h class CannotRestorePipelineException(PipelineException): def __init__(self, pipeline_name: str, pipelines_dir: str, reason: str) -> None: - msg = f"Pipeline with name {pipeline_name} in working directory {pipelines_dir} could not be restored: {reason}" + msg = ( + f"Pipeline with name {pipeline_name} in working directory {pipelines_dir} could not be" + f" restored: {reason}" + ) super().__init__(pipeline_name, msg) class SqlClientNotAvailable(PipelineException): - def __init__(self, pipeline_name: str,destination_name: str) -> None: - super().__init__(pipeline_name, f"SQL Client not available for destination {destination_name} in pipeline {pipeline_name}") + def __init__(self, pipeline_name: str, destination_name: str) -> None: + super().__init__( + pipeline_name, + f"SQL Client not available for destination {destination_name} in pipeline" + f" {pipeline_name}", + ) class PipelineStepFailed(PipelineException): - def __init__(self, pipeline: SupportsPipeline, step: TPipelineStep, exception: BaseException, step_info: Any = None) -> None: + """Raised by run, extract, normalize and load Pipeline methods.""" + + def __init__( + self, + pipeline: SupportsPipeline, + step: TPipelineStep, + load_id: str, + exception: BaseException, + step_info: StepInfo[StepMetrics] = None, + ) -> None: self.pipeline = pipeline self.step = step + self.load_id = load_id self.exception = exception self.step_info = step_info - super().__init__(pipeline.pipeline_name, f"Pipeline execution failed at stage {step} with exception:\n\n{type(exception)}\n{exception}") + + package_str = f" when processing package {load_id}" if load_id else "" + super().__init__( + pipeline.pipeline_name, + f"Pipeline execution failed at stage {step}{package_str} with" + f" exception:\n\n{type(exception)}\n{exception}", + ) + + def attrs(self) -> Dict[str, Any]: + # remove attr that should not be published + attrs_ = super().attrs() + attrs_.pop("pipeline") + attrs_.pop("exception") + attrs_.pop("step_info") + return attrs_ class PipelineStateEngineNoUpgradePathException(PipelineException): - def __init__(self, pipeline_name: str, init_engine: int, from_engine: int, to_engine: int) -> None: + def __init__( + self, pipeline_name: str, init_engine: int, from_engine: int, to_engine: int + ) -> None: self.init_engine = init_engine self.from_engine = from_engine self.to_engine = to_engine - super().__init__(pipeline_name, f"No engine upgrade path for state in pipeline {pipeline_name} from {init_engine} to {to_engine}, stopped at {from_engine}") + super().__init__( + pipeline_name, + f"No engine upgrade path for state in pipeline {pipeline_name} from {init_engine} to" + f" {to_engine}, stopped at {from_engine}", + ) class PipelineHasPendingDataException(PipelineException): def __init__(self, pipeline_name: str, pipelines_dir: str) -> None: msg = ( - f" Operation failed because pipeline with name {pipeline_name} in working directory {pipelines_dir} contains pending extracted files or load packages. " - "Use `dlt pipeline sync` to reset the local state then run this operation again." + f" Operation failed because pipeline with name {pipeline_name} in working directory" + f" {pipelines_dir} contains pending extracted files or load packages. Use `dlt pipeline" + " sync` to reset the local state then run this operation again." ) super().__init__(pipeline_name, msg) + class PipelineNeverRan(PipelineException): def __init__(self, pipeline_name: str, pipelines_dir: str) -> None: msg = ( - f" Operation failed because pipeline with name {pipeline_name} in working directory {pipelines_dir} was never run or never synced with destination. " - "Use `dlt pipeline sync` to synchronize." + f" Operation failed because pipeline with name {pipeline_name} in working directory" + f" {pipelines_dir} was never run or never synced with destination. Use `dlt pipeline" + " sync` to synchronize." ) super().__init__(pipeline_name, msg) class PipelineNotActive(PipelineException): def __init__(self, pipeline_name: str) -> None: - super().__init__(pipeline_name, f"Pipeline {pipeline_name} is not active so it cannot be deactivated") + super().__init__( + pipeline_name, f"Pipeline {pipeline_name} is not active so it cannot be deactivated" + ) diff --git a/dlt/pipeline/helpers.py b/dlt/pipeline/helpers.py index ebb85f5e23..7bba5f84e7 100644 --- a/dlt/pipeline/helpers.py +++ b/dlt/pipeline/helpers.py @@ -4,19 +4,35 @@ from dlt.common.jsonpath import resolve_paths, TAnyJsonPath, compile_paths from dlt.common.exceptions import TerminalException -from dlt.common.schema.utils import group_tables_by_resource, compile_simple_regexes, compile_simple_regex +from dlt.common.schema.utils import ( + group_tables_by_resource, + compile_simple_regexes, + compile_simple_regex, +) from dlt.common.schema.typing import TSimpleRegex from dlt.common.typing import REPattern -from dlt.common.pipeline import TSourceState, reset_resource_state, _sources_state, _delete_source_state_keys, _get_matching_resources +from dlt.common.pipeline import ( + TSourceState, + reset_resource_state, + _sources_state, + _delete_source_state_keys, + _get_matching_resources, +) from dlt.common.destination.reference import WithStagingDataset from dlt.destinations.exceptions import DatabaseUndefinedRelation -from dlt.pipeline.exceptions import PipelineNeverRan, PipelineStepFailed, PipelineHasPendingDataException +from dlt.pipeline.exceptions import ( + PipelineNeverRan, + PipelineStepFailed, + PipelineHasPendingDataException, +) from dlt.pipeline.typing import TPipelineStep from dlt.pipeline import Pipeline -def retry_load(retry_on_pipeline_steps: Sequence[TPipelineStep] = ("load",)) -> Callable[[BaseException], bool]: +def retry_load( + retry_on_pipeline_steps: Sequence[TPipelineStep] = ("load",) +) -> Callable[[BaseException], bool]: """A retry strategy for Tenacity that, with default setting, will repeat `load` step for all exceptions that are not terminal Use this condition with tenacity `retry_if_exception`. Terminal exceptions are exceptions that will not go away when operations is repeated. @@ -31,12 +47,15 @@ def retry_load(retry_on_pipeline_steps: Sequence[TPipelineStep] = ("load",)) -> retry_on_pipeline_steps (Tuple[TPipelineStep, ...], optional): which pipeline steps are allowed to be repeated. Default: "load" """ + def _retry_load(ex: BaseException) -> bool: # do not retry in normalize or extract stages if isinstance(ex, PipelineStepFailed) and ex.step not in retry_on_pipeline_steps: return False # do not retry on terminal exceptions - if isinstance(ex, TerminalException) or (ex.__context__ is not None and isinstance(ex.__context__, TerminalException)): + if isinstance(ex, TerminalException) or ( + ex.__context__ is not None and isinstance(ex.__context__, TerminalException) + ): return False return True @@ -83,14 +102,16 @@ def __init__( resources = set(resources) resource_names = [] if drop_all: - self.resource_pattern = compile_simple_regex(TSimpleRegex('re:.*')) # Match everything + self.resource_pattern = compile_simple_regex(TSimpleRegex("re:.*")) # Match everything elif resources: self.resource_pattern = compile_simple_regexes(TSimpleRegex(r) for r in resources) else: self.resource_pattern = None if self.resource_pattern: - data_tables = {t["name"]: t for t in self.schema.data_tables()} # Don't remove _dlt tables + data_tables = { + t["name"]: t for t in self.schema.data_tables() + } # Don't remove _dlt tables resource_tables = group_tables_by_resource(data_tables, pattern=self.resource_pattern) if self.drop_tables: self.tables_to_drop = list(chain.from_iterable(resource_tables.values())) @@ -105,36 +126,50 @@ def __init__( self.drop_all = drop_all self.info: _DropInfo = dict( - tables=[t['name'] for t in self.tables_to_drop], resource_states=[], state_paths=[], + tables=[t["name"] for t in self.tables_to_drop], + resource_states=[], + state_paths=[], resource_names=resource_names, - schema_name=self.schema.name, dataset_name=self.pipeline.dataset_name, + schema_name=self.schema.name, + dataset_name=self.pipeline.dataset_name, drop_all=drop_all, resource_pattern=self.resource_pattern, - warnings=[] + warnings=[], ) if self.resource_pattern and not resource_tables: - self.info['warnings'].append( - f"Specified resource(s) {str(resources)} did not select any table(s) in schema {self.schema.name}. Possible resources are: {list(group_tables_by_resource(data_tables).keys())}" + self.info["warnings"].append( + f"Specified resource(s) {str(resources)} did not select any table(s) in schema" + f" {self.schema.name}. Possible resources are:" + f" {list(group_tables_by_resource(data_tables).keys())}" ) self._new_state = self._create_modified_state() @property def is_empty(self) -> bool: - return len(self.info['tables']) == 0 and len(self.info["state_paths"]) == 0 and len(self.info["resource_states"]) == 0 + return ( + len(self.info["tables"]) == 0 + and len(self.info["state_paths"]) == 0 + and len(self.info["resource_states"]) == 0 + ) def _drop_destination_tables(self) -> None: - table_names = [tbl['name'] for tbl in self.tables_to_drop] + table_names = [tbl["name"] for tbl in self.tables_to_drop] + for table_name in table_names: + assert table_name not in self.schema._schema_tables, ( + f"You are dropping table {table_name} in {self.schema.name} but it is still present" + " in the schema" + ) with self.pipeline._sql_job_client(self.schema) as client: - client.drop_tables(*table_names) + client.drop_tables(*table_names, replace_schema=True) # also delete staging but ignore if staging does not exist if isinstance(client, WithStagingDataset): with contextlib.suppress(DatabaseUndefinedRelation): with client.with_staging_dataset(): - client.drop_tables(*table_names) + client.drop_tables(*table_names, replace_schema=True) def _delete_pipeline_tables(self) -> None: for tbl in self.tables_to_drop: - del self.schema_tables[tbl['name']] + del self.schema_tables[tbl["name"]] self.schema.bump_version() def _list_state_paths(self, source_state: Dict[str, Any]) -> List[str]: @@ -149,14 +184,17 @@ def _create_modified_state(self) -> Dict[str, Any]: # drop table states if self.drop_state and self.resource_pattern: for key in _get_matching_resources(self.resource_pattern, source_state): - self.info['resource_states'].append(key) + self.info["resource_states"].append(key) reset_resource_state(key, source_state) # drop additional state paths resolved_paths = resolve_paths(self.state_paths_to_drop, source_state) if self.state_paths_to_drop and not resolved_paths: - self.info['warnings'].append(f"State paths {self.state_paths_to_drop} did not select any paths in source {source_name}") + self.info["warnings"].append( + f"State paths {self.state_paths_to_drop} did not select any paths in source" + f" {source_name}" + ) _delete_source_state_keys(resolved_paths, source_state) - self.info['state_paths'].extend(f"{source_name}.{p}" for p in resolved_paths) + self.info["state_paths"].extend(f"{source_name}.{p}" for p in resolved_paths) return state # type: ignore[return-value] def _drop_state_keys(self) -> None: @@ -166,8 +204,12 @@ def _drop_state_keys(self) -> None: state.update(self._new_state) def __call__(self) -> None: - if self.pipeline.has_pending_data: # Raise when there are pending extracted/load files to prevent conflicts - raise PipelineHasPendingDataException(self.pipeline.pipeline_name, self.pipeline.pipelines_dir) + if ( + self.pipeline.has_pending_data + ): # Raise when there are pending extracted/load files to prevent conflicts + raise PipelineHasPendingDataException( + self.pipeline.pipeline_name, self.pipeline.pipelines_dir + ) self.pipeline.sync_destination() if not self.drop_state and not self.drop_tables: @@ -176,19 +218,20 @@ def __call__(self) -> None: if self.drop_tables: self._delete_pipeline_tables() self._drop_destination_tables() - if self.drop_state: - self._drop_state_keys() if self.drop_tables: self.pipeline.schemas.save_schema(self.schema) + if self.drop_state: + self._drop_state_keys() # Send updated state to destination self.pipeline.normalize() try: self.pipeline.load(raise_on_failed_jobs=True) except Exception: # Clear extracted state on failure so command can run again - self.pipeline._get_load_storage().wipe_normalized_packages() + self.pipeline.drop_pending_packages() with self.pipeline.managed_state() as state: state["_local"].pop("_last_extracted_at", None) + state["_local"].pop("_last_extracted_hash", None) raise @@ -198,6 +241,6 @@ def drop( schema_name: str = None, state_paths: TAnyJsonPath = (), drop_all: bool = False, - state_only: bool = False + state_only: bool = False, ) -> None: return DropCommand(pipeline, resources, schema_name, state_paths, drop_all, state_only)() diff --git a/dlt/pipeline/mark.py b/dlt/pipeline/mark.py index 5f880d8711..3b9b3ccfc7 100644 --- a/dlt/pipeline/mark.py +++ b/dlt/pipeline/mark.py @@ -1,2 +1,2 @@ """Module with market functions that make data to be specially processed""" -from dlt.extract.source import with_table_name \ No newline at end of file +from dlt.extract import with_table_name diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index b948ad8040..73c8f076d1 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -3,40 +3,100 @@ import datetime # noqa: 251 from contextlib import contextmanager from functools import wraps -from collections.abc import Sequence as C_Sequence -from typing import Any, Callable, ClassVar, List, Iterator, Optional, Sequence, Tuple, cast, get_type_hints, ContextManager -from concurrent.futures import Executor +from typing import ( + Any, + Callable, + ClassVar, + List, + Iterator, + Optional, + Sequence, + Tuple, + cast, + get_type_hints, + ContextManager, +) from dlt import version from dlt.common import json, logger, pendulum from dlt.common.configuration import inject_section, known_sections from dlt.common.configuration.specs import RunConfiguration, CredentialsConfiguration from dlt.common.configuration.container import Container -from dlt.common.configuration.exceptions import ConfigFieldMissingException, ContextDefaultCannotBeCreated +from dlt.common.configuration.exceptions import ( + ConfigFieldMissingException, + ContextDefaultCannotBeCreated, +) from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.resolve import initialize_credentials -from dlt.common.exceptions import (DestinationLoadingViaStagingNotSupported, DestinationLoadingWithoutStagingNotSupported, DestinationNoStagingMode, - MissingDependencyException, DestinationUndefinedEntity, DestinationIncompatibleLoaderFileFormatException) +from dlt.common.exceptions import ( + DestinationLoadingViaStagingNotSupported, + DestinationLoadingWithoutStagingNotSupported, + DestinationNoStagingMode, + MissingDependencyException, + DestinationUndefinedEntity, + DestinationIncompatibleLoaderFileFormatException, +) from dlt.common.normalizers import explicit_normalizers, import_normalizers from dlt.common.runtime import signals, initialize_runtime -from dlt.common.schema.typing import TColumnNames, TColumnSchema, TSchemaTables, TWriteDisposition, TAnySchemaColumns -from dlt.common.storages.load_storage import LoadJobInfo, LoadPackageInfo -from dlt.common.typing import TFun, TSecretValue, is_optional_type +from dlt.common.schema.typing import ( + TColumnNames, + TSchemaTables, + TWriteDisposition, + TAnySchemaColumns, + TSchemaContract, +) +from dlt.common.schema.utils import normalize_schema_name +from dlt.common.storages.exceptions import LoadPackageNotFound +from dlt.common.typing import DictStrStr, TFun, TSecretValue, is_optional_type from dlt.common.runners import pool_runner as runner -from dlt.common.storages import LiveSchemaStorage, NormalizeStorage, LoadStorage, SchemaStorage, FileStorage, NormalizeStorageConfiguration, SchemaStorageConfiguration, LoadStorageConfiguration -from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import (DestinationClientDwhConfiguration, WithStateSync, DestinationReference, JobClientBase, DestinationClientConfiguration, - TDestinationReferenceArg, DestinationClientStagingConfiguration, DestinationClientStagingConfiguration, - DestinationClientDwhWithStagingConfiguration) +from dlt.common.storages import ( + LiveSchemaStorage, + NormalizeStorage, + LoadStorage, + SchemaStorage, + FileStorage, + NormalizeStorageConfiguration, + SchemaStorageConfiguration, + LoadStorageConfiguration, + PackageStorage, + LoadJobInfo, + LoadPackageInfo, +) +from dlt.common.destination import DestinationCapabilitiesContext, TDestination +from dlt.common.destination.reference import ( + DestinationClientDwhConfiguration, + WithStateSync, + Destination, + JobClientBase, + DestinationClientConfiguration, + TDestinationReferenceArg, + DestinationClientStagingConfiguration, + DestinationClientStagingConfiguration, + DestinationClientDwhWithStagingConfiguration, +) from dlt.common.destination.capabilities import INTERNAL_LOADER_FILE_FORMATS -from dlt.common.pipeline import ExtractInfo, LoadInfo, NormalizeInfo, PipelineContext, SupportsPipeline, TPipelineLocalState, TPipelineState, StateInjectableContext +from dlt.common.pipeline import ( + ExtractInfo, + LoadInfo, + NormalizeInfo, + PipelineContext, + StepInfo, + TStepInfo, + SupportsPipeline, + TPipelineLocalState, + TPipelineState, + StateInjectableContext, + TStepMetrics, + WithStepInfo, +) from dlt.common.schema import Schema from dlt.common.utils import is_interactive from dlt.common.data_writers import TLoaderFileFormat +from dlt.common.warnings import deprecated, Dlt04DeprecationWarning +from dlt.extract import DltSource from dlt.extract.exceptions import SourceExhausted -from dlt.extract.extract import ExtractorStorage, extract_with_schema -from dlt.extract.source import DltResource, DltSource +from dlt.extract.extract import Extract, data_to_sources from dlt.normalize import Normalize from dlt.normalize.configuration import NormalizeConfiguration from dlt.destinations.sql_client import SqlClientBase @@ -46,16 +106,38 @@ from dlt.pipeline.configuration import PipelineConfiguration from dlt.pipeline.progress import _Collector, _NULL_COLLECTOR -from dlt.pipeline.exceptions import CannotRestorePipelineException, InvalidPipelineName, PipelineConfigMissing, PipelineNotActive, PipelineStepFailed, SqlClientNotAvailable -from dlt.pipeline.trace import PipelineTrace, PipelineStepTrace, load_trace, merge_traces, start_trace, start_trace_step, end_trace_step, end_trace, describe_extract_data +from dlt.pipeline.exceptions import ( + CannotRestorePipelineException, + InvalidPipelineName, + PipelineConfigMissing, + PipelineNotActive, + PipelineStepFailed, + SqlClientNotAvailable, +) +from dlt.pipeline.trace import ( + PipelineTrace, + PipelineStepTrace, + load_trace, + merge_traces, + start_trace, + start_trace_step, + end_trace_step, + end_trace, +) from dlt.pipeline.typing import TPipelineStep -from dlt.pipeline.state_sync import STATE_ENGINE_VERSION, load_state_from_destination, merge_state_if_changed, migrate_state, state_resource, json_encode_state, json_decode_state - -from dlt.common.schema.utils import normalize_schema_name +from dlt.pipeline.state_sync import ( + STATE_ENGINE_VERSION, + bump_version_if_modified, + load_state_from_destination, + migrate_state, + state_resource, + json_encode_state, + json_decode_state, +) +from dlt.pipeline.warnings import credentials_argument_deprecated def with_state_sync(may_extract_state: bool = False) -> Callable[[TFun], TFun]: - def decorator(f: TFun) -> TFun: @wraps(f) def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: @@ -74,72 +156,85 @@ def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: def with_schemas_sync(f: TFun) -> TFun: - @wraps(f) def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: for name in self._schema_storage.live_schemas: # refresh live schemas in storage or import schema path self._schema_storage.commit_live_schema(name) rv = f(self, *args, **kwargs) + # save modified live schemas + for name in self._schema_storage.live_schemas: + self._schema_storage.commit_live_schema(name) # refresh list of schemas if any new schemas are added - self.schema_names = self._schema_storage.list_schemas() + self.schema_names = self._list_schemas_sorted() return rv return _wrap # type: ignore -def with_runtime_trace(f: TFun) -> TFun: - - @wraps(f) - def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: - trace: PipelineTrace = self._trace - trace_step: PipelineStepTrace = None - step_info: Any = None - is_new_trace = self._trace is None and self.config.enable_runtime_trace - - # create a new trace if we enter a traced function and there's no current trace - if is_new_trace: - self._trace = trace = start_trace(cast(TPipelineStep, f.__name__), self) +def with_runtime_trace(send_state: bool = False) -> Callable[[TFun], TFun]: + def decorator(f: TFun) -> TFun: + @wraps(f) + def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: + trace: PipelineTrace = self._trace + trace_step: PipelineStepTrace = None + step_info: Any = None + is_new_trace = self._trace is None and self.config.enable_runtime_trace - try: - # start a trace step for wrapped function - if trace: - trace_step = start_trace_step(trace, cast(TPipelineStep, f.__name__), self) + # create a new trace if we enter a traced function and there's no current trace + if is_new_trace: + self._trace = trace = start_trace(cast(TPipelineStep, f.__name__), self) - step_info = f(self, *args, **kwargs) - return step_info - except Exception as ex: - step_info = ex # step info is an exception - raise - finally: try: - if trace_step: - # if there was a step, finish it - end_trace_step(self._trace, trace_step, self, step_info) - if is_new_trace: - assert trace is self._trace, f"Messed up trace reference {id(self._trace)} vs {id(trace)}" - end_trace(trace, self, self._pipeline_storage.storage_path) + # start a trace step for wrapped function + if trace: + trace_step = start_trace_step(trace, cast(TPipelineStep, f.__name__), self) + + step_info = f(self, *args, **kwargs) + return step_info + except Exception as ex: + step_info = ex # step info is an exception + raise finally: - # always end trace - if is_new_trace: - assert self._trace == trace, f"Messed up trace reference {id(self._trace)} vs {id(trace)}" - # if we end new trace that had only 1 step, add it to previous trace - # this way we combine several separate calls to extract, normalize, load as single trace - # the trace of "run" has many steps and will not be merged - self._last_trace = merge_traces(self._last_trace, trace) - self._trace = None + try: + if trace_step: + # if there was a step, finish it + self._trace = end_trace_step( + self._trace, trace_step, self, step_info, send_state + ) + if is_new_trace: + assert trace.transaction_id == self._trace.transaction_id, ( + f"Messed up trace reference {self._trace.transaction_id} vs" + f" {trace.transaction_id}" + ) + trace = end_trace( + trace, self, self._pipeline_storage.storage_path, send_state + ) + finally: + # always end trace + if is_new_trace: + assert ( + self._trace.transaction_id == trace.transaction_id + ), f"Messed up trace reference {id(self._trace)} vs {id(trace)}" + # if we end new trace that had only 1 step, add it to previous trace + # this way we combine several separate calls to extract, normalize, load as single trace + # the trace of "run" has many steps and will not be merged + self._last_trace = merge_traces(self._last_trace, trace) + self._trace = None - return _wrap # type: ignore + return _wrap # type: ignore + return decorator -def with_config_section(sections: Tuple[str, ...]) -> Callable[[TFun], TFun]: +def with_config_section(sections: Tuple[str, ...]) -> Callable[[TFun], TFun]: def decorator(f: TFun) -> TFun: - @wraps(f) def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: # add section context to the container to be used by all configuration without explicit sections resolution - with inject_section(ConfigSectionContext(pipeline_name=self.pipeline_name, sections=sections)): + with inject_section( + ConfigSectionContext(pipeline_name=self.pipeline_name, sections=sections) + ): return f(self, *args, **kwargs) return _wrap # type: ignore @@ -148,9 +243,11 @@ def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any: class Pipeline(SupportsPipeline): - STATE_FILE: ClassVar[str] = "state.json" - STATE_PROPS: ClassVar[List[str]] = list(set(get_type_hints(TPipelineState).keys()) - {"sources"}) + STATE_PROPS: ClassVar[List[str]] = list( + set(get_type_hints(TPipelineState).keys()) + - {"sources", "destination_type", "destination_name", "staging_type", "staging_name"} + ) LOCAL_STATE_PROPS: ClassVar[List[str]] = list(get_type_hints(TPipelineLocalState).keys()) DEFAULT_DATASET_SUFFIX: ClassVar[str] = "_dataset" @@ -166,9 +263,9 @@ class Pipeline(SupportsPipeline): """A directory where the pipelines' working directories are created""" working_dir: str """A working directory of the pipeline""" - destination: DestinationReference = None - staging: DestinationReference = None - """The destination reference which is ModuleType. `destination.__name__` returns the name string""" + destination: TDestination = None + staging: TDestination = None + """The destination reference which is the Destination Class. `destination.destination_name` returns the name string""" dataset_name: str = None """Name of the dataset to which pipeline will be loaded to""" credentials: Any = None @@ -179,22 +276,22 @@ class Pipeline(SupportsPipeline): runtime_config: RunConfiguration def __init__( - self, - pipeline_name: str, - pipelines_dir: str, - pipeline_salt: TSecretValue, - destination: DestinationReference, - staging: DestinationReference, - dataset_name: str, - credentials: Any, - import_schema_path: str, - export_schema_path: str, - full_refresh: bool, - progress: _Collector, - must_attach_to_local_pipeline: bool, - config: PipelineConfiguration, - runtime: RunConfiguration, - ) -> None: + self, + pipeline_name: str, + pipelines_dir: str, + pipeline_salt: TSecretValue, + destination: TDestination, + staging: TDestination, + dataset_name: str, + credentials: Any, + import_schema_path: str, + export_schema_path: str, + full_refresh: bool, + progress: _Collector, + must_attach_to_local_pipeline: bool, + config: PipelineConfiguration, + runtime: RunConfiguration, + ) -> None: """Initializes the Pipeline class which implements `dlt` pipeline. Please use `pipeline` function in `dlt` module to create a new Pipeline instance.""" self.pipeline_salt = pipeline_salt self.config = config @@ -209,8 +306,6 @@ def __init__( self._pipeline_storage: FileStorage = None self._schema_storage: LiveSchemaStorage = None self._schema_storage_config: SchemaStorageConfiguration = None - self._normalize_storage_config: NormalizeStorageConfiguration = None - self._load_storage_config: LoadStorageConfiguration = None self._trace: PipelineTrace = None self._last_trace: PipelineTrace = None self._state_restored: bool = False @@ -220,12 +315,11 @@ def __init__( self._init_working_dir(pipeline_name, pipelines_dir) with self.managed_state() as state: - # set the pipeline properties from state + # changing the destination could be dangerous if pipeline has pending load packages + self._set_destinations(destination=destination, staging=staging) + # set the pipeline properties from state, destination and staging will not be set self._state_to_props(state) - # we overwrite the state with the values from init - # changing the destination could be dangerous if pipeline has pending load packages - self._set_destinations(destination, staging) self._set_dataset_name(dataset_name) self.credentials = credentials self._configure(import_schema_path, export_schema_path, must_attach_to_local_pipeline) @@ -249,10 +343,10 @@ def drop(self) -> "Pipeline": self.collector, False, self.config, - self.runtime_config + self.runtime_config, ) - @with_runtime_trace + @with_runtime_trace() @with_schemas_sync # this must precede with_state_sync @with_state_sync(may_extract_state=True) @with_config_section((known_sections.EXTRACT,)) @@ -267,35 +361,59 @@ def extract( primary_key: TColumnNames = None, schema: Schema = None, max_parallel_items: int = None, - workers: int = None + workers: int = None, + schema_contract: TSchemaContract = None, ) -> ExtractInfo: """Extracts the `data` and prepare it for the normalization. Does not require destination or credentials to be configured. See `run` method for the arguments' description.""" # create extract storage to which all the sources will be extracted - storage = ExtractorStorage(self._normalize_storage_config) - extract_ids: List[str] = [] + extract_step = Extract( + self._schema_storage, + self._normalize_storage_config(), + self.collector, + original_data=data, + ) try: with self._maybe_destination_capabilities(): # extract all sources - for source in self._data_to_sources(data, schema, table_name, parent_table_name, write_disposition, columns, primary_key): + for source in data_to_sources( + data, + self, + schema, + table_name, + parent_table_name, + write_disposition, + columns, + primary_key, + schema_contract, + ): if source.exhausted: raise SourceExhausted(source.name) - # TODO: merge infos for all the sources - extract_ids.append( - self._extract_source(storage, source, max_parallel_items, workers) + self._extract_source(extract_step, source, max_parallel_items, workers) + # extract state + if self.config.restore_from_destination: + # this will update state version hash so it will not be extracted again by with_state_sync + self._bump_version_and_extract_state( + self._container[StateInjectableContext].state, True, extract_step ) - # commit extract ids - # TODO: if we fail here we should probably wipe out the whole extract folder - for extract_id in extract_ids: - storage.commit_extract_files(extract_id) - return ExtractInfo(describe_extract_data(data)) + # commit load packages + extract_step.commit_packages() + return self._get_step_info(extract_step) except Exception as exc: - # TODO: provide metrics from extractor - raise PipelineStepFailed(self, "extract", exc, ExtractInfo(describe_extract_data(data))) from exc - - @with_runtime_trace + step_info = self._get_step_info(extract_step) + raise PipelineStepFailed( + self, + "extract", + extract_step.current_load_id, + exc, + step_info, + ) from exc + + @with_runtime_trace() @with_schemas_sync @with_config_section((known_sections.NORMALIZE,)) - def normalize(self, workers: int = 1, loader_file_format: TLoaderFileFormat = None) -> NormalizeInfo: + def normalize( + self, workers: int = 1, loader_file_format: TLoaderFileFormat = None + ) -> NormalizeInfo: """Normalizes the data prepared with `extract` method, infers the schema and creates load packages for the `load` method. Requires `destination` to be known.""" if is_interactive(): workers = 1 @@ -311,21 +429,32 @@ def normalize(self, workers: int = 1, loader_file_format: TLoaderFileFormat = No normalize_config = NormalizeConfiguration( workers=workers, _schema_storage_config=self._schema_storage_config, - _normalize_storage_config=self._normalize_storage_config, - _load_storage_config=self._load_storage_config + _normalize_storage_config=self._normalize_storage_config(), + _load_storage_config=self._load_storage_config(), ) # run with destination context with self._maybe_destination_capabilities(loader_file_format=loader_file_format): # shares schema storage with the pipeline so we do not need to install - normalize = Normalize(collector=self.collector, config=normalize_config, schema_storage=self._schema_storage) + normalize_step: Normalize = Normalize( + collector=self.collector, + config=normalize_config, + schema_storage=self._schema_storage, + ) try: with signals.delayed_signals(): - runner.run_pool(normalize.config, normalize) - return normalize.get_normalize_info() + runner.run_pool(normalize_step.config, normalize_step) + return self._get_step_info(normalize_step) except Exception as n_ex: - raise PipelineStepFailed(self, "normalize", n_ex, normalize.get_normalize_info()) from n_ex + step_info = self._get_step_info(normalize_step) + raise PipelineStepFailed( + self, + "normalize", + normalize_step.current_load_id, + n_ex, + step_info, + ) from n_ex - @with_runtime_trace + @with_runtime_trace(send_state=True) @with_schemas_sync @with_state_sync() @with_config_section((known_sections.LOAD,)) @@ -336,12 +465,15 @@ def load( credentials: Any = None, *, workers: int = 20, - raise_on_failed_jobs: bool = False + raise_on_failed_jobs: bool = False, ) -> LoadInfo: """Loads the packages prepared by `normalize` method into the `dataset_name` at `destination`, using provided `credentials`""" - # set destination and default dataset if provided - self._set_destinations(destination, None) + # set destination and default dataset if provided (this is the reason we have state sync here) + self._set_destinations(destination=destination, staging=None) self._set_dataset_name(dataset_name) + + credentials_argument_deprecated("pipeline.load", credentials, destination) + self.credentials = credentials or self.credentials # check if any schema is present, if not then no data was extracted @@ -355,27 +487,30 @@ def load( load_config = LoaderConfiguration( workers=workers, raise_on_failed_jobs=raise_on_failed_jobs, - _load_storage_config=self._load_storage_config + _load_storage_config=self._load_storage_config(), ) - load = Load( + load_step: Load = Load( self.destination, staging_destination=self.staging, collector=self.collector, is_storage_owner=False, config=load_config, initial_client_config=client.config, - initial_staging_client_config=staging_client.config if staging_client else None + initial_staging_client_config=staging_client.config if staging_client else None, ) try: with signals.delayed_signals(): - runner.run_pool(load.config, load) - info = self._get_load_info(load) + runner.run_pool(load_step.config, load_step) + info: LoadInfo = self._get_step_info(load_step) self.first_run = False return info except Exception as l_ex: - raise PipelineStepFailed(self, "load", l_ex, self._get_load_info(load)) from l_ex + step_info = self._get_step_info(load_step) + raise PipelineStepFailed( + self, "load", load_step.current_load_id, l_ex, step_info + ) from l_ex - @with_runtime_trace + @with_runtime_trace() @with_config_section(("run",)) def run( self, @@ -390,7 +525,8 @@ def run( columns: TAnySchemaColumns = None, primary_key: TColumnNames = None, schema: Schema = None, - loader_file_format: TLoaderFileFormat = None + loader_file_format: TLoaderFileFormat = None, + schema_contract: TSchemaContract = None, ) -> LoadInfo: """Loads the data from `data` argument into the destination specified in `destination` and dataset specified in `dataset_name`. @@ -440,40 +576,67 @@ def run( loader_file_format (Literal["jsonl", "insert_values", "parquet"], optional). The file format the loader will use to create the load package. Not all file_formats are compatible with all destinations. Defaults to the preferred file format of the selected destination. + schema_contract (TSchemaContract, optional): On override for the schema contract settings, this will replace the schema contract settings for all tables in the schema. Defaults to None. + Raises: PipelineStepFailed when a problem happened during `extract`, `normalize` or `load` steps. Returns: LoadInfo: Information on loaded data including the list of package ids and failed job statuses. Please not that `dlt` will not raise if a single job terminally fails. Such information is provided via LoadInfo. """ signals.raise_if_signalled() - self._set_destinations(destination, staging) + self.activate() + self._set_destinations(destination=destination, staging=staging) self._set_dataset_name(dataset_name) + credentials_argument_deprecated("pipeline.run", credentials, self.destination) + # sync state with destination - if self.config.restore_from_destination and not self.full_refresh and not self._state_restored and (self.destination or destination): + if ( + self.config.restore_from_destination + and not self.full_refresh + and not self._state_restored + and (self.destination or destination) + ): self.sync_destination(destination, staging, dataset_name) # sync only once self._state_restored = True # normalize and load pending data - if self.list_extracted_resources(): + if self.list_extracted_load_packages(): self.normalize(loader_file_format=loader_file_format) if self.list_normalized_load_packages(): # if there were any pending loads, load them and **exit** if data is not None: - logger.warn("The pipeline `run` method will now load the pending load packages. The data you passed to the run function will not be loaded. In order to do that you must run the pipeline again") + logger.warn( + "The pipeline `run` method will now load the pending load packages. The data" + " you passed to the run function will not be loaded. In order to do that you" + " must run the pipeline again" + ) return self.load(destination, dataset_name, credentials=credentials) # extract from the source if data is not None: - self.extract(data, table_name=table_name, write_disposition=write_disposition, columns=columns, primary_key=primary_key, schema=schema) + self.extract( + data, + table_name=table_name, + write_disposition=write_disposition, + columns=columns, + primary_key=primary_key, + schema=schema, + schema_contract=schema_contract, + ) self.normalize(loader_file_format=loader_file_format) return self.load(destination, dataset_name, credentials=credentials) else: return None @with_schemas_sync - def sync_destination(self, destination: TDestinationReferenceArg = None, staging: TDestinationReferenceArg = None, dataset_name: str = None) -> None: + def sync_destination( + self, + destination: TDestinationReferenceArg = None, + staging: TDestinationReferenceArg = None, + dataset_name: str = None, + ) -> None: """Synchronizes pipeline state with the `destination`'s state kept in `dataset_name` #### Note: @@ -485,12 +648,11 @@ def sync_destination(self, destination: TDestinationReferenceArg = None, staging Note: this method is executed by the `run` method before any operation on data. Use `restore_from_destination` configuration option to disable that behavior. """ - self._set_destinations(destination, staging) + self._set_destinations(destination=destination, staging=staging) self._set_dataset_name(dataset_name) state = self._get_state() - local_state = state.pop("_local") - merged_state: TPipelineState = None + state_changed = False try: try: restored_schemas: Sequence[Schema] = None @@ -498,32 +660,40 @@ def sync_destination(self, destination: TDestinationReferenceArg = None, staging # if remote state is newer or same # print(f'REMOTE STATE: {(remote_state or {}).get("_state_version")} >= {state["_state_version"]}') + # TODO: check if remote_state["_state_version"] is not in 10 recent version. then we know remote is newer. if remote_state and remote_state["_state_version"] >= state["_state_version"]: - # compare changes and updates local state - merged_state = merge_state_if_changed(state, remote_state, increase_version=False) + state_changed = remote_state["_version_hash"] != state.get("_version_hash") # print(f"MERGED STATE: {bool(merged_state)}") - if merged_state: + if state_changed: # see if state didn't change the pipeline name if state["pipeline_name"] != remote_state["pipeline_name"]: raise CannotRestorePipelineException( state["pipeline_name"], self.pipelines_dir, - f"destination state contains state for pipeline with name {remote_state['pipeline_name']}" + "destination state contains state for pipeline with name" + f" {remote_state['pipeline_name']}", ) # if state was modified force get all schemas - restored_schemas = self._get_schemas_from_destination(merged_state["schema_names"], always_download=True) + restored_schemas = self._get_schemas_from_destination( + remote_state["schema_names"], always_download=True + ) # TODO: we should probably wipe out pipeline here # if we didn't full refresh schemas, get only missing schemas if restored_schemas is None: - restored_schemas = self._get_schemas_from_destination(state["schema_names"], always_download=False) + restored_schemas = self._get_schemas_from_destination( + state["schema_names"], always_download=False + ) # commit all the changes locally - if merged_state: + if state_changed: + # use remote state as state + remote_state["_local"] = state["_local"] + state = remote_state # set the pipeline props from merged state - state["_local"] = local_state + self._state_to_props(state) # add that the state is already extracted + state["_local"]["_last_extracted_hash"] = state["_version_hash"] state["_local"]["_last_extracted_at"] = pendulum.now() - self._state_to_props(merged_state) # on merge schemas are replaced so we delete all old versions self._schema_storage.clear_storage() for schema in restored_schemas: @@ -546,17 +716,18 @@ def sync_destination(self, destination: TDestinationReferenceArg = None, staging # reset pipeline self._wipe_working_folder() state = self._get_state() - self._configure(self._schema_storage_config.export_schema_path, self._schema_storage_config.import_schema_path, False) - + self._configure( + self._schema_storage_config.export_schema_path, + self._schema_storage_config.import_schema_path, + False, + ) # write the state back - state = merged_state or state - if "_local" not in state: - state["_local"] = local_state self._props_to_state(state) + bump_version_if_modified(state) self._save_state(state) except Exception as ex: - raise PipelineStepFailed(self, "run", ex, None) from ex + raise PipelineStepFailed(self, "sync", None, ex, None) from ex def activate(self) -> None: """Activates the pipeline @@ -590,12 +761,20 @@ def deactivate(self) -> None: @property def has_data(self) -> bool: """Tells if the pipeline contains any data: schemas, extracted files, load packages or loaded packages in the destination""" - return not self.first_run or bool(self.schema_names) or len(self.list_extracted_resources()) > 0 or len(self.list_normalized_load_packages()) > 0 + return ( + not self.first_run + or bool(self.schema_names) + or len(self.list_extracted_load_packages()) > 0 + or len(self.list_normalized_load_packages()) > 0 + ) @property def has_pending_data(self) -> bool: """Tells if the pipeline contains any extracted files or pending load packages""" - return len(self.list_normalized_load_packages()) > 0 or len(self.list_extracted_resources()) > 0 + return ( + len(self.list_normalized_load_packages()) > 0 + or len(self.list_extracted_load_packages()) > 0 + ) @property def schemas(self) -> SchemaStorage: @@ -617,21 +796,33 @@ def last_trace(self) -> PipelineTrace: return self._last_trace return load_trace(self.working_dir) + @deprecated( + "Please use list_extracted_load_packages instead. Flat extracted storage format got dropped" + " in dlt 0.4.0", + category=Dlt04DeprecationWarning, + ) def list_extracted_resources(self) -> Sequence[str]: """Returns a list of all the files with extracted resources that will be normalized.""" return self._get_normalize_storage().list_files_to_normalize_sorted() + def list_extracted_load_packages(self) -> Sequence[str]: + """Returns a list of all load packages ids that are or will be normalized.""" + return self._get_normalize_storage().extracted_packages.list_packages() + def list_normalized_load_packages(self) -> Sequence[str]: """Returns a list of all load packages ids that are or will be loaded.""" return self._get_load_storage().list_normalized_packages() def list_completed_load_packages(self) -> Sequence[str]: """Returns a list of all load package ids that are completely loaded""" - return self._get_load_storage().list_completed_packages() + return self._get_load_storage().list_loaded_packages() def get_load_package_info(self, load_id: str) -> LoadPackageInfo: - """Returns information on normalized/completed package with given load_id, all jobs and their statuses.""" - return self._get_load_storage().get_load_package_info(load_id) + """Returns information on extracted/normalized/completed package with given load_id, all jobs and their statuses.""" + try: + return self._get_load_storage().get_load_package_info(load_id) + except LoadPackageNotFound: + return self._get_normalize_storage().extracted_packages.get_load_package_info(load_id) def list_failed_jobs_in_package(self, load_id: str) -> Sequence[LoadJobInfo]: """List all failed jobs and associated error messages for a specified `load_id`""" @@ -641,21 +832,27 @@ def drop_pending_packages(self, with_partial_loads: bool = True) -> None: """Deletes all extracted and normalized packages, including those that are partially loaded by default""" # delete normalized packages load_storage = self._get_load_storage() - for load_id in load_storage.list_normalized_packages(): - package_info = load_storage.get_load_package_info(load_id) - if LoadStorage.is_package_partially_loaded(package_info) and not with_partial_loads: + for load_id in load_storage.normalized_packages.list_packages(): + package_info = load_storage.normalized_packages.get_load_package_info(load_id) + if PackageStorage.is_package_partially_loaded(package_info) and not with_partial_loads: continue - package_path = load_storage.get_normalized_package_path(load_id) - load_storage.storage.delete_folder(package_path, recursively=True) + load_storage.normalized_packages.delete_package(load_id) # delete extracted files normalize_storage = self._get_normalize_storage() - normalize_storage.delete_extracted_files(normalize_storage.list_files_to_normalize_sorted()) + for load_id in normalize_storage.extracted_packages.list_packages(): + normalize_storage.extracted_packages.delete_package(load_id) @with_schemas_sync def sync_schema(self, schema_name: str = None, credentials: Any = None) -> TSchemaTables: """Synchronizes the schema `schema_name` with the destination. If no name is provided, the default schema will be synchronized.""" if not schema_name and not self.default_schema_name: - raise PipelineConfigMissing(self.pipeline_name, "default_schema_name", "load", "Pipeline contains no schemas. Please extract any data with `extract` or `run` methods.") + raise PipelineConfigMissing( + self.pipeline_name, + "default_schema_name", + "load", + "Pipeline contains no schemas. Please extract any data with `extract` or `run`" + " methods.", + ) schema = self.schemas[schema_name] if schema_name else self.default_schema client_config = self._get_destination_client_initial_config(credentials) @@ -681,19 +878,19 @@ def get_local_state_val(self, key: str) -> Any: state = self._container[StateInjectableContext].state except ContextDefaultCannotBeCreated: state = self._get_state() - return state["_local"][key] # type: ignore + return state["_local"][key] # type: ignore def sql_client(self, schema_name: str = None, credentials: Any = None) -> SqlClientBase[Any]: """Returns a sql client configured to query/change the destination and dataset that were used to load the data. - Use the client with `with` statement to manage opening and closing connection to the destination: - >>> with pipeline.sql_client() as client: - >>> with client.execute_query( - >>> "SELECT id, name, email FROM customers WHERE id = %s", 10 - >>> ) as cursor: - >>> print(cursor.fetchall()) - - The client is authenticated and defaults all queries to dataset_name used by the pipeline. You can provide alternative - `schema_name` which will be used to normalize dataset name and alternative `credentials`. + Use the client with `with` statement to manage opening and closing connection to the destination: + >>> with pipeline.sql_client() as client: + >>> with client.execute_query( + >>> "SELECT id, name, email FROM customers WHERE id = %s", 10 + >>> ) as cursor: + >>> print(cursor.fetchall()) + + The client is authenticated and defaults all queries to dataset_name used by the pipeline. You can provide alternative + `schema_name` which will be used to normalize dataset name and alternative `credentials`. """ # if not self.default_schema_name and not schema_name: # raise PipelineConfigMissing( @@ -707,12 +904,12 @@ def sql_client(self, schema_name: str = None, credentials: Any = None) -> SqlCli def destination_client(self, schema_name: str = None, credentials: Any = None) -> JobClientBase: """Get the destination job client for the configured destination - Use the client with `with` statement to manage opening and closing connection to the destination: - >>> with pipeline.destination_client() as client: - >>> client.drop_storage() # removes storage which typically wipes all data in it + Use the client with `with` statement to manage opening and closing connection to the destination: + >>> with pipeline.destination_client() as client: + >>> client.drop_storage() # removes storage which typically wipes all data in it - The client is authenticated. You can provide alternative `schema_name` which will be used to normalize dataset name and alternative `credentials`. - If no schema name is provided and no default schema is present in the pipeline, and ad hoc schema will be created and discarded after use. + The client is authenticated. You can provide alternative `schema_name` which will be used to normalize dataset name and alternative `credentials`. + If no schema name is provided and no default schema is present in the pipeline, and ad hoc schema will be created and discarded after use. """ schema = self._get_schema_or_create(schema_name) client_config = self._get_destination_client_initial_config(credentials) @@ -732,14 +929,27 @@ def _sql_job_client(self, schema: Schema, credentials: Any = None) -> SqlJobClie if isinstance(client, SqlJobClientBase): return client else: - raise SqlClientNotAvailable(self.pipeline_name, self.destination.__name__) + raise SqlClientNotAvailable(self.pipeline_name, self.destination.destination_name) def _get_normalize_storage(self) -> NormalizeStorage: - return NormalizeStorage(True, self._normalize_storage_config) + return NormalizeStorage(True, self._normalize_storage_config()) def _get_load_storage(self) -> LoadStorage: caps = self._get_destination_capabilities() - return LoadStorage(True, caps.preferred_loader_file_format, caps.supported_loader_file_formats, self._load_storage_config) + return LoadStorage( + True, + caps.preferred_loader_file_format, + caps.supported_loader_file_formats, + self._load_storage_config(), + ) + + def _normalize_storage_config(self) -> NormalizeStorageConfiguration: + return NormalizeStorageConfiguration( + normalize_volume_path=os.path.join(self.working_dir, "normalize") + ) + + def _load_storage_config(self) -> LoadStorageConfiguration: + return LoadStorageConfiguration(load_volume_path=os.path.join(self.working_dir, "load")) def _init_working_dir(self, pipeline_name: str, pipelines_dir: str) -> None: self.pipeline_name = pipeline_name @@ -753,21 +963,27 @@ def _init_working_dir(self, pipeline_name: str, pipelines_dir: str) -> None: if self.full_refresh: self._wipe_working_folder() - def _configure(self, import_schema_path: str, export_schema_path: str, must_attach_to_local_pipeline: bool) -> None: + def _configure( + self, import_schema_path: str, export_schema_path: str, must_attach_to_local_pipeline: bool + ) -> None: # create schema storage and folders self._schema_storage_config = SchemaStorageConfiguration( schema_volume_path=os.path.join(self.working_dir, "schemas"), import_schema_path=import_schema_path, - export_schema_path=export_schema_path + export_schema_path=export_schema_path, ) # create default configs - self._normalize_storage_config = NormalizeStorageConfiguration(normalize_volume_path=os.path.join(self.working_dir, "normalize")) - self._load_storage_config = LoadStorageConfiguration(load_volume_path=os.path.join(self.working_dir, "load"),) + self._normalize_storage_config() + self._load_storage_config() # are we running again? has_state = self._pipeline_storage.has_file(Pipeline.STATE_FILE) if must_attach_to_local_pipeline and not has_state: - raise CannotRestorePipelineException(self.pipeline_name, self.pipelines_dir, f"the pipeline was not found in {self.working_dir}.") + raise CannotRestorePipelineException( + self.pipeline_name, + self.pipelines_dir, + f"the pipeline was not found in {self.working_dir}.", + ) self.must_attach_to_local_pipeline = must_attach_to_local_pipeline # attach to pipeline if folder exists and contains state @@ -795,115 +1011,49 @@ def _wipe_working_folder(self) -> None: def _attach_pipeline(self) -> None: pass - def _data_to_sources(self, - data: Any, - schema: Schema, - table_name: str = None, - parent_table_name: str = None, - write_disposition: TWriteDisposition = None, - columns: TAnySchemaColumns = None, - primary_key: TColumnNames = None - ) -> List[DltSource]: - - def apply_hint_args(resource: DltResource) -> None: - # apply hints only if any of the hints is present, table_name must be always present - if table_name or parent_table_name or write_disposition or columns or primary_key: - resource.apply_hints(table_name or resource.table_name or resource.name, parent_table_name, write_disposition, columns, primary_key) - - def choose_schema() -> Schema: - """Except of explicitly passed schema, use a clone that will get discarded if extraction fails""" - if schema: - return schema - if self.default_schema_name: - return self.default_schema.clone() - return self._make_schema_with_default_name() - - effective_schema = choose_schema() - - # a list of sources or a list of resources may be passed as data - sources: List[DltSource] = [] - resources: List[DltResource] = [] - - def append_data(data_item: Any) -> None: - if isinstance(data_item, DltSource): - # if schema is explicit then override source schema - if schema: - data_item.schema = schema - # try to apply hints to resources - _resources = data_item.resources.values() - for r in _resources: - apply_hint_args(r) - sources.append(data_item) - elif isinstance(data_item, DltResource): - # apply hints - apply_hint_args(data_item) - # do not set section to prevent source that represent a standalone resource - # to overwrite other standalone resources (ie. parents) in that source - sources.append( - DltSource(effective_schema.name, "", effective_schema, [data_item]) - ) - else: - # iterator/iterable/generator - # create resource first without table template - resource = DltResource.from_data(data_item, name=table_name, section=self.pipeline_name) - # apply hints - apply_hint_args(resource) - resources.append(resource) - - if isinstance(data, C_Sequence) and len(data) > 0: - # if first element is source or resource - if isinstance(data[0], (DltResource, DltSource)): - for item in data: - append_data(item) - else: - append_data(data) - else: - append_data(data) - - if resources: - # add all the appended resources in one source - sources.append(DltSource(effective_schema.name, self.pipeline_name, effective_schema, resources)) - - return sources - - def _extract_source(self, storage: ExtractorStorage, source: DltSource, max_parallel_items: int, workers: int) -> str: - # discover the schema from source - source_schema = source.schema - source_schema.update_normalizers() + def _extract_source( + self, extract: Extract, source: DltSource, max_parallel_items: int, workers: int + ) -> str: + # discover the existing pipeline schema + if source.schema.name in self.schemas: + # use clone until extraction complete + pipeline_schema = self.schemas[source.schema.name].clone() + # apply all changes in the source schema to pipeline schema + # NOTE: we do not apply contracts to changes done programmatically + pipeline_schema.update_schema(source.schema) + # replace schema in the source + source.schema = pipeline_schema # extract into pipeline schema - extract_id = extract_with_schema(storage, source, source_schema, self.collector, max_parallel_items, workers) + load_id = extract.extract(source, max_parallel_items, workers) # save import with fully discovered schema - self._schema_storage.save_import_schema_if_not_exists(source_schema) + self._schema_storage.save_import_schema_if_not_exists(source.schema) - # if source schema does not exist in the pipeline - if source_schema.name not in self._schema_storage: - # create new schema - self._schema_storage.save_schema(source_schema) - - # update pipeline schema (do contract checks here) - pipeline_schema = self._schema_storage[source_schema.name] - pipeline_schema.update_schema(source_schema) + # update live schema but not update the store yet + self._schema_storage.update_live_schema(source.schema) # set as default if this is first schema in pipeline if not self.default_schema_name: # this performs additional validations as schema contains the naming module - self._set_default_schema_name(pipeline_schema) + self._set_default_schema_name(source.schema) - return extract_id + return load_id - def _get_destination_client_initial_config(self, destination: DestinationReference = None, credentials: Any = None, as_staging: bool = False) -> DestinationClientConfiguration: + def _get_destination_client_initial_config( + self, destination: TDestination = None, credentials: Any = None, as_staging: bool = False + ) -> DestinationClientConfiguration: destination = destination or self.destination if not destination: raise PipelineConfigMissing( self.pipeline_name, "destination", "load", - "Please provide `destination` argument to `pipeline`, `run` or `load` method directly or via .dlt config.toml file or environment variable." + "Please provide `destination` argument to `pipeline`, `run` or `load` method" + " directly or via .dlt config.toml file or environment variable.", ) # create initial destination client config - client_spec = destination.spec() + client_spec = destination.spec # initialize explicit credentials if not as_staging: # explicit credentials passed to dlt.pipeline should not be applied to staging @@ -911,27 +1061,41 @@ def _get_destination_client_initial_config(self, destination: DestinationReferen if credentials is not None and not isinstance(credentials, CredentialsConfiguration): # use passed credentials as initial value. initial value may resolve credentials credentials = initialize_credentials( - client_spec.get_resolvable_fields()["credentials"], - credentials + client_spec.get_resolvable_fields()["credentials"], credentials ) # this client support many schemas and datasets if issubclass(client_spec, DestinationClientDwhConfiguration): if not self.dataset_name and self.full_refresh: - logger.warning("Full refresh may not work if dataset name is not set. Please set the dataset_name argument in dlt.pipeline or run method") + logger.warning( + "Full refresh may not work if dataset name is not set. Please set the" + " dataset_name argument in dlt.pipeline or run method" + ) # set default schema name to load all incoming data to a single dataset, no matter what is the current schema name - default_schema_name = None if self.config.use_single_dataset else self.default_schema_name + default_schema_name = ( + None if self.config.use_single_dataset else self.default_schema_name + ) if issubclass(client_spec, DestinationClientStagingConfiguration): - return client_spec(dataset_name=self.dataset_name, default_schema_name=default_schema_name, credentials=credentials, as_staging=as_staging) - return client_spec(dataset_name=self.dataset_name, default_schema_name=default_schema_name, credentials=credentials) + return client_spec( + dataset_name=self.dataset_name, + default_schema_name=default_schema_name, + credentials=credentials, + as_staging=as_staging, + ) + return client_spec( + dataset_name=self.dataset_name, + default_schema_name=default_schema_name, + credentials=credentials, + ) return client_spec(credentials=credentials) - def _get_destination_clients(self, + def _get_destination_clients( + self, schema: Schema, initial_config: DestinationClientConfiguration = None, - initial_staging_config: DestinationClientConfiguration = None + initial_staging_config: DestinationClientConfiguration = None, ) -> Tuple[JobClientBase, JobClientBase]: try: # resolve staging config in order to pass it to destination client config @@ -939,14 +1103,20 @@ def _get_destination_clients(self, if self.staging: if not initial_staging_config: # this is just initial config - without user configuration injected - initial_staging_config = self._get_destination_client_initial_config(self.staging, as_staging=True) + initial_staging_config = self._get_destination_client_initial_config( + self.staging, as_staging=True + ) # create the client - that will also resolve the config staging_client = self.staging.client(schema, initial_staging_config) if not initial_config: # config is not provided then get it with injected credentials initial_config = self._get_destination_client_initial_config(self.destination) # attach the staging client config to destination client config - if its type supports it - if self.staging and isinstance(initial_config, DestinationClientDwhWithStagingConfiguration) and isinstance(staging_client.config ,DestinationClientStagingConfiguration): + if ( + self.staging + and isinstance(initial_config, DestinationClientDwhWithStagingConfiguration) + and isinstance(staging_client.config, DestinationClientStagingConfiguration) + ): initial_config.staging_config = staging_client.config # create instance with initial_config properly set client = self.destination.client(schema, initial_config) @@ -954,19 +1124,20 @@ def _get_destination_clients(self, except ModuleNotFoundError: client_spec = self.destination.spec() raise MissingDependencyException( - f"{client_spec.destination_name} destination", - [f"{version.DLT_PKG_NAME}[{client_spec.destination_name}]"], - "Dependencies for specific destinations are available as extras of dlt" + f"{client_spec.destination_type} destination", + [f"{version.DLT_PKG_NAME}[{client_spec.destination_type}]"], + "Dependencies for specific destinations are available as extras of dlt", ) def _get_destination_capabilities(self) -> DestinationCapabilitiesContext: if not self.destination: - raise PipelineConfigMissing( - self.pipeline_name, - "destination", - "normalize", - "Please provide `destination` argument to `pipeline`, `run` or `load` method directly or via .dlt config.toml file or environment variable." - ) + raise PipelineConfigMissing( + self.pipeline_name, + "destination", + "normalize", + "Please provide `destination` argument to `pipeline`, `run` or `load` method" + " directly or via .dlt config.toml file or environment variable.", + ) return self.destination.capabilities() def _get_staging_capabilities(self) -> Optional[DestinationCapabilitiesContext]: @@ -992,32 +1163,56 @@ def _set_context(self, is_active: bool) -> None: # set destination context on activation if self.destination: # inject capabilities context - self._container[DestinationCapabilitiesContext] = self._get_destination_capabilities() + self._container[DestinationCapabilitiesContext] = ( + self._get_destination_capabilities() + ) else: # remove destination context on deactivation if DestinationCapabilitiesContext in self._container: del self._container[DestinationCapabilitiesContext] - def _set_destinations(self, destination: TDestinationReferenceArg, staging: TDestinationReferenceArg) -> None: - destination_mod = DestinationReference.from_name(destination) - self.destination = destination_mod or self.destination + def _set_destinations( + self, + destination: TDestinationReferenceArg, + destination_name: Optional[str] = None, + staging: Optional[TDestinationReferenceArg] = None, + staging_name: Optional[str] = None, + ) -> None: + # destination_mod = DestinationReference.from_name(destination) + if destination: + self.destination = Destination.from_reference( + destination, destination_name=destination_name + ) - if destination and not self.destination.capabilities().supported_loader_file_formats and not staging: - logger.warning(f"The destination {destination_mod.__name__} requires the filesystem staging destination to be set, but it was not provided. Setting it to 'filesystem'.") + if ( + self.destination + and not self.destination.capabilities().supported_loader_file_formats + and not staging + ): + logger.warning( + f"The destination {self.destination.destination_name} requires the filesystem" + " staging destination to be set, but it was not provided. Setting it to" + " 'filesystem'." + ) staging = "filesystem" + staging_name = "filesystem" if staging: - staging_module = DestinationReference.from_name(staging) - if staging_module and not issubclass(staging_module.spec(), DestinationClientStagingConfiguration): - raise DestinationNoStagingMode(staging_module.__name__) - self.staging = staging_module or self.staging + staging_module = Destination.from_reference(staging, destination_name=staging_name) + if staging_module and not issubclass( + staging_module.spec, DestinationClientStagingConfiguration + ): + raise DestinationNoStagingMode(staging_module.destination_name) + self.staging = staging_module with self._maybe_destination_capabilities(): # default normalizers must match the destination self._set_default_normalizers() @contextmanager - def _maybe_destination_capabilities(self, loader_file_format: TLoaderFileFormat = None) -> Iterator[DestinationCapabilitiesContext]: + def _maybe_destination_capabilities( + self, loader_file_format: TLoaderFileFormat = None + ) -> Iterator[DestinationCapabilitiesContext]: try: caps: DestinationCapabilitiesContext = None injected_caps: ContextManager[DestinationCapabilitiesContext] = None @@ -1028,9 +1223,18 @@ def _maybe_destination_capabilities(self, loader_file_format: TLoaderFileFormat caps = injected_caps.__enter__() caps.preferred_loader_file_format = self._resolve_loader_file_format( - DestinationReference.to_name(self.destination), - DestinationReference.to_name(self.staging) if self.staging else None, - destination_caps, stage_caps, loader_file_format) + self.destination.destination_name, + ( + # DestinationReference.to_name(self.destination), + self.staging.destination_name + if self.staging + else None + ), + # DestinationReference.to_name(self.staging) if self.staging else None, + destination_caps, + stage_caps, + loader_file_format, + ) caps.supported_loader_file_formats = ( destination_caps.supported_staging_file_formats if stage_caps else None ) or destination_caps.supported_loader_file_formats @@ -1041,17 +1245,21 @@ def _maybe_destination_capabilities(self, loader_file_format: TLoaderFileFormat @staticmethod def _resolve_loader_file_format( - destination: str, - staging: str, - dest_caps: DestinationCapabilitiesContext, - stage_caps: DestinationCapabilitiesContext, - file_format: TLoaderFileFormat) -> TLoaderFileFormat: - + destination: str, + staging: str, + dest_caps: DestinationCapabilitiesContext, + stage_caps: DestinationCapabilitiesContext, + file_format: TLoaderFileFormat, + ) -> TLoaderFileFormat: possible_file_formats = dest_caps.supported_loader_file_formats if stage_caps: if not dest_caps.supported_staging_file_formats: raise DestinationLoadingViaStagingNotSupported(destination) - possible_file_formats = [f for f in dest_caps.supported_staging_file_formats if f in stage_caps.supported_loader_file_formats] + possible_file_formats = [ + f + for f in dest_caps.supported_staging_file_formats + if f in stage_caps.supported_loader_file_formats + ] if not file_format: if not stage_caps: if not dest_caps.preferred_loader_file_format: @@ -1062,7 +1270,12 @@ def _resolve_loader_file_format( else: file_format = possible_file_formats[0] if len(possible_file_formats) > 0 else None if file_format not in possible_file_formats: - raise DestinationIncompatibleLoaderFileFormatException(destination, staging, file_format, set(possible_file_formats) - INTERNAL_LOADER_FILE_FORMATS) + raise DestinationIncompatibleLoaderFileFormatException( + destination, + staging, + file_format, + set(possible_file_formats) - INTERNAL_LOADER_FILE_FORMATS, + ) return file_format def _set_default_normalizers(self) -> None: @@ -1076,7 +1289,9 @@ def _set_dataset_name(self, new_dataset_name: str) -> None: fields = self.destination.spec().get_resolvable_fields() dataset_name_type = fields.get("dataset_name") # if dataset is required (default!) we create a default dataset name - destination_needs_dataset = dataset_name_type is not None and not is_optional_type(dataset_name_type) + destination_needs_dataset = dataset_name_type is not None and not is_optional_type( + dataset_name_type + ) # if destination is not specified - generate dataset if not self.destination or destination_needs_dataset: new_dataset_name = self.pipeline_name + self.DEFAULT_DATASET_SUFFIX @@ -1110,24 +1325,24 @@ def _inject_schema(self, schema: Schema) -> None: if not self.default_schema_name: self._set_default_schema_name(schema) - def _get_load_info(self, load: Load) -> LoadInfo: - started_at: datetime.datetime = None - if self._trace: - started_at = self._trace.started_at - return load.get_load_info(self, started_at) + def _get_step_info(self, step: WithStepInfo[TStepMetrics, TStepInfo]) -> TStepInfo: + return step.get_step_info(self) def _get_state(self) -> TPipelineState: try: state = json_decode_state(self._pipeline_storage.load(Pipeline.STATE_FILE)) - return migrate_state(self.pipeline_name, state, state["_state_engine_version"], STATE_ENGINE_VERSION) + return migrate_state( + self.pipeline_name, state, state["_state_engine_version"], STATE_ENGINE_VERSION + ) except FileNotFoundError: + # do not set the state hash, this will happen on first merge return { "_state_version": 0, "_state_engine_version": STATE_ENGINE_VERSION, - "_local": { - "first_run": True - } + "_local": {"first_run": True}, } + # state["_version_hash"] = generate_version_hash(state) + # return state def _optional_sql_job_client(self, schema_name: str) -> Optional[SqlJobClientBase]: try: @@ -1157,18 +1372,29 @@ def _restore_state_from_destination(self) -> Optional[TPipelineState]: if isinstance(job_client, WithStateSync): state = load_state_from_destination(self.pipeline_name, job_client) if state is None: - logger.info(f"The state was not found in the destination {self.destination.__name__}:{dataset_name}") + logger.info( + "The state was not found in the destination" + f" {self.destination.destination_description}:{dataset_name}" + ) else: - logger.info(f"The state was restored from the destination {self.destination.__name__}:{dataset_name}") + logger.info( + "The state was restored from the destination" + f" {self.destination.destination_description}:{dataset_name}" + ) else: state = None - logger.info(f"Destination does not support metadata storage {self.destination.__name__}:{dataset_name}") + logger.info( + "Destination does not support state sync" + f" {self.destination.destination_description}:{dataset_name}" + ) return state finally: # restore the use_single_dataset option self.config.use_single_dataset = use_single_dataset - def _get_schemas_from_destination(self, schema_names: Sequence[str], always_download: bool = False) -> Sequence[Schema]: + def _get_schemas_from_destination( + self, schema_names: Sequence[str], always_download: bool = False + ) -> Sequence[Schema]: # check which schemas are present in the pipeline and restore missing schemas restored_schemas: List[Schema] = [] for schema_name in schema_names: @@ -1177,17 +1403,27 @@ def _get_schemas_from_destination(self, schema_names: Sequence[str], always_down if not self._schema_storage.has_schema(schema.name) or always_download: with self._get_destination_clients(schema)[0] as job_client: if not isinstance(job_client, WithStateSync): - logger.info(f"Destination does not support metadata storage {self.destination.__name__}") + logger.info( + "Destination does not support restoring of pipeline state" + f" {self.destination.destination_name}" + ) return restored_schemas schema_info = job_client.get_stored_schema() if schema_info is None: - logger.info(f"The schema {schema.name} was not found in the destination {self.destination.__name__}:{self.dataset_name}") + logger.info( + f"The schema {schema.name} was not found in the destination" + f" {self.destination.destination_name}:{self.dataset_name}" + ) # try to import schema with contextlib.suppress(FileNotFoundError): self._schema_storage.load_schema(schema.name) else: schema = Schema.from_dict(json.loads(schema_info.schema)) - logger.info(f"The schema {schema.name} version {schema.version} hash {schema.stored_version_hash} was restored from the destination {self.destination.__name__}:{self.dataset_name}") + logger.info( + f"The schema {schema.name} version {schema.version} hash" + f" {schema.stored_version_hash} was restored from the destination" + f" {self.destination.destination_name}:{self.dataset_name}" + ) restored_schemas.append(schema) return restored_schemas @@ -1202,38 +1438,13 @@ def managed_state(self, *, extract_state: bool = False) -> Iterator[TPipelineSta backup_state = self._get_state() # restore original pipeline props self._state_to_props(backup_state) - # synchronize schema storage with initial list of schemas, note that we'll not be able to synchronize the schema content - if self._schema_storage: - # TODO: we should restore schemas backup here - for existing_schema_name in self._schema_storage.list_schemas(): - if existing_schema_name not in self.schema_names: - self._schema_storage.remove_schema(existing_schema_name) # raise original exception raise else: - self._props_to_state(state) - - backup_state = self._get_state() - # do not compare local states - local_state = state.pop("_local") - backup_state.pop("_local") - - # check if any state element was changed - merged_state = merge_state_if_changed(backup_state, state) - # extract state only when there's change in the state or state was not yet extracted AND we actually want to do it - if (merged_state or "_last_extracted_at" not in local_state) and extract_state: - # print(f'EXTRACT STATE merged: {bool(merged_state)} extracted timestamp in {"_last_extracted_at" not in local_state}') - merged_state = self._extract_state(merged_state or state) - local_state["_last_extracted_at"] = pendulum.now() - - # if state is modified and is not being extracted, mark it to be extracted next time - if not extract_state and merged_state: - local_state.pop("_last_extracted_at", None) - - # always save state locally as local_state is not compared - merged_state = merged_state or state - merged_state["_local"] = local_state - self._save_state(merged_state) + # this modifies state in place + self._bump_version_and_extract_state(state, extract_state) + # so we save modified state here + self._save_state(state) def _state_to_props(self, state: TPipelineState) -> None: """Write `state` to pipeline props.""" @@ -1243,11 +1454,34 @@ def _state_to_props(self, state: TPipelineState) -> None: for prop in Pipeline.LOCAL_STATE_PROPS: if prop in state["_local"] and not prop.startswith("_"): setattr(self, prop, state["_local"][prop]) # type: ignore - if "destination" in state: - self._set_destinations(DestinationReference.from_name(self.destination), DestinationReference.from_name(self.staging) if "staging" in state else None ) - - def _props_to_state(self, state: TPipelineState) -> None: - """Write pipeline props to `state`""" + # staging and destination are taken from state only if not yet set in the pipeline + if not self.destination: + self._set_destinations( + destination=state.get("destination_type"), + destination_name=state.get("destination_name"), + staging=state.get("staging_type"), + staging_name=state.get("staging_name"), + ) + else: + # issue warnings that state destination/staging got ignored + state_destination = state.get("destination_type") + if state_destination: + if self.destination.destination_type != state_destination: + logger.warning( + f"The destination {state_destination}:{state.get('destination_name')} in" + " state differs from destination" + f" {self.destination.destination_type}:{self.destination.destination_name} in" + " pipeline and will be ignored" + ) + state_staging = state.get("staging_type") + if state_staging: + logger.warning( + "The state staging destination" + f" {state_staging}:{state.get('staging_name')} is ignored" + ) + + def _props_to_state(self, state: TPipelineState) -> TPipelineState: + """Write pipeline props to `state`, returns it for chaining""" for prop in Pipeline.STATE_PROPS: if not prop.startswith("_"): state[prop] = getattr(self, prop) # type: ignore @@ -1255,23 +1489,42 @@ def _props_to_state(self, state: TPipelineState) -> None: if not prop.startswith("_"): state["_local"][prop] = getattr(self, prop) # type: ignore if self.destination: - state["destination"] = self.destination.__name__ + state["destination_type"] = self.destination.destination_type + state["destination_name"] = self.destination.destination_name if self.staging: - state["staging"] = self.staging.__name__ - state["schema_names"] = self._schema_storage.list_schemas() + state["staging_type"] = self.staging.destination_type + state["staging_name"] = self.staging.destination_name + state["schema_names"] = self._list_schemas_sorted() + return state + + def _bump_version_and_extract_state( + self, state: TPipelineState, extract_state: bool, extract: Extract = None + ) -> None: + """Merges existing state into `state` and extracts state using `storage` if extract_state is True. + + Storage will be created on demand. In that case the extracted package will be immediately committed. + """ + _, hash_, _ = bump_version_if_modified(self._props_to_state(state)) + should_extract = hash_ != state["_local"].get("_last_extracted_hash") + if should_extract and extract_state: + data = state_resource(state) + extract_ = extract or Extract( + self._schema_storage, self._normalize_storage_config(), original_data=data + ) + self._extract_source(extract_, data_to_sources(data, self)[0], 1, 1) + state["_local"]["_last_extracted_at"] = pendulum.now() + state["_local"]["_last_extracted_hash"] = hash_ + # commit only if we created storage + if not extract: + extract_.commit_packages() + + def _list_schemas_sorted(self) -> List[str]: + """Lists schema names sorted to have deterministic state""" + return sorted(self._schema_storage.list_schemas()) def _save_state(self, state: TPipelineState) -> None: self._pipeline_storage.save(Pipeline.STATE_FILE, json_encode_state(state)) - def _extract_state(self, state: TPipelineState) -> TPipelineState: - # this will extract the state into current load package and update the schema with the _dlt_pipeline_state table - # note: the schema will be persisted because the schema saving decorator is over the state manager decorator for extract - state_source = DltSource(self.default_schema.name, self.pipeline_name, self.default_schema, [state_resource(state)]) - storage = ExtractorStorage(self._normalize_storage_config) - extract_id = extract_with_schema(storage, state_source, self.default_schema, _NULL_COLLECTOR, 1, 1) - storage.commit_extract_files(extract_id) - return state - def __getstate__(self) -> Any: # pickle only the SupportsPipeline protocol fields return {"pipeline_name": self.pipeline_name} diff --git a/dlt/pipeline/platform.py b/dlt/pipeline/platform.py new file mode 100644 index 0000000000..c8014d5ae7 --- /dev/null +++ b/dlt/pipeline/platform.py @@ -0,0 +1,118 @@ +"""Implements SupportsTracking""" +from typing import Any, cast, TypedDict, List +import requests +from dlt.common.managed_thread_pool import ManagedThreadPool +from urllib.parse import urljoin + +from dlt.pipeline.trace import PipelineTrace, PipelineStepTrace, TPipelineStep, SupportsPipeline +from dlt.common import json +from dlt.common.runtime import logger +from dlt.common.pipeline import LoadInfo +from dlt.common.schema.typing import TStoredSchema + +_THREAD_POOL: ManagedThreadPool = ManagedThreadPool(1) +TRACE_URL_SUFFIX = "/trace" +STATE_URL_SUFFIX = "/state" + + +class TPipelineSyncPayload(TypedDict): + pipeline_name: str + destination_name: str + destination_displayable_credentials: str + destination_fingerprint: str + dataset_name: str + schemas: List[TStoredSchema] + + +def _send_trace_to_platform(trace: PipelineTrace, pipeline: SupportsPipeline) -> None: + """ + Send the full trace after a run operation to the platform + TODO: Migrate this to open telemetry in the next iteration + """ + if not pipeline.runtime_config.dlthub_dsn: + return + + def _future_send() -> None: + try: + trace_dump = json.dumps(trace.asdict()) + url = pipeline.runtime_config.dlthub_dsn + TRACE_URL_SUFFIX + response = requests.put(url, data=trace_dump) + if response.status_code != 200: + logger.debug( + f"Failed to send trace to platform, response code: {response.status_code}" + ) + except Exception as e: + logger.debug(f"Exception while sending trace to platform: {e}") + + _THREAD_POOL.thread_pool.submit(_future_send) + + # trace_dump = json.dumps(trace.asdict(), pretty=True) + # with open(f"trace.json", "w") as f: + # f.write(trace_dump) + + +def _sync_schemas_to_platform(trace: PipelineTrace, pipeline: SupportsPipeline) -> None: + if not pipeline.runtime_config.dlthub_dsn: + return + + # sync only if load step was processed + load_info: LoadInfo = None + for step in trace.steps: + if step.step == "load": + load_info = cast(LoadInfo, step.step_info) + + if not load_info: + return + + payload = TPipelineSyncPayload( + pipeline_name=pipeline.pipeline_name, + destination_name=load_info.destination_name, + destination_displayable_credentials=load_info.destination_displayable_credentials, + destination_fingerprint=load_info.destination_fingerprint, + dataset_name=load_info.dataset_name, + schemas=[], + ) + + # attach all schemas + for schema_name in pipeline.schemas: + schema = pipeline.schemas[schema_name] + payload["schemas"].append(schema.to_dict()) + + def _future_send() -> None: + try: + url = pipeline.runtime_config.dlthub_dsn + STATE_URL_SUFFIX + response = requests.put(url, data=json.dumps(payload)) + if response.status_code != 200: + logger.debug( + f"Failed to send state to platform, response code: {response.status_code}" + ) + except Exception as e: + logger.debug(f"Exception while sending state to platform: {e}") + + _THREAD_POOL.thread_pool.submit(_future_send) + + +def on_start_trace(trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline) -> None: + pass + + +def on_start_trace_step( + trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline +) -> None: + pass + + +def on_end_trace_step( + trace: PipelineTrace, + step: PipelineStepTrace, + pipeline: SupportsPipeline, + step_info: Any, + send_state: bool, +) -> None: + if send_state: + # also sync schemas to dlthub + _sync_schemas_to_platform(trace, pipeline) + + +def on_end_trace(trace: PipelineTrace, pipeline: SupportsPipeline, send_state: bool) -> None: + _send_trace_to_platform(trace, pipeline) diff --git a/dlt/pipeline/progress.py b/dlt/pipeline/progress.py index 90fc192bb1..89eda4cac5 100644 --- a/dlt/pipeline/progress.py +++ b/dlt/pipeline/progress.py @@ -1,12 +1,18 @@ """Measure the extract, normalize and load progress""" from typing import Union, Literal -from dlt.common.runtime.collector import TqdmCollector as tqdm, LogCollector as log, EnlightenCollector as enlighten, AliveCollector as alive_progress +from dlt.common.runtime.collector import ( + TqdmCollector as tqdm, + LogCollector as log, + EnlightenCollector as enlighten, + AliveCollector as alive_progress, +) from dlt.common.runtime.collector import Collector as _Collector, NULL_COLLECTOR as _NULL_COLLECTOR TSupportedCollectors = Literal["tqdm", "enlighten", "log", "alive_progress"] TCollectorArg = Union[_Collector, TSupportedCollectors] + def _from_name(collector: TCollectorArg) -> _Collector: """Create default collector by name""" if collector is None: diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py index 581ed4c2bd..fa3939969b 100644 --- a/dlt/pipeline/state_sync.py +++ b/dlt/pipeline/state_sync.py @@ -1,53 +1,38 @@ +import base64 import binascii -from typing import Any, Optional, cast -import binascii - +from copy import copy +import hashlib +from typing import Tuple, cast import pendulum import dlt - from dlt.common import json from dlt.common.pipeline import TPipelineState from dlt.common.typing import DictStrAny from dlt.common.schema.typing import STATE_TABLE_NAME, TTableSchemaColumns -from dlt.common.destination.reference import JobClientBase, WithStateSync +from dlt.common.destination.reference import WithStateSync, Destination +from dlt.common.utils import compressed_b64decode, compressed_b64encode -from dlt.extract.source import DltResource +from dlt.extract import DltResource from dlt.pipeline.exceptions import PipelineStateEngineNoUpgradePathException -from dlt.common.utils import compressed_b64decode, compressed_b64encode # allows to upgrade state when restored with a new version of state logic/schema -STATE_ENGINE_VERSION = 2 +STATE_ENGINE_VERSION = 4 # state table columns STATE_TABLE_COLUMNS: TTableSchemaColumns = { - "version": { - "name": "version", - "data_type": "bigint", - "nullable": False - }, - "engine_version": { - "name": "engine_version", - "data_type": "bigint", - "nullable": False - }, - "pipeline_name": { - "name": "pipeline_name", - "data_type": "text", - "nullable": False - }, - "state": { - "name": "state", + "version": {"name": "version", "data_type": "bigint", "nullable": False}, + "engine_version": {"name": "engine_version", "data_type": "bigint", "nullable": False}, + "pipeline_name": {"name": "pipeline_name", "data_type": "text", "nullable": False}, + "state": {"name": "state", "data_type": "text", "nullable": False}, + "created_at": {"name": "created_at", "data_type": "timestamp", "nullable": False}, + "version_hash": { + "name": "version_hash", "data_type": "text", - "nullable": False - }, - "created_at": { - "name": "created_at", - "data_type": "timestamp", - "nullable": False - } + "nullable": True, + }, # set to nullable so we can migrate existing tables } @@ -72,27 +57,47 @@ def decompress_state(state_str: str) -> DictStrAny: return json.typed_loadb(state_bytes) # type: ignore[no-any-return] -def merge_state_if_changed(old_state: TPipelineState, new_state: TPipelineState, increase_version: bool = True) -> Optional[TPipelineState]: - # we may want to compare hashes like we do with schemas - if json.dumps(old_state, sort_keys=True) == json.dumps(new_state, sort_keys=True): - return None - # TODO: we should probably update smarter ie. recursively - old_state.update(new_state) - if increase_version: - old_state["_state_version"] += 1 - return old_state +def generate_version_hash(state: TPipelineState) -> str: + # generates hash out of stored schema content, excluding hash itself, version and local state + state_copy = copy(state) + state_copy.pop("_state_version", None) + state_copy.pop("_state_engine_version", None) + state_copy.pop("_version_hash", None) + state_copy.pop("_local", None) + content = json.typed_dumpb(state_copy, sort_keys=True) + h = hashlib.sha3_256(content) + return base64.b64encode(h.digest()).decode("ascii") + + +def bump_version_if_modified(state: TPipelineState) -> Tuple[int, str, str]: + """Bumps the `state` version and version hash if content modified, returns (new version, new hash, old hash) tuple""" + hash_ = generate_version_hash(state) + previous_hash = state.get("_version_hash") + if not previous_hash: + # if hash was not set, set it without bumping the version, that's initial schema + pass + elif hash_ != previous_hash: + state["_state_version"] += 1 + + state["_version_hash"] = hash_ + return state["_state_version"], hash_, previous_hash def state_resource(state: TPipelineState) -> DltResource: + state = copy(state) + state.pop("_local") state_str = compress_state(state) state_doc = { "version": state["_state_version"], "engine_version": state["_state_engine_version"], "pipeline_name": state["pipeline_name"], - "state": state_str, - "created_at": pendulum.now() + "state": state_str, + "created_at": pendulum.now(), + "version_hash": state["_version_hash"], } - return dlt.resource([state_doc], name=STATE_TABLE_NAME, write_disposition="append", columns=STATE_TABLE_COLUMNS) + return dlt.resource( + [state_doc], name=STATE_TABLE_NAME, write_disposition="append", columns=STATE_TABLE_COLUMNS + ) def load_state_from_destination(pipeline_name: str, client: WithStateSync) -> TPipelineState: @@ -104,16 +109,33 @@ def load_state_from_destination(pipeline_name: str, client: WithStateSync) -> TP return migrate_state(pipeline_name, s, s["_state_engine_version"], STATE_ENGINE_VERSION) -def migrate_state(pipeline_name: str, state: DictStrAny, from_engine: int, to_engine: int) -> TPipelineState: +def migrate_state( + pipeline_name: str, state: DictStrAny, from_engine: int, to_engine: int +) -> TPipelineState: if from_engine == to_engine: return cast(TPipelineState, state) if from_engine == 1 and to_engine > 1: state["_local"] = {} from_engine = 2 + if from_engine == 2 and to_engine > 2: + # you may want to recompute hash + state["_version_hash"] = generate_version_hash(state) # type: ignore[arg-type] + from_engine = 3 + if from_engine == 3 and to_engine > 3: + if state.get("destination"): + state["destination_type"] = state["destination"] + state["destination_name"] = Destination.to_name(state["destination"]) + del state["destination"] + if state.get("staging"): + state["staging_type"] = state["staging"] + state["staging_name"] = Destination.to_name(state["staging"]) + del state["staging"] + from_engine = 4 # check state engine - state["_state_engine_version"] = from_engine if from_engine != to_engine: - raise PipelineStateEngineNoUpgradePathException(pipeline_name, state["_state_engine_version"], from_engine, to_engine) - + raise PipelineStateEngineNoUpgradePathException( + pipeline_name, state["_state_engine_version"], from_engine, to_engine + ) + state["_state_engine_version"] = from_engine return cast(TPipelineState, state) diff --git a/dlt/pipeline/trace.py b/dlt/pipeline/trace.py index 2ba71396f6..5679884b0b 100644 --- a/dlt/pipeline/trace.py +++ b/dlt/pipeline/trace.py @@ -1,20 +1,34 @@ +import contextlib +from copy import copy import os import pickle import datetime # noqa: 251 import dataclasses -from collections.abc import Sequence as C_Sequence -from typing import Any, List, NamedTuple, Optional, Protocol, Sequence +from typing import Any, List, NamedTuple, Optional, Protocol, Sequence import humanize -from dlt.common import pendulum -from dlt.common.runtime.logger import suppress_and_warn +from dlt.common import pendulum, json from dlt.common.configuration import is_secret_hint +from dlt.common.configuration.exceptions import ContextDefaultCannotBeCreated +from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.utils import _RESOLVED_TRACES -from dlt.common.pipeline import ExtractDataInfo, ExtractInfo, LoadInfo, NormalizeInfo, SupportsPipeline -from dlt.common.typing import DictStrAny, StrAny -from dlt.common.utils import uniq_id +from dlt.common.configuration.container import Container +from dlt.common.exceptions import ExceptionTrace, ResourceNameNotAvailable +from dlt.common.runtime.logger import suppress_and_warn +from dlt.common.runtime.exec_info import TExecutionContext, get_execution_context +from dlt.common.pipeline import ( + ExtractInfo, + LoadInfo, + NormalizeInfo, + PipelineContext, + StepInfo, + StepMetrics, + SupportsPipeline, +) +from dlt.common.source import get_current_pipe_name +from dlt.common.typing import DictStrAny, StrAny, SupportsHumanize +from dlt.common.utils import uniq_id, get_exception_trace_chain -from dlt.extract.source import DltResource, DltSource from dlt.pipeline.typing import TPipelineStep from dlt.pipeline.exceptions import PipelineStepFailed @@ -22,9 +36,11 @@ TRACE_ENGINE_VERSION = 1 TRACE_FILE_NAME = "trace.pickle" + # @dataclasses.dataclass(init=True) class SerializableResolvedValueTrace(NamedTuple): """Information on resolved secret and config values""" + key: str value: Any default_value: Any @@ -35,7 +51,7 @@ class SerializableResolvedValueTrace(NamedTuple): def asdict(self) -> StrAny: """A dictionary representation that is safe to load.""" - return {k:v for k,v in self._asdict().items() if k not in ("value", "default_value")} + return {k: v for k, v in self._asdict().items() if k not in ("value", "default_value")} def asstr(self, verbosity: int = 0) -> str: return f"{self.key}->{self.value} in {'.'.join(self.sections)} by {self.provider_name}" @@ -44,16 +60,21 @@ def __str__(self) -> str: return self.asstr(verbosity=0) -@dataclasses.dataclass(init=True) -class _PipelineStepTrace: +class _PipelineStepTrace(NamedTuple): span_id: str step: TPipelineStep started_at: datetime.datetime finished_at: datetime.datetime = None - step_info: Optional[Any] = None + step_info: Optional[StepInfo[StepMetrics]] = None """A step outcome info ie. LoadInfo""" step_exception: Optional[str] = None """For failing steps contains exception string""" + exception_traces: List[ExceptionTrace] = None + """For failing steps contains traces of exception chain causing it""" + + +class PipelineStepTrace(SupportsHumanize, _PipelineStepTrace): + """Trace of particular pipeline step, contains timing information, the step outcome info or exception in case of failing step with custom asdict()""" def asstr(self, verbosity: int = 0) -> str: completed_str = "FAILED" if self.step_exception else "COMPLETED" @@ -73,25 +94,39 @@ def asstr(self, verbosity: int = 0) -> str: msg += f"\nspan id: {self.span_id}" return msg - def __str__(self) -> str: - return self.asstr(verbosity=0) - - -class PipelineStepTrace(_PipelineStepTrace): - """Trace of particular pipeline step, contains timing information, the step outcome info or exception in case of failing step with custom asdict()""" def asdict(self) -> DictStrAny: """A dictionary representation of PipelineStepTrace that can be loaded with `dlt`""" - d = dataclasses.asdict(self) + d = self._asdict() if self.step_info: # name property depending on step name - generates nicer data - d[f"{self.step}_info"] = d.pop("step_info") + d[f"{self.step}_info"] = step_info_dict = d.pop("step_info").asdict() + d["step_info"] = {} + # take only the base keys + for prop in self.step_info._astuple()._asdict(): + d["step_info"][prop] = step_info_dict.pop(prop) + # replace the attributes in exception traces with json dumps + if self.exception_traces: + # do not modify original traces + d["exception_traces"] = copy(d["exception_traces"]) + traces: List[ExceptionTrace] = d["exception_traces"] + for idx in range(len(traces)): + if traces[idx].get("exception_attrs"): + # trace: ExceptionTrace + trace = traces[idx] = copy(traces[idx]) + trace["exception_attrs"] = str(trace["exception_attrs"]) # type: ignore[typeddict-item] + return d + def __str__(self) -> str: + return self.asstr(verbosity=0) + -@dataclasses.dataclass(init=True) -class PipelineTrace: +class _PipelineTrace(NamedTuple): """Pipeline runtime trace containing data on "extract", "normalize" and "load" steps and resolved config and secret values.""" + transaction_id: str + pipeline_name: str + execution_context: TExecutionContext started_at: datetime.datetime steps: List[PipelineStepTrace] """A list of steps in the trace""" @@ -100,6 +135,8 @@ class PipelineTrace: """A list of resolved config values""" engine_version: int = TRACE_ENGINE_VERSION + +class PipelineTrace(SupportsHumanize, _PipelineTrace): def asstr(self, verbosity: int = 0) -> str: last_step = self.steps[-1] completed_str = "FAILED" if last_step.step_exception else "COMPLETED" @@ -108,7 +145,10 @@ def asstr(self, verbosity: int = 0) -> str: elapsed_str = humanize.precisedelta(elapsed) else: elapsed_str = "---" - msg = f"Run started at {self.started_at} and {completed_str} in {elapsed_str} with {len(self.steps)} steps." + msg = ( + f"Run started at {self.started_at} and {completed_str} in {elapsed_str} with" + f" {len(self.steps)} steps." + ) if verbosity > 0 and len(self.resolved_config_values) > 0: msg += "\nFollowing config and secret values were resolved:\n" msg += "\n".join([s.asstr(verbosity) for s in self.resolved_config_values]) @@ -123,6 +163,13 @@ def last_pipeline_step_trace(self, step_name: TPipelineStep) -> PipelineStepTrac return step return None + def asdict(self) -> DictStrAny: + """A dictionary representation of PipelineTrace that can be loaded with `dlt`""" + d = self._asdict() + # run step is the same as load step + d["steps"] = [step.asdict() for step in self.steps] # if step.step != "run" + return d + @property def last_extract_info(self) -> ExtractInfo: step_trace = self.last_pipeline_step_trace("extract") @@ -149,78 +196,117 @@ def __str__(self) -> str: class SupportsTracking(Protocol): - def on_start_trace(self, trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline) -> None: - ... + def on_start_trace( + self, trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline + ) -> None: ... - def on_start_trace_step(self, trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline) -> None: - ... + def on_start_trace_step( + self, trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline + ) -> None: ... - def on_end_trace_step(self, trace: PipelineTrace, step: PipelineStepTrace, pipeline: SupportsPipeline, step_info: Any) -> None: - ... + def on_end_trace_step( + self, + trace: PipelineTrace, + step: PipelineStepTrace, + pipeline: SupportsPipeline, + step_info: Any, + send_state: bool, + ) -> None: ... - def on_end_trace(self, trace: PipelineTrace, pipeline: SupportsPipeline) -> None: - ... + def on_end_trace( + self, trace: PipelineTrace, pipeline: SupportsPipeline, send_state: bool + ) -> None: ... -# plug in your own tracking module here -# TODO: that probably should be a list of modules / classes with all of them called -TRACKING_MODULE: SupportsTracking = None +# plug in your own tracking modules here +TRACKING_MODULES: List[SupportsTracking] = None def start_trace(step: TPipelineStep, pipeline: SupportsPipeline) -> PipelineTrace: - trace = PipelineTrace(uniq_id(), pendulum.now(), steps=[]) - with suppress_and_warn(): - TRACKING_MODULE.on_start_trace(trace, step, pipeline) + trace = PipelineTrace( + uniq_id(), + pipeline.pipeline_name, + get_execution_context(), + pendulum.now(), + steps=[], + resolved_config_values=[], + ) + for module in TRACKING_MODULES: + with suppress_and_warn(): + module.on_start_trace(trace, step, pipeline) return trace -def start_trace_step(trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline) -> PipelineStepTrace: +def start_trace_step( + trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline +) -> PipelineStepTrace: trace_step = PipelineStepTrace(uniq_id(), step, pendulum.now()) - with suppress_and_warn(): - TRACKING_MODULE.on_start_trace_step(trace, step, pipeline) + for module in TRACKING_MODULES: + with suppress_and_warn(): + module.on_start_trace_step(trace, step, pipeline) return trace_step -def end_trace_step(trace: PipelineTrace, step: PipelineStepTrace, pipeline: SupportsPipeline, step_info: Any) -> None: +def end_trace_step( + trace: PipelineTrace, + step: PipelineStepTrace, + pipeline: SupportsPipeline, + step_info: Any, + send_state: bool, +) -> PipelineTrace: # saves runtime trace of the pipeline if isinstance(step_info, PipelineStepFailed): + exception_traces = get_exception_traces(step_info) step_exception = str(step_info) step_info = step_info.step_info elif isinstance(step_info, Exception): + exception_traces = get_exception_traces(step_info) step_exception = str(step_info) if step_info.__context__: step_exception += "caused by: " + str(step_info.__context__) step_info = None else: step_info = step_info + exception_traces = None step_exception = None - step.finished_at = pendulum.now() - step.step_exception = step_exception - step.step_info = step_info - - resolved_values = map(lambda v: SerializableResolvedValueTrace( + step = step._replace( + finished_at=pendulum.now(), + step_exception=step_exception, + exception_traces=exception_traces, + step_info=step_info, + ) + resolved_values = map( + lambda v: SerializableResolvedValueTrace( v.key, v.value, v.default_value, is_secret_hint(v.hint), v.sections, v.provider_name, - str(type(v.config).__qualname__) - ) , _RESOLVED_TRACES.values()) + str(type(v.config).__qualname__), + ), + _RESOLVED_TRACES.values(), + ) - trace.resolved_config_values = list(resolved_values) + trace.resolved_config_values[:] = list(resolved_values) trace.steps.append(step) - with suppress_and_warn(): - TRACKING_MODULE.on_end_trace_step(trace, step, pipeline, step_info) + for module in TRACKING_MODULES: + with suppress_and_warn(): + module.on_end_trace_step(trace, step, pipeline, step_info, send_state) + return trace -def end_trace(trace: PipelineTrace, pipeline: SupportsPipeline, trace_path: str) -> None: - trace.finished_at = pendulum.now() +def end_trace( + trace: PipelineTrace, pipeline: SupportsPipeline, trace_path: str, send_state: bool +) -> PipelineTrace: + trace = trace._replace(finished_at=pendulum.now()) if trace_path: save_trace(trace_path, trace) - with suppress_and_warn(): - TRACKING_MODULE.on_end_trace(trace, pipeline) + for module in TRACKING_MODULES: + with suppress_and_warn(): + module.on_end_trace(trace, pipeline, send_state) + return trace def merge_traces(last_trace: PipelineTrace, new_trace: PipelineTrace) -> PipelineTrace: @@ -229,13 +315,12 @@ def merge_traces(last_trace: PipelineTrace, new_trace: PipelineTrace) -> Pipelin return new_trace last_trace.steps.extend(new_trace.steps) - # remember only last 100 steps - last_trace.steps = last_trace.steps[-100:] - # keep the finished up from previous trace - last_trace.finished_at = new_trace.finished_at - last_trace.resolved_config_values = new_trace.resolved_config_values - - return last_trace + # remember only last 100 steps and keep the finished up from previous trace + return last_trace._replace( + steps=last_trace.steps[-100:], + finished_at=new_trace.finished_at, + resolved_config_values=new_trace.resolved_config_values, + ) def save_trace(trace_path: str, trace: PipelineTrace) -> None: @@ -252,33 +337,33 @@ def load_trace(trace_path: str) -> PipelineTrace: return None -def describe_extract_data(data: Any) -> List[ExtractDataInfo]: - """Extract source and resource names from data passed to extract""" - data_info: List[ExtractDataInfo] = [] +def get_exception_traces(exc: BaseException, container: Container = None) -> List[ExceptionTrace]: + """Gets exception trace chain and extend it with data available in Container context""" + traces = get_exception_trace_chain(exc) + container = container or Container() + + # get resource name + resource_name: str = None + with contextlib.suppress(ResourceNameNotAvailable): + resource_name = get_current_pipe_name() + # get source name + source_name: str = None + with contextlib.suppress(ContextDefaultCannotBeCreated): + sections_context = container[ConfigSectionContext] + source_name = sections_context.source_state_key + # get pipeline name + proxy = container[PipelineContext] + if proxy.is_active(): + pipeline_name = proxy.pipeline().pipeline_name + else: + pipeline_name = None - def add_item(item: Any) -> bool: - if isinstance(item, (DltResource, DltSource)): - # record names of sources/resources - data_info.append({ - "name": item.name, - "data_type": "resource" if isinstance(item, DltResource) else "source" - }) - return False - else: - # anything else - data_info.append({ - "name": "", - "data_type": type(item).__name__ - }) - return True - - item: Any = data - if isinstance(data, C_Sequence) and len(data) > 0: - for item in data: - # add_item returns True if non named item was returned. in that case we break - if add_item(item): - break - return data_info - - add_item(item) - return data_info + # apply context to trace + for trace in traces: + # only to dlt exceptions + if "exception_attrs" in trace: + trace.setdefault("resource_name", resource_name) + trace.setdefault("pipeline_name", pipeline_name) + trace.setdefault("source_name", source_name) + + return traces diff --git a/dlt/pipeline/track.py b/dlt/pipeline/track.py index ec42bc788f..9a9deee017 100644 --- a/dlt/pipeline/track.py +++ b/dlt/pipeline/track.py @@ -9,7 +9,6 @@ from dlt.common.runtime.segment import track as dlthub_telemetry_track from dlt.common.runtime.slack import send_slack_message from dlt.common.pipeline import LoadInfo, ExtractInfo, SupportsPipeline -from dlt.common.destination import DestinationReference from dlt.pipeline.typing import TPipelineStep from dlt.pipeline.trace import PipelineTrace, PipelineStepTrace @@ -21,9 +20,10 @@ def _add_sentry_tags(span: Span, pipeline: SupportsPipeline) -> None: span.set_tag("pipeline_name", pipeline.pipeline_name) if pipeline.destination: - span.set_tag("destination", pipeline.destination.__name__) + span.set_tag("destination", pipeline.destination.destination_name) if pipeline.dataset_name: span.set_tag("dataset_name", pipeline.dataset_name) + except ImportError: # sentry is optional dependency and enabled only when RuntimeConfiguration.sentry_dsn is set pass @@ -48,7 +48,7 @@ def _get_step_elapsed(step: PipelineStepTrace) -> str: normalize_step = next((step for step in trace.steps if step.step == "normalize"), None) extract_step = next((step for step in trace.steps if step.step == "extract"), None) - message = f"""The {author}pipeline *{load_info.pipeline.pipeline_name}* just loaded *{len(load_info.loads_ids)}* load package(s) to destination *{load_info.destination_name}* and into dataset *{load_info.dataset_name}*. + message = f"""The {author}pipeline *{load_info.pipeline.pipeline_name}* just loaded *{len(load_info.loads_ids)}* load package(s) to destination *{load_info.destination_type}* and into dataset *{load_info.dataset_name}*. 🚀 *{humanize.precisedelta(total_elapsed)}* of which {_get_step_elapsed(load_step)}{_get_step_elapsed(normalize_step)}{_get_step_elapsed(extract_step)}""" send_slack_message(incoming_hook, message) @@ -67,7 +67,9 @@ def on_start_trace(trace: PipelineTrace, step: TPipelineStep, pipeline: Supports transaction.__enter__() -def on_start_trace_step(trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline) -> None: +def on_start_trace_step( + trace: PipelineTrace, step: TPipelineStep, pipeline: SupportsPipeline +) -> None: if pipeline.runtime_config.sentry_dsn: # print(f"START SENTRY SPAN {trace.transaction_id}:{trace_step.span_id} SCOPE: {Hub.current.scope}") span = Hub.current.scope.span.start_child(description=step, op=step).__enter__() @@ -75,7 +77,13 @@ def on_start_trace_step(trace: PipelineTrace, step: TPipelineStep, pipeline: Sup _add_sentry_tags(span, pipeline) -def on_end_trace_step(trace: PipelineTrace, step: PipelineStepTrace, pipeline: SupportsPipeline, step_info: Any) -> None: +def on_end_trace_step( + trace: PipelineTrace, + step: PipelineStepTrace, + pipeline: SupportsPipeline, + step_info: Any, + send_state: bool, +) -> None: if pipeline.runtime_config.sentry_dsn: # print(f"---END SENTRY SPAN {trace.transaction_id}:{step.span_id}: {step} SCOPE: {Hub.current.scope}") with contextlib.suppress(Exception): @@ -87,13 +95,15 @@ def on_end_trace_step(trace: PipelineTrace, step: PipelineStepTrace, pipeline: S props = { "elapsed": (step.finished_at - trace.started_at).total_seconds(), "success": step.step_exception is None, - "destination_name": DestinationReference.to_name(pipeline.destination) if pipeline.destination else None, + "destination_name": pipeline.destination.destination_name if pipeline.destination else None, + "destination_type": pipeline.destination.destination_type if pipeline.destination else None, "pipeline_name_hash": digest128(pipeline.pipeline_name), "dataset_name_hash": digest128(pipeline.dataset_name) if pipeline.dataset_name else None, - "default_schema_name_hash": digest128(pipeline.default_schema_name) if pipeline.default_schema_name else None, - "transaction_id": trace.transaction_id + "default_schema_name_hash": ( + digest128(pipeline.default_schema_name) if pipeline.default_schema_name else None + ), + "transaction_id": trace.transaction_id, } - # disable automatic slack messaging until we can configure messages themselves if step.step == "extract" and step_info: assert isinstance(step_info, ExtractInfo) props["extract_data"] = step_info.extract_data_info @@ -103,8 +113,8 @@ def on_end_trace_step(trace: PipelineTrace, step: PipelineStepTrace, pipeline: S dlthub_telemetry_track("pipeline", step.step, props) -def on_end_trace(trace: PipelineTrace, pipeline: SupportsPipeline) -> None: +def on_end_trace(trace: PipelineTrace, pipeline: SupportsPipeline, send_state: bool) -> None: if pipeline.runtime_config.sentry_dsn: # print(f"---END SENTRY TX: {trace.transaction_id} SCOPE: {Hub.current.scope}") with contextlib.suppress(Exception): - Hub.current.scope.span.__exit__(None, None, None) \ No newline at end of file + Hub.current.scope.span.__exit__(None, None, None) diff --git a/dlt/pipeline/typing.py b/dlt/pipeline/typing.py index 5cd2b2b503..f0192a504d 100644 --- a/dlt/pipeline/typing.py +++ b/dlt/pipeline/typing.py @@ -1,3 +1,3 @@ from typing import Literal -TPipelineStep = Literal["run", "extract", "normalize", "load"] +TPipelineStep = Literal["sync", "extract", "normalize", "load"] diff --git a/dlt/pipeline/warnings.py b/dlt/pipeline/warnings.py new file mode 100644 index 0000000000..87fcbc1f0c --- /dev/null +++ b/dlt/pipeline/warnings.py @@ -0,0 +1,22 @@ +import typing as t +import warnings + +from dlt.common.warnings import Dlt04DeprecationWarning +from dlt.common.destination import Destination, TDestinationReferenceArg + + +def credentials_argument_deprecated( + caller_name: str, credentials: t.Optional[t.Any], destination: TDestinationReferenceArg = None +) -> None: + if credentials is None: + return + + dest_name = Destination.to_name(destination) if destination else "postgres" + + warnings.warn( + f"The `credentials argument` to {caller_name} is deprecated and will be removed in a future" + " version. Pass the same credentials to the `destination` instance instead, e.g." + f" {caller_name}(destination=dlt.destinations.{dest_name}(credentials=...))", + Dlt04DeprecationWarning, + stacklevel=2, + ) diff --git a/dlt/reflection/names.py b/dlt/reflection/names.py index 1aee6df52b..dad7bdce92 100644 --- a/dlt/reflection/names.py +++ b/dlt/reflection/names.py @@ -18,5 +18,5 @@ ATTACH: inspect.signature(attach), RUN: inspect.signature(run), SOURCE: inspect.signature(source), - RESOURCE: inspect.signature(resource) -} \ No newline at end of file + RESOURCE: inspect.signature(resource), +} diff --git a/dlt/reflection/script_inspector.py b/dlt/reflection/script_inspector.py index 204135dcd7..d8d96804c8 100644 --- a/dlt/reflection/script_inspector.py +++ b/dlt/reflection/script_inspector.py @@ -12,7 +12,8 @@ from dlt.common.typing import DictStrAny from dlt.pipeline import Pipeline -from dlt.extract.source import DltSource, ManagedPipeIterator +from dlt.extract import DltSource +from dlt.extract.pipe import ManagedPipeIterator def patch__init__(self: Any, *args: Any, **kwargs: Any) -> None: @@ -21,6 +22,7 @@ def patch__init__(self: Any, *args: Any, **kwargs: Any) -> None: class DummyModule(ModuleType): """A dummy module from which you can import anything""" + def __getattr__(self, key: str) -> Any: if key[0].isupper(): # if imported name is capitalized, import type @@ -28,13 +30,20 @@ def __getattr__(self, key: str) -> Any: else: # otherwise import instance return SimpleNamespace() - __all__: List[Any] = [] # support wildcard imports + + __all__: List[Any] = [] # support wildcard imports def _import_module(name: str, missing_modules: Tuple[str, ...] = ()) -> ModuleType: """Module importer that ignores missing modules by importing a dummy module""" - def _try_import(name: str, _globals: Mapping[str, Any] = None, _locals: Mapping[str, Any] = None, fromlist: Sequence[str] = (), level:int = 0) -> ModuleType: + def _try_import( + name: str, + _globals: Mapping[str, Any] = None, + _locals: Mapping[str, Any] = None, + fromlist: Sequence[str] = (), + level: int = 0, + ) -> ModuleType: """This function works as follows: on ImportError it raises. This import error is then next caught in the main function body and the name is added to exceptions. Next time if the name is on exception list or name is a package on exception list we return DummyModule and do not reraise This excepts only the modules that bubble up ImportError up until our code so any handled import errors are not excepted @@ -62,7 +71,7 @@ def _try_import(name: str, _globals: Mapping[str, Any] = None, _locals: Mapping[ # print(f"ADD {ie.name} {ie.path} vs {name} vs {str(ie)}") if ie.name in missing_modules: raise - missing_modules += (ie.name, ) + missing_modules += (ie.name,) except MissingDependencyException as me: if isinstance(me.__context__, ImportError): if me.__context__.name is None: @@ -71,14 +80,16 @@ def _try_import(name: str, _globals: Mapping[str, Any] = None, _locals: Mapping[ # print(f"{me.__context__.name} IN :/") raise # print(f"ADD {me.__context__.name}") - missing_modules += (me.__context__.name, ) + missing_modules += (me.__context__.name,) else: raise finally: builtins.__import__ = real_import -def load_script_module(module_path:str, script_relative_path: str, ignore_missing_imports: bool = False) -> ModuleType: +def load_script_module( + module_path: str, script_relative_path: str, ignore_missing_imports: bool = False +) -> ModuleType: """Loads a module in `script_relative_path` by splitting it into a script module (file part) and package (folders). `module_path` is added to sys.path Optionally, missing imports will be ignored by importing a dummy module instead. """ @@ -110,12 +121,24 @@ def load_script_module(module_path:str, script_relative_path: str, ignore_missin sys.path.remove(sys_path) -def inspect_pipeline_script(module_path:str, script_relative_path: str, ignore_missing_imports: bool = False) -> ModuleType: +def inspect_pipeline_script( + module_path: str, script_relative_path: str, ignore_missing_imports: bool = False +) -> ModuleType: # patch entry points to pipeline, sources and resources to prevent pipeline from running - with patch.object(Pipeline, '__init__', patch__init__), patch.object(DltSource, '__init__', patch__init__), patch.object(ManagedPipeIterator, '__init__', patch__init__): - return load_script_module(module_path, script_relative_path, ignore_missing_imports=ignore_missing_imports) + with patch.object(Pipeline, "__init__", patch__init__), patch.object( + DltSource, "__init__", patch__init__ + ), patch.object(ManagedPipeIterator, "__init__", patch__init__): + return load_script_module( + module_path, script_relative_path, ignore_missing_imports=ignore_missing_imports + ) class PipelineIsRunning(DltException): def __init__(self, obj: object, args: Tuple[str, ...], kwargs: DictStrAny) -> None: - super().__init__(f"The pipeline script instantiates the pipeline on import. Did you forget to use if __name__ == 'main':? in {obj.__class__.__name__}", obj, args, kwargs) + super().__init__( + "The pipeline script instantiates the pipeline on import. Did you forget to use if" + f" __name__ == 'main':? in {obj.__class__.__name__}", + obj, + args, + kwargs, + ) diff --git a/dlt/reflection/script_visitor.py b/dlt/reflection/script_visitor.py index 7d4e0ea2cd..52b19fe031 100644 --- a/dlt/reflection/script_visitor.py +++ b/dlt/reflection/script_visitor.py @@ -10,7 +10,6 @@ class PipelineScriptVisitor(NodeVisitor): - def __init__(self, source: str): self.source = source self.source_lines: List[str] = ast._splitlines_no_ff(source) # type: ignore @@ -73,7 +72,9 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: elif isinstance(deco, ast.Call): alias_name = astunparse.unparse(deco.func).strip() else: - raise ValueError(self.source_segment(deco), type(deco), "Unknown decorator form") + raise ValueError( + self.source_segment(deco), type(deco), "Unknown decorator form" + ) fn = self.func_aliases.get(alias_name) if fn == n.SOURCE: self.known_sources[str(node.name)] = node @@ -96,7 +97,9 @@ def visit_Call(self, node: ast.Call) -> Any: sig = n.SIGNATURES[fn] try: # bind the signature where the argument values are the corresponding ast nodes - bound_args = sig.bind(*node.args, **{str(kwd.arg):kwd.value for kwd in node.keywords}) + bound_args = sig.bind( + *node.args, **{str(kwd.arg): kwd.value for kwd in node.keywords} + ) bound_args.apply_defaults() # print(f"ALIAS: {alias_name} of {self.func_aliases.get(alias_name)} with {bound_args}") fun_calls = self.known_calls.setdefault(fn, []) diff --git a/dlt/sources/__init__.py b/dlt/sources/__init__.py index 6e418a3cb2..465467db67 100644 --- a/dlt/sources/__init__.py +++ b/dlt/sources/__init__.py @@ -1,7 +1,6 @@ """Module with built in sources and source building blocks""" -from dlt.extract.incremental import Incremental as incremental -from dlt.extract.source import DltSource, DltResource from dlt.common.typing import TDataItem, TDataItems +from dlt.extract import DltSource, DltResource, Incremental as incremental from . import credentials from . import config from . import filesystem diff --git a/dlt/sources/config.py b/dlt/sources/config.py index d58c210ab6..796a338c02 100644 --- a/dlt/sources/config.py +++ b/dlt/sources/config.py @@ -1,2 +1,2 @@ from dlt.common.configuration.specs import configspec -from dlt.common.configuration.inject import with_config \ No newline at end of file +from dlt.common.configuration.inject import with_config diff --git a/dlt/sources/credentials.py b/dlt/sources/credentials.py index 5815324d56..a7663a857b 100644 --- a/dlt/sources/credentials.py +++ b/dlt/sources/credentials.py @@ -1,4 +1,8 @@ -from dlt.common.configuration.specs import GcpServiceAccountCredentials, GcpOAuthCredentials, GcpCredentials +from dlt.common.configuration.specs import ( + GcpServiceAccountCredentials, + GcpOAuthCredentials, + GcpCredentials, +) from dlt.common.configuration.specs import ConnectionStringCredentials from dlt.common.configuration.specs import OAuth2Credentials from dlt.common.configuration.specs import CredentialsConfiguration, configspec @@ -16,4 +20,3 @@ "FileSystemCredentials", "FilesystemConfiguration", ] - diff --git a/dlt/sources/filesystem.py b/dlt/sources/filesystem.py index bb18a15f20..23fb6a9cf3 100644 --- a/dlt/sources/filesystem.py +++ b/dlt/sources/filesystem.py @@ -1,3 +1,8 @@ -from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict, fsspec_filesystem, glob_files +from dlt.common.storages.fsspec_filesystem import ( + FileItem, + FileItemDict, + fsspec_filesystem, + glob_files, +) __all__ = ["FileItem", "FileItemDict", "fsspec_filesystem", "glob_files"] diff --git a/dlt/sources/helpers/requests/__init__.py b/dlt/sources/helpers/requests/__init__.py index 39d286e29d..3e29a2cf52 100644 --- a/dlt/sources/helpers/requests/__init__.py +++ b/dlt/sources/helpers/requests/__init__.py @@ -1,6 +1,7 @@ from tenacity import RetryError from requests import ( - Request, Response, + Request, + Response, ConnectionError, ConnectTimeout, FileModeWarning, @@ -19,7 +20,14 @@ client = Client() get, post, put, patch, delete, options, head, request = ( - client.get, client.post, client.put, client.patch, client.delete, client.options, client.head, client.request + client.get, + client.post, + client.put, + client.patch, + client.delete, + client.options, + client.head, + client.request, ) @@ -40,8 +48,18 @@ def init(config: RunConfiguration) -> None: "request", "init", "Session", - "Request", "Response", "ConnectionError", "ConnectTimeout", "FileModeWarning", "HTTPError", "ReadTimeout", - "RequestException", "Timeout", "TooManyRedirects", "URLRequired", "ChunkedEncodingError", "RetryError" - "Client", - "RetryError" + "Request", + "Response", + "ConnectionError", + "ConnectTimeout", + "FileModeWarning", + "HTTPError", + "ReadTimeout", + "RequestException", + "Timeout", + "TooManyRedirects", + "URLRequired", + "ChunkedEncodingError", + "RetryErrorClient", + "RetryError", ] diff --git a/dlt/sources/helpers/requests/retry.py b/dlt/sources/helpers/requests/retry.py index 8f824e0c4f..c9a813598f 100644 --- a/dlt/sources/helpers/requests/retry.py +++ b/dlt/sources/helpers/requests/retry.py @@ -1,13 +1,32 @@ from email.utils import parsedate_tz, mktime_tz import re import time -from typing import Optional, cast, Callable, Type, Union, Sequence, Tuple, List, TYPE_CHECKING, Any, Dict +from typing import ( + Optional, + cast, + Callable, + Type, + Union, + Sequence, + Tuple, + List, + TYPE_CHECKING, + Any, + Dict, +) from threading import local from requests import Response, HTTPError, Session as BaseSession from requests.exceptions import ConnectionError, Timeout, ChunkedEncodingError from requests.adapters import HTTPAdapter -from tenacity import Retrying, retry_if_exception_type, stop_after_attempt, RetryCallState, retry_any, wait_exponential +from tenacity import ( + Retrying, + retry_if_exception_type, + stop_after_attempt, + RetryCallState, + retry_any, + wait_exponential, +) from tenacity.retry import retry_base from dlt.sources.helpers.requests.session import Session, DEFAULT_TIMEOUT @@ -96,7 +115,7 @@ def _make_retry( backoff_factor: float, respect_retry_after_header: bool, max_delay: TimedeltaSeconds, -)-> Retrying: +) -> Retrying: retry_conds = [retry_if_status(status_codes), retry_if_exception_type(tuple(exceptions))] if condition is not None: if callable(condition): @@ -148,12 +167,15 @@ class Client: respect_retry_after_header: Whether to use the `Retry-After` response header (when available) to determine the retry delay session_attrs: Extra attributes that will be set on the session instance, e.g. `{headers: {'Authorization': 'api-key'}}` (see `requests.sessions.Session` for possible attributes) """ + _session_attrs: Dict[str, Any] @with_config(spec=RunConfiguration) def __init__( self, - request_timeout: Optional[Union[TimedeltaSeconds, Tuple[TimedeltaSeconds, TimedeltaSeconds]]] = DEFAULT_TIMEOUT, + request_timeout: Optional[ + Union[TimedeltaSeconds, Tuple[TimedeltaSeconds, TimedeltaSeconds]] + ] = DEFAULT_TIMEOUT, max_connections: int = 50, raise_for_status: bool = True, status_codes: Sequence[int] = DEFAULT_RETRY_STATUS, @@ -175,7 +197,7 @@ def __init__( condition=retry_condition, backoff_factor=request_backoff_factor, respect_retry_after_header=respect_retry_after_header, - max_delay=request_max_retry_delay + max_delay=request_max_retry_delay, ) self._session_attrs = session_attrs or {} @@ -198,29 +220,31 @@ def __init__( self.options = lambda *a, **kw: self.session.options(*a, **kw) self.request = lambda *a, **kw: self.session.request(*a, **kw) - self._config_version: int = 0 # Incrementing marker to ensure per-thread sessions are recreated on config changes + self._config_version: int = ( + 0 # Incrementing marker to ensure per-thread sessions are recreated on config changes + ) def update_from_config(self, config: RunConfiguration) -> None: """Update session/retry settings from RunConfiguration""" - self._session_kwargs['timeout'] = config.request_timeout - self._retry_kwargs['backoff_factor'] = config.request_backoff_factor - self._retry_kwargs['max_delay'] = config.request_max_retry_delay - self._retry_kwargs['max_attempts'] = config.request_max_attempts + self._session_kwargs["timeout"] = config.request_timeout + self._retry_kwargs["backoff_factor"] = config.request_backoff_factor + self._retry_kwargs["max_delay"] = config.request_max_retry_delay + self._retry_kwargs["max_attempts"] = config.request_max_attempts self._config_version += 1 def _make_session(self) -> Session: session = Session(**self._session_kwargs) # type: ignore[arg-type] for key, value in self._session_attrs.items(): setattr(session, key, value) - session.mount('http://', self._adapter) - session.mount('https://', self._adapter) + session.mount("http://", self._adapter) + session.mount("https://", self._adapter) retry = _make_retry(**self._retry_kwargs) session.request = retry.wraps(session.request) # type: ignore[method-assign] return session @property def session(self) -> Session: - session: Optional[Session] = getattr(self._local, 'session', None) + session: Optional[Session] = getattr(self._local, "session", None) version = self._config_version if session is not None: version = self._local.config_version diff --git a/dlt/sources/helpers/requests/session.py b/dlt/sources/helpers/requests/session.py index b12d8da73f..0a4d277848 100644 --- a/dlt/sources/helpers/requests/session.py +++ b/dlt/sources/helpers/requests/session.py @@ -15,7 +15,11 @@ def _timeout_to_seconds(timeout: TRequestTimeout) -> Optional[Union[Tuple[float, float], float]]: - return (to_seconds(timeout[0]), to_seconds(timeout[1])) if isinstance(timeout, tuple) else to_seconds(timeout) + return ( + (to_seconds(timeout[0]), to_seconds(timeout[1])) + if isinstance(timeout, tuple) + else to_seconds(timeout) + ) class Session(BaseSession): @@ -26,23 +30,28 @@ class Session(BaseSession): May be a single value or a tuple for separate (connect, read) timeout. raise_for_status: Whether to raise exception on error status codes (using `response.raise_for_status()`) """ + def __init__( self, - timeout: Optional[Union[TimedeltaSeconds, Tuple[TimedeltaSeconds, TimedeltaSeconds]]] = DEFAULT_TIMEOUT, + timeout: Optional[ + Union[TimedeltaSeconds, Tuple[TimedeltaSeconds, TimedeltaSeconds]] + ] = DEFAULT_TIMEOUT, raise_for_status: bool = True, ) -> None: super().__init__() self.timeout = _timeout_to_seconds(timeout) self.raise_for_status = raise_for_status - self.headers.update({ - "User-Agent": f"dlt/{__version__}", - }) + self.headers.update( + { + "User-Agent": f"dlt/{__version__}", + } + ) if TYPE_CHECKING: request = BaseSession.request def request(self, *args, **kwargs): # type: ignore[no-untyped-def,no-redef] - kwargs.setdefault('timeout', self.timeout) + kwargs.setdefault("timeout", self.timeout) resp = super().request(*args, **kwargs) if self.raise_for_status: resp.raise_for_status() diff --git a/dlt/sources/helpers/transform.py b/dlt/sources/helpers/transform.py index 0c2f7c5e39..1975c20586 100644 --- a/dlt/sources/helpers/transform.py +++ b/dlt/sources/helpers/transform.py @@ -5,18 +5,22 @@ def take_first(max_items: int) -> ItemTransformFunctionNoMeta[bool]: """A filter that takes only first `max_items` from a resource""" count: int = 0 + def _filter(_: TDataItem) -> bool: nonlocal count count += 1 return count <= max_items + return _filter def skip_first(max_items: int) -> ItemTransformFunctionNoMeta[bool]: """A filter that skips first `max_items` from a resource""" count: int = 0 + def _filter(_: TDataItem) -> bool: nonlocal count count += 1 return count > max_items + return _filter diff --git a/docs/examples/archive/_helpers.py b/docs/examples/archive/_helpers.py index 95913d1be1..0f490ff85f 100644 --- a/docs/examples/archive/_helpers.py +++ b/docs/examples/archive/_helpers.py @@ -10,6 +10,12 @@ } # we do not want to have this key verbatim in repo so we decode it here -_bigquery_credentials["private_key"] = bytes([_a ^ _b for _a, _b in zip(base64.b64decode(_bigquery_credentials["private_key"]), b"quickstart-sv"*150)]).decode("utf-8") +_bigquery_credentials["private_key"] = bytes( + [ + _a ^ _b + for _a, _b in zip( + base64.b64decode(_bigquery_credentials["private_key"]), b"quickstart-sv" * 150 + ) + ] +).decode("utf-8") pub_bigquery_credentials = _bigquery_credentials - diff --git a/docs/examples/archive/credentials/explicit.py b/docs/examples/archive/credentials/explicit.py index 6233140459..b1bc25fce6 100644 --- a/docs/examples/archive/credentials/explicit.py +++ b/docs/examples/archive/credentials/explicit.py @@ -4,7 +4,9 @@ @dlt.resource -def simple_data(api_url: str = dlt.config.value, api_secret: dlt.TSecretValue = dlt.secrets.value) -> Iterator[str]: +def simple_data( + api_url: str = dlt.config.value, api_secret: dlt.TSecretValue = dlt.secrets.value +) -> Iterator[str]: # just yield api_url and api_secret to show what was configured in the example yield api_url yield api_secret @@ -29,13 +31,17 @@ def simple_data(api_url: str = dlt.config.value, api_secret: dlt.TSecretValue = print(list(data)) # you are free to pass credentials from custom location to destination -pipeline = dlt.pipeline(destination="postgres", credentials=dlt.secrets["custom.destination.credentials"]) +pipeline = dlt.pipeline( + destination="postgres", credentials=dlt.secrets["custom.destination.credentials"] +) # see nice credentials object print(pipeline.credentials) # you can also pass credentials partially, only the password comes from the secrets or environment -pipeline = dlt.pipeline(destination="postgres", credentials="postgres://loader@localhost:5432/dlt_data") +pipeline = dlt.pipeline( + destination="postgres", credentials="postgres://loader@localhost:5432/dlt_data" +) # now lets compare it with default location for config and credentials data = simple_data() -print(list(data)) \ No newline at end of file +print(list(data)) diff --git a/docs/examples/archive/dbt_run_jaffle.py b/docs/examples/archive/dbt_run_jaffle.py index ad059dcd6d..098b35fff8 100644 --- a/docs/examples/archive/dbt_run_jaffle.py +++ b/docs/examples/archive/dbt_run_jaffle.py @@ -2,7 +2,9 @@ pipeline = dlt.pipeline(destination="duckdb", dataset_name="jaffle_jaffle") -print("create or restore virtual environment in which dbt is installed, use the newest version of dbt") +print( + "create or restore virtual environment in which dbt is installed, use the newest version of dbt" +) venv = dlt.dbt.get_venv(pipeline) print("get runner, optionally pass the venv") @@ -11,13 +13,18 @@ print("run the package (clone/pull repo, deps, seed, source tests, run)") models = dbt.run_all() for m in models: - print(f"Model {m.model_name} materialized in {m.time} with status {m.status} and message {m.message}") + print( + f"Model {m.model_name} materialized in {m.time} with status {m.status} and message" + f" {m.message}" + ) print("") print("test the model") models = dbt.test() for m in models: - print(f"Test {m.model_name} executed in {m.time} with status {m.status} and message {m.message}") + print( + f"Test {m.model_name} executed in {m.time} with status {m.status} and message {m.message}" + ) print("") print("get and display data frame with customers") diff --git a/docs/examples/archive/discord_iterator.py b/docs/examples/archive/discord_iterator.py index a3c59ed2c5..44cbe3b5b1 100644 --- a/docs/examples/archive/discord_iterator.py +++ b/docs/examples/archive/discord_iterator.py @@ -1,4 +1,3 @@ - # from dlt.common import json # from dlt.common.schema import Schema # from dlt.common.typing import DictStrAny diff --git a/docs/examples/archive/google_sheets.py b/docs/examples/archive/google_sheets.py index 93c5658233..26c3d30b54 100644 --- a/docs/examples/archive/google_sheets.py +++ b/docs/examples/archive/google_sheets.py @@ -6,5 +6,7 @@ # see example.secrets.toml to where to put credentials # "2022-05", "model_metadata" -info = google_spreadsheet("11G95oVZjieRhyGqtQMQqlqpxyvWkRXowKE8CtdLtFaU", ["named range", "Second_Copy!1:2"]) +info = google_spreadsheet( + "11G95oVZjieRhyGqtQMQqlqpxyvWkRXowKE8CtdLtFaU", ["named range", "Second_Copy!1:2"] +) print(list(info)) diff --git a/docs/examples/archive/quickstart.py b/docs/examples/archive/quickstart.py index e55e9f6049..6e49f1af7a 100644 --- a/docs/examples/archive/quickstart.py +++ b/docs/examples/archive/quickstart.py @@ -9,9 +9,9 @@ """ # 1. configuration: name your dataset, table, pass credentials -dataset_name = 'dlt_quickstart' -pipeline_name = 'dlt_quickstart' -table_name = 'my_json_doc' +dataset_name = "dlt_quickstart" +pipeline_name = "dlt_quickstart" +table_name = "my_json_doc" gcp_credentials_json = { "type": "service_account", @@ -24,7 +24,14 @@ destination_name = "duckdb" if destination_name == "bigquery": # we do not want to have this key verbatim in repo so we decode it here - gcp_credentials_json["private_key"] = bytes([_a ^ _b for _a, _b in zip(base64.b64decode(gcp_credentials_json["private_key"]), b"quickstart-sv"*150)]).decode("utf-8") + gcp_credentials_json["private_key"] = bytes( + [ + _a ^ _b + for _a, _b in zip( + base64.b64decode(gcp_credentials_json["private_key"]), b"quickstart-sv" * 150 + ) + ] + ).decode("utf-8") credentials: Any = gcp_credentials_json elif destination_name == "redshift": credentials = db_dsn @@ -41,20 +48,26 @@ dataset_name=dataset_name, credentials=credentials, export_schema_path=export_schema_path, - full_refresh=True + full_refresh=True, ) # 3. Pass the data to the pipeline and give it a table name. Optionally normalize and handle schema. -rows = [{"name": "Ana", "age": 30, "id": 456, "children": [{"name": "Bill", "id": 625}, - {"name": "Elli", "id": 591} - ]}, - - {"name": "Bob", "age": 30, "id": 455, "children": [{"name": "Bill", "id": 625}, - {"name": "Dave", "id": 621} - ]} - ] +rows = [ + { + "name": "Ana", + "age": 30, + "id": 456, + "children": [{"name": "Bill", "id": 625}, {"name": "Elli", "id": 591}], + }, + { + "name": "Bob", + "age": 30, + "id": 455, + "children": [{"name": "Bill", "id": 625}, {"name": "Dave", "id": 621}], + }, +] load_info = pipeline.run(rows, table_name=table_name, write_disposition="replace") diff --git a/docs/examples/archive/rasa_example.py b/docs/examples/archive/rasa_example.py index d438ce5e8b..e83e6c61f7 100644 --- a/docs/examples/archive/rasa_example.py +++ b/docs/examples/archive/rasa_example.py @@ -24,9 +24,11 @@ destination=postgres, # export_schema_path=... # uncomment to see the final schema in the folder you want ).run( - rasa(event_files, store_last_timestamp=True), # also store last timestamp so we have no duplicate events - credentials=credentials # if you skip this parameter, the credentials will be injected by the config providers - ) + rasa( + event_files, store_last_timestamp=True + ), # also store last timestamp so we have no duplicate events + credentials=credentials, # if you skip this parameter, the credentials will be injected by the config providers +) print(info) diff --git a/docs/examples/archive/read_table.py b/docs/examples/archive/read_table.py index 291c27bde4..6cccf0efdb 100644 --- a/docs/examples/archive/read_table.py +++ b/docs/examples/archive/read_table.py @@ -9,7 +9,9 @@ source_dsn = "redshift+redshift_connector://loader@chat-analytics.czwteevq7bpe.eu-central-1.redshift.amazonaws.com:5439/chat_analytics_rasa" # get data from table, we preserve method signature from pandas -items = query_table("blocks__transactions", source_dsn, table_schema_name="mainnet_2_ethereum", coerce_float=False) +items = query_table( + "blocks__transactions", source_dsn, table_schema_name="mainnet_2_ethereum", coerce_float=False +) # the data is also an iterator for i in items: @@ -25,5 +27,7 @@ # you can find a docker compose file that spins up required instance in tests/load/postgres # note: run the script without required env variables to see info on possible secret configurations that were tried -info = dlt.pipeline().run(items, destination=postgres, dataset_name="ethereum", table_name="transactions") +info = dlt.pipeline().run( + items, destination=postgres, dataset_name="ethereum", table_name="transactions" +) print(info) diff --git a/docs/examples/archive/restore_pipeline.py b/docs/examples/archive/restore_pipeline.py index f3c013e85b..fc1f92a4c0 100644 --- a/docs/examples/archive/restore_pipeline.py +++ b/docs/examples/archive/restore_pipeline.py @@ -18,4 +18,4 @@ # print(pipeline.list_extracted_loads()) # # just finalize -# pipeline.flush() \ No newline at end of file +# pipeline.flush() diff --git a/docs/examples/archive/singer_tap_example.py b/docs/examples/archive/singer_tap_example.py index d03182339c..a9b105fe93 100644 --- a/docs/examples/archive/singer_tap_example.py +++ b/docs/examples/archive/singer_tap_example.py @@ -11,7 +11,10 @@ # here we use context manager to automatically delete venv after example was run # the dependency is meltano version of csv tap -print("Spawning virtual environment to run singer and installing csv tap from git+https://github.com/MeltanoLabs/tap-csv.git") +print( + "Spawning virtual environment to run singer and installing csv tap from" + " git+https://github.com/MeltanoLabs/tap-csv.git" +) # WARNING: on MACOS you need to have working gcc to use tap-csv, otherwise dependency will not be installed with Venv.create(mkdtemp(), ["git+https://github.com/MeltanoLabs/tap-csv.git"]) as venv: # prep singer config for tap-csv @@ -20,13 +23,13 @@ { "entity": "annotations_202205", "path": os.path.abspath("examples/data/singer_taps/model_annotations.csv"), - "keys": [ - "message id" - ] + "keys": ["message id"], } ] } print("running tap-csv") tap_source = tap(venv, "tap-csv", csv_tap_config, "examples/data/singer_taps/csv_catalog.json") - info = dlt.pipeline("meltano_csv", destination="postgres").run(tap_source, credentials="postgres://loader@localhost:5432/dlt_data") + info = dlt.pipeline("meltano_csv", destination="postgres").run( + tap_source, credentials="postgres://loader@localhost:5432/dlt_data" + ) print(info) diff --git a/docs/examples/archive/singer_tap_jsonl_example.py b/docs/examples/archive/singer_tap_jsonl_example.py index fff64bdb1d..c926a9f153 100644 --- a/docs/examples/archive/singer_tap_jsonl_example.py +++ b/docs/examples/archive/singer_tap_jsonl_example.py @@ -7,11 +7,11 @@ # load hubspot schema stub - it converts all field names with `timestamp` into timestamp type -schema = SchemaStorage.load_schema_file("docs/examples/schemas/", "hubspot", ("yaml", )) +schema = SchemaStorage.load_schema_file("docs/examples/schemas/", "hubspot", ("yaml",)) p = dlt.pipeline(destination="postgres", full_refresh=True) # now load a pipeline created from jsonl resource that feeds messages into singer tap transformer pipe = jsonl_file("docs/examples/data/singer_taps/tap_hubspot.jsonl") | singer_raw_stream() # provide hubspot schema info = p.run(pipe, schema=schema, credentials="postgres://loader@localhost:5432/dlt_data") -print(info) \ No newline at end of file +print(info) diff --git a/docs/examples/archive/sources/google_sheets.py b/docs/examples/archive/sources/google_sheets.py index 8a3d6b1d1c..69855154ae 100644 --- a/docs/examples/archive/sources/google_sheets.py +++ b/docs/examples/archive/sources/google_sheets.py @@ -16,38 +16,52 @@ # TODO: consider using https://github.com/burnash/gspread for spreadsheet discovery -def _initialize_sheets(credentials: Union[GcpOAuthCredentials, GcpServiceAccountCredentials]) -> Any: +def _initialize_sheets( + credentials: Union[GcpOAuthCredentials, GcpServiceAccountCredentials] +) -> Any: # Build the service object. - service = build('sheets', 'v4', credentials=credentials.to_native_credentials()) + service = build("sheets", "v4", credentials=credentials.to_native_credentials()) return service @dlt.source -def google_spreadsheet(spreadsheet_id: str, sheet_names: Sequence[str], credentials: Union[GcpServiceAccountCredentials, GcpOAuthCredentials, str, StrAny] = dlt.secrets.value) -> Any: - +def google_spreadsheet( + spreadsheet_id: str, + sheet_names: Sequence[str], + credentials: Union[ + GcpServiceAccountCredentials, GcpOAuthCredentials, str, StrAny + ] = dlt.secrets.value, +) -> Any: sheets = _initialize_sheets(cast(GcpServiceAccountCredentials, credentials)) # import pprint # meta = sheets.spreadsheets().get(spreadsheetId=spreadsheet_id, ranges=sheet_names, includeGridData=True).execute() # pprint.pprint(meta) def get_sheet(sheet_name: str) -> Iterator[DictStrAny]: - # get list of list of typed values - result = sheets.spreadsheets().values().get( - spreadsheetId=spreadsheet_id, - range=sheet_name, - # unformatted returns typed values - valueRenderOption="UNFORMATTED_VALUE", - # will return formatted dates - dateTimeRenderOption="FORMATTED_STRING" - ).execute() + result = ( + sheets.spreadsheets() + .values() + .get( + spreadsheetId=spreadsheet_id, + range=sheet_name, + # unformatted returns typed values + valueRenderOption="UNFORMATTED_VALUE", + # will return formatted dates + dateTimeRenderOption="FORMATTED_STRING", + ) + .execute() + ) # pprint.pprint(result) - values = result.get('values') + values = result.get("values") # yield dicts assuming row 0 contains headers and following rows values and all rows have identical length for v in values[1:]: yield {h: v for h, v in zip(values[0], v)} # create resources from supplied sheet names - return [dlt.resource(get_sheet(name), name=name, write_disposition="replace") for name in sheet_names] + return [ + dlt.resource(get_sheet(name), name=name, write_disposition="replace") + for name in sheet_names + ] diff --git a/docs/examples/archive/sources/jsonl.py b/docs/examples/archive/sources/jsonl.py index 282966d00a..5989d2054f 100644 --- a/docs/examples/archive/sources/jsonl.py +++ b/docs/examples/archive/sources/jsonl.py @@ -7,8 +7,9 @@ from dlt.common.typing import StrAny, StrOrBytesPath -def chunk_jsonl(path: StrOrBytesPath, chunk_size: int = 20) -> Union[Iterator[StrAny], Iterator[List[StrAny]]]: - +def chunk_jsonl( + path: StrOrBytesPath, chunk_size: int = 20 +) -> Union[Iterator[StrAny], Iterator[List[StrAny]]]: with open(path, "rb") as f: def _iter() -> Iterator[StrAny]: @@ -24,9 +25,13 @@ def _iter() -> Iterator[StrAny]: else: break + jsonl_file = dlt.resource(chunk_jsonl, name="jsonl", spec=BaseConfiguration) + @dlt.resource(name="jsonl") -def jsonl_files(paths: Sequence[StrOrBytesPath], chunk_size: int = 20) -> Union[Iterator[StrAny], Iterator[List[StrAny]]]: +def jsonl_files( + paths: Sequence[StrOrBytesPath], chunk_size: int = 20 +) -> Union[Iterator[StrAny], Iterator[List[StrAny]]]: for path in paths: yield from chunk_jsonl(path, chunk_size) diff --git a/docs/examples/archive/sources/rasa/__init__.py b/docs/examples/archive/sources/rasa/__init__.py index acd214368a..3a274af671 100644 --- a/docs/examples/archive/sources/rasa/__init__.py +++ b/docs/examples/archive/sources/rasa/__init__.py @@ -1 +1 @@ -from .rasa import rasa \ No newline at end of file +from .rasa import rasa diff --git a/docs/examples/archive/sources/rasa/rasa.py b/docs/examples/archive/sources/rasa/rasa.py index aa31b3c482..60643fe17e 100644 --- a/docs/examples/archive/sources/rasa/rasa.py +++ b/docs/examples/archive/sources/rasa/rasa.py @@ -3,7 +3,7 @@ import dlt from dlt.common.typing import StrAny, TDataItem, TDataItems from dlt.common.time import timestamp_within -from dlt.extract.source import DltResource +from dlt.extract.resource import DltResource @dlt.source @@ -13,7 +13,7 @@ def rasa( source_env: str = None, initial_timestamp: float = None, end_timestamp: float = None, - store_last_timestamp: bool = True + store_last_timestamp: bool = True, ) -> Any: """Transforms the base resource provided in `data_from` into a rasa tracker store raw dataset where each event type get it's own table. The resource is a stream resource and it generates tables dynamically from data. The source uses `rasa.schema.yaml` file to initialize the schema @@ -34,7 +34,9 @@ def rasa( def events(source_events: TDataItems) -> Iterator[TDataItem]: # recover start_timestamp from state if given if store_last_timestamp: - start_timestamp = max(initial_timestamp or 0, dlt.current.source_state().get("start_timestamp", 0)) + start_timestamp = max( + initial_timestamp or 0, dlt.current.source_state().get("start_timestamp", 0) + ) # we expect tracker store events here last_timestamp: int = None @@ -51,7 +53,7 @@ def _proc_event(source_event: TDataItem) -> Iterator[TDataItem]: event = { "sender_id": source_event["sender_id"], "timestamp": last_timestamp, - "event": event_type + "event": event_type, } if source_env: event["source"] = source_env diff --git a/docs/examples/archive/sources/singer_tap.py b/docs/examples/archive/sources/singer_tap.py index 41db2c09f5..3c733c33f1 100644 --- a/docs/examples/archive/sources/singer_tap.py +++ b/docs/examples/archive/sources/singer_tap.py @@ -12,6 +12,7 @@ FilePathOrDict = Union[StrAny, StrOrBytesPath] + class SingerMessage(TypedDict): type: str # noqa: A003 @@ -24,6 +25,7 @@ class SingerRecord(SingerMessage): class SingerState(SingerMessage): value: DictStrAny + # try: # from singer import parse_message_from_obj, Message, RecordMessage, StateMessage # except ImportError: @@ -33,7 +35,9 @@ class SingerState(SingerMessage): # pip install ../singer/singer-python # https://github.com/datamill-co/singer-runner/tree/master/singer_runner # https://techgaun.github.io/active-forks/index.html#singer-io/singer-python -def get_source_from_stream(singer_messages: Iterator[SingerMessage], state: DictStrAny = None) -> Iterator[TDataItem]: +def get_source_from_stream( + singer_messages: Iterator[SingerMessage], state: DictStrAny = None +) -> Iterator[TDataItem]: last_state = {} for msg in singer_messages: if msg["type"] == "RECORD": @@ -57,7 +61,13 @@ def singer_raw_stream(singer_messages: TDataItems, use_state: bool = True) -> It @dlt.source(spec=BaseConfiguration) # use BaseConfiguration spec to prevent injections -def tap(venv: Venv, tap_name: str, config_file: FilePathOrDict, catalog_file: FilePathOrDict, use_state: bool = True) -> Any: +def tap( + venv: Venv, + tap_name: str, + config_file: FilePathOrDict, + catalog_file: FilePathOrDict, + use_state: bool = True, +) -> Any: # TODO: generate append/replace dispositions and some table/column hints from catalog files def as_config_file(config: FilePathOrDict) -> StrOrBytesPath: @@ -87,14 +97,15 @@ def singer_messages() -> Iterator[TDataItem]: else: state_params = () # type: ignore - pipe_iterator = singer_process_pipe(venv, - tap_name, - "--config", - os.path.abspath(config_file_path), - "--catalog", - os.path.abspath(catalog_file_path), - *state_params - ) + pipe_iterator = singer_process_pipe( + venv, + tap_name, + "--config", + os.path.abspath(config_file_path), + "--catalog", + os.path.abspath(catalog_file_path), + *state_params + ) yield from get_source_from_stream(pipe_iterator, state) return singer_messages diff --git a/docs/examples/archive/sources/sql_query.py b/docs/examples/archive/sources/sql_query.py index effa8740d5..8cd60992b2 100644 --- a/docs/examples/archive/sources/sql_query.py +++ b/docs/examples/archive/sources/sql_query.py @@ -12,23 +12,30 @@ # import gracefully and produce nice exception that explains the user what to do import pandas except ImportError: - raise MissingDependencyException("SQL Query Source", ["pandas"], "SQL Query Source temporarily uses pandas as DB interface") + raise MissingDependencyException( + "SQL Query Source", ["pandas"], "SQL Query Source temporarily uses pandas as DB interface" + ) try: from sqlalchemy.exc import NoSuchModuleError except ImportError: - raise MissingDependencyException("SQL Query Source", ["sqlalchemy"], "SQL Query Source temporarily uses pandas as DB interface") + raise MissingDependencyException( + "SQL Query Source", + ["sqlalchemy"], + "SQL Query Source temporarily uses pandas as DB interface", + ) -def _query_data( - f: AnyFun -) -> Iterator[DictStrAny]: - +def _query_data(f: AnyFun) -> Iterator[DictStrAny]: try: items = f() except NoSuchModuleError as m_exc: if "redshift.redshift_connector" in str(m_exc): - raise MissingDependencyException("SQL Query Source", ["sqlalchemy-redshift", "redshift_connector"], "Redshift dialect support for SqlAlchemy") + raise MissingDependencyException( + "SQL Query Source", + ["sqlalchemy-redshift", "redshift_connector"], + "Redshift dialect support for SqlAlchemy", + ) raise for i in items: @@ -46,11 +53,21 @@ def query_table( coerce_float: bool = True, parse_dates: Any = None, columns: List[str] = None, - chunk_size: int = 1000 + chunk_size: int = 1000, ) -> Any: print(credentials) assert isinstance(credentials, ConnectionStringCredentials) - f = partial(pandas.read_sql_table, table_name, credentials.to_native_representation(), table_schema_name, None, coerce_float, parse_dates, columns, chunksize=chunk_size) + f = partial( + pandas.read_sql_table, + table_name, + credentials.to_native_representation(), + table_schema_name, + None, + coerce_float, + parse_dates, + columns, + chunksize=chunk_size, + ) # if resource is returned from decorator function, it will override the hints from decorator return dlt.resource(_query_data(f), name=table_name) @@ -62,8 +79,18 @@ def query_sql( coerce_float: bool = True, parse_dates: Any = None, chunk_size: int = 1000, - dtype: Any = None + dtype: Any = None, ) -> Iterator[TDataItem]: assert isinstance(credentials, ConnectionStringCredentials) - f = partial(pandas.read_sql_query, sql, credentials.to_native_representation(), None, coerce_float, None, parse_dates, chunk_size, dtype) + f = partial( + pandas.read_sql_query, + sql, + credentials.to_native_representation(), + None, + coerce_float, + None, + parse_dates, + chunk_size, + dtype, + ) yield from _query_data(f) diff --git a/docs/examples/chess/chess.py b/docs/examples/chess/chess.py index f136e49a0a..84fbf3cb07 100644 --- a/docs/examples/chess/chess.py +++ b/docs/examples/chess/chess.py @@ -10,8 +10,13 @@ @dlt.source -def chess(chess_url: str = dlt.config.value, title: str = "GM", max_players: int = 2, year: int = 2022, month: int = 10) -> Any: - +def chess( + chess_url: str = dlt.config.value, + title: str = "GM", + max_players: int = 2, + year: int = 2022, + month: int = 10, +) -> Any: def _get_data_with_retry(path: str) -> StrAny: r = client.get(f"{chess_url}{path}") return r.json() # type: ignore @@ -29,7 +34,7 @@ def players() -> Iterator[TDataItems]: @dlt.defer def players_profiles(username: Any) -> TDataItems: print(f"getting {username} profile via thread {threading.current_thread().name}") - sleep(1) # add some latency to show parallel runs + sleep(1) # add some latency to show parallel runs return _get_data_with_retry(f"player/{username}") # this resource takes data from players and returns games for the last month if not specified otherwise @@ -41,6 +46,7 @@ def players_games(username: Any) -> Iterator[TDataItems]: return players(), players_profiles, players_games + if __name__ == "__main__": print("You must run this from the docs/examples/chess folder") assert os.getcwd().endswith("chess") @@ -48,12 +54,7 @@ def players_games(username: Any) -> Iterator[TDataItems]: # look for parallel run configuration in `config.toml`! # mind the full_refresh: it makes the pipeline to load to a distinct dataset each time it is run and always is resetting the schema and state info = dlt.pipeline( - pipeline_name="chess_games", - destination="postgres", - dataset_name="chess", - full_refresh=True - ).run( - chess(max_players=5, month=9) - ) + pipeline_name="chess_games", destination="postgres", dataset_name="chess", full_refresh=True + ).run(chess(max_players=5, month=9)) # display where the data went print(info) diff --git a/docs/examples/chess/chess_dbt.py b/docs/examples/chess/chess_dbt.py index 4ee51f6b50..f453e53a38 100644 --- a/docs/examples/chess/chess_dbt.py +++ b/docs/examples/chess/chess_dbt.py @@ -21,4 +21,3 @@ # run all the tests tests = transforms.test() print(tests) - diff --git a/docs/examples/chess_production/chess.py b/docs/examples/chess_production/chess.py index 79b573fe43..5b767f0eb6 100644 --- a/docs/examples/chess_production/chess.py +++ b/docs/examples/chess_production/chess.py @@ -6,6 +6,7 @@ from dlt.common.typing import StrAny, TDataItems from dlt.sources.helpers.requests import client + @dlt.source def chess( chess_url: str = dlt.config.value, @@ -31,9 +32,7 @@ def players() -> Iterator[TDataItems]: @dlt.transformer(data_from=players, write_disposition="replace") @dlt.defer def players_profiles(username: Any) -> TDataItems: - print( - f"getting {username} profile via thread {threading.current_thread().name}" - ) + print(f"getting {username} profile via thread {threading.current_thread().name}") sleep(1) # add some latency to show parallel runs return _get_data_with_retry(f"player/{username}") @@ -61,6 +60,7 @@ def players_games(username: Any) -> Iterator[TDataItems]: MAX_PLAYERS = 5 + def load_data_with_retry(pipeline, data): try: for attempt in Retrying( @@ -70,9 +70,7 @@ def load_data_with_retry(pipeline, data): reraise=True, ): with attempt: - logger.info( - f"Running the pipeline, attempt={attempt.retry_state.attempt_number}" - ) + logger.info(f"Running the pipeline, attempt={attempt.retry_state.attempt_number}") load_info = pipeline.run(data) logger.info(str(load_info)) @@ -80,16 +78,12 @@ def load_data_with_retry(pipeline, data): load_info.raise_on_failed_jobs() # send notification send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Data was successfully loaded!" + pipeline.runtime_config.slack_incoming_hook, "Data was successfully loaded!" ) except Exception: # we get here after all the failed retries # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Something went wrong!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Something went wrong!") raise # we get here after a successful attempt @@ -98,18 +92,14 @@ def load_data_with_retry(pipeline, data): # print the information on the first load package and all jobs inside logger.info(f"First load package info: {load_info.load_packages[0]}") # print the information on the first completed job in first load package - logger.info( - f"First completed job info: {load_info.load_packages[0].jobs['completed_jobs'][0]}" - ) + logger.info(f"First completed job info: {load_info.load_packages[0].jobs['completed_jobs'][0]}") # check for schema updates: schema_updates = [p.schema_update for p in load_info.load_packages] # send notifications if there are schema updates if schema_updates: # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, "Schema was updated!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Schema was updated!") # To run simple tests with `sql_client`, such as checking table counts and # warning if there is no data, you can use the `execute_query` method @@ -160,4 +150,4 @@ def load_data_with_retry(pipeline, data): ) # get data for a few famous players data = chess(chess_url="https://api.chess.com/pub/", max_players=MAX_PLAYERS) - load_data_with_retry(pipeline, data) \ No newline at end of file + load_data_with_retry(pipeline, data) diff --git a/docs/examples/connector_x_arrow/load_arrow.py b/docs/examples/connector_x_arrow/load_arrow.py index 06ca4e17b3..b3c654cef9 100644 --- a/docs/examples/connector_x_arrow/load_arrow.py +++ b/docs/examples/connector_x_arrow/load_arrow.py @@ -3,6 +3,7 @@ import dlt from dlt.sources.credentials import ConnectionStringCredentials + def read_sql_x( conn_str: ConnectionStringCredentials = dlt.secrets.value, query: str = dlt.config.value, @@ -14,6 +15,7 @@ def read_sql_x( protocol="binary", ) + def genome_resource(): # create genome resource with merge on `upid` primary key genome = dlt.resource( diff --git a/docs/examples/incremental_loading/zendesk.py b/docs/examples/incremental_loading/zendesk.py index 6370f29811..6113f98793 100644 --- a/docs/examples/incremental_loading/zendesk.py +++ b/docs/examples/incremental_loading/zendesk.py @@ -1,16 +1,15 @@ -from typing import Iterator, Optional, Dict, Any, Tuple +from typing import Optional, Dict, Any, Tuple import dlt from dlt.common import pendulum from dlt.common.time import ensure_pendulum_datetime -from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime -from dlt.extract.source import DltResource +from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client @dlt.source(max_table_nesting=2) def zendesk_support( - credentials: Dict[str, str]=dlt.secrets.value, + credentials: Dict[str, str] = dlt.secrets.value, start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 end_date: Optional[TAnyDateTime] = None, ): @@ -102,9 +101,7 @@ def get_pages( # make request and keep looping until there is no next page get_url = f"{url}{endpoint}" while get_url: - response = client.get( - get_url, headers=headers, auth=auth, params=params - ) + response = client.get(get_url, headers=headers, auth=auth, params=params) response.raise_for_status() response_json = response.json() result = response_json[data_point_name] @@ -123,4 +120,4 @@ def get_pages( ) load_info = pipeline.run(zendesk_support()) - print(load_info) \ No newline at end of file + print(load_info) diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py index 3e4a1295c3..7f85f0522e 100644 --- a/docs/examples/nested_data/nested_data.py +++ b/docs/examples/nested_data/nested_data.py @@ -13,6 +13,7 @@ CHUNK_SIZE = 10000 + # You can limit how deep dlt goes when generating child tables. # By default, the library will descend and generate child tables # for all nested lists, without a limit. @@ -81,6 +82,7 @@ def load_documents(self) -> Iterator[TDataItem]: while docs_slice := list(islice(cursor, CHUNK_SIZE)): yield map_nested_in_place(convert_mongo_objs, docs_slice) + def convert_mongo_objs(value: Any) -> Any: if isinstance(value, (ObjectId, Decimal128)): return str(value) @@ -98,9 +100,7 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") load_info = pipeline.run(source_data) print(load_info) @@ -114,9 +114,7 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="not_unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) @@ -130,9 +128,7 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data_without_cast", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) diff --git a/docs/examples/qdrant_zendesk/qdrant.py b/docs/examples/qdrant_zendesk/qdrant.py index fdda366806..fa137d8dc6 100644 --- a/docs/examples/qdrant_zendesk/qdrant.py +++ b/docs/examples/qdrant_zendesk/qdrant.py @@ -5,11 +5,12 @@ from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client -from dlt.destinations.qdrant import qdrant_adapter +from dlt.destinations.impl.qdrant import qdrant_adapter from qdrant_client import QdrantClient from dlt.common.configuration.inject import with_config + # function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk @dlt.source(max_table_nesting=2) def zendesk_support( @@ -45,9 +46,7 @@ def zendesk_support( # when two events have the same timestamp @dlt.resource(primary_key="id", write_disposition="append") def tickets_data( - updated_at: dlt.sources.incremental[ - pendulum.DateTime - ] = dlt.sources.incremental( + updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental( "updated_at", initial_value=start_date_obj, end_value=end_date_obj, @@ -80,6 +79,7 @@ def _parse_date_or_none(value: Optional[str]) -> Optional[pendulum.DateTime]: return None return ensure_pendulum_datetime(value) + # modify dates to return datetime objects instead def _fix_date(ticket): ticket["updated_at"] = _parse_date_or_none(ticket["updated_at"]) @@ -87,6 +87,7 @@ def _fix_date(ticket): ticket["due_at"] = _parse_date_or_none(ticket["due_at"]) return ticket + # function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk def get_pages( url: str, @@ -116,9 +117,7 @@ def get_pages( # make request and keep looping until there is no next page get_url = f"{url}{endpoint}" while get_url: - response = client.get( - get_url, headers=headers, auth=auth, params=params - ) + response = client.get(get_url, headers=headers, auth=auth, params=params) response.raise_for_status() response_json = response.json() result = response_json[data_point_name] @@ -129,6 +128,7 @@ def get_pages( if not response_json["end_of_stream"]: get_url = response_json["next_page"] + if __name__ == "__main__": # create a pipeline with an appropriate name pipeline = dlt.pipeline( @@ -141,17 +141,16 @@ def get_pages( load_info = pipeline.run( # here we use a special function to tell Qdrant which fields to embed qdrant_adapter( - zendesk_support(), # retrieve tickets data + zendesk_support(), # retrieve tickets data embed=["subject", "description"], ) ) print(load_info) - # running the Qdrant client to connect to your Qdrant database - @with_config(sections=("destination", "credentials")) + @with_config(sections=("destination", "qdrant", "credentials")) def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): return QdrantClient( url=location, @@ -168,5 +167,5 @@ def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): response = qdrant_client.query( "zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table query_text=["cancel", "cancel subscription"], # prompt to search - limit=3 # limit the number of results to the nearest 3 embeddings + limit=3, # limit the number of results to the nearest 3 embeddings ) diff --git a/docs/examples/transformers/pokemon.py b/docs/examples/transformers/pokemon.py index ce8cc0142c..97b9a98b11 100644 --- a/docs/examples/transformers/pokemon.py +++ b/docs/examples/transformers/pokemon.py @@ -45,10 +45,8 @@ def species(pokemon_details): # 2. send pokemon details into `species` transformer to get species details # NOTE: dlt is smart enough to get data from pokemon_list and pokemon details once - return ( - pokemon_list | pokemon, - pokemon_list | pokemon | species - ) + return (pokemon_list | pokemon, pokemon_list | pokemon | species) + if __name__ == "__main__": # build duck db pipeline @@ -58,4 +56,4 @@ def species(pokemon_details): # the pokemon_list resource does not need to be loaded load_info = pipeline.run(source("https://pokeapi.co/api/v2/pokemon")) - print(load_info) \ No newline at end of file + print(load_info) diff --git a/docs/website/docs/conftest.py b/docs/website/docs/conftest.py index b605700187..d108089342 100644 --- a/docs/website/docs/conftest.py +++ b/docs/website/docs/conftest.py @@ -7,29 +7,52 @@ from dlt.common.configuration.container import Container # patch which providers to enable -from dlt.common.configuration.providers import StringTomlProvider, EnvironProvider, SecretsTomlProvider, ConfigTomlProvider -from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext, ConfigProvidersConfiguration - -from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, duckdb_pipeline_location, wipe_pipeline +from dlt.common.configuration.providers import ( + StringTomlProvider, + EnvironProvider, + SecretsTomlProvider, + ConfigTomlProvider, +) +from dlt.common.configuration.specs.config_providers_context import ( + ConfigProvidersContext, + ConfigProvidersConfiguration, +) + +from tests.utils import ( + patch_home_dir, + autouse_test_storage, + preserve_environ, + duckdb_pipeline_location, + wipe_pipeline, +) @pytest.fixture(autouse=True) def setup_tests(request): # always set working dir to main website folder - dname = os.path.dirname(request.module.__file__) + dname = os.path.dirname(request.module.__file__) config_dir = dname + "/.dlt" # inject provider context so the original providers are restored at the end def _initial_providers(): - return [EnvironProvider(), SecretsTomlProvider(project_dir=config_dir, add_global_config=False), ConfigTomlProvider(project_dir=config_dir, add_global_config=False)] + return [ + EnvironProvider(), + SecretsTomlProvider(project_dir=config_dir, add_global_config=False), + ConfigTomlProvider(project_dir=config_dir, add_global_config=False), + ] glob_ctx = ConfigProvidersContext() glob_ctx.providers = _initial_providers() - with set_working_dir(dname), Container().injectable_context(glob_ctx), patch("dlt.common.configuration.specs.config_providers_context.ConfigProvidersContext.initial_providers", _initial_providers): + with set_working_dir(dname), Container().injectable_context(glob_ctx), patch( + "dlt.common.configuration.specs.config_providers_context.ConfigProvidersContext.initial_providers", + _initial_providers, + ): yield def pytest_configure(config): # push sentry to ci - os.environ["RUNTIME__SENTRY_DSN"] = "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" + os.environ["RUNTIME__SENTRY_DSN"] = ( + "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" + ) diff --git a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md index 594a93cdd1..2849a3e427 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md +++ b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md @@ -94,7 +94,14 @@ You can configure a DuckDB destination with [secret / config values](../../gener ```toml destination.duckdb.credentials=duckdb:///_storage/test_quack.duckdb ``` +**duckdb://** url above creates a **relative** path to `_storage/test_quack.duckdb`. To define **absolute** path you need to specify four slashes ie. `duckdb:////_storage/test_quack.duckdb`. +A few special connection strings are supported: +* **:pipeline:** creates the database in the working directory of the pipeline with name `quack.duckdb`. +* **:memory:** creates in memory database. This may be useful for testing. + + +### Additional configuration Unique indexes may be created during loading if the following config value is set: ```toml [destination.duckdb] diff --git a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md index 97c3d02b94..7a219150ec 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md +++ b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md @@ -98,5 +98,4 @@ We also see them. My observation is that if you write a lot of data into the database then close the connection and then open it again to write, there's a chance of such timeout. Possible **WAL** file is being written to the remote duckdb database. ### Invalid Input Error: Initialization function "motherduck_init" from file -Use `duckdb 0.8.1` - +Use `duckdb 0.8.1` or above. diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md index 8086c6a223..ff66d67d55 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md +++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md @@ -24,7 +24,8 @@ See instructions here to [install Microsoft ODBC Driver 18 for SQL Server on Win Following ODBC drivers are supported: * ODBC Driver 18 for SQL Server * ODBC Driver 17 for SQL Server -[You configure driver name explicitly](#additional-destination-options) as well. + +[You can configure driver name explicitly](#additional-destination-options) as well. ### Create a pipeline @@ -98,7 +99,14 @@ create_indexes=true You can explicitly set the ODBC driver name: ```toml [destination.mssql.credentials] -odbc_driver="ODBC Driver 18 for SQL Server" +driver="ODBC Driver 18 for SQL Server" +``` + +When using a SQLAlchemy connection string, replace spaces with `+`: + +```toml +# keep it at the top of your toml file! before any section starts +destination.mssql.credentials="mssql://loader:@loader.database.windows.net/dlt_data?driver=ODBC+Driver+18+for+SQL+Server" ``` ### dbt support diff --git a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md index 519375cdac..c502d1cf5b 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md +++ b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md @@ -33,7 +33,7 @@ If no configuration options are provided, the default fallback will be `http://l ```python import dlt -from dlt.destinations.qdrant import qdrant_adapter +from dlt.destinations.impl.qdrant import qdrant_adapter movies = [ { diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md index 8c626266a4..fe7dafc243 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md +++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md @@ -252,7 +252,7 @@ it will be normalized to: so your best course of action is to clean up the data yourself before loading and use default naming convention. Nevertheless you can configure the alternative in `config.toml`: ```toml [schema] -naming="dlt.destinations.weaviate.ci_naming" +naming="dlt.destinations.weaviate.impl.ci_naming" ``` ## Additional destination options diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt-snippets.py b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt-snippets.py index beb1c862cc..4cb960b19f 100644 --- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt-snippets.py +++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt-snippets.py @@ -10,11 +10,14 @@ def run_dbt_standalone_snippet() -> None: working_dir=".", # the package below will be cloned to current dir package_location="https://github.com/dbt-labs/jaffle_shop.git", package_profiles_dir=os.path.abspath("."), # profiles.yml must be placed in this dir - package_profile_name="duckdb_dlt_dbt_test" # name of the profile + package_profile_name="duckdb_dlt_dbt_test", # name of the profile ) models = runner.run_all() # @@@DLT_SNIPPET_END run_dbt_standalone for m in models: - print(f"Model {m.model_name} materialized in {m.time} with status {m.status} and message {m.message}") + print( + f"Model {m.model_name} materialized in {m.time} with status {m.status} and message" + f" {m.message}" + ) diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md index af2d5df469..01c16e15b4 100644 --- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md +++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md @@ -95,7 +95,7 @@ runner = create_runner( working_dir=".", # the package below will be cloned to current dir package_location="https://github.com/dbt-labs/jaffle_shop.git", package_profiles_dir=os.path.abspath("."), # profiles.yml must be placed in this dir - package_profile_name="duckdb_dlt_dbt_test" # name of the profile + package_profile_name="duckdb_dlt_dbt_test", # name of the profile ) models = runner.run_all() diff --git a/docs/website/docs/examples/chess_production/code/chess-snippets.py b/docs/website/docs/examples/chess_production/code/chess-snippets.py index 84ef46bc12..5571dc6fac 100644 --- a/docs/website/docs/examples/chess_production/code/chess-snippets.py +++ b/docs/website/docs/examples/chess_production/code/chess-snippets.py @@ -38,9 +38,7 @@ def players() -> Iterator[TDataItems]: @dlt.transformer(data_from=players, write_disposition="replace") @dlt.defer def players_profiles(username: Any) -> TDataItems: - print( - f"getting {username} profile via thread {threading.current_thread().name}" - ) + print(f"getting {username} profile via thread {threading.current_thread().name}") sleep(1) # add some latency to show parallel runs return _get_data_with_retry(f"player/{username}") @@ -89,16 +87,12 @@ def load_data_with_retry(pipeline, data): load_info.raise_on_failed_jobs() # send notification send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Data was successfully loaded!" + pipeline.runtime_config.slack_incoming_hook, "Data was successfully loaded!" ) except Exception: # we get here after all the failed retries # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Something went wrong!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Something went wrong!") raise # we get here after a successful attempt @@ -116,9 +110,7 @@ def load_data_with_retry(pipeline, data): # send notifications if there are schema updates if schema_updates: # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, "Schema was updated!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Schema was updated!") # To run simple tests with `sql_client`, such as checking table counts and # warning if there is no data, you can use the `execute_query` method @@ -179,4 +171,4 @@ def load_data_with_retry(pipeline, data): data = chess(chess_url="https://api.chess.com/pub/", max_players=MAX_PLAYERS) load_data_with_retry(pipeline, data) # @@@DLT_SNIPPET_END markdown_pipeline - # @@@DLT_SNIPPET_END example \ No newline at end of file + # @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/chess_production/index.md b/docs/website/docs/examples/chess_production/index.md index 381fce1065..b812e47ef8 100644 --- a/docs/website/docs/examples/chess_production/index.md +++ b/docs/website/docs/examples/chess_production/index.md @@ -61,9 +61,7 @@ def chess( @dlt.transformer(data_from=players, write_disposition="replace") @dlt.defer def players_profiles(username: Any) -> TDataItems: - print( - f"getting {username} profile via thread {threading.current_thread().name}" - ) + print(f"getting {username} profile via thread {threading.current_thread().name}") sleep(1) # add some latency to show parallel runs return _get_data_with_retry(f"player/{username}") @@ -116,16 +114,12 @@ def load_data_with_retry(pipeline, data): load_info.raise_on_failed_jobs() # send notification send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Data was successfully loaded!" + pipeline.runtime_config.slack_incoming_hook, "Data was successfully loaded!" ) except Exception: # we get here after all the failed retries # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, - "Something went wrong!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Something went wrong!") raise # we get here after a successful attempt @@ -143,9 +137,7 @@ def load_data_with_retry(pipeline, data): # send notifications if there are schema updates if schema_updates: # send notification - send_slack_message( - pipeline.runtime_config.slack_incoming_hook, "Schema was updated!" - ) + send_slack_message(pipeline.runtime_config.slack_incoming_hook, "Schema was updated!") # To run simple tests with `sql_client`, such as checking table counts and # warning if there is no data, you can use the `execute_query` method diff --git a/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py b/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py index 2d674407bc..ecc0420854 100644 --- a/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py +++ b/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py @@ -7,23 +7,22 @@ @skipifgithubfork def incremental_snippet() -> None: - # @@@DLT_SNIPPET_START example # @@@DLT_SNIPPET_START markdown_source - from typing import Iterator, Optional, Dict, Any, Tuple + from typing import Optional, Dict, Any, Tuple import dlt from dlt.common import pendulum from dlt.common.time import ensure_pendulum_datetime - from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime - from dlt.extract.source import DltResource + from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client - @dlt.source(max_table_nesting=2) def zendesk_support( - credentials: Dict[str, str]=dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + credentials: Dict[str, str] = dlt.secrets.value, + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ @@ -115,9 +114,7 @@ def get_pages( # make request and keep looping until there is no next page get_url = f"{url}{endpoint}" while get_url: - response = client.get( - get_url, headers=headers, auth=auth, params=params - ) + response = client.get(get_url, headers=headers, auth=auth, params=params) response.raise_for_status() response_json = response.json() result = response_json[data_point_name] @@ -128,9 +125,8 @@ def get_pages( if not response_json["end_of_stream"]: get_url = response_json["next_page"] - # @@@DLT_SNIPPET_START markdown_pipeline - __name__ = "__main__" # @@@DLT_REMOVE + __name__ = "__main__" # @@@DLT_REMOVE if __name__ == "__main__": # create dlt pipeline pipeline = dlt.pipeline( @@ -145,4 +141,3 @@ def get_pages( # check that stuff was loaded row_counts = pipeline.last_trace.last_normalize_info.row_counts assert row_counts["ticket_events"] == 24 - diff --git a/docs/website/docs/examples/incremental_loading/index.md b/docs/website/docs/examples/incremental_loading/index.md index a4aa5e3c8e..edfbee2806 100644 --- a/docs/website/docs/examples/incremental_loading/index.md +++ b/docs/website/docs/examples/incremental_loading/index.md @@ -33,20 +33,20 @@ We'll learn: ```py -from typing import Iterator, Optional, Dict, Any, Tuple +from typing import Optional, Dict, Any, Tuple import dlt from dlt.common import pendulum from dlt.common.time import ensure_pendulum_datetime -from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime -from dlt.extract.source import DltResource +from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client - @dlt.source(max_table_nesting=2) def zendesk_support( - credentials: Dict[str, str]=dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + credentials: Dict[str, str] = dlt.secrets.value, + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index 57130fa06a..bff64167fd 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -108,14 +108,12 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert (len(tables) == 7), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert len(tables) == 7, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The second method involves setting the max_table_nesting attribute directly # on the source data object. @@ -127,15 +125,13 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="not_unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert (len(tables) == 1), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert len(tables) == 1, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The third method involves applying data type hints to specific columns in the data. # In this case, we tell dlt that column 'cast' (containing a list of actors) @@ -146,15 +142,13 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data_without_cast", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert (len(tables) == 6), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert len(tables) == 6, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # @@@DLT_SNIPPET_END nested_data_run # @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index 4687e84d39..b2b5ee2792 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -99,9 +99,7 @@ if __name__ == "__main__": destination="duckdb", dataset_name="unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") load_info = pipeline.run(source_data) print(load_info) @@ -115,9 +113,7 @@ if __name__ == "__main__": destination="duckdb", dataset_name="not_unpacked_data", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) @@ -131,9 +127,7 @@ if __name__ == "__main__": destination="duckdb", dataset_name="unpacked_data_without_cast", ) - source_data = mongodb_collection( - collection="movies", write_disposition="replace" - ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) diff --git a/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py b/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py index 262483bef7..c6ab46a453 100644 --- a/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py +++ b/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py @@ -2,6 +2,7 @@ __source_name__ = "zendesk" + @skipifgithubfork def qdrant_snippet(): # @@@DLT_SNIPPET_START example @@ -13,7 +14,7 @@ def qdrant_snippet(): from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client - from dlt.destinations.qdrant import qdrant_adapter + from dlt.destinations.impl.qdrant import qdrant_adapter from qdrant_client import QdrantClient from dlt.common.configuration.inject import with_config @@ -22,7 +23,9 @@ def qdrant_snippet(): @dlt.source(max_table_nesting=2) def zendesk_support( credentials: Dict[str, str] = dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ @@ -53,9 +56,7 @@ def zendesk_support( # when two events have the same timestamp @dlt.resource(primary_key="id", write_disposition="append") def tickets_data( - updated_at: dlt.sources.incremental[ - pendulum.DateTime - ] = dlt.sources.incremental( + updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental( "updated_at", initial_value=start_date_obj, end_value=end_date_obj, @@ -125,9 +126,7 @@ def get_pages( # make request and keep looping until there is no next page get_url = f"{url}{endpoint}" while get_url: - response = client.get( - get_url, headers=headers, auth=auth, params=params - ) + response = client.get(get_url, headers=headers, auth=auth, params=params) response.raise_for_status() response_json = response.json() result = response_json[data_point_name] @@ -152,7 +151,7 @@ def get_pages( load_info = pipeline.run( # here we use a special function to tell Qdrant which fields to embed qdrant_adapter( - zendesk_support(), # retrieve tickets data + zendesk_support(), # retrieve tickets data embed=["subject", "description"], ) ) @@ -183,10 +182,10 @@ def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): response = qdrant_client.query( "zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table query_text=["cancel", "cancel subscription"], # prompt to search - limit=3 # limit the number of results to the nearest 3 embeddings + limit=3, # limit the number of results to the nearest 3 embeddings ) # @@@DLT_SNIPPET_END get_response - assert len(response) <= 3 and len(response) > 0 # @@@DLT_REMOVE + assert len(response) <= 3 and len(response) > 0 # @@@DLT_REMOVE - # @@@DLT_SNIPPET_END example \ No newline at end of file + # @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/qdrant_zendesk/index.md b/docs/website/docs/examples/qdrant_zendesk/index.md index c0c1e12285..99a88a694d 100644 --- a/docs/website/docs/examples/qdrant_zendesk/index.md +++ b/docs/website/docs/examples/qdrant_zendesk/index.md @@ -44,7 +44,7 @@ from dlt.common import pendulum from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client -from dlt.destinations.qdrant import qdrant_adapter +from dlt.destinations.impl.qdrant import qdrant_adapter from qdrant_client import QdrantClient from dlt.common.configuration.inject import with_config @@ -53,7 +53,9 @@ from dlt.common.configuration.inject import with_config @dlt.source(max_table_nesting=2) def zendesk_support( credentials: Dict[str, str] = dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ @@ -84,9 +86,7 @@ def zendesk_support( # when two events have the same timestamp @dlt.resource(primary_key="id", write_disposition="append") def tickets_data( - updated_at: dlt.sources.incremental[ - pendulum.DateTime - ] = dlt.sources.incremental( + updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental( "updated_at", initial_value=start_date_obj, end_value=end_date_obj, @@ -129,7 +129,7 @@ if __name__ == "__main__": load_info = pipeline.run( # here we use a special function to tell Qdrant which fields to embed qdrant_adapter( - zendesk_support(), # retrieve tickets data + zendesk_support(), # retrieve tickets data embed=["subject", "description"], ) ) @@ -143,7 +143,7 @@ if __name__ == "__main__": ```py # running the Qdrant client to connect to your Qdrant database -@with_config(sections=("destination", "credentials")) +@with_config(sections=("destination", "qdrant", "credentials")) def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): return QdrantClient( url=location, @@ -164,7 +164,7 @@ print(qdrant_client.get_collections()) response = qdrant_client.query( "zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table query_text=["cancel", "cancel subscription"], # prompt to search - limit=3 # limit the number of results to the nearest 3 embeddings + limit=3, # limit the number of results to the nearest 3 embeddings ) ``` diff --git a/docs/website/docs/examples/transformers/code/pokemon-snippets.py b/docs/website/docs/examples/transformers/code/pokemon-snippets.py index 726bcf7e2e..d8fe4f41ba 100644 --- a/docs/website/docs/examples/transformers/code/pokemon-snippets.py +++ b/docs/website/docs/examples/transformers/code/pokemon-snippets.py @@ -1,11 +1,8 @@ - def transformers_snippet() -> None: - # @@@DLT_SNIPPET_START example import dlt from dlt.sources.helpers import requests - @dlt.source(max_table_nesting=2) def source(pokemon_api_url: str): """""" @@ -49,12 +46,9 @@ def species(pokemon_details): # 2. send pokemon details into `species` transformer to get species details # NOTE: dlt is smart enough to get data from pokemon_list and pokemon details once - return ( - pokemon_list | pokemon, - pokemon_list | pokemon | species - ) + return (pokemon_list | pokemon, pokemon_list | pokemon | species) - __name__ = "__main__" # @@@DLT_REMOVE + __name__ = "__main__" # @@@DLT_REMOVE if __name__ == "__main__": # build duck db pipeline pipeline = dlt.pipeline( diff --git a/docs/website/docs/examples/transformers/index.md b/docs/website/docs/examples/transformers/index.md index dfa6513e72..7ed8fd29c3 100644 --- a/docs/website/docs/examples/transformers/index.md +++ b/docs/website/docs/examples/transformers/index.md @@ -33,7 +33,6 @@ We'll learn how to: import dlt from dlt.sources.helpers import requests - @dlt.source(max_table_nesting=2) def source(pokemon_api_url: str): """""" @@ -77,10 +76,7 @@ def source(pokemon_api_url: str): # 2. send pokemon details into `species` transformer to get species details # NOTE: dlt is smart enough to get data from pokemon_list and pokemon details once - return ( - pokemon_list | pokemon, - pokemon_list | pokemon | species - ) + return (pokemon_list | pokemon, pokemon_list | pokemon | species) if __name__ == "__main__": # build duck db pipeline diff --git a/docs/website/docs/general-usage/credentials/configuration.md b/docs/website/docs/general-usage/credentials/configuration.md index 4cb3e17468..867c47ccba 100644 --- a/docs/website/docs/general-usage/credentials/configuration.md +++ b/docs/website/docs/general-usage/credentials/configuration.md @@ -210,7 +210,7 @@ You can pass destination credentials and ignore the default lookup: pipeline = dlt.pipeline(destination="postgres", credentials=dlt.secrets["postgres_dsn"]) ``` -:::Note +:::note **dlt.config** and **dlt.secrets** can be also used as setters. For example: ```python dlt.config["sheet_id"] = "23029402349032049" diff --git a/docs/website/docs/general-usage/pipeline.md b/docs/website/docs/general-usage/pipeline.md index 1dfac1a259..4850027f24 100644 --- a/docs/website/docs/general-usage/pipeline.md +++ b/docs/website/docs/general-usage/pipeline.md @@ -77,7 +77,7 @@ You can inspect stored artifacts using the command [dlt pipeline info](../reference/command-line-interface.md#dlt-pipeline) and [programmatically](../walkthroughs/run-a-pipeline.md#4-inspect-a-load-process). -> 💡 A pipeline with given name looks for its working in location above - so if you have two +> 💡 A pipeline with given name looks for its working directory in location above - so if you have two > pipeline scripts that create a pipeline with the same name, they will see the same working folder > and share all the possible state. You may override the default location using `pipelines_dir` > argument when creating the pipeline. diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md index e203b3d93a..8451ecb6c7 100644 --- a/docs/website/docs/general-usage/resource.md +++ b/docs/website/docs/general-usage/resource.md @@ -25,7 +25,7 @@ def generate_rows(): for i in range(10): yield {'id':i, 'example_string':'abc'} -@dlt.sources +@dlt.source def source_name(): return generate_rows ``` @@ -73,6 +73,9 @@ accepts following arguments: > 💡 You can mark some resource arguments as [configuration and credentials](credentials) > values so `dlt` can pass them automatically to your functions. +### Put a contract on a tables, columns and data +Use the `schema_contract` argument to tell dlt how to [deal with new tables, data types and bad data types](schema-contracts.md). For example if you set it to **freeze**, `dlt` will not allow for any new tables, columns or data types to be introduced to the schema - it will raise an exception. Learn more in on available contract modes [here](schema-contracts.md#setting-up-the-contract) + ### Define a schema with Pydantic You can alternatively use a [Pydantic](https://pydantic-docs.helpmanual.io/) model to define the schema. @@ -106,25 +109,34 @@ def get_users(): The data types of the table columns are inferred from the types of the pydantic fields. These use the same type conversions as when the schema is automatically generated from the data. +Pydantic models integrate well with [schema contracts](schema-contracts.md) as data validators. + Things to note: - Fields with an `Optional` type are marked as `nullable` - Fields with a `Union` type are converted to the first (not `None`) type listed in the union. E.g. `status: Union[int, str]` results in a `bigint` column. -- `list`, `dict` and nested pydantic model fields will use the `complex` type which means they'll be stored as a JSON object in the database instead of creating child tables. You can override this by manually calling the pydantic helper with `skip_complex_types=True`, see below: +- `list`, `dict` and nested pydantic model fields will use the `complex` type which means they'll be stored as a JSON object in the database instead of creating child tables. + +You can override this by configuring the Pydantic model ```python -from dlt.common.lib.pydantic import pydantic_to_table_schema_columns +from typing import ClassVar +from dlt.common.libs.pydantic import DltConfig -... +class UserWithNesting(User): + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} -@dlt.resource(name="user", columns=pydantic_to_table_schema_columns(User, skip_complex_types=True)) +@dlt.resource(name="user", columns=UserWithNesting) def get_users(): ... ``` -This omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default +`"skip_complex_types"` omits any `dict`/`list`/`BaseModel` type fields from the schema, so dlt will fall back on the default behaviour of creating child tables for these fields. +We do not support `RootModel` that validate simple types. You can add such validator yourself, see [data filtering section](#filter-transform-and-pivot-data). + + ### Dispatch data to many tables You can load data to many tables from a single resource. The most common case is a stream of events @@ -301,7 +313,7 @@ assert list(r) == list(range(10)) > 💡 You cannot limit transformers. They should process all the data they receive fully to avoid > inconsistencies in generated datasets. -### Set table and adjust schema +### Set table name and adjust schema You can change the schema of a resource, be it standalone or as a part of a source. Look for method named `apply_hints` which takes the same arguments as resource decorator. Obviously you should call diff --git a/docs/website/docs/general-usage/schema-contracts.md b/docs/website/docs/general-usage/schema-contracts.md new file mode 100644 index 0000000000..764b565beb --- /dev/null +++ b/docs/website/docs/general-usage/schema-contracts.md @@ -0,0 +1,209 @@ +--- +title: 🧪 Schema and Data Contracts +description: Controlling schema evolution and validating data +keywords: [data contracts, schema, dlt schema, pydantic] +--- + +## Schema and Data Contracts + +`dlt` will evolve the schema at the destination by following the structure and data types of the extracted data. There are several modes +that you can use to control this automatic schema evolution, from the default modes where all changes to the schema are accepted to +a frozen schema that does not change at all. + +Consider this example: + +```py +@dlt.resource(schema_contract={"tables": "evolve", "columns": "freeze"}) +def items(): + ... +``` + +This resource will allow new tables (both child tables and [tables with dynamic names](resource.md#dispatch-data-to-many-tables)) to be created, but will throw an exception if data is extracted for an existing table which contains a new column. + +### Setting up the contract +You can control the following **schema entities**: +* `tables` - contract is applied when a new table is created +* `columns` - contract is applied when a new column is created on an existing table +* `data_type` - contract is applied when data cannot be coerced into a data type associate with existing column. + +You can use **contract modes** to tell `dlt` how to apply contract for a particular entity: +* `evolve`: No constraints on schema changes. +* `freeze`: This will raise an exception if data is encountered that does not fit the existing schema, so no data will be loaded to the destination +* `discard_row`: This will discard any extracted row if it does not adhere to the existing schema, and this row will not be loaded to the destination. +* `discard_value`: This will discard data in an extracted row that does not adhere to the existing schema and the row will be loaded without this data. + +:::note +The default mode (**evolve**) works as follows: +1. New tables may be always created +2. New columns may be always appended to the existing table +3. Data that do not coerce to existing data type of a particular column will be sent to a [variant column](schema.md#variant-columns) created for this particular type. +::: + +#### Passing schema_contract argument +The `schema_contract` exists on the [dlt.source](source.md) decorator as a default for all resources in that source and on the +[dlt.resource](source.md) decorator as a directive for the individual resource - and as a consequence - on all tables created by this resource. +Additionally it exists on the `pipeline.run()` method, which will override all existing settings. + +The `schema_contract` argument accepts two forms: +1. **full**: a mapping of schema entities to contract modes +2. **shorthand** a contract mode (string) that will be applied to all schema entities. + +For example setting `schema_contract` to *freeze* will expand to the full form: +```python +{"tables": "freeze", "columns": "freeze", "data_type": "freeze"} +``` + +You can change the contract on the **source** instance via `schema_contract` property. For **resource** you can use [apply_hints](resource#set-table-name-and-adjust-schema). + + +#### Nuances of contract modes. +1. Contracts are applied **after names of tables and columns are normalized**. +2. Contract defined on a resource is applied to all tables and child tables created by that resource. +3. `discard_row` works on table level. So for example if you have two tables in parent-child relationship ie. *users* and *users__addresses* and contract is violated in *users__addresses* table, the row of that table is discarded while the parent row in *users* table will be loaded. + +### Use Pydantic models for data validation +Pydantic models can be used to [define table schemas and validate incoming data](resource.md#define-a-schema-with-pydantic). You can use any model you already have. `dlt` will internally synthesize (if necessary) new models that conform with the **schema contract** on the resource. + +Just passing a model in `column` argument of the [dlt.resource](resource.md#define-a-schema-with-pydantic) sets a schema contract that conforms to default Pydantic behavior: +```python +{ + "tables": "evolve", + "columns": "discard_value", + "data_type": "freeze" +} +``` +New tables are allowed, extra fields are ignored and invalid data raises an exception. + +If you pass schema contract explicitly the following happens to schema entities: +1. **tables** do not impact the Pydantic models +2. **columns** modes are mapped into the **extra** modes of Pydantic (see below). `dlt` will apply this setting recursively if models contain other models. +3. **data_type** supports following modes for Pydantic: **evolve** will synthesize lenient model that allows for any data type. This may result with variant columns upstream. +**freeze** will re-raise `ValidationException`. **discard_row** will remove the non-validating data items. +**discard_value** is not currently supported. We may eventually do that on Pydantic v2. + +`dlt` maps column contract modes into the extra fields settings as follows. + +Note that this works in two directions. If you use a model with such setting explicitly configured, `dlt` sets the column contract mode accordingly. This also avoids synthesizing modified models. + +| column mode | pydantic extra | +| ------------- | -------------- | +| evolve | allow | +| freeze | forbid | +| discard_value | ignore | +| discard_row | forbid | + +`discard_row` requires additional handling when ValidationError is raised. + +:::tip +Model validation is added as a [transform step](resource.md#filter-transform-and-pivot-data) to the resource. This step will convert the incoming data items into instances of validating models. You could easily convert them back to dictionaries by using `add_map(lambda item: item.dict())` on a resource. +::: + +:::note +Pydantic models work on the **extracted** data **before names are normalized or child relationships are created**. Make sure to name model fields as in your input data and handle nested data with the nested models. + +As a consequence, `discard_row` will drop the whole data item - even if nested model was affected. +::: + +### Set contracts on Arrow Tables and Pandas +All contract settings apply to [arrow tables and panda frames](../dlt-ecosystem/verified-sources/arrow-pandas.md) as well. +1. **tables** mode the same - no matter what is the data item type +2. **columns** will allow new columns, raise an exception or modify tables/frames still in extract step to avoid re-writing parquet files. +3. **data_type** changes to data types in tables/frames are not allowed and will result in data type schema clash. We could allow for more modes (evolving data types in Arrow tables sounds weird but ping us on Slack if you need it.) + +Here's how `dlt` deals with column modes: +1. **evolve** new columns are allowed (table may be reordered to put them at the end) +2. **discard_value** column will be deleted +3. **discard_row** rows with the column present will be deleted and then column will be deleted +4. **freeze** exception on a new column + + +### Get context from DataValidationError in freeze mode +When contract is violated in freeze mode, `dlt` raises `DataValidationError` exception. This exception gives access to the full context and passes the evidence to the caller. +As with any other exception coming from pipeline run, it will be re-raised via `PipelineStepFailed` exception which you should catch in except: + +```python +try: + pipeline.run() +except as pip_ex: + if pip_ex.step == "normalize": + if isinstance(pip_ex.__context__.__context__, DataValidationError): + ... + if pip_ex.step == "extract": + if isinstance(pip_ex.__context__, DataValidationError): + ... + + +``` + +`DataValidationError` provides the following context: +1. `schema_name`, `table_name` and `column_name` provide the logical "location" at which the contract was violated. +2. `schema_entity` and `contract_mode` tell which contract was violated +3. `table_schema` contains the schema against which the contract was validated. May be Pydantic model or `dlt` TTableSchema instance +4. `schema_contract` the full, expanded schema contract +5. `data_item` causing data item (Python dict, arrow table, pydantic model or list of there of) + + +### Contracts on new tables +If a table is a **new table** that has not been created on the destination yet, dlt will allow the creation of new columns. For a single pipeline run, the column mode is changed (internally) to **evolve** and then reverted back to the original mode. This allows for initial schema inference to happen and then on subsequent run, the inferred contract will be applied to a new data. + +Following tables are considered new: +1. Child tables inferred from the nested data +2. Dynamic tables created from the data during extraction +3. Tables containing **incomplete** columns - columns without data type bound to them. + +For example such table is considered new because column **number** is incomplete (define primary key and NOT null but no data type) +```yaml + blocks: + description: Ethereum blocks + write_disposition: append + columns: + number: + nullable: false + primary_key: true + name: number +``` + +What tables are not considered new: +1. Those with columns defined by Pydantic modes + +### Code Examples + +The below code will silently ignore new subtables, allow new columns to be added to existing tables and raise an error if a variant of a column is discovered. + +```py +@dlt.resource(schema_contract={"tables": "discard_row", "columns": "evolve", "data_type": "freeze"}) +def items(): + ... +``` + +The below Code will raise on any encountered schema change. Note: You can always set a string which will be interpreted as though all keys are set to these values. + +```py +pipeline.run(my_source(), schema_contract="freeze") +``` + +The below code defines some settings on the source which can be overwritten on the resource which in turn can be overwritten by the global override on the `run` method. +Here for all resources variant columns are frozen and raise an error if encountered, on `items` new columns are allowed but `other_items` inherits the `freeze` setting from +the source, thus new columns are frozen there. New tables are allowed. + +```py +@dlt.resource(schema_contract={"columns": "evolve"}) +def items(): + ... + +@dlt.resource() +def other_items(): + ... + +@dlt.source(schema_contract={"columns": "freeze", "data_type": "freeze"}): +def source(): + return [items(), other_items()] + + +# this will use the settings defined by the decorators +pipeline.run(source()) + +# this will freeze the whole schema, regardless of the decorator settings +pipeline.run(source(), schema_contract="freeze") + +``` \ No newline at end of file diff --git a/docs/website/docs/getting-started-snippets.py b/docs/website/docs/getting-started-snippets.py index c4bd789834..618ba1a406 100644 --- a/docs/website/docs/getting-started-snippets.py +++ b/docs/website/docs/getting-started-snippets.py @@ -3,19 +3,13 @@ def start_snippet() -> None: - # @@@DLT_SNIPPET_START start import dlt - data = [ - {'id': 1, 'name': 'Alice'}, - {'id': 2, 'name': 'Bob'} - ] + data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] pipeline = dlt.pipeline( - pipeline_name='quick_start', - destination='duckdb', - dataset_name='mydata' + pipeline_name="quick_start", destination="duckdb", dataset_name="mydata" ) load_info = pipeline.run(data, table_name="users") @@ -26,19 +20,18 @@ def start_snippet() -> None: def json_snippet() -> None: - # @@@DLT_SNIPPET_START json import dlt from dlt.common import json - with open("./assets/json_file.json", 'rb') as file: + with open("./assets/json_file.json", "rb") as file: data = json.load(file) pipeline = dlt.pipeline( - pipeline_name='from_json', - destination='duckdb', - dataset_name='mydata', + pipeline_name="from_json", + destination="duckdb", + dataset_name="mydata", ) # NOTE: test data that we load is just a dictionary so we enclose it in a list @@ -52,19 +45,18 @@ def json_snippet() -> None: def csv_snippet() -> None: - # @@@DLT_SNIPPET_START csv import dlt import pandas as pd owid_disasters_csv = "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)/Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020).csv" df = pd.read_csv(owid_disasters_csv) - data = df.to_dict(orient='records') + data = df.to_dict(orient="records") pipeline = dlt.pipeline( - pipeline_name='from_csv', - destination='duckdb', - dataset_name='mydata', + pipeline_name="from_csv", + destination="duckdb", + dataset_name="mydata", ) load_info = pipeline.run(data, table_name="natural_disasters") @@ -75,7 +67,6 @@ def csv_snippet() -> None: def api_snippet() -> None: - # @@@DLT_SNIPPET_START api import dlt from dlt.sources.helpers import requests @@ -87,9 +78,9 @@ def api_snippet() -> None: response.raise_for_status() pipeline = dlt.pipeline( - pipeline_name='from_api', - destination='duckdb', - dataset_name='github_data', + pipeline_name="from_api", + destination="duckdb", + dataset_name="github_data", ) # the response contains a list of issues load_info = pipeline.run(response.json(), table_name="issues") @@ -101,7 +92,6 @@ def api_snippet() -> None: def db_snippet() -> None: - # @@@DLT_SNIPPET_START db import dlt from sqlalchemy import create_engine @@ -112,19 +102,18 @@ def db_snippet() -> None: engine = create_engine("mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam") with engine.connect() as conn: # select genome table, stream data in batches of 100 elements - rows = conn.execution_options(yield_per=100).exec_driver_sql("SELECT * FROM genome LIMIT 1000") + rows = conn.execution_options(yield_per=100).exec_driver_sql( + "SELECT * FROM genome LIMIT 1000" + ) pipeline = dlt.pipeline( - pipeline_name='from_database', - destination='duckdb', - dataset_name='genome_data', + pipeline_name="from_database", + destination="duckdb", + dataset_name="genome_data", ) # here we convert the rows into dictionaries on the fly with a map function - load_info = pipeline.run( - map(lambda row: dict(row._mapping), rows), - table_name="genome" - ) + load_info = pipeline.run(map(lambda row: dict(row._mapping), rows), table_name="genome") print(load_info) # @@@DLT_SNIPPET_END db @@ -133,19 +122,15 @@ def db_snippet() -> None: def replace_snippet() -> None: - # @@@DLT_SNIPPET_START replace import dlt - data = [ - {'id': 1, 'name': 'Alice'}, - {'id': 2, 'name': 'Bob'} - ] + data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] pipeline = dlt.pipeline( - pipeline_name='replace_data', - destination='duckdb', - dataset_name='mydata', + pipeline_name="replace_data", + destination="duckdb", + dataset_name="mydata", ) load_info = pipeline.run(data, table_name="users", write_disposition="replace") @@ -156,7 +141,6 @@ def replace_snippet() -> None: def incremental_snippet() -> None: - # @@@DLT_SNIPPET_START incremental import dlt from dlt.sources.helpers import requests @@ -183,11 +167,10 @@ def get_issues( break url = response.links["next"]["url"] - pipeline = dlt.pipeline( - pipeline_name='github_issues_incremental', - destination='duckdb', - dataset_name='github_data_append', + pipeline_name="github_issues_incremental", + destination="duckdb", + dataset_name="github_data_append", ) load_info = pipeline.run(get_issues) row_counts = pipeline.last_trace.last_normalize_info @@ -201,7 +184,6 @@ def get_issues( def incremental_merge_snippet() -> None: - # @@@DLT_SNIPPET_START incremental_merge import dlt from dlt.sources.helpers import requests @@ -212,7 +194,7 @@ def incremental_merge_snippet() -> None: primary_key="id", ) def get_issues( - updated_at = dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z") + updated_at=dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z") ): # NOTE: we read only open issues to minimize number of calls to the API. There's a limit of ~50 calls for not authenticated Github users url = f"https://api.github.com/repos/dlt-hub/dlt/issues?since={updated_at.last_value}&per_page=100&sort=updated&directions=desc&state=open" @@ -228,9 +210,9 @@ def get_issues( url = response.links["next"]["url"] pipeline = dlt.pipeline( - pipeline_name='github_issues_merge', - destination='duckdb', - dataset_name='github_data_merge', + pipeline_name="github_issues_merge", + destination="duckdb", + dataset_name="github_data_merge", ) load_info = pipeline.run(get_issues) row_counts = pipeline.last_trace.last_normalize_info @@ -244,15 +226,12 @@ def get_issues( def table_dispatch_snippet() -> None: - # @@@DLT_SNIPPET_START table_dispatch import dlt from dlt.sources.helpers import requests @dlt.resource(primary_key="id", table_name=lambda i: i["type"], write_disposition="append") - def repo_events( - last_created_at = dlt.sources.incremental("created_at") - ): + def repo_events(last_created_at=dlt.sources.incremental("created_at")): url = "https://api.github.com/repos/dlt-hub/dlt/events?per_page=100" while True: @@ -271,9 +250,9 @@ def repo_events( url = response.links["next"]["url"] pipeline = dlt.pipeline( - pipeline_name='github_events', - destination='duckdb', - dataset_name='github_events_data', + pipeline_name="github_events", + destination="duckdb", + dataset_name="github_events_data", ) load_info = pipeline.run(repo_events) row_counts = pipeline.last_trace.last_normalize_info @@ -285,15 +264,15 @@ def repo_events( assert_load_info(load_info) + def pdf_to_weaviate_snippet() -> None: # @@@DLT_SNIPPET_START pdf_to_weaviate import os import dlt - from dlt.destinations.weaviate import weaviate_adapter + from dlt.destinations.impl.weaviate import weaviate_adapter from PyPDF2 import PdfReader - @dlt.resource(selected=False) def list_files(folder_path: str): folder_path = os.path.abspath(folder_path) @@ -302,10 +281,9 @@ def list_files(folder_path: str): yield { "file_name": filename, "file_path": file_path, - "mtime": os.path.getmtime(file_path) + "mtime": os.path.getmtime(file_path), } - @dlt.transformer(primary_key="page_id", write_disposition="merge") def pdf_to_text(file_item, separate_pages: bool = False): if not separate_pages: @@ -319,10 +297,7 @@ def pdf_to_text(file_item, separate_pages: bool = False): page_item["page_id"] = file_item["file_name"] + "_" + str(page_no) yield page_item - pipeline = dlt.pipeline( - pipeline_name='pdf_to_text', - destination='weaviate' - ) + pipeline = dlt.pipeline(pipeline_name="pdf_to_text", destination="weaviate") # this constructs a simple pipeline that: (1) reads files from "invoices" folder (2) filters only those ending with ".pdf" # (3) sends them to pdf_to_text transformer with pipe (|) operator @@ -335,9 +310,7 @@ def pdf_to_text(file_item, separate_pages: bool = False): pdf_pipeline.table_name = "InvoiceText" # use weaviate_adapter to tell destination to vectorize "text" column - load_info = pipeline.run( - weaviate_adapter(pdf_pipeline, vectorize="text") - ) + load_info = pipeline.run(weaviate_adapter(pdf_pipeline, vectorize="text")) row_counts = pipeline.last_trace.last_normalize_info print(row_counts) print("------") @@ -353,4 +326,3 @@ def pdf_to_text(file_item, separate_pages: bool = False): # get text of all the invoices in InvoiceText class we just created above print(client.query.get("InvoiceText", ["text", "file_name", "mtime", "page_id"]).do()) # @@@DLT_SNIPPET_END pdf_to_weaviate_read - diff --git a/docs/website/docs/getting-started.md b/docs/website/docs/getting-started.md index cd3f2cc69d..d0f9f29e48 100644 --- a/docs/website/docs/getting-started.md +++ b/docs/website/docs/getting-started.md @@ -44,15 +44,10 @@ Let's load a list of Python objects (dictionaries) into `duckdb` and inspect the ```py import dlt -data = [ - {'id': 1, 'name': 'Alice'}, - {'id': 2, 'name': 'Bob'} -] +data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] pipeline = dlt.pipeline( - pipeline_name='quick_start', - destination='duckdb', - dataset_name='mydata' + pipeline_name="quick_start", destination="duckdb", dataset_name="mydata" ) load_info = pipeline.run(data, table_name="users") @@ -133,13 +128,13 @@ import dlt from dlt.common import json -with open("./assets/json_file.json", 'rb') as file: +with open("./assets/json_file.json", "rb") as file: data = json.load(file) pipeline = dlt.pipeline( - pipeline_name='from_json', - destination='duckdb', - dataset_name='mydata', + pipeline_name="from_json", + destination="duckdb", + dataset_name="mydata", ) # NOTE: test data that we load is just a dictionary so we enclose it in a list @@ -164,12 +159,12 @@ import pandas as pd owid_disasters_csv = "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)/Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020).csv" df = pd.read_csv(owid_disasters_csv) -data = df.to_dict(orient='records') +data = df.to_dict(orient="records") pipeline = dlt.pipeline( - pipeline_name='from_csv', - destination='duckdb', - dataset_name='mydata', + pipeline_name="from_csv", + destination="duckdb", + dataset_name="mydata", ) load_info = pipeline.run(data, table_name="natural_disasters") @@ -192,9 +187,9 @@ response = requests.get(url) response.raise_for_status() pipeline = dlt.pipeline( - pipeline_name='from_api', - destination='duckdb', - dataset_name='github_data', + pipeline_name="from_api", + destination="duckdb", + dataset_name="github_data", ) # the response contains a list of issues load_info = pipeline.run(response.json(), table_name="issues") @@ -222,19 +217,18 @@ from sqlalchemy import create_engine engine = create_engine("mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam") with engine.connect() as conn: # select genome table, stream data in batches of 100 elements - rows = conn.execution_options(yield_per=100).exec_driver_sql("SELECT * FROM genome LIMIT 1000") + rows = conn.execution_options(yield_per=100).exec_driver_sql( + "SELECT * FROM genome LIMIT 1000" + ) pipeline = dlt.pipeline( - pipeline_name='from_database', - destination='duckdb', - dataset_name='genome_data', + pipeline_name="from_database", + destination="duckdb", + dataset_name="genome_data", ) # here we convert the rows into dictionaries on the fly with a map function - load_info = pipeline.run( - map(lambda row: dict(row._mapping), rows), - table_name="genome" - ) + load_info = pipeline.run(map(lambda row: dict(row._mapping), rows), table_name="genome") print(load_info) ``` @@ -267,15 +261,12 @@ One method is to tell `dlt` to replace the data in existing tables by using `wri ```py import dlt -data = [ - {'id': 1, 'name': 'Alice'}, - {'id': 2, 'name': 'Bob'} -] +data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] pipeline = dlt.pipeline( - pipeline_name='replace_data', - destination='duckdb', - dataset_name='mydata', + pipeline_name="replace_data", + destination="duckdb", + dataset_name="mydata", ) load_info = pipeline.run(data, table_name="users", write_disposition="replace") @@ -334,11 +325,10 @@ def get_issues( break url = response.links["next"]["url"] - pipeline = dlt.pipeline( - pipeline_name='github_issues_incremental', - destination='duckdb', - dataset_name='github_data_append', + pipeline_name="github_issues_incremental", + destination="duckdb", + dataset_name="github_data_append", ) load_info = pipeline.run(get_issues) row_counts = pipeline.last_trace.last_normalize_info @@ -394,7 +384,7 @@ from dlt.sources.helpers import requests primary_key="id", ) def get_issues( - updated_at = dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z") + updated_at=dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z") ): # NOTE: we read only open issues to minimize number of calls to the API. There's a limit of ~50 calls for not authenticated Github users url = f"https://api.github.com/repos/dlt-hub/dlt/issues?since={updated_at.last_value}&per_page=100&sort=updated&directions=desc&state=open" @@ -410,9 +400,9 @@ def get_issues( url = response.links["next"]["url"] pipeline = dlt.pipeline( - pipeline_name='github_issues_merge', - destination='duckdb', - dataset_name='github_data_merge', + pipeline_name="github_issues_merge", + destination="duckdb", + dataset_name="github_data_merge", ) load_info = pipeline.run(get_issues) row_counts = pipeline.last_trace.last_normalize_info @@ -445,9 +435,7 @@ import dlt from dlt.sources.helpers import requests @dlt.resource(primary_key="id", table_name=lambda i: i["type"], write_disposition="append") -def repo_events( - last_created_at = dlt.sources.incremental("created_at") -): +def repo_events(last_created_at=dlt.sources.incremental("created_at")): url = "https://api.github.com/repos/dlt-hub/dlt/events?per_page=100" while True: @@ -466,9 +454,9 @@ def repo_events( url = response.links["next"]["url"] pipeline = dlt.pipeline( - pipeline_name='github_events', - destination='duckdb', - dataset_name='github_events_data', + pipeline_name="github_events", + destination="duckdb", + dataset_name="github_events_data", ) load_info = pipeline.run(repo_events) row_counts = pipeline.last_trace.last_normalize_info @@ -522,10 +510,9 @@ Below we extract text from PDFs and load it to [Weaviate](dlt-ecosystem/destinat import os import dlt -from dlt.destinations.weaviate import weaviate_adapter +from dlt.destinations.impl.weaviate import weaviate_adapter from PyPDF2 import PdfReader - @dlt.resource(selected=False) def list_files(folder_path: str): folder_path = os.path.abspath(folder_path) @@ -534,10 +521,9 @@ def list_files(folder_path: str): yield { "file_name": filename, "file_path": file_path, - "mtime": os.path.getmtime(file_path) + "mtime": os.path.getmtime(file_path), } - @dlt.transformer(primary_key="page_id", write_disposition="merge") def pdf_to_text(file_item, separate_pages: bool = False): if not separate_pages: @@ -551,10 +537,7 @@ def pdf_to_text(file_item, separate_pages: bool = False): page_item["page_id"] = file_item["file_name"] + "_" + str(page_no) yield page_item -pipeline = dlt.pipeline( - pipeline_name='pdf_to_text', - destination='weaviate' -) +pipeline = dlt.pipeline(pipeline_name="pdf_to_text", destination="weaviate") # this constructs a simple pipeline that: (1) reads files from "invoices" folder (2) filters only those ending with ".pdf" # (3) sends them to pdf_to_text transformer with pipe (|) operator @@ -567,9 +550,7 @@ pdf_pipeline = list_files("assets/invoices").add_filter( pdf_pipeline.table_name = "InvoiceText" # use weaviate_adapter to tell destination to vectorize "text" column -load_info = pipeline.run( - weaviate_adapter(pdf_pipeline, vectorize="text") -) +load_info = pipeline.run(weaviate_adapter(pdf_pipeline, vectorize="text")) row_counts = pipeline.last_trace.last_normalize_info print(row_counts) print("------") diff --git a/docs/website/docs/intro-snippets.py b/docs/website/docs/intro-snippets.py index 2924cd34de..bef758d0aa 100644 --- a/docs/website/docs/intro-snippets.py +++ b/docs/website/docs/intro-snippets.py @@ -1,25 +1,24 @@ from tests.pipeline.utils import assert_load_info -def intro_snippet() -> None: +def intro_snippet() -> None: # @@@DLT_SNIPPET_START index import dlt from dlt.sources.helpers import requests + # Create a dlt pipeline that will load # chess player data to the DuckDB destination pipeline = dlt.pipeline( - pipeline_name='chess_pipeline', - destination='duckdb', - dataset_name='player_data' + pipeline_name="chess_pipeline", destination="duckdb", dataset_name="player_data" ) # Grab some player data from Chess.com API data = [] - for player in ['magnuscarlsen', 'rpragchess']: - response = requests.get(f'https://api.chess.com/pub/player/{player}') + for player in ["magnuscarlsen", "rpragchess"]: + response = requests.get(f"https://api.chess.com/pub/player/{player}") response.raise_for_status() data.append(response.json()) # Extract, normalize, and load the data - load_info = pipeline.run(data, table_name='player') + load_info = pipeline.run(data, table_name="player") # @@@DLT_SNIPPET_END index assert_load_info(load_info) diff --git a/docs/website/docs/intro.md b/docs/website/docs/intro.md index 1ee2b594d7..8dd08b4393 100644 --- a/docs/website/docs/intro.md +++ b/docs/website/docs/intro.md @@ -25,21 +25,20 @@ There's no need to start any backends or containers. Simply import `dlt` in a Py ```py import dlt from dlt.sources.helpers import requests + # Create a dlt pipeline that will load # chess player data to the DuckDB destination pipeline = dlt.pipeline( - pipeline_name='chess_pipeline', - destination='duckdb', - dataset_name='player_data' + pipeline_name="chess_pipeline", destination="duckdb", dataset_name="player_data" ) # Grab some player data from Chess.com API data = [] -for player in ['magnuscarlsen', 'rpragchess']: - response = requests.get(f'https://api.chess.com/pub/player/{player}') +for player in ["magnuscarlsen", "rpragchess"]: + response = requests.get(f"https://api.chess.com/pub/player/{player}") response.raise_for_status() data.append(response.json()) # Extract, normalize, and load the data -load_info = pipeline.run(data, table_name='player') +load_info = pipeline.run(data, table_name="player") ``` diff --git a/docs/website/docs/reference/performance.md b/docs/website/docs/reference/performance.md index 8e9823cbd2..f1a405684f 100644 --- a/docs/website/docs/reference/performance.md +++ b/docs/website/docs/reference/performance.md @@ -136,6 +136,7 @@ PROGRESS=log python pipeline_script.py ``` ## Parallelism +You can create pipelines that extract, normalize and load data in parallel. ### Extract You can extract data concurrently if you write your pipelines to yield callables or awaitables that can be then evaluated in a thread or futures pool respectively. @@ -160,7 +161,6 @@ def get_details(item_id): # just return the results, if you yield, generator will be evaluated in main thread return {"row": item_id} - # evaluate the pipeline and print all the items # resources are iterators and they are evaluated in the same way in the pipeline.run print(list(list_items(0, 10) | get_details)) @@ -198,7 +198,6 @@ async def a_get_details(item_id): # just return the results, if you yield, generator will be evaluated in main thread return {"row": item_id} - print(list(list_items(0, 10) | a_get_details)) ``` @@ -251,6 +250,17 @@ The default is to not parallelize normalization and to perform it in the main pr Normalization is CPU bound and can easily saturate all your cores. Never allow `dlt` to use all cores on your local machine. ::: +:::caution +The default method of spawning a process pool on Linux is **fork**. If you are using threads in your code (or libraries that use threads), +you should rather switch to **spawn**. Process forking does not respawn the threads and may destroy the critical sections in your code. Even logging +with Python loggers from multiple threads may lock the `normalize` step. Here's how you switch to **spawn**: +```toml +[normalize] +workers=3 +start_method="spawn" +``` +::: + ### Load The **load** stage uses a thread pool for parallelization. Loading is input/output bound. `dlt` avoids any processing of the content of the load package produced by the normalizer. By default loading happens in 20 threads, each loading a single file. @@ -306,16 +316,21 @@ def read_table(limit): rows = iter(range(limit)) while item_slice := list(islice(rows, 1000)): now = pendulum.now().isoformat() - yield [{"row": _id, "description": "this is row with id {_id}", "timestamp": now} for _id in item_slice] + yield [ + {"row": _id, "description": "this is row with id {_id}", "timestamp": now} + for _id in item_slice + ] # this prevents process pool to run the initialization code again if __name__ == "__main__" or "PYTEST_CURRENT_TEST" in os.environ: pipeline = dlt.pipeline("parallel_load", destination="duckdb", full_refresh=True) pipeline.extract(read_table(1000000)) + load_id = pipeline.list_extracted_load_packages()[0] + extracted_package = pipeline.get_load_package_info(load_id) # we should have 11 files (10 pieces for `table` and 1 for state) - extracted_files = pipeline.list_extracted_resources() - print(extracted_files) + extracted_jobs = extracted_package.jobs["new_jobs"] + print([str(job.job_file_info) for job in extracted_jobs]) # normalize and print counts print(pipeline.normalize(loader_file_format="jsonl")) # print jobs in load package (10 + 1 as above) @@ -359,6 +374,79 @@ the schema, that should be a problem though as long as your data does not create should be accessed serially to avoid losing details on parallel runs. +## Running several pipelines in parallel in single process +You can run several pipeline instances in parallel from a single process by placing them in +separate threads. The most straightforward way is to use `ThreadPoolExecutor` and `asyncio` to execute pipeline methods. + + +```py +import asyncio +import dlt +from time import sleep +from concurrent.futures import ThreadPoolExecutor + +# create both futures and thread parallel resources + +def async_table(): + async def _gen(idx): + await asyncio.sleep(0.1) + return {"async_gen": idx} + + # just yield futures in a loop + for idx_ in range(10): + yield _gen(idx_) + +def defer_table(): + @dlt.defer + def _gen(idx): + sleep(0.1) + return {"thread_gen": idx} + + # just yield futures in a loop + for idx_ in range(5): + yield _gen(idx_) + +def _run_pipeline(pipeline, gen_): + # run the pipeline in a thread, also instantiate generators here! + # Python does not let you use generators across threads + return pipeline.run(gen_()) + +# declare pipelines in main thread then run them "async" +pipeline_1 = dlt.pipeline("pipeline_1", destination="duckdb", full_refresh=True) +pipeline_2 = dlt.pipeline("pipeline_2", destination="duckdb", full_refresh=True) + +async def _run_async(): + loop = asyncio.get_running_loop() + # from Python 3.9 you do not need explicit pool. loop.to_thread will suffice + with ThreadPoolExecutor() as executor: + results = await asyncio.gather( + loop.run_in_executor(executor, _run_pipeline, pipeline_1, async_table), + loop.run_in_executor(executor, _run_pipeline, pipeline_2, defer_table), + ) + # result contains two LoadInfo instances + results[0].raise_on_failed_jobs() + results[1].raise_on_failed_jobs() + +# load data +asyncio.run(_run_async()) +# activate pipelines before they are used +pipeline_1.activate() +# assert load_data_table_counts(pipeline_1) == {"async_table": 10} +pipeline_2.activate() +# assert load_data_table_counts(pipeline_2) == {"defer_table": 5} +``` + + +:::tip +Please note the following: +1. Do not run pipelines with the same name and working dir in parallel. State synchronization will not +work in that case. +2. When running in multiple threads and using [parallel normalize step](#normalize) , use **spawn** +process start method. +3. If you created the `Pipeline` object in the worker thread and you use it from another (ie. main thread) +call `pipeline.activate()` to inject the right context into current thread. +::: + ## Resources extraction, `fifo` vs. `round robin` When extracting from resources, you have two options to determine what the order of queries to your diff --git a/docs/website/docs/reference/performance_snippets/performance-snippets.py b/docs/website/docs/reference/performance_snippets/performance-snippets.py index d0c2c46acd..a6ad2f2618 100644 --- a/docs/website/docs/reference/performance_snippets/performance-snippets.py +++ b/docs/website/docs/reference/performance_snippets/performance-snippets.py @@ -1,7 +1,7 @@ from utils import parse_toml_file -def parallel_config_snippet() -> None: +def parallel_config_snippet() -> None: # @@@DLT_SNIPPET_START parallel_config import os import dlt @@ -13,16 +13,21 @@ def read_table(limit): rows = iter(range(limit)) while item_slice := list(islice(rows, 1000)): now = pendulum.now().isoformat() - yield [{"row": _id, "description": "this is row with id {_id}", "timestamp": now} for _id in item_slice] + yield [ + {"row": _id, "description": "this is row with id {_id}", "timestamp": now} + for _id in item_slice + ] # this prevents process pool to run the initialization code again if __name__ == "__main__" or "PYTEST_CURRENT_TEST" in os.environ: pipeline = dlt.pipeline("parallel_load", destination="duckdb", full_refresh=True) pipeline.extract(read_table(1000000)) + load_id = pipeline.list_extracted_load_packages()[0] + extracted_package = pipeline.get_load_package_info(load_id) # we should have 11 files (10 pieces for `table` and 1 for state) - extracted_files = pipeline.list_extracted_resources() - print(extracted_files) + extracted_jobs = extracted_package.jobs["new_jobs"] + print([str(job.job_file_info) for job in extracted_jobs]) # normalize and print counts print(pipeline.normalize(loader_file_format="jsonl")) # print jobs in load package (10 + 1 as above) @@ -31,7 +36,7 @@ def read_table(limit): print(pipeline.load()) # @@@DLT_SNIPPET_END parallel_config - assert len(extracted_files) == 11 + assert len(extracted_jobs) == 11 loaded_package = pipeline.get_load_package_info(load_id) assert len(loaded_package.jobs["completed_jobs"]) == 11 @@ -55,7 +60,6 @@ def get_details(item_id): # just return the results, if you yield, generator will be evaluated in main thread return {"row": item_id} - # evaluate the pipeline and print all the items # resources are iterators and they are evaluated in the same way in the pipeline.run print(list(list_items(0, 10) | get_details)) @@ -72,7 +76,6 @@ async def a_get_details(item_id): # just return the results, if you yield, generator will be evaluated in main thread return {"row": item_id} - print(list(list_items(0, 10) | a_get_details)) # @@@DLT_SNIPPET_END parallel_extract_awaitables @@ -88,6 +91,7 @@ def get_rows(limit): def database_cursor(): # here we yield each row returned from database separately yield from get_rows(10000) + # @@@DLT_SNIPPET_END performance_chunking # @@@DLT_SNIPPET_START performance_chunking_chunk @@ -100,14 +104,71 @@ def database_cursor_chunked(): while item_slice := list(islice(rows, 1000)): print(f"got chunk of length {len(item_slice)}") yield item_slice + # @@@DLT_SNIPPET_END performance_chunking_chunk assert len(list(database_cursor())) == 10000 assert len(list(database_cursor_chunked())) == 10000 -def test_toml_snippets() -> None: - parse_toml_file("./toml-snippets.toml") - +def parallel_pipelines_asyncio_snippet() -> None: + # @@@DLT_SNIPPET_START parallel_pipelines + import asyncio + import dlt + from time import sleep + from concurrent.futures import ThreadPoolExecutor + + # create both futures and thread parallel resources + + def async_table(): + async def _gen(idx): + await asyncio.sleep(0.1) + return {"async_gen": idx} + + # just yield futures in a loop + for idx_ in range(10): + yield _gen(idx_) + + def defer_table(): + @dlt.defer + def _gen(idx): + sleep(0.1) + return {"thread_gen": idx} + + # just yield futures in a loop + for idx_ in range(5): + yield _gen(idx_) + + def _run_pipeline(pipeline, gen_): + # run the pipeline in a thread, also instantiate generators here! + # Python does not let you use generators across threads + return pipeline.run(gen_()) + + # declare pipelines in main thread then run them "async" + pipeline_1 = dlt.pipeline("pipeline_1", destination="duckdb", full_refresh=True) + pipeline_2 = dlt.pipeline("pipeline_2", destination="duckdb", full_refresh=True) + + async def _run_async(): + loop = asyncio.get_running_loop() + # from Python 3.9 you do not need explicit pool. loop.to_thread will suffice + with ThreadPoolExecutor() as executor: + results = await asyncio.gather( + loop.run_in_executor(executor, _run_pipeline, pipeline_1, async_table), + loop.run_in_executor(executor, _run_pipeline, pipeline_2, defer_table), + ) + # result contains two LoadInfo instances + results[0].raise_on_failed_jobs() + results[1].raise_on_failed_jobs() + + # load data + asyncio.run(_run_async()) + # activate pipelines before they are used + pipeline_1.activate() + # assert load_data_table_counts(pipeline_1) == {"async_table": 10} + pipeline_2.activate() + # assert load_data_table_counts(pipeline_2) == {"defer_table": 5} + # @@@DLT_SNIPPET_END parallel_pipelines +def test_toml_snippets() -> None: + parse_toml_file("./toml-snippets.toml") diff --git a/docs/website/docs/reference/tracing.md b/docs/website/docs/reference/tracing.md new file mode 100644 index 0000000000..0ad0a59912 --- /dev/null +++ b/docs/website/docs/reference/tracing.md @@ -0,0 +1,6 @@ +1. Identifiers + +2. Data Lineage + +3. Schema Lineage + diff --git a/docs/website/docs/utils.py b/docs/website/docs/utils.py index 36ae49ca65..ce609a61c2 100644 --- a/docs/website/docs/utils.py +++ b/docs/website/docs/utils.py @@ -3,6 +3,7 @@ DLT_MARKER = "@@@DLT_" + def parse_toml_file(filename: str) -> None: # test toml file by going snippet by snippet with open(filename, "r", encoding="utf-8") as f: @@ -17,8 +18,10 @@ def parse_toml_file(filename: str) -> None: try: tomlkit.loads(toml_snippet) except Exception as e: - print(f"Error while testing snippet bewteen: {current_marker} and {line.strip()}") + print( + f"Error while testing snippet bewteen: {current_marker} and {line.strip()}" + ) raise e current_lines = [] current_marker = line.strip() - current_lines.append(line) \ No newline at end of file + current_lines.append(line) diff --git a/docs/website/package-lock.json b/docs/website/package-lock.json index c45374f83b..577b362611 100644 --- a/docs/website/package-lock.json +++ b/docs/website/package-lock.json @@ -25,6 +25,7 @@ }, "devDependencies": { "@docusaurus/module-type-aliases": "2.4.1" + }, "engines": { "node": ">=16.14" @@ -5713,6 +5714,7 @@ "version": "16.3.1", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.3.1.tgz", "integrity": "sha512-IPzF4w4/Rd94bA9imS68tZBaYyBWSCE47V1RGuMrB94iyTOIEwRmVL2x/4An+6mETpLrKJ5hQkB8W4kFAadeIQ==", + "dev": true, "engines": { "node": ">=12" }, @@ -11524,7 +11526,8 @@ "node_modules/toml": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/toml/-/toml-3.0.0.tgz", - "integrity": "sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==" + "integrity": "sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==", + "dev": true }, "node_modules/totalist": { "version": "3.0.1", diff --git a/docs/website/pydoc_markdown_dlt.py b/docs/website/pydoc_markdown_dlt.py index ff970ef3a2..ed30189dbc 100644 --- a/docs/website/pydoc_markdown_dlt.py +++ b/docs/website/pydoc_markdown_dlt.py @@ -22,4 +22,4 @@ def _process(self, node): c = sub(r"(\n\s*)(>>> ?)", r"\1", c) node.docstring.content = c - return super()._process(node) \ No newline at end of file + return super()._process(node) diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js index 527511bedd..9febf0ee17 100644 --- a/docs/website/sidebars.js +++ b/docs/website/sidebars.js @@ -108,6 +108,7 @@ const sidebars = { 'general-usage/incremental-loading', 'general-usage/full-loading', 'general-usage/schema', + 'general-usage/schema-contracts', { type: 'category', label: 'Configuration', diff --git a/docs/website/src/css/custom.css b/docs/website/src/css/custom.css index 5f94b9a305..0f07699af7 100644 --- a/docs/website/src/css/custom.css +++ b/docs/website/src/css/custom.css @@ -39,7 +39,6 @@ --docsearch-highlight-color:#191937 !important; --docsearch-hit-color: #191937 !important; --ifm-footer-background-color: #E4E8F0; - --doc-sidebar-width: 340px !important; } diff --git a/mypy.ini b/mypy.ini index 0d7ab84da9..8a02cf80bd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -102,4 +102,6 @@ ignore_missing_imports=true [mypy-connectorx] ignore_missing_imports=true [mypy-s3fs.*] +ignore_missing_imports=true +[mypy-win_precise_time] ignore_missing_imports=true \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 9771620963..fb8752a638 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,26 +1,18 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. - [[package]] name = "about-time" version = "4.2.1" description = "Easily measure timing and throughput of code blocks, with beautiful human friendly representations." +category = "dev" optional = false python-versions = ">=3.7, <4" -files = [ - {file = "about-time-4.2.1.tar.gz", hash = "sha256:6a538862d33ce67d997429d14998310e1dbfda6cb7d9bbfbf799c4709847fece"}, - {file = "about_time-4.2.1-py3-none-any.whl", hash = "sha256:8bbf4c75fe13cbd3d72f49a03b02c5c7dca32169b6d49117c257e7eb3eaee341"}, -] [[package]] name = "adlfs" version = "2023.8.0" description = "Access Azure Datalake Gen1 with fsspec and dask" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "adlfs-2023.8.0-py3-none-any.whl", hash = "sha256:3eb248a3c2a30b419f1147bd7676d156b5219f96ef7f11d47166afd2a3bdb07e"}, - {file = "adlfs-2023.8.0.tar.gz", hash = "sha256:07e804f6df4593acfcaf01025b162e30ac13e523d3570279c98b2d91a18026d9"}, -] [package.dependencies] aiohttp = ">=3.7.0" @@ -37,12 +29,9 @@ docs = ["furo", "myst-parser", "numpydoc", "sphinx"] name = "agate" version = "1.6.3" description = "A data analysis library that is optimized for humans instead of machines." +category = "main" optional = false python-versions = "*" -files = [ - {file = "agate-1.6.3-py2.py3-none-any.whl", hash = "sha256:2d568fd68a8eb8b56c805a1299ba4bc30ca0434563be1bea309c9d1c1c8401f4"}, - {file = "agate-1.6.3.tar.gz", hash = "sha256:e0f2f813f7e12311a4cdccc97d6ba0a6781e9c1aa8eca0ab00d5931c0113a308"}, -] [package.dependencies] Babel = ">=2.0" @@ -61,12 +50,9 @@ test = ["PyICU (>=2.4.2)", "coverage (>=3.7.1)", "cssselect (>=0.9.1)", "lxml (> name = "aiobotocore" version = "2.5.2" description = "Async client for aws services using botocore and aiohttp" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, - {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, -] [package.dependencies] aiohttp = ">=3.3.1,<4.0.0" @@ -82,97 +68,9 @@ boto3 = ["boto3 (>=1.26.161,<1.26.162)"] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, - {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, - {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, - {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, - {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, - {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, - {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, - {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, - {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, - {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, - {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, -] [package.dependencies] aiosignal = ">=1.1.2" @@ -190,12 +88,9 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aioitertools" version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, - {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, -] [package.dependencies] typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} @@ -204,12 +99,9 @@ typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] [package.dependencies] frozenlist = ">=1.1.0" @@ -218,12 +110,9 @@ frozenlist = ">=1.1.0" name = "alembic" version = "1.12.0" description = "A database migration tool for SQLAlchemy." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, - {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, -] [package.dependencies] importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} @@ -239,38 +128,40 @@ tz = ["python-dateutil"] name = "alive-progress" version = "3.1.4" description = "A new kind of Progress Bar, with real-time throughput, ETA, and very cool animations!" +category = "dev" optional = false python-versions = ">=3.7, <4" -files = [ - {file = "alive-progress-3.1.4.tar.gz", hash = "sha256:74a95d8d0d42bc99d3a3725dbd06ebb852245f1b64e301a7c375b92b22663f7b"}, - {file = "alive_progress-3.1.4-py3-none-any.whl", hash = "sha256:c80ad87ce9c1054b01135a87fae69ecebbfc2107497ae87cbe6aec7e534903db"}, -] [package.dependencies] about-time = "4.2.1" grapheme = "0.6.0" +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + [[package]] name = "ansicon" version = "1.89.0" description = "Python wrapper for loading Jason Hood's ANSICON" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "ansicon-1.89.0-py2.py3-none-any.whl", hash = "sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec"}, - {file = "ansicon-1.89.0.tar.gz", hash = "sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1"}, -] [[package]] name = "anyio" version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, - {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, -] [package.dependencies] exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} @@ -284,14 +175,11 @@ trio = ["trio (>=0.22)"] [[package]] name = "apache-airflow" -version = "2.7.0" +version = "2.7.2" description = "Programmatically author, schedule and monitor data pipelines" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-2.7.0.tar.gz", hash = "sha256:06fba3df5943b6eda5e2f033e7e45b6ea557d89909ca36e61614ea61075f9722"}, - {file = "apache_airflow-2.7.0-py3-none-any.whl", hash = "sha256:8e3cf4b3cd8583a2e76bd04827af8d34747e0cf30a28cf0e70f4f4f39ce61f6d"}, -] [package.dependencies] alembic = ">=1.6.3,<2.0" @@ -314,7 +202,7 @@ cryptography = ">=0.9.3" deprecated = ">=1.2.13" dill = ">=0.2.2" flask = ">=2.2,<2.3" -flask-appbuilder = "4.3.3" +flask-appbuilder = "4.3.6" flask-caching = ">=1.5.0" flask-login = ">=0.6.2" flask-session = ">=0.4.0" @@ -323,7 +211,7 @@ google-re2 = ">=1.0" graphviz = ">=0.12" gunicorn = ">=20.1.0" httpx = "*" -importlib-metadata = {version = ">=1.7,<5.0.0", markers = "python_version < \"3.9\""} +importlib-metadata = {version = ">=1.7", markers = "python_version < \"3.9\""} importlib-resources = {version = ">=5.2", markers = "python_version < \"3.9\""} itsdangerous = ">=2.0" jinja2 = ">=3.0.0" @@ -336,14 +224,14 @@ markdown-it-py = ">=2.1.0" markupsafe = ">=1.1.1" marshmallow-oneofschema = ">=2.0.1" mdit-py-plugins = ">=0.3.0" -opentelemetry-api = "1.15.0" +opentelemetry-api = ">=1.15.0" opentelemetry-exporter-otlp = "*" packaging = ">=14.0" pathspec = ">=0.9.0" pendulum = ">=2.0" pluggy = ">=1.0" psutil = ">=4.2.0" -pydantic = ">=1.10.0,<2.0.0" +pydantic = ">=1.10.0" pygments = ">=2.0.1" pyjwt = ">=2.0.0" python-daemon = ">=3.0.0" @@ -354,7 +242,7 @@ rfc3339-validator = ">=0.1.4" rich = ">=12.4.4" rich-argparse = ">=1.0.0" setproctitle = ">=1.1.8" -sqlalchemy = ">=1.4,<2.0" +sqlalchemy = ">=1.4.28,<2.0" sqlalchemy-jsonfield = ">=1.0" tabulate = ">=0.7.5" tenacity = ">=6.2.0,<8.2.0 || >8.2.0" @@ -367,8 +255,8 @@ werkzeug = ">=2.0" aiobotocore = ["aiobotocore (>=2.1.1)"] airbyte = ["apache-airflow-providers-airbyte"] alibaba = ["apache-airflow-providers-alibaba"] -all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-qubole", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "bcrypt (>=2.0.0)", "blinker (>=1.1)", "boto3 (>=1.24.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.2.3,<6)", "cgroupspy (>=0.2.2)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "elasticsearch (>7,<7.15.0)", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "flask-appbuilder[oauth] (==4.3.3)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mypy-boto3-appflow (>=1.24.0,<1.28.12)", "mypy-boto3-rds (>=1.24.0)", "mypy-boto3-redshift-data (>=1.24.0)", "mypy-boto3-s3 (>=1.24.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "plyvel", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive] (>=0.6.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm (>=0.4)", "qds-sdk (>=1.10.4)", "redis (>=3.2.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-toolbelt", "sasl (>=0.3.1)", "scrapbook[all]", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "zenpy (>=2.0.24)"] -all-dbs = ["aiohttp (>=3.6.3,<4)", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-arangodb", "apache-airflow-providers-cloudant", "apache-airflow-providers-common-sql (>=1.3.1)", "apache-airflow-providers-common-sql (>=1.5.0)", "apache-airflow-providers-databricks", "apache-airflow-providers-exasol", "apache-airflow-providers-influxdb", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "cassandra-driver (>=3.13.0)", "cloudant (>=2.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "dnspython (>=1.13.0)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "pandas (>=0.17.1)", "pinotdb (>0.4.7)", "presto-python-client (>=0.8.2)", "psycopg2-binary (>=2.8.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive] (>=0.6.0)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "python-arango (>=7.3.2)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "sasl (>=0.3.1)", "sqlalchemy-drill (>=1.1.0)", "thrift (>=0.9.2)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)"] +all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "bcrypt (>=2.0.0)", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "elasticsearch (>8,<9)", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "plyvel", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-toolbelt", "scrapbook[all]", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "zenpy (>=2.0.24)"] +all-dbs = ["aiohttp (>=3.6.3,<4)", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-arangodb", "apache-airflow-providers-cloudant", "apache-airflow-providers-common-sql (>=1.3.1)", "apache-airflow-providers-common-sql (>=1.5.0)", "apache-airflow-providers-databricks", "apache-airflow-providers-exasol", "apache-airflow-providers-influxdb", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "cassandra-driver (>=3.13.0)", "cloudant (>=2.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "dnspython (>=1.13.0)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "pandas (>=0.17.1)", "pinotdb (>0.4.7)", "presto-python-client (>=0.8.2)", "psycopg2-binary (>=2.8.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pyhive[hive-pure-sasl] (>=0.7.0)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "python-arango (>=7.3.2)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "sqlalchemy-drill (>=1.1.0)", "thrift (>=0.9.2)", "trino (>=0.318.0)", "vertica-python (>=0.5.1)"] amazon = ["apache-airflow-providers-amazon"] apache-atlas = ["atlasclient (>=0.1.2)"] apache-beam = ["apache-airflow-providers-apache-beam"] @@ -396,7 +284,7 @@ atlassian-jira = ["apache-airflow-providers-atlassian-jira"] aws = ["apache-airflow-providers-amazon"] azure = ["apache-airflow-providers-microsoft-azure"] cassandra = ["apache-airflow-providers-apache-cassandra"] -celery = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-celery", "celery (>=5.2.3,<6)", "flower (>=1.0.0)"] +celery = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-celery", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "flower (>=1.0.0)"] cgroups = ["cgroupspy (>=0.2.2)"] cloudant = ["apache-airflow-providers-cloudant"] cncf-kubernetes = ["apache-airflow (>=2.4.0)", "apache-airflow-providers-cncf-kubernetes", "asgiref (>=3.5.2)", "cryptography (>=2.0.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)"] @@ -407,13 +295,13 @@ databricks = ["apache-airflow-providers-databricks"] datadog = ["apache-airflow-providers-datadog"] dbt-cloud = ["apache-airflow-providers-dbt-cloud"] deprecated-api = ["requests (>=2.26.0)"] -devel = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-common-sql", "astroid (>=2.12.3)", "aws-xray-sdk", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "bowler", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "ipdb", "jira", "jsondiff", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[cloudformation,glue] (>=4.0)", "mypy (==1.2.0)", "mysqlclient (>=1.3.6)", "pandas (>=0.17.1)", "paramiko", "pipdeptree", "pre-commit", "pyarrow (>=9.0.0)", "pygithub", "pypsrp", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-jose", "pywinrm", "qds-sdk (>=1.9.6)", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-boto", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] -devel-all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-qubole", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.24.0)", "bowler", "cassandra-driver (>=3.13.0)", "celery (>=5.2.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>7,<7.15.0)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.3)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "jira", "json-merge-patch (>=0.2)", "jsondiff", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[cloudformation,glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.24.0,<1.28.12)", "mypy-boto3-rds (>=1.24.0)", "mypy-boto3-redshift-data (>=1.24.0)", "mypy-boto3-s3 (>=1.24.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive] (>=0.6.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-jose", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "qds-sdk (>=1.10.4)", "qds-sdk (>=1.9.6)", "redis (>=3.2.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "sasl (>=0.3.1)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-boto", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] -devel-ci = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-qubole", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.24.0)", "bowler", "cassandra-driver (>=3.13.0)", "celery (>=5.2.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>7,<7.15.0)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.3)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "jira", "json-merge-patch (>=0.2)", "jsondiff", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[cloudformation,glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.24.0,<1.28.12)", "mypy-boto3-rds (>=1.24.0)", "mypy-boto3-redshift-data (>=1.24.0)", "mypy-boto3-s3 (>=1.24.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive] (>=0.6.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-jose", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "qds-sdk (>=1.10.4)", "qds-sdk (>=1.9.6)", "redis (>=3.2.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "sasl (>=0.3.1)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-boto", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] -devel-hadoop = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-common-sql", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "astroid (>=2.12.3)", "aws-xray-sdk", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "bowler", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "ipdb", "jira", "jsondiff", "jsonpath-ng (>=1.5.3)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[cloudformation,glue] (>=4.0)", "mypy (==1.2.0)", "mysqlclient (>=1.3.6)", "pandas (>=0.17.1)", "paramiko", "pipdeptree", "pre-commit", "presto-python-client (>=0.8.2)", "pyarrow (>=9.0.0)", "pygithub", "pyhive[hive] (>=0.6.0)", "pykerberos (>=1.1.13)", "pypsrp", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-jose", "pywinrm", "qds-sdk (>=1.9.6)", "requests-kerberos (>=0.10.0)", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "sasl (>=0.3.1)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-boto", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] +devel = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-common-sql", "astroid (>=2.12.3,<3.0)", "aws-xray-sdk", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "ipdb", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "openapi-spec-validator (>=0.2.8)", "pandas (>=0.17.1)", "pipdeptree", "pre-commit", "pyarrow (>=9.0.0)", "pygithub", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "pywinrm", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] +devel-all = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3,<3.0)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>8,<9)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openapi-spec-validator (>=0.2.8)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] +devel-ci = ["PyGithub (!=1.58)", "PyOpenSSL", "adal (>=1.2.7)", "aiobotocore (>=2.1.1)", "aiohttp", "aiohttp (>=3.6.3,<4)", "aioresponses", "alibabacloud-adb20211201 (>=1.0.0)", "alibabacloud-tea-openapi (>=0.3.7)", "amqp", "analytics-python (>=1.2.9)", "apache-airflow (>=2.4.0)", "apache-airflow (>=2.7.0)", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-impala", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", "apache-airflow-providers-apache-livy", "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", "apache-airflow-providers-apache-sqoop", "apache-airflow-providers-apprise", "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", "apache-airflow-providers-cloudant", "apache-airflow-providers-cncf-kubernetes", "apache-airflow-providers-common-sql", "apache-airflow-providers-daskexecutor", "apache-airflow-providers-databricks", "apache-airflow-providers-datadog", "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-dingding", "apache-airflow-providers-discord", "apache-airflow-providers-docker", "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", "apache-airflow-providers-github", "apache-airflow-providers-google", "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-jdbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-microsoft-azure", "apache-airflow-providers-microsoft-mssql", "apache-airflow-providers-microsoft-psrp", "apache-airflow-providers-microsoft-winrm", "apache-airflow-providers-mongo", "apache-airflow-providers-mysql", "apache-airflow-providers-neo4j", "apache-airflow-providers-odbc", "apache-airflow-providers-openfaas", "apache-airflow-providers-openlineage", "apache-airflow-providers-opsgenie", "apache-airflow-providers-oracle", "apache-airflow-providers-pagerduty", "apache-airflow-providers-papermill", "apache-airflow-providers-plexus", "apache-airflow-providers-postgres", "apache-airflow-providers-presto", "apache-airflow-providers-redis", "apache-airflow-providers-salesforce", "apache-airflow-providers-samba", "apache-airflow-providers-segment", "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-singularity", "apache-airflow-providers-slack", "apache-airflow-providers-smtp", "apache-airflow-providers-snowflake", "apache-airflow-providers-sqlite", "apache-airflow-providers-ssh", "apache-airflow-providers-tableau", "apache-airflow-providers-tabular", "apache-airflow-providers-telegram", "apache-airflow-providers-trino", "apache-airflow-providers-vertica", "apache-airflow-providers-zendesk", "apache-beam (>=2.47.0)", "apprise", "arrow (>=0.16.0)", "asana (>=0.10,<4.0.0)", "asgiref", "asgiref (>=3.5.2)", "astroid (>=2.12.3,<3.0)", "atlasclient (>=0.1.2)", "atlassian-python-api (>=1.14.2)", "attrs (>=22.2)", "authlib (>=1.0.0)", "aws-xray-sdk", "azure-batch (>=8.0.0)", "azure-cosmos (>=4.0.0)", "azure-datalake-store (>=0.0.45)", "azure-identity (>=1.3.1)", "azure-keyvault-secrets (>=4.1.0)", "azure-kusto-data (>=0.0.43,<0.1)", "azure-mgmt-containerinstance (>=1.5.0,<2.0)", "azure-mgmt-datafactory (>=1.0.0,<2.0)", "azure-mgmt-datalake-store (>=0.5.0)", "azure-mgmt-resource (>=2.2.0)", "azure-servicebus (>=7.6.1)", "azure-storage-blob (>=12.14.0)", "azure-storage-common (>=2.1.0)", "azure-storage-file (>=2.1.0)", "azure-storage-file-datalake (>=12.9.1)", "azure-synapse-spark", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "blinker (>=1.1)", "boto3 (>=1.28.0)", "botocore (>=1.31.0)", "cassandra-driver (>=3.13.0)", "celery (>=5.3.0,!=5.3.2,!=5.3.3,<6)", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "cloudant (>=2.0)", "cloudpickle (>=1.4.1)", "confluent-kafka (>=1.8.2)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "dask (>=2.9.0,!=2022.10.1,!=2023.5.0)", "databricks-sql-connector (>=2.0.0,<3.0.0)", "datadog (>=0.14.0)", "distributed (>=2.11.1,!=2023.5.0)", "dnspython (>=1.13.0)", "docker (>=5.0.3)", "docutils (<0.17.0)", "elasticsearch (>8,<9)", "eralchemy2", "eventlet (>=0.33.3)", "facebook-business (>=6.0.2)", "filelock", "flask-appbuilder[oauth] (==4.3.6)", "flask-bcrypt (>=0.7.1)", "flower (>=1.0.0)", "gcloud-aio-auth (>=4.0.0,<5.0.0)", "gcloud-aio-bigquery (>=6.1.2)", "gcloud-aio-storage", "gevent (>=0.13)", "gitpython", "google-ads (>=21.2.0)", "google-api-core (>=2.11.0)", "google-api-python-client (>=1.6.0)", "google-auth (>=1.0.0)", "google-auth (>=1.0.0,<3.0.0)", "google-auth-httplib2 (>=0.0.1)", "google-cloud-aiplatform (>=1.22.1)", "google-cloud-automl (>=2.11.0)", "google-cloud-bigquery-datatransfer (>=3.11.0)", "google-cloud-bigtable (>=2.17.0)", "google-cloud-build (>=3.13.0)", "google-cloud-compute (>=1.10.0)", "google-cloud-container (>=2.17.4)", "google-cloud-datacatalog (>=3.11.1)", "google-cloud-dataflow-client (>=0.8.2)", "google-cloud-dataform (>=0.5.0)", "google-cloud-dataplex (>=1.4.2)", "google-cloud-dataproc (>=5.4.0)", "google-cloud-dataproc-metastore (>=1.12.0)", "google-cloud-dlp (>=3.12.0)", "google-cloud-kms (>=2.15.0)", "google-cloud-language (>=2.9.0)", "google-cloud-logging (>=3.5.0)", "google-cloud-memcache (>=1.7.0)", "google-cloud-monitoring (>=2.14.1)", "google-cloud-orchestration-airflow (>=1.7.0)", "google-cloud-os-login (>=2.9.1)", "google-cloud-pubsub (>=2.15.0)", "google-cloud-redis (>=2.12.0)", "google-cloud-secret-manager (>=2.16.0)", "google-cloud-spanner (>=3.11.1)", "google-cloud-speech (>=2.18.0)", "google-cloud-storage (>=2.7.0)", "google-cloud-storage-transfer (>=1.4.1)", "google-cloud-tasks (>=2.13.0)", "google-cloud-texttospeech (>=2.14.1)", "google-cloud-translate (>=3.11.0)", "google-cloud-videointelligence (>=2.11.0)", "google-cloud-vision (>=3.4.0)", "google-cloud-workflows (>=1.10.0)", "greenlet (>=0.4.9)", "grpcio (>=1.15.0)", "grpcio-gcp (>=0.2.2)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "httpx", "hvac (>=0.10)", "impyla (>=0.18.0,<1.0)", "influxdb-client (>=1.19.0)", "ipdb", "jaydebeapi (>=1.1.1)", "json-merge-patch (>=0.2)", "jsonpath-ng (>=1.5.3)", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "kubernetes-asyncio (>=18.20.1,<25)", "kylinpy (>=2.6)", "ldap3 (>=2.5.1)", "looker-sdk (>=22.2.0)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "neo4j (>=4.2.1)", "openapi-spec-validator (>=0.2.8)", "openlineage-integration-common (>=0.28.0)", "openlineage-python (>=0.28.0)", "opentelemetry-exporter-prometheus", "opsgenie-sdk (>=2.1.5)", "oracledb (>=1.0.0)", "oss2 (>=2.14.0)", "pandas (>=0.17.1)", "pandas-gbq", "papermill[all] (>=1.2.1)", "paramiko (>=2.6.0)", "pdpyras (>=4.1.2)", "pinotdb (>0.4.7)", "pipdeptree", "plyvel", "pre-commit", "presto-python-client (>=0.8.2)", "proto-plus (>=1.19.6)", "psycopg2-binary (>=2.8.0)", "pyarrow (>=9.0.0)", "pydruid (>=0.4.1)", "pyexasol (>=0.5.1)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pymongo (>=3.6.0)", "pymssql (>=2.1.5)", "pyodbc", "pypsrp (>=0.8.0)", "pyspark", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "python-arango (>=7.3.2)", "python-dotenv (>=0.21.0)", "python-jenkins (>=1.0.0)", "python-ldap", "python-telegram-bot (>=20.0.0)", "pywinrm", "pywinrm (>=0.4)", "redis (>=4.5.2,!=4.5.5,<5.0.0)", "redshift-connector (>=2.0.888)", "requests (>=2.26.0)", "requests (>=2.27,<3)", "requests-kerberos (>=0.10.0)", "requests-mock", "requests-toolbelt", "rich-click (>=1.5)", "ruff (>=0.0.219)", "scrapbook[all]", "semver", "sendgrid (>=6.0.0)", "sentry-sdk (>=0.8.0)", "simple-salesforce (>=1.0.0)", "slack-sdk (>=3.0.0)", "smbprotocol (>=1.5.0)", "snowflake-connector-python (>=2.4.1)", "snowflake-sqlalchemy (>=1.1.0)", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "spython (>=0.0.56)", "sqlalchemy-bigquery (>=1.2.1)", "sqlalchemy-drill (>=1.1.0)", "sqlalchemy-redshift (>=0.8.6)", "sqlalchemy-spanner (>=1.6.2)", "sqlparse (>=0.4.2)", "sshtunnel (>=0.3.2)", "statsd (>=3.3.0)", "tableauserverclient", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "trino (>=0.318.0)", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "vertica-python (>=0.5.1)", "virtualenv", "watchtower (>=2.0.1,<2.1.0)", "wheel", "yamllint", "zenpy (>=2.0.24)"] +devel-hadoop = ["aiobotocore (>=2.1.1)", "aioresponses", "apache-airflow (>=2.4.0)", "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-common-sql", "apache-airflow-providers-presto", "apache-airflow-providers-trino", "astroid (>=2.12.3,<3.0)", "aws-xray-sdk", "backports.zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black", "blinker", "cgroupspy (>=0.2.2)", "checksumdir", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage (>=7.2)", "cryptography (>=2.0.0)", "docutils (<0.17.0)", "eralchemy2", "filelock", "flask-bcrypt (>=0.7.1)", "gitpython", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "hmsclient (>=0.1.0)", "impyla (>=0.18.0,<1.0)", "ipdb", "jsonschema (>=3.0)", "kubernetes (>=21.7.0,<24)", "mongomock", "moto[glue] (>=4.0)", "mypy (==1.2.0)", "mypy-boto3-appflow (>=1.28.0)", "mypy-boto3-rds (>=1.28.0)", "mypy-boto3-redshift-data (>=1.28.0)", "mypy-boto3-s3 (>=1.28.0)", "mysqlclient (>=1.3.6)", "openapi-spec-validator (>=0.2.8)", "pandas (>=0.17.1)", "pipdeptree", "pre-commit", "presto-python-client (>=0.8.2)", "pyarrow (>=9.0.0)", "pygithub", "pyhive[hive-pure-sasl] (>=0.7.0)", "pykerberos (>=1.1.13)", "pytest", "pytest-asyncio", "pytest-capture-warnings", "pytest-cov", "pytest-httpx", "pytest-instafail", "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", "pywinrm", "requests-kerberos (>=0.10.0)", "requests-mock", "rich-click (>=1.5)", "ruff (>=0.0.219)", "semver", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)", "thrift (>=0.9.2)", "thrift-sasl (>=0.2.0)", "time-machine", "towncrier", "twine", "types-Deprecated", "types-Markdown", "types-PyMySQL", "types-PyYAML", "types-certifi", "types-croniter", "types-docutils", "types-paramiko", "types-protobuf", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "wheel", "yamllint"] dingding = ["apache-airflow-providers-dingding"] discord = ["apache-airflow-providers-discord"] -doc = ["astroid (>=2.12.3)", "checksumdir", "click (>=8.0,!=8.1.4,!=8.1.5)", "docutils (<0.17.0)", "eralchemy2", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)"] +doc = ["astroid (>=2.12.3,<3.0)", "checksumdir", "click (>=8.0,!=8.1.4,!=8.1.5)", "docutils (<0.17.0)", "eralchemy2", "sphinx (>=5.2.0)", "sphinx-airflow-theme", "sphinx-argparse (>=0.1.13)", "sphinx-autoapi (>=2.0.0)", "sphinx-copybutton", "sphinx-jinja (>=2.0)", "sphinx-rtd-theme (>=0.1.6)", "sphinxcontrib-httpdomain (>=1.7.0)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-spelling (>=7.3)"] doc-gen = ["eralchemy2"] docker = ["apache-airflow-providers-docker"] druid = ["apache-airflow-providers-apache-druid"] @@ -424,9 +312,9 @@ ftp = ["apache-airflow-providers-ftp"] gcp = ["apache-airflow-providers-google"] gcp-api = ["apache-airflow-providers-google"] github = ["apache-airflow-providers-github"] -github-enterprise = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.3)"] +github-enterprise = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.6)"] google = ["apache-airflow-providers-google"] -google-auth = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.3)"] +google-auth = ["authlib (>=1.0.0)", "flask-appbuilder[oauth] (==4.3.6)"] grpc = ["apache-airflow-providers-grpc"] hashicorp = ["apache-airflow-providers-hashicorp"] hdfs = ["apache-airflow-providers-apache-hdfs"] @@ -463,7 +351,6 @@ plexus = ["apache-airflow-providers-plexus"] postgres = ["apache-airflow-providers-postgres"] presto = ["apache-airflow-providers-presto"] qds = ["apache-airflow-providers-qubole"] -qubole = ["apache-airflow-providers-qubole"] rabbitmq = ["amqp"] redis = ["apache-airflow-providers-redis"] s3 = ["apache-airflow-providers-amazon"] @@ -495,12 +382,9 @@ zendesk = ["apache-airflow-providers-zendesk"] name = "apache-airflow-providers-common-sql" version = "1.7.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-common-sql package" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-providers-common-sql-1.7.1.tar.gz", hash = "sha256:ba37f795d9656a87cf4661edc381b8ecfe930272c59324b59f8a158fd0971aeb"}, - {file = "apache_airflow_providers_common_sql-1.7.1-py3-none-any.whl", hash = "sha256:36da2f51b51a64765b0ed5e6a5fece8eaa3ca173dfbff803e2fe2a0afbb90944"}, -] [package.dependencies] apache-airflow = ">=2.4.0" @@ -514,12 +398,9 @@ pandas = ["pandas (>=0.17.1)"] name = "apache-airflow-providers-ftp" version = "3.5.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-ftp package" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-providers-ftp-3.5.1.tar.gz", hash = "sha256:dc6dc524dc7454857a0812154d7540172e36db3a87e48a4a91918ebf80898bbf"}, - {file = "apache_airflow_providers_ftp-3.5.1-py3-none-any.whl", hash = "sha256:e4ea77d6276355acfe2392c12155db7b9d51be460b7673b616dc1d8bee03c1d7"}, -] [package.dependencies] apache-airflow = ">=2.4.0" @@ -531,12 +412,9 @@ openlineage = ["apache-airflow-providers-openlineage"] name = "apache-airflow-providers-http" version = "4.5.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-http package" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-providers-http-4.5.1.tar.gz", hash = "sha256:ec90920ff980fc264af9811dc72c37ef272bcdb3d007c7114e12366559426460"}, - {file = "apache_airflow_providers_http-4.5.1-py3-none-any.whl", hash = "sha256:702f26938bc22684eefecd297c2b0809793f9e43b8d911d807a29f21e69da179"}, -] [package.dependencies] aiohttp = "*" @@ -549,12 +427,9 @@ requests-toolbelt = "*" name = "apache-airflow-providers-imap" version = "3.3.1" description = "Provider for Apache Airflow. Implements apache-airflow-providers-imap package" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-providers-imap-3.3.1.tar.gz", hash = "sha256:40bac2a75e4dfbcd7d397776d90d03938facaf2707acc6cc119a8db684e53f77"}, - {file = "apache_airflow_providers_imap-3.3.1-py3-none-any.whl", hash = "sha256:adb6ef7864a5a8e245fbbd555bb4ef1eecf5b094d6d23ca0edc5f0aded50490d"}, -] [package.dependencies] apache-airflow = ">=2.4.0" @@ -563,12 +438,9 @@ apache-airflow = ">=2.4.0" name = "apache-airflow-providers-sqlite" version = "3.4.3" description = "Provider for Apache Airflow. Implements apache-airflow-providers-sqlite package" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "apache-airflow-providers-sqlite-3.4.3.tar.gz", hash = "sha256:347d2db03eaa5ea9fef414666565ffa5e849935cbc30e37237edcaa822b5ced8"}, - {file = "apache_airflow_providers_sqlite-3.4.3-py3-none-any.whl", hash = "sha256:4ffa6a50f0ea1b4e51240b657dfec3fb026c87bdfa71af908a56461df6a6f2e0"}, -] [package.dependencies] apache-airflow = ">=2.4.0" @@ -581,12 +453,9 @@ common-sql = ["apache-airflow-providers-common-sql"] name = "apispec" version = "6.3.0" description = "A pluggable API specification generator. Currently supports the OpenAPI Specification (f.k.a. the Swagger specification)." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "apispec-6.3.0-py3-none-any.whl", hash = "sha256:95a0b9355785df998bb0e9b939237a30ee4c7428fd6ef97305eae3da06b9b339"}, - {file = "apispec-6.3.0.tar.gz", hash = "sha256:6cb08d92ce73ff0b3bf46cb2ea5c00d57289b0f279fb0256a3df468182ba5344"}, -] [package.dependencies] packaging = ">=21.3" @@ -605,23 +474,17 @@ yaml = ["PyYAML (>=3.10)"] name = "appdirs" version = "1.4.4" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = "*" -files = [ - {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, - {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, -] [[package]] name = "argcomplete" version = "3.1.1" description = "Bash tab completion for argparse" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, - {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, -] [package.extras] test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] @@ -630,12 +493,9 @@ test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] name = "asgiref" version = "3.7.2" description = "ASGI specs, helper code, and adapters" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, -] [package.dependencies] typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} @@ -647,23 +507,17 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "asn1crypto" version = "1.5.1" description = "Fast ASN.1 parser and serializer with definitions for private keys, public keys, certificates, CRL, OCSP, CMS, PKCS#3, PKCS#7, PKCS#8, PKCS#12, PKCS#5, X.509 and TSP" +category = "main" optional = true python-versions = "*" -files = [ - {file = "asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"}, - {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, -] [[package]] name = "astatine" version = "0.3.3" description = "Some handy helper functions for Python's AST module." +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, - {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, -] [package.dependencies] asttokens = ">=1.1" @@ -673,12 +527,9 @@ domdf-python-tools = ">=2.7.0" name = "asttokens" version = "2.3.0" description = "Annotate AST trees with source code positions" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, - {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, -] [package.dependencies] six = ">=1.12.0" @@ -690,12 +541,9 @@ test = ["astroid", "pytest"] name = "astunparse" version = "1.6.3" description = "An AST unparser for Python" +category = "main" optional = false python-versions = "*" -files = [ - {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, - {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, -] [package.dependencies] six = ">=1.6.1,<2.0" @@ -705,33 +553,25 @@ wheel = ">=0.23.0,<1.0" name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] [[package]] name = "atomicwrites" version = "1.4.1" description = "Atomic file writes." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] [[package]] name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, -] [package.extras] cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] @@ -744,12 +584,9 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "authlib" version = "1.2.1" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." +category = "main" optional = true python-versions = "*" -files = [ - {file = "Authlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:c88984ea00149a90e3537c964327da930779afa4564e354edfd98410bea01911"}, - {file = "Authlib-1.2.1.tar.gz", hash = "sha256:421f7c6b468d907ca2d9afede256f068f87e34d23dd221c07d13d4c234726afb"}, -] [package.dependencies] cryptography = ">=3.2" @@ -758,12 +595,9 @@ cryptography = ">=3.2" name = "azure-core" version = "1.29.3" description = "Microsoft Azure Core Library for Python" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "azure-core-1.29.3.tar.gz", hash = "sha256:c92700af982e71c8c73de9f4c20da8b3f03ce2c22d13066e4d416b4629c87903"}, - {file = "azure_core-1.29.3-py3-none-any.whl", hash = "sha256:f8b2910f92b66293d93bd00564924ad20ad48f4a1e150577cf18d1e7d4f9263c"}, -] [package.dependencies] requests = ">=2.18.4" @@ -777,12 +611,9 @@ aio = ["aiohttp (>=3.0)"] name = "azure-datalake-store" version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" +category = "main" optional = true python-versions = "*" -files = [ - {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, - {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, -] [package.dependencies] cffi = "*" @@ -793,12 +624,9 @@ requests = ">=2.20.0" name = "azure-identity" version = "1.14.0" description = "Microsoft Azure Identity Library for Python" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "azure-identity-1.14.0.zip", hash = "sha256:72441799f8c5c89bfe21026965e266672a7c5d050c2c65119ef899dd5362e2b1"}, - {file = "azure_identity-1.14.0-py3-none-any.whl", hash = "sha256:edabf0e010eb85760e1dd19424d5e8f97ba2c9caff73a16e7b30ccbdbcce369b"}, -] [package.dependencies] azure-core = ">=1.11.0,<2.0.0" @@ -810,12 +638,9 @@ msal-extensions = ">=0.3.0,<2.0.0" name = "azure-storage-blob" version = "12.17.0" description = "Microsoft Azure Blob Storage Client Library for Python" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "azure-storage-blob-12.17.0.zip", hash = "sha256:c14b785a17050b30fc326a315bdae6bc4a078855f4f94a4c303ad74a48dc8c63"}, - {file = "azure_storage_blob-12.17.0-py3-none-any.whl", hash = "sha256:0016e0c549a80282d7b4920c03f2f4ba35c53e6e3c7dbcd2a4a8c8eb3882c1e7"}, -] [package.dependencies] azure-core = ">=1.28.0,<2.0.0" @@ -830,12 +655,9 @@ aio = ["azure-core[aio] (>=1.28.0,<2.0.0)"] name = "babel" version = "2.12.1" description = "Internationalization utilities" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, - {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, -] [package.dependencies] pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} @@ -844,23 +666,17 @@ pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" +category = "dev" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] [[package]] name = "bandit" version = "1.7.5" description = "Security oriented static analyser for python code." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, - {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, -] [package.dependencies] colorama = {version = ">=0.3.9", markers = "platform_system == \"Windows\""} @@ -878,12 +694,9 @@ yaml = ["PyYAML"] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" +category = "main" optional = true python-versions = ">=3.6.0" -files = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, -] [package.dependencies] soupsieve = ">1.2" @@ -896,32 +709,9 @@ lxml = ["lxml"] name = "black" version = "23.9.1" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, - {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, - {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, - {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, - {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, - {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, - {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, - {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, - {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, - {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, - {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, - {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, - {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, - {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, - {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, -] [package.dependencies] click = ">=8.0.0" @@ -942,12 +732,9 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blessed" version = "1.20.0" description = "Easy, practical library for making terminal apps, by providing an elegant, well-documented interface to Colors, Keyboard input, and screen Positioning capabilities." +category = "dev" optional = false python-versions = ">=2.7" -files = [ - {file = "blessed-1.20.0-py2.py3-none-any.whl", hash = "sha256:0c542922586a265e699188e52d5f5ac5ec0dd517e5a1041d90d2bbf23f906058"}, - {file = "blessed-1.20.0.tar.gz", hash = "sha256:2cdd67f8746e048f00df47a2880f4d6acbcdb399031b604e34ba8f71d5787680"}, -] [package.dependencies] jinxed = {version = ">=1.1.0", markers = "platform_system == \"Windows\""} @@ -958,23 +745,17 @@ wcwidth = ">=0.1.4" name = "blinker" version = "1.6.2" description = "Fast, simple object-to-object and broadcast signaling" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"}, - {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"}, -] [[package]] name = "boto3" version = "1.26.161" description = "The AWS SDK for Python" +category = "main" optional = true python-versions = ">= 3.7" -files = [ - {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, - {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, -] [package.dependencies] botocore = ">=1.29.161,<1.30.0" @@ -988,12 +769,9 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "boto3-stubs" version = "1.28.40" description = "Type annotations for boto3 1.28.40 generated with mypy-boto3-builder 7.18.2" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "boto3-stubs-1.28.40.tar.gz", hash = "sha256:76079a82f199087319762c931f13506e02129132e80257dab0888d3da7dc11c7"}, - {file = "boto3_stubs-1.28.40-py3-none-any.whl", hash = "sha256:bd1d1cbdcbf18902a090d4a746cdecef2a7ebe31cf9a474bbe407d57eaa79a6a"}, -] [package.dependencies] botocore-stubs = "*" @@ -1368,12 +1146,9 @@ xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"] name = "botocore" version = "1.29.161" description = "Low-level, data-driven core of boto 3." +category = "main" optional = true python-versions = ">= 3.7" -files = [ - {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, - {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, -] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" @@ -1387,12 +1162,9 @@ crt = ["awscrt (==0.16.9)"] name = "botocore-stubs" version = "1.31.40" description = "Type annotations and code completion for botocore" +category = "main" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "botocore_stubs-1.31.40-py3-none-any.whl", hash = "sha256:aab534d7e7949cd543bc9b2fadc1a36712033cb00e6f31e2475eefe8486d19ae"}, - {file = "botocore_stubs-1.31.40.tar.gz", hash = "sha256:2001a253daf4ae2e171e6137b9982a00a7fbfc7a53449a16856dc049e7cd5214"}, -] [package.dependencies] types-awscrt = "*" @@ -1402,34 +1174,25 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} name = "cachelib" version = "0.9.0" description = "A collection of cache libraries in the same API interface." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "cachelib-0.9.0-py3-none-any.whl", hash = "sha256:811ceeb1209d2fe51cd2b62810bd1eccf70feba5c52641532498be5c675493b3"}, - {file = "cachelib-0.9.0.tar.gz", hash = "sha256:38222cc7c1b79a23606de5c2607f4925779e37cdcea1c2ad21b8bae94b5425a5"}, -] [[package]] name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, - {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, -] [[package]] name = "cattrs" version = "23.1.2" description = "Composable complex class support for attrs and dataclasses." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, - {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, -] [package.dependencies] attrs = ">=20" @@ -1449,85 +1212,17 @@ ujson = ["ujson (>=5.4.0,<6.0.0)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, - {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, -] [[package]] name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" -files = [ - {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, - {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, - {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, - {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, - {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, - {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, - {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, - {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, - {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, - {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, - {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, - {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, - {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, - {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, - {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, - {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, - {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, - {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, - {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, -] [package.dependencies] pycparser = "*" @@ -1536,107 +1231,25 @@ pycparser = "*" name = "chardet" version = "5.2.0" description = "Universal encoding detector for Python 3" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] [[package]] name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" -files = [ - {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, - {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, -] [[package]] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -1645,12 +1258,9 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "clickclick" version = "20.10.2" description = "Click utility functions" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "clickclick-20.10.2-py2.py3-none-any.whl", hash = "sha256:c8f33e6d9ec83f68416dd2136a7950125bd256ec39ccc9a85c6e280a16be2bb5"}, - {file = "clickclick-20.10.2.tar.gz", hash = "sha256:4efb13e62353e34c5eef7ed6582c4920b418d7dedc86d819e22ee089ba01802c"}, -] [package.dependencies] click = ">=4.0" @@ -1660,23 +1270,17 @@ PyYAML = ">=3.11" name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] [[package]] name = "coloredlogs" version = "15.0.1" description = "Colored terminal output for Python's logging module" +category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, - {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, -] [package.dependencies] humanfriendly = ">=9.1" @@ -1688,12 +1292,9 @@ cron = ["capturer (>=2.4)"] name = "colorlog" version = "4.8.0" description = "Log formatting with colors!" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "colorlog-4.8.0-py2.py3-none-any.whl", hash = "sha256:3dd15cb27e8119a24c1a7b5c93f9f3b455855e0f73993b1c25921b2f646f1dcd"}, - {file = "colorlog-4.8.0.tar.gz", hash = "sha256:59b53160c60902c405cdec28d38356e09d40686659048893e026ecbd589516b1"}, -] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -1702,12 +1303,9 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} name = "configupdater" version = "3.1.1" description = "Parser like ConfigParser but for updating configuration files" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "ConfigUpdater-3.1.1-py2.py3-none-any.whl", hash = "sha256:805986dbeba317886c7a8d348b2e34986dc9e3128cd3761ecc35decbd372b286"}, - {file = "ConfigUpdater-3.1.1.tar.gz", hash = "sha256:46f0c74d73efa723776764b43c9739f68052495dd3d734319c1d0eb58511f15b"}, -] [package.extras] testing = ["flake8", "pytest", "pytest-cov", "pytest-virtualenv", "pytest-xdist", "sphinx"] @@ -1716,37 +1314,17 @@ testing = ["flake8", "pytest", "pytest-cov", "pytest-virtualenv", "pytest-xdist" name = "connectorx" version = "0.3.1" description = "" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "connectorx-0.3.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:719750045e7c3b94c199271fbfe6aef47944768e711f27bcc606b498707e0054"}, - {file = "connectorx-0.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aed31b08acebeb3ebbe53c0df846c686e7c27c4242bff3a75b72cf517d070257"}, - {file = "connectorx-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71d2c2678339fb01f89469bbe22e66e75cabcf727a52ed72d576fef5744ebc58"}, - {file = "connectorx-0.3.1-cp310-none-win_amd64.whl", hash = "sha256:92e576ef9610b59f8e5456c12d22e5b0752d0207f586df82701987657909888b"}, - {file = "connectorx-0.3.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:36c28cc59220998928e7b283eecf404e17e077dc3e525570096d0968b192cc64"}, - {file = "connectorx-0.3.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:c5173e7252f593c46787627a46561b0d949eb80ab23321e045bbf6bd5131945c"}, - {file = "connectorx-0.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c8411631750d24c12e5e296720637909b8515d5faa3b5eaf7bb86c582d02667"}, - {file = "connectorx-0.3.1-cp37-none-win_amd64.whl", hash = "sha256:0674b6389f8f2ba62155ac2f718df18f76f9de5c50d9911a5fefe7485e1c598e"}, - {file = "connectorx-0.3.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:324c5075e8aa6698db8c877cb847f0d86172784db88ac0f3e6762aa9852330f3"}, - {file = "connectorx-0.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:027a3880629a7b33ae0c7a80ab4fa53286957a253af2dfe34f19adfea6b79b91"}, - {file = "connectorx-0.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a666b967958fcf9fc0444a7b3603483ee23a2fe39f0da3d545ff199f376f7e4b"}, - {file = "connectorx-0.3.1-cp38-none-win_amd64.whl", hash = "sha256:3c5dedfd75cf44898c17cc84a1dd0ab6ed0fa54de0461f2d6aa4bcb2c2b0dc1d"}, - {file = "connectorx-0.3.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:354c4126bcd7a9efbb8879feac92e1e7b0d0712f7e98665c392af663805491f8"}, - {file = "connectorx-0.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3011e1f9a27fd2a7b12c6a45bc29f6e7577a27418a3f607adaf54b301ff09068"}, - {file = "connectorx-0.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1efb6ed547acc5837c2211e3d65d22948019d1653e7b30e522a4a4bd6d25fa8"}, - {file = "connectorx-0.3.1-cp39-none-win_amd64.whl", hash = "sha256:001b473e600b6d25af83b32674f98dccf49705a59bd6df724b5ba9beb236a0e0"}, -] [[package]] name = "connexion" version = "2.14.1" description = "Connexion - API first applications with OpenAPI/Swagger and Flask" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, - {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, -] [package.dependencies] clickclick = ">=1.2,<21" @@ -1770,11 +1348,9 @@ tests = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14 name = "cron-descriptor" version = "1.4.0" description = "A Python library that converts cron expressions into human readable strings." +category = "main" optional = false python-versions = "*" -files = [ - {file = "cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd"}, -] [package.extras] dev = ["polib"] @@ -1783,12 +1359,9 @@ dev = ["polib"] name = "croniter" version = "1.4.1" description = "croniter provides iteration for datetime object with cron like format" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "croniter-1.4.1-py2.py3-none-any.whl", hash = "sha256:9595da48af37ea06ec3a9f899738f1b2c1c13da3c38cea606ef7cd03ea421128"}, - {file = "croniter-1.4.1.tar.gz", hash = "sha256:1a6df60eacec3b7a0aa52a8f2ef251ae3dd2a7c7c8b9874e73e791636d55a361"}, -] [package.dependencies] python-dateutil = "*" @@ -1797,33 +1370,9 @@ python-dateutil = "*" name = "cryptography" version = "41.0.3" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507"}, - {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116"}, - {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c"}, - {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae"}, - {file = "cryptography-41.0.3-cp37-abi3-win32.whl", hash = "sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306"}, - {file = "cryptography-41.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4"}, - {file = "cryptography-41.0.3.tar.gz", hash = "sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34"}, -] [package.dependencies] cffi = ">=1.12" @@ -1842,12 +1391,9 @@ test-randomorder = ["pytest-randomly"] name = "databind-core" version = "4.4.0" description = "Databind is a library inspired by jackson-databind to de-/serialize Python dataclasses. Compatible with Python 3.7 and newer." +category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" -files = [ - {file = "databind.core-4.4.0-py3-none-any.whl", hash = "sha256:3c8a4d9abc93e158af9931d8cec389ddfc0514e02aec03b397948d243db11881"}, - {file = "databind.core-4.4.0.tar.gz", hash = "sha256:715d485e934c073f819f0250bbfcaf59c1319f83427365bc7cfd4c347f87576d"}, -] [package.dependencies] Deprecated = ">=1.2.12,<2.0.0" @@ -1860,12 +1406,9 @@ typing-extensions = ">=3.10.0" name = "databind-json" version = "4.4.0" description = "De-/serialize Python dataclasses to or from JSON payloads. Compatible with Python 3.7 and newer." +category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" -files = [ - {file = "databind.json-4.4.0-py3-none-any.whl", hash = "sha256:df8874118cfba6fd0e77ec3d41a87e04e26034bd545230cab0db1fe904bf1b09"}, - {file = "databind.json-4.4.0.tar.gz", hash = "sha256:4356afdf0aeefcc053eda0888650c59cc558be2686f08a58324d675ccd023586"}, -] [package.dependencies] "databind.core" = ">=4.4.0,<5.0.0" @@ -1877,12 +1420,9 @@ typing-extensions = ">=3.10.0" name = "dbt-athena-community" version = "1.5.2" description = "The athena adapter plugin for dbt (data build tool)" +category = "main" optional = true python-versions = "*" -files = [ - {file = "dbt-athena-community-1.5.2.tar.gz", hash = "sha256:9acd333ddf33514769189a7a0b6219e13966d370098211cb1d022fa32e64671a"}, - {file = "dbt_athena_community-1.5.2-py3-none-any.whl", hash = "sha256:c9f0f8425500211a1c1deddce5aff5ed24fe08530f0ffad38e63de9c9b9f3ee6"}, -] [package.dependencies] boto3 = ">=1.26,<2.0" @@ -1896,12 +1436,9 @@ tenacity = ">=8.2,<9.0" name = "dbt-bigquery" version = "1.5.6" description = "The Bigquery adapter plugin for dbt" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "dbt-bigquery-1.5.6.tar.gz", hash = "sha256:4655cf2ee0acda986b80e6c5d55cae57871bef22d868dfe29d8d4a5bca98a1ba"}, - {file = "dbt_bigquery-1.5.6-py3-none-any.whl", hash = "sha256:3f37544716880cbd17b32bc0c9728a0407b5615b2cd08e1bb904a7a83c46eb6c"}, -] [package.dependencies] agate = ">=1.6.3,<1.7.0" @@ -1914,12 +1451,9 @@ google-cloud-storage = ">=2.4,<3.0" name = "dbt-core" version = "1.5.6" description = "With dbt, data analysts and engineers can build analytics the way engineers build applications." +category = "main" optional = false python-versions = ">=3.7.2" -files = [ - {file = "dbt-core-1.5.6.tar.gz", hash = "sha256:af3c03cd4a1fc92481362888014ca1ffed2ffef0b0e0d98463ad0f26c49ef458"}, - {file = "dbt_core-1.5.6-py3-none-any.whl", hash = "sha256:030d2179f9efbf8ccea079296d0c79278d963bb2475c0bcce9ca4bbb0d8c393c"}, -] [package.dependencies] agate = ">=1.6,<1.7.1" @@ -1949,12 +1483,9 @@ werkzeug = ">=1,<3" name = "dbt-duckdb" version = "1.5.2" description = "The duckdb adapter plugin for dbt (data build tool)" +category = "main" optional = false python-versions = "*" -files = [ - {file = "dbt-duckdb-1.5.2.tar.gz", hash = "sha256:3407216c21bf78fd128dccfcff3ec4bf260fb145e633432015bc7d0f123e8e4b"}, - {file = "dbt_duckdb-1.5.2-py3-none-any.whl", hash = "sha256:5d18254807bbc3e61daf4f360208ad886adf44b8525e1998168290fbe73a5cbb"}, -] [package.dependencies] dbt-core = ">=1.5.0,<1.6.0" @@ -1967,37 +1498,17 @@ glue = ["boto3", "mypy-boto3-glue"] name = "dbt-extractor" version = "0.4.1" description = "A tool to analyze and extract information from Jinja used in dbt projects." +category = "main" optional = false python-versions = ">=3.6.1" -files = [ - {file = "dbt_extractor-0.4.1-cp36-abi3-macosx_10_7_x86_64.whl", hash = "sha256:4dc715bd740e418d8dc1dd418fea508e79208a24cf5ab110b0092a3cbe96bf71"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:bc9e0050e3a2f4ea9fe58e8794bc808e6709a0c688ed710fc7c5b6ef3e5623ec"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76872cdee659075d6ce2df92dc62e59a74ba571be62acab2e297ca478b49d766"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81435841610be1b07806d72cd89b1956c6e2a84c360b9ceb3f949c62a546d569"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c291f9f483eae4f60dd5859097d7ba51d5cb6c4725f08973ebd18cdea89d758"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:822b1e911db230e1b9701c99896578e711232001027b518c44c32f79a46fa3f9"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:554d27741a54599c39e5c0b7dbcab77400d83f908caba284a3e960db812e5814"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a805d51a25317f53cbff951c79b9cf75421cf48e4b3e1dfb3e9e8de6d824b76c"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cad90ddc708cb4182dc16fe2c87b1f088a1679877b93e641af068eb68a25d582"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:34783d788b133f223844e280e37b3f5244f2fb60acc457aa75c2667e418d5442"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:9da211869a1220ea55c5552c1567a3ea5233a6c52fa89ca87a22465481c37bc9"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:7d7c47774dc051b8c18690281a55e2e3d3320e823b17e04b06bc3ff81b1874ba"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:037907a7c7ae0391045d81338ca77ddaef899a91d80f09958f09fe374594e19b"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-win32.whl", hash = "sha256:3fe8d8e28a7bd3e0884896147269ca0202ca432d8733113386bdc84c824561bf"}, - {file = "dbt_extractor-0.4.1-cp36-abi3-win_amd64.whl", hash = "sha256:35265a0ae0a250623b0c2e3308b2738dc8212e40e0aa88407849e9ea090bb312"}, - {file = "dbt_extractor-0.4.1.tar.gz", hash = "sha256:75b1c665699ec0f1ffce1ba3d776f7dfce802156f22e70a7b9c8f0b4d7e80f42"}, -] [[package]] name = "dbt-postgres" version = "1.5.6" description = "The postgres adapter plugin for dbt (data build tool)" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "dbt-postgres-1.5.6.tar.gz", hash = "sha256:b74e471dc661819a3d4bda2d11497935661ac2e25786c8a5b7314d8241b18582"}, - {file = "dbt_postgres-1.5.6-py3-none-any.whl", hash = "sha256:bc5711c9ab0ec4b57ab814b2c4e4c973554c8374b7da94b06814ac81c91f67ef"}, -] [package.dependencies] dbt-core = "1.5.6" @@ -2007,12 +1518,9 @@ psycopg2-binary = ">=2.8,<3.0" name = "dbt-redshift" version = "1.5.10" description = "The Redshift adapter plugin for dbt" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "dbt-redshift-1.5.10.tar.gz", hash = "sha256:2b9ae1a7d05349e208b0937cd7cc920ea427341ef96096021b18e4070e927f5c"}, - {file = "dbt_redshift-1.5.10-py3-none-any.whl", hash = "sha256:b7689b043535b6b0d217c2abfe924db2336beaae71f3f36ab9aa1e920d2bb2e0"}, -] [package.dependencies] agate = "*" @@ -2025,12 +1533,9 @@ redshift-connector = "2.0.913" name = "dbt-snowflake" version = "1.5.3" description = "The Snowflake adapter plugin for dbt" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "dbt-snowflake-1.5.3.tar.gz", hash = "sha256:cf42772d2c2f1e29a2a64b039c66d80a8593f52a2dd711a144d43b4175802f9a"}, - {file = "dbt_snowflake-1.5.3-py3-none-any.whl", hash = "sha256:8aaa939d834798e5bb10a3ba4f52fc32a53e6e5568d6c0e8b3ac644f099972ff"}, -] [package.dependencies] dbt-core = ">=1.5.0,<1.6.0" @@ -2040,12 +1545,9 @@ snowflake-connector-python = {version = ">=3.0,<4.0", extras = ["secure-local-st name = "decopatch" version = "1.4.10" description = "Create decorators easily in python." +category = "dev" optional = false python-versions = "*" -files = [ - {file = "decopatch-1.4.10-py2.py3-none-any.whl", hash = "sha256:e151f7f93de2b1b3fd3f3272dcc7cefd1a69f68ec1c2d8e288ecd9deb36dc5f7"}, - {file = "decopatch-1.4.10.tar.gz", hash = "sha256:957f49c93f4150182c23f8fb51d13bb3213e0f17a79e09c8cca7057598b55720"}, -] [package.dependencies] makefun = ">=1.5.0" @@ -2054,23 +1556,17 @@ makefun = ">=1.5.0" name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "main" optional = false python-versions = ">=3.5" -files = [ - {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, - {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, -] [[package]] name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, - {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, -] [package.dependencies] wrapt = ">=1.10,<2" @@ -2082,12 +1578,9 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] name = "diff-cover" version = "7.7.0" description = "Run coverage and linting reports on diffs" +category = "dev" optional = false python-versions = ">=3.7.2,<4.0.0" -files = [ - {file = "diff_cover-7.7.0-py3-none-any.whl", hash = "sha256:bf86f32ec999f9a9e79bf24969f7127ea7b4e55c3ef3cd9300feb13188c89736"}, - {file = "diff_cover-7.7.0.tar.gz", hash = "sha256:60614cf7e722cf7fb1bde497afac0b514294e1e26534449622dac4da296123fb"}, -] [package.dependencies] chardet = ">=3.0.0" @@ -2102,12 +1595,9 @@ toml = ["tomli (>=1.2.1)"] name = "dill" version = "0.3.7" description = "serialize all of Python" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, - {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, -] [package.extras] graph = ["objgraph (>=1.7.2)"] @@ -2116,12 +1606,9 @@ graph = ["objgraph (>=1.7.2)"] name = "dnspython" version = "2.4.2" description = "DNS toolkit" +category = "dev" optional = false python-versions = ">=3.8,<4.0" -files = [ - {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, - {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, -] [package.extras] dnssec = ["cryptography (>=2.6,<42.0)"] @@ -2135,12 +1622,9 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "docspec" version = "2.2.1" description = "Docspec is a JSON object specification for representing API documentation of programming languages." +category = "dev" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "docspec-2.2.1-py3-none-any.whl", hash = "sha256:7538f750095a9688c6980ff9a4e029a823a500f64bd00b6b4bdb27951feb31cb"}, - {file = "docspec-2.2.1.tar.gz", hash = "sha256:4854e77edc0e2de40e785e57e95880f7095a05fe978f8b54cef7a269586e15ff"}, -] [package.dependencies] "databind.core" = ">=4.2.6,<5.0.0" @@ -2151,12 +1635,9 @@ Deprecated = ">=1.2.12,<2.0.0" name = "docspec-python" version = "2.2.1" description = "A parser based on lib2to3 producing docspec data from Python source code." +category = "dev" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "docspec_python-2.2.1-py3-none-any.whl", hash = "sha256:76ac41d35a8face35b2d766c2e8a416fb8832359785d396f0d53bcb00f178e54"}, - {file = "docspec_python-2.2.1.tar.gz", hash = "sha256:c41b850b4d6f4de30999ea6f82c9cdb9183d9bcba45559ee9173d3dab7281559"}, -] [package.dependencies] black = ">=23.1.0,<24.0.0" @@ -2167,11 +1648,9 @@ docspec = ">=2.2.1,<3.0.0" name = "docstring-parser" version = "0.11" description = "\"Parse Python docstrings in reST, Google and Numpydoc format\"" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "docstring_parser-0.11.tar.gz", hash = "sha256:93b3f8f481c7d24e37c5d9f30293c89e2933fa209421c8abd731dd3ef0715ecb"}, -] [package.extras] test = ["black", "pytest"] @@ -2180,23 +1659,17 @@ test = ["black", "pytest"] name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, - {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, -] [[package]] name = "domdf-python-tools" version = "3.6.1" description = "Helpful functions for Python 🐍 🛠️" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, - {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, -] [package.dependencies] importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.9\""} @@ -2211,60 +1684,17 @@ dates = ["pytz (>=2019.1)"] name = "duckdb" version = "0.9.1" description = "DuckDB embedded database" +category = "main" optional = false python-versions = ">=3.7.0" -files = [ - {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6c724e105ecd78c8d86b3c03639b24e1df982392fc836705eb007e4b1b488864"}, - {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75f12c5a3086079fb6440122565f1762ef1a610a954f2d8081014c1dd0646e1a"}, - {file = "duckdb-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:151f5410c32f8f8fe03bf23462b9604349bc0b4bd3a51049bbf5e6a482a435e8"}, - {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1d066fdae22b9b711b1603541651a378017645f9fbc4adc9764b2f3c9e9e4a"}, - {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1de56d8b7bd7a7653428c1bd4b8948316df488626d27e9c388194f2e0d1428d4"}, - {file = "duckdb-0.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1fb6cd590b1bb4e31fde8efd25fedfbfa19a86fa72789fa5b31a71da0d95bce4"}, - {file = "duckdb-0.9.1-cp310-cp310-win32.whl", hash = "sha256:1039e073714d668cef9069bb02c2a6756c7969cedda0bff1332520c4462951c8"}, - {file = "duckdb-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:7e6ac4c28918e1d278a89ff26fd528882aa823868ed530df69d6c8a193ae4e41"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5eb750f2ee44397a61343f32ee9d9e8c8b5d053fa27ba4185d0e31507157f130"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aea2a46881d75dc069a242cb164642d7a4f792889010fb98210953ab7ff48849"}, - {file = "duckdb-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed3dcedfc7a9449b6d73f9a2715c730180056e0ba837123e7967be1cd3935081"}, - {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c55397bed0087ec4445b96f8d55f924680f6d40fbaa7f2e35468c54367214a5"}, - {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3261696130f1cfb955735647c93297b4a6241753fb0de26c05d96d50986c6347"}, - {file = "duckdb-0.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:64c04b1728e3e37cf93748829b5d1e028227deea75115bb5ead01c608ece44b1"}, - {file = "duckdb-0.9.1-cp311-cp311-win32.whl", hash = "sha256:12cf9fb441a32702e31534330a7b4d569083d46a91bf185e0c9415000a978789"}, - {file = "duckdb-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:fdfd85575ce9540e593d5d25c9d32050bd636c27786afd7b776aae0f6432b55e"}, - {file = "duckdb-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:704700a4b469e3bb1a7e85ac12e58037daaf2b555ef64a3fe2913ffef7bd585b"}, - {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf55b303b7b1a8c2165a96e609eb30484bc47481d94a5fb1e23123e728df0a74"}, - {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b70e23c14746904ca5de316436e43a685eb769c67fe3dbfaacbd3cce996c5045"}, - {file = "duckdb-0.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:77379f7f1f8b4dc98e01f8f6f8f15a0858cf456e2385e22507f3cb93348a88f9"}, - {file = "duckdb-0.9.1-cp37-cp37m-win32.whl", hash = "sha256:92c8f738489838666cae9ef41703f8b16f660bb146970d1eba8b2c06cb3afa39"}, - {file = "duckdb-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08c5484ac06ab714f745526d791141f547e2f5ac92f97a0a1b37dfbb3ea1bd13"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f66d3c07c7f6938d3277294677eb7dad75165e7c57c8dd505503fc5ef10f67ad"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c38044e5f78c0c7b58e9f937dcc6c34de17e9ca6be42f9f8f1a5a239f7a847a5"}, - {file = "duckdb-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73bc0d715b79566b3ede00c367235cfcce67be0eddda06e17665c7a233d6854a"}, - {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26622c3b4ea6a8328d95882059e3cc646cdc62d267d48d09e55988a3bba0165"}, - {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3367d10096ff2b7919cedddcf60d308d22d6e53e72ee2702f6e6ca03d361004a"}, - {file = "duckdb-0.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d88a119f1cb41911a22f08a6f084d061a8c864e28b9433435beb50a56b0d06bb"}, - {file = "duckdb-0.9.1-cp38-cp38-win32.whl", hash = "sha256:99567496e45b55c67427133dc916013e8eb20a811fc7079213f5f03b2a4f5fc0"}, - {file = "duckdb-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:5b3da4da73422a3235c3500b3fb541ac546adb3e35642ef1119dbcd9cc7f68b8"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eca00c0c2062c0265c6c0e78ca2f6a30611b28f3afef062036610e9fc9d4a67d"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb5af8e89d40fc4baab1515787ea1520a6c6cf6aa40ab9f107df6c3a75686ce1"}, - {file = "duckdb-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fae3d4f83ebcb47995f6acad7c6d57d003a9b6f0e1b31f79a3edd6feb377443"}, - {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b9a7efc745bc3c5d1018c3a2f58d9e6ce49c0446819a9600fdba5f78e54c47"}, - {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0b60167f5537772e9f5af940e69dcf50e66f5247732b8bb84a493a9af6055"}, - {file = "duckdb-0.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4f27f5e94c47df6c4ccddf18e3277b7464eea3db07356d2c4bf033b5c88359b8"}, - {file = "duckdb-0.9.1-cp39-cp39-win32.whl", hash = "sha256:d43cd7e6f783006b59dcc5e40fcf157d21ee3d0c8dfced35278091209e9974d7"}, - {file = "duckdb-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e666795887d9cf1d6b6f6cbb9d487270680e5ff6205ebc54b2308151f13b8cff"}, - {file = "duckdb-0.9.1.tar.gz", hash = "sha256:603a878746015a3f2363a65eb48bcbec816261b6ee8d71eee53061117f6eef9d"}, -] [[package]] name = "email-validator" version = "1.3.1" description = "A robust email address syntax and deliverability validation library." +category = "dev" optional = false python-versions = ">=3.5" -files = [ - {file = "email_validator-1.3.1-py2.py3-none-any.whl", hash = "sha256:49a72f5fa6ed26be1c964f0567d931d10bf3fdeeacdf97bc26ef1cd2a44e0bda"}, - {file = "email_validator-1.3.1.tar.gz", hash = "sha256:d178c5c6fa6c6824e9b04f199cf23e79ac15756786573c190d2ad13089411ad2"}, -] [package.dependencies] dnspython = ">=1.15.0" @@ -2274,12 +1704,9 @@ idna = ">=2.0.0" name = "enlighten" version = "1.11.2" description = "Enlighten Progress Bar" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "enlighten-1.11.2-py2.py3-none-any.whl", hash = "sha256:98c9eb20e022b6a57f1c8d4f17e16760780b6881e6d658c40f52d21255ea45f3"}, - {file = "enlighten-1.11.2.tar.gz", hash = "sha256:9284861dee5a272e0e1a3758cd3f3b7180b1bd1754875da76876f2a7f46ccb61"}, -] [package.dependencies] blessed = ">=1.17.7" @@ -2289,12 +1716,9 @@ prefixed = ">=0.3.2" name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, - {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, -] [package.extras] test = ["pytest (>=6)"] @@ -2303,12 +1727,9 @@ test = ["pytest (>=6)"] name = "fastembed" version = "0.1.1" description = "Fast, light, accurate library built for retrieval embedding generation" +category = "main" optional = true python-versions = ">=3.8.0,<3.12" -files = [ - {file = "fastembed-0.1.1-py3-none-any.whl", hash = "sha256:131413ae52cd72f4c8cced7a675f8269dbfd1a852abade3c815e265114bcc05a"}, - {file = "fastembed-0.1.1.tar.gz", hash = "sha256:f7e524ee4f74bb8aad16be5b687d1f77f608d40e96e292c87881dc36baf8f4c7"}, -] [package.dependencies] onnx = ">=1.11,<2.0" @@ -2321,12 +1742,9 @@ tqdm = ">=4.65,<5.0" name = "filelock" version = "3.12.3" description = "A platform independent file lock." +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, - {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, -] [package.dependencies] typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} @@ -2339,12 +1757,9 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pyt name = "flake8" version = "5.0.4" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.6.1" -files = [ - {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, - {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, -] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" @@ -2355,12 +1770,9 @@ pyflakes = ">=2.5.0,<2.6.0" name = "flake8-bugbear" version = "22.12.6" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, - {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, -] [package.dependencies] attrs = ">=19.2.0" @@ -2373,12 +1785,9 @@ dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "tox"] name = "flake8-builtins" version = "1.5.3" description = "Check for python builtins being used as variables or parameters." +category = "dev" optional = false python-versions = "*" -files = [ - {file = "flake8-builtins-1.5.3.tar.gz", hash = "sha256:09998853b2405e98e61d2ff3027c47033adbdc17f9fe44ca58443d876eb00f3b"}, - {file = "flake8_builtins-1.5.3-py2.py3-none-any.whl", hash = "sha256:7706babee43879320376861897e5d1468e396a40b8918ed7bccf70e5f90b8687"}, -] [package.dependencies] flake8 = "*" @@ -2390,12 +1799,9 @@ test = ["coverage", "coveralls", "mock", "pytest", "pytest-cov"] name = "flake8-encodings" version = "0.5.0.post1" description = "A Flake8 plugin to identify incorrect use of encodings." +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, - {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, -] [package.dependencies] astatine = ">=0.3.1" @@ -2411,12 +1817,9 @@ classes = ["jedi (>=0.18.0)"] name = "flake8-helper" version = "0.2.1" description = "A helper library for Flake8 plugins." +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, - {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, -] [package.dependencies] flake8 = ">=3.8.4" @@ -2425,12 +1828,9 @@ flake8 = ">=3.8.4" name = "flake8-tidy-imports" version = "4.10.0" description = "A flake8 plugin that helps you write tidier imports." +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, - {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, -] [package.dependencies] flake8 = ">=3.8.0" @@ -2439,12 +1839,9 @@ flake8 = ">=3.8.0" name = "flask" version = "2.2.5" description = "A simple framework for building complex web applications." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"}, - {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"}, -] [package.dependencies] click = ">=8.0" @@ -2459,14 +1856,11 @@ dotenv = ["python-dotenv"] [[package]] name = "flask-appbuilder" -version = "4.3.3" +version = "4.3.6" description = "Simple and rapid application development framework, built on top of Flask. includes detailed security, auto CRUD generation for your models, google charts and much more." +category = "dev" optional = false python-versions = "~=3.7" -files = [ - {file = "Flask-AppBuilder-4.3.3.tar.gz", hash = "sha256:b420379f74788e431a2763f8d3749cc37712df682dc00a45538d85d989340768"}, - {file = "Flask_AppBuilder-4.3.3-py3-none-any.whl", hash = "sha256:7eb1904d8f61297778ebf0d0b83f1d74b154534c9e84af3bb9198cfc0f51ff05"}, -] [package.dependencies] apispec = {version = ">=6.0.0,<7", extras = ["yaml"]} @@ -2494,17 +1888,15 @@ WTForms = "<4" jmespath = ["jmespath (>=0.9.5)"] oauth = ["Authlib (>=0.14,<2.0.0)"] openid = ["Flask-OpenID (>=1.2.5,<2)"] +talisman = ["flask-talisman (>=1.0.0,<2.0)"] [[package]] name = "flask-babel" version = "2.0.0" description = "Adds i18n/l10n support to Flask applications" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "Flask-Babel-2.0.0.tar.gz", hash = "sha256:f9faf45cdb2e1a32ea2ec14403587d4295108f35017a7821a2b1acb8cfd9257d"}, - {file = "Flask_Babel-2.0.0-py3-none-any.whl", hash = "sha256:e6820a052a8d344e178cdd36dd4bb8aea09b4bda3d5f9fa9f008df2c7f2f5468"}, -] [package.dependencies] Babel = ">=2.3" @@ -2519,12 +1911,9 @@ dev = ["Pallets-Sphinx-Themes", "bumpversion", "ghp-import", "pytest", "pytest-m name = "flask-caching" version = "2.0.2" description = "Adds caching support to Flask applications." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-Caching-2.0.2.tar.gz", hash = "sha256:24b60c552d59a9605cc1b6a42c56cdb39a82a28dab4532bbedb9222ae54ecb4e"}, - {file = "Flask_Caching-2.0.2-py3-none-any.whl", hash = "sha256:19571f2570e9b8dd9dd9d2f49d7cbee69c14ebe8cc001100b1eb98c379dd80ad"}, -] [package.dependencies] cachelib = ">=0.9.0,<0.10.0" @@ -2534,12 +1923,9 @@ Flask = "<3" name = "flask-jwt-extended" version = "4.5.2" description = "Extended JWT integration with Flask" +category = "dev" optional = false python-versions = ">=3.7,<4" -files = [ - {file = "Flask-JWT-Extended-4.5.2.tar.gz", hash = "sha256:ba56245ba43b71c8ae936784b867625dce8b9956faeedec2953222e57942fb0b"}, - {file = "Flask_JWT_Extended-4.5.2-py2.py3-none-any.whl", hash = "sha256:e0ef23d8c863746bd141046167073699e1a7b03c97169cbba70f05b8d9cd6b9e"}, -] [package.dependencies] Flask = ">=2.0,<3.0" @@ -2553,12 +1939,9 @@ asymmetric-crypto = ["cryptography (>=3.3.1)"] name = "flask-limiter" version = "3.5.0" description = "Rate limiting for flask applications" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-Limiter-3.5.0.tar.gz", hash = "sha256:13a3491b994c49f7cb4706587a38ca47e8162b576530472df38be68104f299c0"}, - {file = "Flask_Limiter-3.5.0-py3-none-any.whl", hash = "sha256:dbda4174f44e6cb858c6eb75e7488186f2977dd5d33d7028ba1aabf179de1bee"}, -] [package.dependencies] Flask = ">=2" @@ -2576,12 +1959,9 @@ redis = ["limits[redis]"] name = "flask-login" version = "0.6.2" description = "User authentication and session management for Flask." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-Login-0.6.2.tar.gz", hash = "sha256:c0a7baa9fdc448cdd3dd6f0939df72eec5177b2f7abe6cb82fc934d29caac9c3"}, - {file = "Flask_Login-0.6.2-py3-none-any.whl", hash = "sha256:1ef79843f5eddd0f143c2cd994c1b05ac83c0401dc6234c143495af9a939613f"}, -] [package.dependencies] Flask = ">=1.0.4" @@ -2591,12 +1971,9 @@ Werkzeug = ">=1.0.1" name = "flask-session" version = "0.5.0" description = "Server-side session support for Flask" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-Session-0.5.0.tar.gz", hash = "sha256:190875e6aebf2953c6803d42379ef3b934bc209ef8ef006f97aecb08f5aaeb86"}, - {file = "flask_session-0.5.0-py3-none-any.whl", hash = "sha256:1619bcbc16f04f64e90f8e0b17145ba5c9700090bb1294e889956c1282d58631"}, -] [package.dependencies] cachelib = "*" @@ -2606,12 +1983,9 @@ flask = ">=2.2" name = "flask-sqlalchemy" version = "2.5.1" description = "Adds SQLAlchemy support to your Flask application." +category = "dev" optional = false python-versions = ">= 2.7, != 3.0.*, != 3.1.*, != 3.2.*, != 3.3.*" -files = [ - {file = "Flask-SQLAlchemy-2.5.1.tar.gz", hash = "sha256:2bda44b43e7cacb15d4e05ff3cc1f8bc97936cc464623424102bfc2c35e95912"}, - {file = "Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl", hash = "sha256:f12c3d4cc5cc7fdcc148b9527ea05671718c3ea45d50c7e732cceb33f574b390"}, -] [package.dependencies] Flask = ">=0.10" @@ -2621,12 +1995,9 @@ SQLAlchemy = ">=0.8.0" name = "flask-wtf" version = "1.1.1" description = "Form rendering, validation, and CSRF protection for Flask with WTForms." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Flask-WTF-1.1.1.tar.gz", hash = "sha256:41c4244e9ae626d63bed42ae4785b90667b885b1535d5a4095e1f63060d12aa9"}, - {file = "Flask_WTF-1.1.1-py3-none-any.whl", hash = "sha256:7887d6f1ebb3e17bf648647422f0944c9a469d0fcf63e3b66fb9a83037e38b2c"}, -] [package.dependencies] Flask = "*" @@ -2640,93 +2011,25 @@ email = ["email-validator"] name = "flatbuffers" version = "23.5.26" description = "The FlatBuffers serialization format for Python" +category = "main" optional = true python-versions = "*" -files = [ - {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, - {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, -] [[package]] name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, - {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, - {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, - {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, - {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, - {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, - {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, - {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, - {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, - {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, -] [[package]] name = "fsspec" version = "2023.6.0" description = "File-system specification" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, - {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, -] [package.extras] abfs = ["adlfs"] @@ -2756,22 +2059,17 @@ tqdm = ["tqdm"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"}, -] [[package]] name = "gcsfs" version = "2023.6.0" description = "Convenient Filesystem interface over GCS" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "gcsfs-2023.6.0-py2.py3-none-any.whl", hash = "sha256:3b3c7d8eddd4ec1380f3b49fbb861ee1e974adb223564401f10884b6260d406f"}, - {file = "gcsfs-2023.6.0.tar.gz", hash = "sha256:30b14fccadb3b7f0d99b2cd03bd8507c40f3a9a7d05847edca571f642bedbdff"}, -] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" @@ -2790,12 +2088,9 @@ gcsfuse = ["fusepy"] name = "gitdb" version = "4.0.10" description = "Git Object Database" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, - {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, -] [package.dependencies] smmap = ">=3.0.1,<6" @@ -2804,12 +2099,9 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.34" description = "GitPython is a Python library used to interact with Git repositories" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "GitPython-3.1.34-py3-none-any.whl", hash = "sha256:5d3802b98a3bae1c2b8ae0e1ff2e4aa16bcdf02c145da34d092324f599f01395"}, - {file = "GitPython-3.1.34.tar.gz", hash = "sha256:85f7d365d1f6bf677ae51039c1ef67ca59091c7ebd5a3509aa399d4eda02d6dd"}, -] [package.dependencies] gitdb = ">=4.0.1,<5" @@ -2818,34 +2110,28 @@ gitdb = ">=4.0.1,<5" name = "giturlparse" version = "0.11.1" description = "A Git URL parsing module (supports parsing and rewriting)" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "giturlparse-0.11.1-py2.py3-none-any.whl", hash = "sha256:6422f25c8ca563e1a3cb6b85862e48614be804cd1334e6d84be5630eb26b343f"}, - {file = "giturlparse-0.11.1.tar.gz", hash = "sha256:cdbe0c062096c69e00f08397826dddebc1f73bc15b793994579c13aafc70c990"}, -] [[package]] name = "google-api-core" version = "2.11.1" description = "Google API client core library" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "google-api-core-2.11.1.tar.gz", hash = "sha256:25d29e05a0058ed5f19c61c0a78b1b53adea4d9364b464d014fbda941f6d1c9a"}, - {file = "google_api_core-2.11.1-py3-none-any.whl", hash = "sha256:d92a5a92dc36dd4f4b9ee4e55528a90e432b059f93aee6ad857f9de8cc7ae94a"}, -] [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, - {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -2859,15 +2145,12 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] name = "google-api-python-client" version = "2.97.0" description = "Google API Client Library for Python" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "google-api-python-client-2.97.0.tar.gz", hash = "sha256:48277291894876a1ca7ed4127e055e81f81e6343ced1b544a7200ae2c119dcd7"}, - {file = "google_api_python_client-2.97.0-py2.py3-none-any.whl", hash = "sha256:5215f4cd577753fc4192ccfbe0bb8b55d4bb5fd68fa6268ac5cf271b6305de31"}, -] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0.dev0" google-auth = ">=1.19.0,<3.0.0.dev0" google-auth-httplib2 = ">=0.1.0" httplib2 = ">=0.15.0,<1.dev0" @@ -2877,12 +2160,9 @@ uritemplate = ">=3.0.1,<5" name = "google-auth" version = "2.22.0" description = "Google Authentication Library" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, - {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, -] [package.dependencies] cachetools = ">=2.0.0,<6.0" @@ -2902,12 +2182,9 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] name = "google-auth-httplib2" version = "0.1.0" description = "Google Authentication Library: httplib2 transport" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, - {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, -] [package.dependencies] google-auth = "*" @@ -2918,12 +2195,9 @@ six = "*" name = "google-auth-oauthlib" version = "1.0.0" description = "Google Authentication Library" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, - {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, -] [package.dependencies] google-auth = ">=2.15.0" @@ -2936,15 +2210,12 @@ tool = ["click (>=6.0.0)"] name = "google-cloud-bigquery" version = "3.11.4" description = "Google BigQuery API client library" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, - {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, -] [package.dependencies] -google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" grpcio = [ @@ -2971,15 +2242,12 @@ tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] name = "google-cloud-core" version = "2.3.3" description = "Google Cloud API client core library" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, - {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, -] [package.dependencies] -google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.6,<2.0.0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" [package.extras] @@ -2989,15 +2257,12 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] name = "google-cloud-dataproc" version = "5.4.3" description = "Google Cloud Dataproc API client library" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataproc-5.4.3.tar.gz", hash = "sha256:d9c77c52aa5ddf52ae657736dbfb5312402933f72bab8480fc2d2afe98697402"}, - {file = "google_cloud_dataproc-5.4.3-py2.py3-none-any.whl", hash = "sha256:9cfff56cb53621cdffd0a3d6b10701e886e0a8ad54891e6c223eb67c0ff753ad"}, -] [package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, @@ -3009,15 +2274,12 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4 name = "google-cloud-storage" version = "2.10.0" description = "Google Cloud Storage API client library" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, - {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, -] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" google-resumable-media = ">=2.3.2" @@ -3030,78 +2292,9 @@ protobuf = ["protobuf (<5.0.0dev)"] name = "google-crc32c" version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, -] [package.extras] testing = ["pytest"] @@ -3110,118 +2303,17 @@ testing = ["pytest"] name = "google-re2" version = "1.1" description = "RE2 Python bindings" +category = "dev" optional = false python-versions = "~=3.8" -files = [ - {file = "google-re2-1.1.tar.gz", hash = "sha256:d3a9467ee52b46ac77ca928f6d0cbeaccfd92f03ca0f0f65b9df6a95184f3a1c"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:874d2e36dfa506b03d4f9c4aef1701a65304f4004c96c7edac7d8aea08fe193e"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b66eb84850afdce09aabca40bcd6f2a0e96178a1b4990d555678edb1f59bf255"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c461640a07db26dc2b51f43de607b7520e7debaf4f6a000f796a3c0196ca52af"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:7f9ba69eaee6e7a9f5ddfb919bf1a866af14a18b26a179e3fb1a6fe3d0cbf349"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f95cf16739cc3ea63728366881221b119f2322b4b739b7da6522d45a68792cea"}, - {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9fb56a41250191298e6a2859b0fdea1e83330c9870fe8d84e5836c506ae46e96"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb22ea995564d87baf4a4bfbb3ca024be913683a710f4f0dc9c94dc663afab20"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19b3f0bfbb2a2ca58ed0aaa9356d07a5c0921383a6dbeca086b2b74472f5ee08"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34fd7f97b84af7453cf05b25adfe2491ba3cef1ca548ac2907efa63d3510954d"}, - {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e029664192d8d30f7c977706183ef483e82ca239302272df74e01d2e22897ca"}, - {file = "google_re2-1.1-1-cp310-cp310-win32.whl", hash = "sha256:41a8f222f9839d059d37efd28e4deec203502d7e39c3759d83d6a33deadf1d2e"}, - {file = "google_re2-1.1-1-cp310-cp310-win_amd64.whl", hash = "sha256:6141d569fdf72aa693f040ba05c469036587395af07ff419b9a3c009d6ffefd3"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2d03f6aaf22788ba13a770f0d183b8eebe55545bcbb6e4c41dcccac7ded014d"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a98f15fd9c31bf80d368698447191a2e9703880b305dbf34d9a63ce634b8a557"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:42128916cc2966623832aabbd224c88e862d1c531d6bc49ab141f565e6321a90"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6e27986a166903ad7000635f6faed8ab5072d687f822ac9f692c40b2470aebcf"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:5e9edcd743a830d0c0b2729201e42ab86fceef8f4086df65563f482e4544359e"}, - {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:d33145bbfd32e916f1c911cd9225be5364a36c3959742a0cc4dfc0692d6a2a5e"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b27cc2544b69a357ab2a749dc0c13a1b9055198c56f4c2c3b0f61d693f8e203"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cdf8982b6def987e95b37984d0c1c878de32635dd78acde3273f730b69708c9"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71ac661a7365e134741fe5542f13d7ce1e6187446b96ddee4c8b7d153fc8f05a"}, - {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35a902ba31a71a3e9e114e44473624d9aa9f9b85ec981bfa91671aefe0ef1a6c"}, - {file = "google_re2-1.1-1-cp311-cp311-win32.whl", hash = "sha256:9469f26b485da2784c658e687a766c72e1a17b1e63b3ed24b5f64c3d19fbae3d"}, - {file = "google_re2-1.1-1-cp311-cp311-win_amd64.whl", hash = "sha256:07dd0780240ee431781119b46c3bbf76f5cef24a2cbb542f6a08c643e0a68d98"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9857dc4d69b8025057c8129e98406a24d51bdaf1b96e481dbba7e69e0ec85104"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:a6eaaa5f200022eb0bdded5949c91454fc96e1edd6f9e9a96dd1dc32c821c00e"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a32bb2afe128d90b8edc20d4f7d297f7e2753206eba92937a57e5280736eac74"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4f2754616c61b76ab4e5a4f39892a52a00897203b859c5abd7e3c630dd883cda"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:b110f3d657e8f67a43a699d327ce47095b80180ea1118e2de44cb5c7002503d9"}, - {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:fd62ba2853eef65e249a9c4437a9ecac568222062bc956f0c61a3d1151a6271b"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23b50eb74dc3e1d480b04b987c61242df5dade50d08bc16e25eb3582b83fca80"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1bde89855dd5ab0811187d21eec149975510c80e865c771c883524a452445e7"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10c6cddc720151a509beb98ab310fa0cc8bcb265f83518ebf831de2c9ff73af0"}, - {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bea09c5e8401ec50b8f211bc820ec2f0ca5e744ac67431a1b39bdacbd266553"}, - {file = "google_re2-1.1-1-cp38-cp38-win32.whl", hash = "sha256:ffa51b118037518bcdf63c7649d0b4be7071982b83f48ee3bbabf24a9cb48f8a"}, - {file = "google_re2-1.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:3b47715b6d43c9351957eb5092ad0fa625d04106d81f34cb8a726c53395ad474"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:998f31bf7efbc9bb603d0c356c1c77e5331f689c71783df8e21e67bb025fc66a"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0b5f0eaab859d3ba5f462c82bf37ab56e9d37e19b40b5898c731dbe4213a85f7"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f6d591d9c4cbc7142b729ddcc3f654d059d8ebc3bc95891198808a4785a6b4d8"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:3c325c2eae197b423330a04ab62e2e1cf942676cd5560907db4d63e23ce0648a"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:1e019e8f57955806ee843254ce454249b58800a6e872b2c8e9df2ef3459de0d5"}, - {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:58ebbcc7ad2b639768a6bca586357291660ea40dfac83039208e5055c357513b"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:723f8553e7fc022294071f14fb7dfc7958c365dc7d4a71d4938ccd2df8c6eca4"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d81512b08e6787fc8ef29fea365d3fdbf957553a625550e1d96c36877ae30355"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c58601b155651cc572a23ee2860788c77581aad85d3567a55b89b0674702f34d"}, - {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c6c9f64b9724ec38da8e514f404ac64e9a6a5e8b1d7031c2dadd05c1f4c16fd"}, - {file = "google_re2-1.1-1-cp39-cp39-win32.whl", hash = "sha256:d1b751b9ab9f8e2ab2a36d72b909281ce65f328c9115a1685acae1a2d1afd7a4"}, - {file = "google_re2-1.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:ac775c75cec7069351d201da4e0fb0cae4c1c5ebecd08fa34e1be89740c1d80b"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5eaefe4705b75ca5f78178a50104b689e9282f868e12f119b26b4cffc0c7ee6e"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:e35f2c8aabfaaa4ce6420b3cae86c0c29042b1b4f9937254347e9b985694a171"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:35fd189cbaaaa39c9a6a8a00164c8d9c709bacd0c231c694936879609beff516"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:60475d222cebd066c80414831c8a42aa2449aab252084102ee05440896586e6a"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:871cb85b9b0e1784c983b5c148156b3c5314cb29ca70432dff0d163c5c08d7e5"}, - {file = "google_re2-1.1-2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:94f4e66e34bdb8de91ec6cdf20ba4fa9fea1dfdcfb77ff1f59700d01a0243664"}, - {file = "google_re2-1.1-2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1563577e2b720d267c4cffacc0f6a2b5c8480ea966ebdb1844fbea6602c7496f"}, - {file = "google_re2-1.1-2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:49b7964532a801b96062d78c0222d155873968f823a546a3dbe63d73f25bb56f"}, - {file = "google_re2-1.1-2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2362fd70eb639a75fd0187d28b4ba7b20b3088833d8ad7ffd8693d0ba159e1c2"}, - {file = "google_re2-1.1-2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86b80719636a4e21391e20a9adf18173ee6ae2ec956726fe2ff587417b5e8ba6"}, - {file = "google_re2-1.1-2-cp310-cp310-win32.whl", hash = "sha256:5456fba09df951fe8d1714474ed1ecda102a68ddffab0113e6c117d2e64e6f2b"}, - {file = "google_re2-1.1-2-cp310-cp310-win_amd64.whl", hash = "sha256:2ac6936a3a60d8d9de9563e90227b3aea27068f597274ca192c999a12d8baa8f"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5a87b436028ec9b0f02fe19d4cbc19ef30441085cdfcdf1cce8fbe5c4bd5e9a"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:fc0d4163de9ed2155a77e7a2d59d94c348a6bbab3cff88922fab9e0d3d24faec"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:48b12d953bc796736e7831d67b36892fb6419a4cc44cb16521fe291e594bfe23"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:62c780c927cff98c1538439f0ff616f48a9b2e8837c676f53170d8ae5b9e83cb"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:04b2aefd768aa4edeef8b273327806c9cb0b82e90ff52eacf5d11003ac7a0db2"}, - {file = "google_re2-1.1-2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:9c90175992346519ee7546d9af9a64541c05b6b70346b0ddc54a48aa0d3b6554"}, - {file = "google_re2-1.1-2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22ad9ad9d125249d6386a2e80efb9de7af8260b703b6be7fa0ab069c1cf56ced"}, - {file = "google_re2-1.1-2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f70971f6ffe5254e476e71d449089917f50ebf9cf60f9cec80975ab1693777e2"}, - {file = "google_re2-1.1-2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f267499529e64a4abed24c588f355ebe4700189d434d84a7367725f5a186e48d"}, - {file = "google_re2-1.1-2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b632eff5e4cd44545a9c0e52f2e1becd55831e25f4dd4e0d7ec8ee6ca50858c1"}, - {file = "google_re2-1.1-2-cp311-cp311-win32.whl", hash = "sha256:a42c733036e8f242ee4e5f0e27153ad4ca44ced9e4ce82f3972938ddee528db0"}, - {file = "google_re2-1.1-2-cp311-cp311-win_amd64.whl", hash = "sha256:64f8eed4ca96905d99b5286b3d14b5ca4f6a025ff3c1351626a7df2f93ad1ddd"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5541efcca5b5faf7e0d882334a04fa479bad4e7433f94870f46272eec0672c4a"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:92309af35b6eb2d3b3dc57045cdd83a76370958ab3e0edd2cc4638f6d23f5b32"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:197cd9bcaba96d18c5bf84d0c32fca7a26c234ea83b1d3083366f4392cb99f78"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:1b896f171d29b541256cf26e10dccc9103ac1894683914ed88828ca6facf8dca"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:e022d3239b945014e916ca7120fee659b246ec26c301f9e0542f1a19b38a8744"}, - {file = "google_re2-1.1-2-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:2c73f8a9440873b68bee1198094377501065e85aaf6fcc0d2512c7589ffa06ca"}, - {file = "google_re2-1.1-2-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:901d86555bd7725506d651afaba7d71cd4abd13260aed6cfd7c641a45f76d4f6"}, - {file = "google_re2-1.1-2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce4710ff636701cfb56eb91c19b775d53b03749a23b7d2a5071bbbf4342a9067"}, - {file = "google_re2-1.1-2-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76a20e5ebdf5bc5d430530197e42a2eeb562f729d3a3fb51f39168283d676e66"}, - {file = "google_re2-1.1-2-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:77c9f4d4bb1c8de9d2642d3c4b8b615858ba764df025b3b4f1310266f8def269"}, - {file = "google_re2-1.1-2-cp38-cp38-win32.whl", hash = "sha256:94bd60785bf37ef130a1613738e3c39465a67eae3f3be44bb918540d39b68da3"}, - {file = "google_re2-1.1-2-cp38-cp38-win_amd64.whl", hash = "sha256:59efeb77c0dcdbe37794c61f29c5b1f34bc06e8ec309a111ccdd29d380644d70"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:221e38c27e1dd9ccb8e911e9c7aed6439f68ce81e7bb74001076830b0d6e931d"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:d9145879e6c2e1b814445300b31f88a675e1f06c57564670d95a1442e8370c27"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:c8a12f0740e2a52826bdbf95569a4b0abdf413b4012fa71e94ad25dd4715c6e5"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:9c9998f71466f4db7bda752aa7c348b2881ff688e361108fe500caad1d8b9cb2"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:0c39f69b702005963a3d3bf78743e1733ad73efd7e6e8465d76e3009e4694ceb"}, - {file = "google_re2-1.1-2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:6d0ce762dee8d6617d0b1788a9653e805e83a23046c441d0ea65f1e27bf84114"}, - {file = "google_re2-1.1-2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ecf3619d98c9b4a7844ab52552ad32597cdbc9a5bdbc7e3435391c653600d1e2"}, - {file = "google_re2-1.1-2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9a1426a8cbd1fa004974574708d496005bd379310c4b1c7012be4bc75efde7a8"}, - {file = "google_re2-1.1-2-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1a30626ba48b4070f3eab272d860ef1952e710b088792c4d68dddb155be6bfc"}, - {file = "google_re2-1.1-2-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b9c1ffcfbc3095b6ff601ec2d2bf662988f6ea6763bc1c9d52bec55881f8fde"}, - {file = "google_re2-1.1-2-cp39-cp39-win32.whl", hash = "sha256:32ecf995a252c0548404c1065ba4b36f1e524f1f4a86b6367a1a6c3da3801e30"}, - {file = "google_re2-1.1-2-cp39-cp39-win_amd64.whl", hash = "sha256:e7865410f3b112a3609739283ec3f4f6f25aae827ff59c6bfdf806fd394d753e"}, -] [[package]] name = "google-resumable-media" version = "2.5.0" description = "Utilities for Google Media Downloads and Resumable Uploads" +category = "main" optional = true python-versions = ">= 3.7" -files = [ - {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, - {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, -] [package.dependencies] google-crc32c = ">=1.0,<2.0dev" @@ -3234,12 +2326,9 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] name = "googleapis-common-protos" version = "1.60.0" description = "Common protobufs used in Google APIs" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "googleapis-common-protos-1.60.0.tar.gz", hash = "sha256:e73ebb404098db405ba95d1e1ae0aa91c3e15a71da031a2eeb6b2e23e7bc3708"}, - {file = "googleapis_common_protos-1.60.0-py2.py3-none-any.whl", hash = "sha256:69f9bbcc6acde92cab2db95ce30a70bd2b81d20b12eff3f1aabaffcbe8a93918"}, -] [package.dependencies] grpcio = {version = ">=1.44.0,<2.0.0.dev0", optional = true, markers = "extra == \"grpc\""} @@ -3252,11 +2341,9 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] name = "grapheme" version = "0.6.0" description = "Unicode grapheme helpers" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"}, -] [package.extras] test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] @@ -3265,12 +2352,9 @@ test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] name = "graphviz" version = "0.20.1" description = "Simple Python interface for Graphviz" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, - {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, -] [package.extras] dev = ["flake8", "pep8-naming", "tox (>=3)", "twine", "wheel"] @@ -3281,70 +2365,9 @@ test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>= name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" -files = [ - {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, - {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, - {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, - {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, - {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, - {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, - {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75209eed723105f9596807495d58d10b3470fa6732dd6756595e89925ce2470"}, - {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a51c9751078733d88e013587b108f1b7a1fb106d402fb390740f002b6f6551a"}, - {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, - {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, - {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, - {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, - {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:eff4eb9b7eb3e4d0cae3d28c283dc16d9bed6b193c2e1ace3ed86ce48ea8df19"}, - {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5454276c07d27a740c5892f4907c86327b632127dd9abec42ee62e12427ff7e3"}, - {file = "greenlet-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:7cafd1208fdbe93b67c7086876f061f660cfddc44f404279c1585bbf3cdc64c5"}, - {file = "greenlet-2.0.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:910841381caba4f744a44bf81bfd573c94e10b3045ee00de0cbf436fe50673a6"}, - {file = "greenlet-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:18a7f18b82b52ee85322d7a7874e676f34ab319b9f8cce5de06067384aa8ff43"}, - {file = "greenlet-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:03a8f4f3430c3b3ff8d10a2a86028c660355ab637cee9333d63d66b56f09d52a"}, - {file = "greenlet-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4b58adb399c4d61d912c4c331984d60eb66565175cdf4a34792cd9600f21b394"}, - {file = "greenlet-2.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:703f18f3fda276b9a916f0934d2fb6d989bf0b4fb5a64825260eb9bfd52d78f0"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:32e5b64b148966d9cccc2c8d35a671409e45f195864560829f395a54226408d3"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd11f291565a81d71dab10b7033395b7a3a5456e637cf997a6f33ebdf06f8db"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0f72c9ddb8cd28532185f54cc1453f2c16fb417a08b53a855c4e6a418edd099"}, - {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd021c754b162c0fb55ad5d6b9d960db667faad0fa2ff25bb6e1301b0b6e6a75"}, - {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:3c9b12575734155d0c09d6c3e10dbd81665d5c18e1a7c6597df72fd05990c8cf"}, - {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b9ec052b06a0524f0e35bd8790686a1da006bd911dd1ef7d50b77bfbad74e292"}, - {file = "greenlet-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:dbfcfc0218093a19c252ca8eb9aee3d29cfdcb586df21049b9d777fd32c14fd9"}, - {file = "greenlet-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9f35ec95538f50292f6d8f2c9c9f8a3c6540bbfec21c9e5b4b751e0a7c20864f"}, - {file = "greenlet-2.0.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5508f0b173e6aa47273bdc0a0b5ba055b59662ba7c7ee5119528f466585526b"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:f82d4d717d8ef19188687aa32b8363e96062911e63ba22a0cff7802a8e58e5f1"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c59a2120b55788e800d82dfa99b9e156ff8f2227f07c5e3012a45a399620b7"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2780572ec463d44c1d3ae850239508dbeb9fed38e294c68d19a24d925d9223ca"}, - {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937e9020b514ceedb9c830c55d5c9872abc90f4b5862f89c0887033ae33c6f73"}, - {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:36abbf031e1c0f79dd5d596bfaf8e921c41df2bdf54ee1eed921ce1f52999a86"}, - {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:18e98fb3de7dba1c0a852731c3070cf022d14f0d68b4c87a19cc1016f3bb8b33"}, - {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, - {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, - {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, - {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acd2162a36d3de67ee896c43effcd5ee3de247eb00354db411feb025aa319857"}, - {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0bf60faf0bc2468089bdc5edd10555bab6e85152191df713e2ab1fcc86382b5a"}, - {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, - {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, - {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, - {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be4ed120b52ae4d974aa40215fcdfde9194d63541c7ded40ee12eb4dda57b76b"}, - {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c817e84245513926588caf1152e3b559ff794d505555211ca041f032abbb6b"}, - {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1a819eef4b0e0b96bb0d98d797bef17dc1b4a10e8d7446be32d1da33e095dbb8"}, - {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7efde645ca1cc441d6dc4b48c0f7101e8d86b54c8530141b09fd31cef5149ec9"}, - {file = "greenlet-2.0.2-cp39-cp39-win32.whl", hash = "sha256:ea9872c80c132f4663822dd2a08d404073a5a9b5ba6155bea72fb2a79d1093b5"}, - {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, - {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, -] [package.extras] docs = ["Sphinx", "docutils (<0.18)"] @@ -3354,12 +2377,9 @@ test = ["objgraph", "psutil"] name = "grpc-google-iam-v1" version = "0.12.6" description = "IAM API client library" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "grpc-google-iam-v1-0.12.6.tar.gz", hash = "sha256:2bc4b8fdf22115a65d751c9317329322602c39b7c86a289c9b72d228d960ef5f"}, - {file = "grpc_google_iam_v1-0.12.6-py2.py3-none-any.whl", hash = "sha256:5c10f3d8dc2d88678ab1a9b0cb5482735c5efee71e6c0cd59f872eef22913f5c"}, -] [package.dependencies] googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} @@ -3370,55 +2390,9 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 name = "grpcio" version = "1.57.0" description = "HTTP/2-based RPC framework" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, - {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, - {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, - {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, - {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, - {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, - {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, - {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, - {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, - {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, - {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, - {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, - {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, - {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, - {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, - {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, - {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, - {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, - {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, - {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, -] [package.extras] protobuf = ["grpcio-tools (>=1.57.0)"] @@ -3427,12 +2401,9 @@ protobuf = ["grpcio-tools (>=1.57.0)"] name = "grpcio-status" version = "1.57.0" description = "Status proto mapping for gRPC" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"}, - {file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"}, -] [package.dependencies] googleapis-common-protos = ">=1.5.5" @@ -3443,55 +2414,9 @@ protobuf = ">=4.21.6" name = "grpcio-tools" version = "1.57.0" description = "Protobuf code generator for gRPC" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"}, - {file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"}, - {file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"}, - {file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"}, - {file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"}, - {file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"}, -] [package.dependencies] grpcio = ">=1.57.0" @@ -3502,12 +2427,9 @@ setuptools = "*" name = "gunicorn" version = "21.2.0" description = "WSGI HTTP Server for UNIX" +category = "dev" optional = false python-versions = ">=3.5" -files = [ - {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, - {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, -] [package.dependencies] packaging = "*" @@ -3522,23 +2444,17 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] [[package]] name = "h2" version = "4.1.0" description = "HTTP/2 State-Machine based protocol implementation" +category = "main" optional = true python-versions = ">=3.6.1" -files = [ - {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, - {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, -] [package.dependencies] hpack = ">=4.0,<5" @@ -3548,12 +2464,9 @@ hyperframe = ">=6.0,<7" name = "hexbytes" version = "0.3.1" description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" +category = "main" optional = false python-versions = ">=3.7, <4" -files = [ - {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, - {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, -] [package.extras] dev = ["black (>=22)", "bumpversion (>=0.5.3)", "eth-utils (>=1.0.1,<3)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "hypothesis (>=3.44.24,<=6.31.6)", "ipython", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)", "pytest (>=7.0.0)", "pytest-watch (>=4.1.0)", "pytest-xdist (>=2.4.0)", "sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)", "tox (>=4.0.0)", "twine", "wheel"] @@ -3565,12 +2478,9 @@ test = ["eth-utils (>=1.0.1,<3)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>= name = "hologram" version = "0.0.16" description = "JSON schema generation from dataclasses" +category = "main" optional = false python-versions = "*" -files = [ - {file = "hologram-0.0.16-py3-none-any.whl", hash = "sha256:4e56bd525336bb64a18916f871977a4125b64be8aaa750233583003333cda361"}, - {file = "hologram-0.0.16.tar.gz", hash = "sha256:1c2c921b4e575361623ea0e0d0aa5aee377b1a333cc6c6a879e213ed34583e55"}, -] [package.dependencies] jsonschema = ">=3.0" @@ -3580,44 +2490,35 @@ python-dateutil = ">=2.8,<2.9" name = "hpack" version = "4.0.0" description = "Pure-Python HPACK header compression" +category = "main" optional = true python-versions = ">=3.6.1" -files = [ - {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, - {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, -] [[package]] name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, - {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, -] [package.dependencies] anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = "==1.*" +sniffio = ">=1.0.0,<2.0.0" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "httplib2" version = "0.22.0" description = "A comprehensive HTTP client library." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, - {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, -] [package.dependencies] pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} @@ -3626,12 +2527,9 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httpx" version = "0.24.1" description = "The next generation HTTP client." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "httpx-0.24.1-py3-none-any.whl", hash = "sha256:06781eb9ac53cde990577af654bd990a4949de37a28bdb4a230d434f3a30b9bd"}, - {file = "httpx-0.24.1.tar.gz", hash = "sha256:5853a43053df830c20f8110c5e69fe44d035d850b2dfe795e196f00fdb774bdd"}, -] [package.dependencies] certifi = "*" @@ -3642,20 +2540,17 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "humanfriendly" version = "10.0" description = "Human friendly output for text interfaces using Python" +category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, - {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, -] [package.dependencies] pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} @@ -3664,12 +2559,9 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve name = "humanize" version = "4.8.0" description = "Python humanize utilities" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, - {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, -] [package.extras] tests = ["freezegun", "pytest", "pytest-cov"] @@ -3678,34 +2570,25 @@ tests = ["freezegun", "pytest", "pytest-cov"] name = "hyperframe" version = "6.0.1" description = "HTTP/2 framing layer for Python" +category = "main" optional = true python-versions = ">=3.6.1" -files = [ - {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, - {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, -] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" -files = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, -] [[package]] name = "importlib-metadata" version = "4.13.0" description = "Read metadata from Python packages" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "importlib_metadata-4.13.0-py3-none-any.whl", hash = "sha256:8a8a81bcf996e74fee46f0d16bd3eaa382a7eb20fd82445c3ad11f4090334116"}, - {file = "importlib_metadata-4.13.0.tar.gz", hash = "sha256:dd0173e8f150d6815e098fd354f6414b0f079af4644ddfe90c71e2fc6174346d"}, -] [package.dependencies] zipp = ">=0.5" @@ -3719,12 +2602,9 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "importlib-resources" version = "6.0.1" description = "Read resources from Python packages" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, - {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, -] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} @@ -3737,59 +2617,58 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "inflection" version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" +category = "dev" optional = false python-versions = ">=3.5" -files = [ - {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, - {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, -] [[package]] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] [[package]] name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" +category = "main" optional = false python-versions = "*" -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] [package.dependencies] six = "*" +[[package]] +name = "isort" +version = "5.12.0" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.8.0" + +[package.extras] +colors = ["colorama (>=0.4.3)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + [[package]] name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, - {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, -] [[package]] name = "jaraco-classes" version = "3.3.0" description = "Utility functions for Python class constructs" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "jaraco.classes-3.3.0-py3-none-any.whl", hash = "sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb"}, - {file = "jaraco.classes-3.3.0.tar.gz", hash = "sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621"}, -] [package.dependencies] more-itertools = "*" @@ -3802,12 +2681,9 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "jeepney" version = "0.8.0" description = "Low-level, pure Python DBus protocol wrapper." +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, - {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, -] [package.extras] test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] @@ -3817,12 +2693,9 @@ trio = ["async_generator", "trio"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] [package.dependencies] MarkupSafe = ">=2.0" @@ -3834,12 +2707,9 @@ i18n = ["Babel (>=2.7)"] name = "jinxed" version = "1.2.0" description = "Jinxed Terminal Library" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "jinxed-1.2.0-py2.py3-none-any.whl", hash = "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b"}, - {file = "jinxed-1.2.0.tar.gz", hash = "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5"}, -] [package.dependencies] ansicon = {version = "*", markers = "platform_system == \"Windows\""} @@ -3848,24 +2718,17 @@ ansicon = {version = "*", markers = "platform_system == \"Windows\""} name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, - {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, -] [[package]] name = "jsonpath-ng" version = "1.5.3" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." +category = "main" optional = false python-versions = "*" -files = [ - {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, - {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, - {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, -] [package.dependencies] decorator = "*" @@ -3876,12 +2739,9 @@ six = "*" name = "jsonschema" version = "4.19.0" description = "An implementation of JSON Schema validation for Python" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, - {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, -] [package.dependencies] attrs = ">=22.2.0" @@ -3899,12 +2759,9 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, - {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, -] [package.dependencies] importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} @@ -3914,12 +2771,9 @@ referencing = ">=0.28.0" name = "keyring" version = "24.2.0" description = "Store and access your passwords safely." +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "keyring-24.2.0-py3-none-any.whl", hash = "sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6"}, - {file = "keyring-24.2.0.tar.gz", hash = "sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509"}, -] [package.dependencies] importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} @@ -3938,57 +2792,17 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "lazy-object-proxy" version = "1.9.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"}, - {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"}, - {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"}, - {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"}, - {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, - {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, -] [[package]] name = "leather" version = "0.3.4" description = "Python charting for 80% of humans." +category = "main" optional = false python-versions = "*" -files = [ - {file = "leather-0.3.4-py2.py3-none-any.whl", hash = "sha256:5e741daee96e9f1e9e06081b8c8a10c4ac199301a0564cdd99b09df15b4603d2"}, - {file = "leather-0.3.4.tar.gz", hash = "sha256:b43e21c8fa46b2679de8449f4d953c06418666dc058ce41055ee8a8d3bb40918"}, -] [package.dependencies] six = ">=1.6.1" @@ -3997,12 +2811,9 @@ six = ">=1.6.1" name = "limits" version = "3.6.0" description = "Rate limiting utilities" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "limits-3.6.0-py3-none-any.whl", hash = "sha256:32fe29a398352c71bc43d53773117d47e22c5ea4200aef28d3f5fdee10334cd7"}, - {file = "limits-3.6.0.tar.gz", hash = "sha256:57a9c69fd37ad1e4fa3886dff8d035227e1f6af87f47e9118627e72cf1ced3bf"}, -] [package.dependencies] deprecated = ">=1.2" @@ -4026,12 +2837,9 @@ rediscluster = ["redis (>=4.2.0,!=4.5.2,!=4.5.3)"] name = "linkify-it-py" version = "2.0.2" description = "Links recognition library with FULL unicode support." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "linkify-it-py-2.0.2.tar.gz", hash = "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2"}, - {file = "linkify_it_py-2.0.2-py3-none-any.whl", hash = "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"}, -] [package.dependencies] uc-micro-py = "*" @@ -4046,142 +2854,36 @@ test = ["coverage", "pytest", "pytest-cov"] name = "lockfile" version = "0.12.2" description = "Platform-independent file locking module" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, - {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, -] [[package]] name = "logbook" version = "1.5.3" description = "A logging replacement for Python" +category = "main" optional = false python-versions = "*" -files = [ - {file = "Logbook-1.5.3-cp27-cp27m-win32.whl", hash = "sha256:56ee54c11df3377314cedcd6507638f015b4b88c0238c2e01b5eb44fd3a6ad1b"}, - {file = "Logbook-1.5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:2dc85f1510533fddb481e97677bb7bca913560862734c0b3b289bfed04f78c92"}, - {file = "Logbook-1.5.3-cp35-cp35m-win32.whl", hash = "sha256:94e2e11ff3c2304b0d09a36c6208e5ae756eb948b210e5cbd63cd8d27f911542"}, - {file = "Logbook-1.5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:97fee1bd9605f76335b169430ed65e15e457a844b2121bd1d90a08cf7e30aba0"}, - {file = "Logbook-1.5.3-cp36-cp36m-win32.whl", hash = "sha256:7c533eb728b3d220b1b5414ba4635292d149d79f74f6973b4aa744c850ca944a"}, - {file = "Logbook-1.5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:e18f7422214b1cf0240c56f884fd9c9b4ff9d0da2eabca9abccba56df7222f66"}, - {file = "Logbook-1.5.3-cp37-cp37m-win32.whl", hash = "sha256:8f76a2e7b1f72595f753228732f81ce342caf03babc3fed6bbdcf366f2f20f18"}, - {file = "Logbook-1.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0cf2cdbfb65a03b5987d19109dacad13417809dcf697f66e1a7084fb21744ea9"}, - {file = "Logbook-1.5.3.tar.gz", hash = "sha256:66f454ada0f56eae43066f604a222b09893f98c1adc18df169710761b8f32fe8"}, -] [package.extras] -all = ["Jinja2", "brotli", "cython", "execnet (>=1.0.9)", "mock", "pytest", "pytest-cov (<2.6)", "pyzmq", "redis", "sqlalchemy"] +all = ["Jinja2", "brotli", "cython", "execnet (>=1.0.9)", "pytest (>4.0)", "pytest-cov (>=2.6)", "pyzmq", "redis", "sqlalchemy"] compression = ["brotli"] -dev = ["cython", "mock", "pytest", "pytest-cov (<2.6)"] +dev = ["cython", "pytest (>4.0)", "pytest-cov (>=2.6)"] execnet = ["execnet (>=1.0.9)"] jinja = ["Jinja2"] redis = ["redis"] sqlalchemy = ["sqlalchemy"] -test = ["mock", "pytest", "pytest-cov (<2.6)"] +test = ["pytest (>4.0)", "pytest-cov (>=2.6)"] zmq = ["pyzmq"] [[package]] name = "lxml" version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" -files = [ - {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, - {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, - {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, - {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, - {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, - {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, - {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, - {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, - {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, - {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, - {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, - {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, - {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, - {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, - {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, - {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, - {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, - {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, - {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, - {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, - {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, - {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, - {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, - {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, - {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, - {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, - {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, - {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, - {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, - {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, - {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, - {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, - {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, - {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, - {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, - {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, - {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, - {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, - {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, - {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, - {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, - {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, - {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, - {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, - {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, - {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, - {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, - {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, - {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, - {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, - {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, - {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, - {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, - {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, - {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, - {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, -] [package.extras] cssselect = ["cssselect (>=0.7)"] @@ -4193,23 +2895,17 @@ source = ["Cython (>=0.29.35)"] name = "makefun" version = "1.15.1" description = "Small library to dynamically create python functions." +category = "main" optional = false python-versions = "*" -files = [ - {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, - {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, -] [[package]] name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, - {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, -] [package.dependencies] MarkupSafe = ">=0.9.2" @@ -4223,12 +2919,9 @@ testing = ["pytest"] name = "markdown" version = "3.4.4" description = "Python implementation of John Gruber's Markdown." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Markdown-3.4.4-py3-none-any.whl", hash = "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941"}, - {file = "Markdown-3.4.4.tar.gz", hash = "sha256:225c6123522495d4119a90b3a3ba31a1e87a70369e03f14799ea9c0d7183a3d6"}, -] [package.dependencies] importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} @@ -4241,12 +2934,9 @@ testing = ["coverage", "pyyaml"] name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, - {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, -] [package.dependencies] mdurl = ">=0.1,<1.0" @@ -4265,71 +2955,17 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, -] [[package]] name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, - {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, -] [package.dependencies] packaging = ">=17.0" @@ -4344,12 +2980,9 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-oneofschema" version = "3.0.1" description = "marshmallow multiplexing schema" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "marshmallow-oneofschema-3.0.1.tar.gz", hash = "sha256:62cd2099b29188c92493c2940ee79d1bf2f2619a71721664e5a98ec2faa58237"}, - {file = "marshmallow_oneofschema-3.0.1-py2.py3-none-any.whl", hash = "sha256:bd29410a9f2f7457a2b428286e2a80ef76b8ddc3701527dc1f935a88914b02f2"}, -] [package.dependencies] marshmallow = ">=3.0.0,<4.0.0" @@ -4363,12 +2996,9 @@ tests = ["mock", "pytest"] name = "marshmallow-sqlalchemy" version = "0.26.1" description = "SQLAlchemy integration with the marshmallow (de)serialization library" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "marshmallow-sqlalchemy-0.26.1.tar.gz", hash = "sha256:d8525f74de51554b5c8491effe036f60629a426229befa33ff614c8569a16a73"}, - {file = "marshmallow_sqlalchemy-0.26.1-py2.py3-none-any.whl", hash = "sha256:ba7493eeb8669a3bf00d8f906b657feaa87a740ae9e4ecf829cfd6ddf763d276"}, -] [package.dependencies] marshmallow = ">=3.0.0" @@ -4384,12 +3014,9 @@ tests = ["pytest", "pytest-lazy-fixture"] name = "mashumaro" version = "3.6" description = "Fast serialization library on top of dataclasses" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "mashumaro-3.6-py3-none-any.whl", hash = "sha256:77403e3e2ecd0a7d0e22d472c08e33282460e48726eabe356c5163efbdf9c7ee"}, - {file = "mashumaro-3.6.tar.gz", hash = "sha256:ceb3de53029219bbbb0385ca600b59348dcd14e0c68523986c6d51889ad338f5"}, -] [package.dependencies] msgpack = {version = ">=0.5.6", optional = true, markers = "extra == \"msgpack\""} @@ -4405,23 +3032,17 @@ yaml = ["pyyaml (>=3.13)"] name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] [[package]] name = "mdit-py-plugins" version = "0.4.0" description = "Collection of plugins for markdown-it-py" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, - {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, -] [package.dependencies] markdown-it-py = ">=1.0.0,<4.0.0" @@ -4435,22 +3056,17 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, - {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, -] [[package]] name = "minimal-snowplow-tracker" version = "0.0.2" description = "A minimal snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games" +category = "main" optional = false python-versions = "*" -files = [ - {file = "minimal-snowplow-tracker-0.0.2.tar.gz", hash = "sha256:acabf7572db0e7f5cbf6983d495eef54081f71be392330eb3aadb9ccb39daaa4"}, -] [package.dependencies] requests = ">=2.2.1,<3.0" @@ -4460,23 +3076,17 @@ six = ">=1.9.0,<2.0" name = "more-itertools" version = "10.1.0" description = "More routines for operating on iterables, beyond itertools" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, - {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, -] [[package]] name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" +category = "main" optional = true python-versions = "*" -files = [ - {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, - {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, -] [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] @@ -4488,12 +3098,9 @@ tests = ["pytest (>=4.6)"] name = "msal" version = "1.23.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +category = "main" optional = true python-versions = "*" -files = [ - {file = "msal-1.23.0-py2.py3-none-any.whl", hash = "sha256:3342e0837a047007f9d479e814b559c3219767453d57920dc40a31986862048b"}, - {file = "msal-1.23.0.tar.gz", hash = "sha256:25c9a33acf84301f93d1fdbe9f1a9c60cd38af0d5fffdbfa378138fc7bc1e86b"}, -] [package.dependencies] cryptography = ">=0.6,<44" @@ -4507,12 +3114,9 @@ broker = ["pymsalruntime (>=0.13.2,<0.14)"] name = "msal-extensions" version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +category = "main" optional = true python-versions = "*" -files = [ - {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, - {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, -] [package.dependencies] msal = ">=0.4.1,<2.0.0" @@ -4525,192 +3129,25 @@ portalocker = [ name = "msgpack" version = "1.0.5" description = "MessagePack serializer" +category = "main" optional = false python-versions = "*" -files = [ - {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"}, - {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198"}, - {file = "msgpack-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a"}, - {file = "msgpack-1.0.5-cp310-cp310-win32.whl", hash = "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea"}, - {file = "msgpack-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed"}, - {file = "msgpack-1.0.5-cp311-cp311-win32.whl", hash = "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c"}, - {file = "msgpack-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2"}, - {file = "msgpack-1.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c"}, - {file = "msgpack-1.0.5-cp36-cp36m-win32.whl", hash = "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9"}, - {file = "msgpack-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a"}, - {file = "msgpack-1.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf"}, - {file = "msgpack-1.0.5-cp37-cp37m-win32.whl", hash = "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77"}, - {file = "msgpack-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0"}, - {file = "msgpack-1.0.5-cp38-cp38-win32.whl", hash = "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e"}, - {file = "msgpack-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11"}, - {file = "msgpack-1.0.5-cp39-cp39-win32.whl", hash = "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc"}, - {file = "msgpack-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164"}, - {file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"}, -] [[package]] name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, - {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, - {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, - {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, - {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, - {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, - {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, - {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, - {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, - {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, - {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, - {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, - {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, -] [[package]] name = "mypy" version = "1.6.1" description = "Optional static typing for Python" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "mypy-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5012e5cc2ac628177eaac0e83d622b2dd499e28253d4107a08ecc59ede3fc2c"}, - {file = "mypy-1.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8fbb68711905f8912e5af474ca8b78d077447d8f3918997fecbf26943ff3cbb"}, - {file = "mypy-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a1ad938fee7d2d96ca666c77b7c494c3c5bd88dff792220e1afbebb2925b5e"}, - {file = "mypy-1.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b96ae2c1279d1065413965c607712006205a9ac541895004a1e0d4f281f2ff9f"}, - {file = "mypy-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:40b1844d2e8b232ed92e50a4bd11c48d2daa351f9deee6c194b83bf03e418b0c"}, - {file = "mypy-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81af8adaa5e3099469e7623436881eff6b3b06db5ef75e6f5b6d4871263547e5"}, - {file = "mypy-1.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8c223fa57cb154c7eab5156856c231c3f5eace1e0bed9b32a24696b7ba3c3245"}, - {file = "mypy-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8032e00ce71c3ceb93eeba63963b864bf635a18f6c0c12da6c13c450eedb183"}, - {file = "mypy-1.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c46b51de523817a0045b150ed11b56f9fff55f12b9edd0f3ed35b15a2809de0"}, - {file = "mypy-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:19f905bcfd9e167159b3d63ecd8cb5e696151c3e59a1742e79bc3bcb540c42c7"}, - {file = "mypy-1.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:82e469518d3e9a321912955cc702d418773a2fd1e91c651280a1bda10622f02f"}, - {file = "mypy-1.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4473c22cc296425bbbce7e9429588e76e05bc7342da359d6520b6427bf76660"}, - {file = "mypy-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59a0d7d24dfb26729e0a068639a6ce3500e31d6655df8557156c51c1cb874ce7"}, - {file = "mypy-1.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cfd13d47b29ed3bbaafaff7d8b21e90d827631afda134836962011acb5904b71"}, - {file = "mypy-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:eb4f18589d196a4cbe5290b435d135dee96567e07c2b2d43b5c4621b6501531a"}, - {file = "mypy-1.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:41697773aa0bf53ff917aa077e2cde7aa50254f28750f9b88884acea38a16169"}, - {file = "mypy-1.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7274b0c57737bd3476d2229c6389b2ec9eefeb090bbaf77777e9d6b1b5a9d143"}, - {file = "mypy-1.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbaf4662e498c8c2e352da5f5bca5ab29d378895fa2d980630656178bd607c46"}, - {file = "mypy-1.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bb8ccb4724f7d8601938571bf3f24da0da791fe2db7be3d9e79849cb64e0ae85"}, - {file = "mypy-1.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:68351911e85145f582b5aa6cd9ad666c8958bcae897a1bfda8f4940472463c45"}, - {file = "mypy-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:49ae115da099dcc0922a7a895c1eec82c1518109ea5c162ed50e3b3594c71208"}, - {file = "mypy-1.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b27958f8c76bed8edaa63da0739d76e4e9ad4ed325c814f9b3851425582a3cd"}, - {file = "mypy-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:925cd6a3b7b55dfba252b7c4561892311c5358c6b5a601847015a1ad4eb7d332"}, - {file = "mypy-1.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8f57e6b6927a49550da3d122f0cb983d400f843a8a82e65b3b380d3d7259468f"}, - {file = "mypy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a43ef1c8ddfdb9575691720b6352761f3f53d85f1b57d7745701041053deff30"}, - {file = "mypy-1.6.1-py3-none-any.whl", hash = "sha256:4cbe68ef919c28ea561165206a2dcb68591c50f3bcf777932323bc208d949cf1"}, - {file = "mypy-1.6.1.tar.gz", hash = "sha256:4d01c00d09a0be62a4ca3f933e315455bde83f37f892ba4b08ce92f3cf44bcc1"}, -] [package.dependencies] mypy-extensions = ">=1.0.0" @@ -4726,12 +3163,9 @@ reports = ["lxml"] name = "mypy-boto3-athena" version = "1.28.36" description = "Type annotations for boto3.Athena 1.28.36 service generated with mypy-boto3-builder 7.18.0" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "mypy-boto3-athena-1.28.36.tar.gz", hash = "sha256:a76df6aace3dc1d91b3f74640d617cd1b4802e5f348a22db2f16dfce0b01ee26"}, - {file = "mypy_boto3_athena-1.28.36-py3-none-any.whl", hash = "sha256:b79b77df6ba30c55ff2f1f8b36de410f537c8c978d892e958b4c5e165797915a"}, -] [package.dependencies] typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} @@ -4740,12 +3174,9 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} name = "mypy-boto3-glue" version = "1.28.36" description = "Type annotations for boto3.Glue 1.28.36 service generated with mypy-boto3-builder 7.18.0" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "mypy-boto3-glue-1.28.36.tar.gz", hash = "sha256:161771252bb6a220a0bfd8e6ad71da8548599c611f95fe8a94846f4a3386d2ae"}, - {file = "mypy_boto3_glue-1.28.36-py3-none-any.whl", hash = "sha256:73bc14616ac65a5c02adea5efba7bbbcf8207cd0c0e3237c13d351ebc916338d"}, -] [package.dependencies] typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} @@ -4754,12 +3185,9 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} name = "mypy-boto3-lakeformation" version = "1.28.36" description = "Type annotations for boto3.LakeFormation 1.28.36 service generated with mypy-boto3-builder 7.18.0" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "mypy-boto3-lakeformation-1.28.36.tar.gz", hash = "sha256:9327cf0d28a09abf5bd90ae946ce7420b32a3b979a1a3554ac93716c3dceacb0"}, - {file = "mypy_boto3_lakeformation-1.28.36-py3-none-any.whl", hash = "sha256:9525a8ab3d69632d4ec83eb565ff7fdfa1181fbdf032bcff4a20d4f8a0350688"}, -] [package.dependencies] typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} @@ -4768,12 +3196,9 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} name = "mypy-boto3-sts" version = "1.28.37" description = "Type annotations for boto3.STS 1.28.37 service generated with mypy-boto3-builder 7.18.2" +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "mypy-boto3-sts-1.28.37.tar.gz", hash = "sha256:54d64ca695ab90a51c68ac1e67ff9eae7ec69f926649e320a3b90ed1ec841a95"}, - {file = "mypy_boto3_sts-1.28.37-py3-none-any.whl", hash = "sha256:24106ff30ecfe7ad0538657bbd00b6009418a5382b323cac46e0e26c1f5d50fb"}, -] [package.dependencies] typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} @@ -4782,23 +3207,17 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] [[package]] name = "natsort" version = "8.4.0" description = "Simple yet flexible natural sorting in Python." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, - {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, -] [package.extras] fast = ["fastnumbers (>=2.0.0)"] @@ -4808,12 +3227,9 @@ icu = ["PyICU (>=1.0.0)"] name = "networkx" version = "2.8.8" description = "Python package for creating and manipulating graphs and networks" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"}, - {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"}, -] [package.extras] default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] @@ -4826,34 +3242,25 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nr-date" version = "2.1.0" description = "" +category = "dev" optional = false python-versions = ">=3.6,<4.0" -files = [ - {file = "nr_date-2.1.0-py3-none-any.whl", hash = "sha256:bd672a9dfbdcf7c4b9289fea6750c42490eaee08036a72059dcc78cb236ed568"}, - {file = "nr_date-2.1.0.tar.gz", hash = "sha256:0643aea13bcdc2a8bc56af9d5e6a89ef244c9744a1ef00cdc735902ba7f7d2e6"}, -] [[package]] name = "nr-stream" version = "1.1.5" description = "" +category = "dev" optional = false python-versions = ">=3.6,<4.0" -files = [ - {file = "nr_stream-1.1.5-py3-none-any.whl", hash = "sha256:47e12150b331ad2cb729cfd9d2abd281c9949809729ba461c6aa87dd9927b2d4"}, - {file = "nr_stream-1.1.5.tar.gz", hash = "sha256:eb0216c6bfc61a46d4568dba3b588502c610ec8ddef4ac98f3932a2bd7264f65"}, -] [[package]] name = "nr-util" version = "0.8.12" description = "General purpose Python utility library." +category = "dev" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "nr.util-0.8.12-py3-none-any.whl", hash = "sha256:91da02ac9795eb8e015372275c1efe54bac9051231ee9b0e7e6f96b0b4e7d2bb"}, - {file = "nr.util-0.8.12.tar.gz", hash = "sha256:a4549c2033d99d2f0379b3f3d233fd2a8ade286bbf0b3ad0cc7cea16022214f4"}, -] [package.dependencies] deprecated = ">=1.2.0,<2.0.0" @@ -4863,90 +3270,25 @@ typing-extensions = ">=3.0.0" name = "numpy" version = "1.24.4" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, - {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, - {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, - {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, - {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, - {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, - {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, - {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, - {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, - {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, - {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, -] [[package]] name = "numpy" version = "1.26.1" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = "<3.13,>=3.9" -files = [ - {file = "numpy-1.26.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82e871307a6331b5f09efda3c22e03c095d957f04bf6bc1804f30048d0e5e7af"}, - {file = "numpy-1.26.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdd9ec98f0063d93baeb01aad472a1a0840dee302842a2746a7a8e92968f9575"}, - {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d78f269e0c4fd365fc2992c00353e4530d274ba68f15e968d8bc3c69ce5f5244"}, - {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ab9163ca8aeb7fd32fe93866490654d2f7dda4e61bc6297bf72ce07fdc02f67"}, - {file = "numpy-1.26.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:78ca54b2f9daffa5f323f34cdf21e1d9779a54073f0018a3094ab907938331a2"}, - {file = "numpy-1.26.1-cp310-cp310-win32.whl", hash = "sha256:d1cfc92db6af1fd37a7bb58e55c8383b4aa1ba23d012bdbba26b4bcca45ac297"}, - {file = "numpy-1.26.1-cp310-cp310-win_amd64.whl", hash = "sha256:d2984cb6caaf05294b8466966627e80bf6c7afd273279077679cb010acb0e5ab"}, - {file = "numpy-1.26.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cd7837b2b734ca72959a1caf3309457a318c934abef7a43a14bb984e574bbb9a"}, - {file = "numpy-1.26.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c59c046c31a43310ad0199d6299e59f57a289e22f0f36951ced1c9eac3665b9"}, - {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d58e8c51a7cf43090d124d5073bc29ab2755822181fcad978b12e144e5e5a4b3"}, - {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6081aed64714a18c72b168a9276095ef9155dd7888b9e74b5987808f0dd0a974"}, - {file = "numpy-1.26.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:97e5d6a9f0702c2863aaabf19f0d1b6c2628fbe476438ce0b5ce06e83085064c"}, - {file = "numpy-1.26.1-cp311-cp311-win32.whl", hash = "sha256:b9d45d1dbb9de84894cc50efece5b09939752a2d75aab3a8b0cef6f3a35ecd6b"}, - {file = "numpy-1.26.1-cp311-cp311-win_amd64.whl", hash = "sha256:3649d566e2fc067597125428db15d60eb42a4e0897fc48d28cb75dc2e0454e53"}, - {file = "numpy-1.26.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1d1bd82d539607951cac963388534da3b7ea0e18b149a53cf883d8f699178c0f"}, - {file = "numpy-1.26.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:afd5ced4e5a96dac6725daeb5242a35494243f2239244fad10a90ce58b071d24"}, - {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03fb25610ef560a6201ff06df4f8105292ba56e7cdd196ea350d123fc32e24e"}, - {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcfaf015b79d1f9f9c9fd0731a907407dc3e45769262d657d754c3a028586124"}, - {file = "numpy-1.26.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e509cbc488c735b43b5ffea175235cec24bbc57b227ef1acc691725beb230d1c"}, - {file = "numpy-1.26.1-cp312-cp312-win32.whl", hash = "sha256:af22f3d8e228d84d1c0c44c1fbdeb80f97a15a0abe4f080960393a00db733b66"}, - {file = "numpy-1.26.1-cp312-cp312-win_amd64.whl", hash = "sha256:9f42284ebf91bdf32fafac29d29d4c07e5e9d1af862ea73686581773ef9e73a7"}, - {file = "numpy-1.26.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb894accfd16b867d8643fc2ba6c8617c78ba2828051e9a69511644ce86ce83e"}, - {file = "numpy-1.26.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e44ccb93f30c75dfc0c3aa3ce38f33486a75ec9abadabd4e59f114994a9c4617"}, - {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9696aa2e35cc41e398a6d42d147cf326f8f9d81befcb399bc1ed7ffea339b64e"}, - {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5b411040beead47a228bde3b2241100454a6abde9df139ed087bd73fc0a4908"}, - {file = "numpy-1.26.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1e11668d6f756ca5ef534b5be8653d16c5352cbb210a5c2a79ff288e937010d5"}, - {file = "numpy-1.26.1-cp39-cp39-win32.whl", hash = "sha256:d1d2c6b7dd618c41e202c59c1413ef9b2c8e8a15f5039e344af64195459e3104"}, - {file = "numpy-1.26.1-cp39-cp39-win_amd64.whl", hash = "sha256:59227c981d43425ca5e5c01094d59eb14e8772ce6975d4b2fc1e106a833d5ae2"}, - {file = "numpy-1.26.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:06934e1a22c54636a059215d6da99e23286424f316fddd979f5071093b648668"}, - {file = "numpy-1.26.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76ff661a867d9272cd2a99eed002470f46dbe0943a5ffd140f49be84f68ffc42"}, - {file = "numpy-1.26.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6965888d65d2848e8768824ca8288db0a81263c1efccec881cb35a0d805fcd2f"}, - {file = "numpy-1.26.1.tar.gz", hash = "sha256:c8c6c72d4a9f831f328efb1312642a1cafafaa88981d9ab76368d50d07d93cbe"}, -] [[package]] name = "oauthlib" version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, - {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, -] [package.extras] rsa = ["cryptography (>=3.0.0)"] @@ -4957,35 +3299,9 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] name = "onnx" version = "1.15.0" description = "Open Neural Network Exchange" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:51cacb6aafba308aaf462252ced562111f6991cdc7bc57a6c554c3519453a8ff"}, - {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:0aee26b6f7f7da7e840de75ad9195a77a147d0662c94eaa6483be13ba468ffc1"}, - {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baf6ef6c93b3b843edb97a8d5b3d229a1301984f3f8dee859c29634d2083e6f9"}, - {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ed899fe6000edc05bb2828863d3841cfddd5a7cf04c1a771f112e94de75d9f"}, - {file = "onnx-1.15.0-cp310-cp310-win32.whl", hash = "sha256:f1ad3d77fc2f4b4296f0ac2c8cadd8c1dcf765fc586b737462d3a0fe8f7c696a"}, - {file = "onnx-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:ca4ebc4f47109bfb12c8c9e83dd99ec5c9f07d2e5f05976356c6ccdce3552010"}, - {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:233ffdb5ca8cc2d960b10965a763910c0830b64b450376da59207f454701f343"}, - {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:51fa79c9ea9af033638ec51f9177b8e76c55fad65bb83ea96ee88fafade18ee7"}, - {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f277d4861729f5253a51fa41ce91bfec1c4574ee41b5637056b43500917295ce"}, - {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8a7c94d2ebead8f739fdb70d1ce5a71726f4e17b3e5b8ad64455ea1b2801a85"}, - {file = "onnx-1.15.0-cp311-cp311-win32.whl", hash = "sha256:17dcfb86a8c6bdc3971443c29b023dd9c90ff1d15d8baecee0747a6b7f74e650"}, - {file = "onnx-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:60a3e28747e305cd2e766e6a53a0a6d952cf9e72005ec6023ce5e07666676a4e"}, - {file = "onnx-1.15.0-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6b5c798d9e0907eaf319e3d3e7c89a2ed9a854bcb83da5fefb6d4c12d5e90721"}, - {file = "onnx-1.15.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:a4f774ff50092fe19bd8f46b2c9b27b1d30fbd700c22abde48a478142d464322"}, - {file = "onnx-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2b0e7f3938f2d994c34616bfb8b4b1cebbc4a0398483344fe5e9f2fe95175e6"}, - {file = "onnx-1.15.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49cebebd0020a4b12c1dd0909d426631212ef28606d7e4d49463d36abe7639ad"}, - {file = "onnx-1.15.0-cp38-cp38-win32.whl", hash = "sha256:1fdf8a3ff75abc2b32c83bf27fb7c18d6b976c9c537263fadd82b9560fe186fa"}, - {file = "onnx-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:763e55c26e8de3a2dce008d55ae81b27fa8fb4acbb01a29b9f3c01f200c4d676"}, - {file = "onnx-1.15.0-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:b2d5e802837629fc9c86f19448d19dd04d206578328bce202aeb3d4bedab43c4"}, - {file = "onnx-1.15.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:9a9cfbb5e5d5d88f89d0dfc9df5fb858899db874e1d5ed21e76c481f3cafc90d"}, - {file = "onnx-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f472bbe5cb670a0a4a4db08f41fde69b187a009d0cb628f964840d3f83524e9"}, - {file = "onnx-1.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf2de9bef64792e5b8080c678023ac7d2b9e05d79a3e17e92cf6a4a624831d2"}, - {file = "onnx-1.15.0-cp39-cp39-win32.whl", hash = "sha256:ef4d9eb44b111e69e4534f3233fc2c13d1e26920d24ae4359d513bd54694bc6d"}, - {file = "onnx-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d7a3e2d79d371e272e39ae3f7547e0b116d0c7f774a4004e97febe6c93507f"}, - {file = "onnx-1.15.0.tar.gz", hash = "sha256:b18461a7d38f286618ca2a6e78062a2a9c634ce498e631e708a8041b00094825"}, -] [package.dependencies] numpy = "*" @@ -4998,34 +3314,9 @@ reference = ["Pillow", "google-re2"] name = "onnxruntime" version = "1.16.1" description = "ONNX Runtime is a runtime accelerator for Machine Learning models" +category = "main" optional = true python-versions = "*" -files = [ - {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, - {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, - {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fa93b166f2d97063dc9f33c5118c5729a4a5dd5617296b6dbef42f9047b3e81"}, - {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:042dd9201b3016ee18f8f8bc4609baf11ff34ca1ff489c0a46bcd30919bf883d"}, - {file = "onnxruntime-1.16.1-cp310-cp310-win32.whl", hash = "sha256:c20aa0591f305012f1b21aad607ed96917c86ae7aede4a4dd95824b3d124ceb7"}, - {file = "onnxruntime-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:5581873e578917bea76d6434ee7337e28195d03488dcf72d161d08e9398c6249"}, - {file = "onnxruntime-1.16.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:ef8c0c8abf5f309aa1caf35941380839dc5f7a2fa53da533be4a3f254993f120"}, - {file = "onnxruntime-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e680380bea35a137cbc3efd67a17486e96972901192ad3026ee79c8d8fe264f7"}, - {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e62cc38ce1a669013d0a596d984762dc9c67c56f60ecfeee0d5ad36da5863f6"}, - {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:025c7a4d57bd2e63b8a0f84ad3df53e419e3df1cc72d63184f2aae807b17c13c"}, - {file = "onnxruntime-1.16.1-cp311-cp311-win32.whl", hash = "sha256:9ad074057fa8d028df248b5668514088cb0937b6ac5954073b7fb9b2891ffc8c"}, - {file = "onnxruntime-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:d5e43a3478bffc01f817ecf826de7b25a2ca1bca8547d70888594ab80a77ad24"}, - {file = "onnxruntime-1.16.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:3aef4d70b0930e29a8943eab248cd1565664458d3a62b2276bd11181f28fd0a3"}, - {file = "onnxruntime-1.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:55a7b843a57c8ca0c8ff169428137958146081d5d76f1a6dd444c4ffcd37c3c2"}, - {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c631af1941bf3b5f7d063d24c04aacce8cff0794e157c497e315e89ac5ad7b"}, - {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5671f296c3d5c233f601e97a10ab5a1dd8e65ba35c7b7b0c253332aba9dff330"}, - {file = "onnxruntime-1.16.1-cp38-cp38-win32.whl", hash = "sha256:eb3802305023dd05e16848d4e22b41f8147247894309c0c27122aaa08793b3d2"}, - {file = "onnxruntime-1.16.1-cp38-cp38-win_amd64.whl", hash = "sha256:fecfb07443d09d271b1487f401fbdf1ba0c829af6fd4fe8f6af25f71190e7eb9"}, - {file = "onnxruntime-1.16.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:de3e12094234db6545c67adbf801874b4eb91e9f299bda34c62967ef0050960f"}, - {file = "onnxruntime-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff723c2a5621b5e7103f3be84d5aae1e03a20621e72219dddceae81f65f240af"}, - {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a7fb3073aaf6b462e3d7fb433320f7700558a8892e5021780522dc4574292a"}, - {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:963159f1f699b0454cd72fcef3276c8a1aab9389a7b301bcd8e320fb9d9e8597"}, - {file = "onnxruntime-1.16.1-cp39-cp39-win32.whl", hash = "sha256:85771adb75190db9364b25ddec353ebf07635b83eb94b64ed014f1f6d57a3857"}, - {file = "onnxruntime-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:d32d2b30799c1f950123c60ae8390818381fd5f88bdf3627eeca10071c155dc5"}, -] [package.dependencies] coloredlogs = "*" @@ -5039,12 +3330,9 @@ sympy = "*" name = "opentelemetry-api" version = "1.15.0" description = "OpenTelemetry Python API" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_api-1.15.0-py3-none-any.whl", hash = "sha256:e6c2d2e42140fd396e96edf75a7ceb11073f4efb4db87565a431cc9d0f93f2e0"}, - {file = "opentelemetry_api-1.15.0.tar.gz", hash = "sha256:79ab791b4aaad27acc3dc3ba01596db5b5aac2ef75c70622c6038051d6c2cded"}, -] [package.dependencies] deprecated = ">=1.2.6" @@ -5054,12 +3342,9 @@ setuptools = ">=16.0" name = "opentelemetry-exporter-otlp" version = "1.15.0" description = "OpenTelemetry Collector Exporters" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_exporter_otlp-1.15.0-py3-none-any.whl", hash = "sha256:79f22748b6a54808a0448093dfa189c8490e729f67c134d4c992533d9393b33e"}, - {file = "opentelemetry_exporter_otlp-1.15.0.tar.gz", hash = "sha256:4f7c49751d9720e2e726e13b0bb958ccade4e29122c305d92c033da432c8d2c5"}, -] [package.dependencies] opentelemetry-exporter-otlp-proto-grpc = "1.15.0" @@ -5069,12 +3354,9 @@ opentelemetry-exporter-otlp-proto-http = "1.15.0" name = "opentelemetry-exporter-otlp-proto-grpc" version = "1.15.0" description = "OpenTelemetry Collector Protobuf over gRPC Exporter" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0-py3-none-any.whl", hash = "sha256:c2a5492ba7d140109968135d641d06ce3c5bd73c50665f787526065d57d7fd1d"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0.tar.gz", hash = "sha256:844f2a4bb9bcda34e4eb6fe36765e5031aacb36dc60ed88c90fc246942ea26e7"}, -] [package.dependencies] backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} @@ -5091,12 +3373,9 @@ test = ["pytest-grpc"] name = "opentelemetry-exporter-otlp-proto-http" version = "1.15.0" description = "OpenTelemetry Collector Protobuf over HTTP Exporter" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_exporter_otlp_proto_http-1.15.0-py3-none-any.whl", hash = "sha256:3ec2a02196c8a54bf5cbf7fe623a5238625638e83b6047a983bdf96e2bbb74c0"}, - {file = "opentelemetry_exporter_otlp_proto_http-1.15.0.tar.gz", hash = "sha256:11b2c814249a49b22f6cca7a06b05701f561d577b747f3660dfd67b6eb9daf9c"}, -] [package.dependencies] backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} @@ -5113,12 +3392,9 @@ test = ["responses (==0.22.0)"] name = "opentelemetry-proto" version = "1.15.0" description = "OpenTelemetry Python Proto" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_proto-1.15.0-py3-none-any.whl", hash = "sha256:044b6d044b4d10530f250856f933442b8753a17f94ae37c207607f733fb9a844"}, - {file = "opentelemetry_proto-1.15.0.tar.gz", hash = "sha256:9c4008e40ac8cab359daac283fbe7002c5c29c77ea2674ad5626a249e64e0101"}, -] [package.dependencies] protobuf = ">=3.19,<5.0" @@ -5127,12 +3403,9 @@ protobuf = ">=3.19,<5.0" name = "opentelemetry-sdk" version = "1.15.0" description = "OpenTelemetry Python SDK" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_sdk-1.15.0-py3-none-any.whl", hash = "sha256:555c533e9837766119bbccc7a80458c9971d853a6f1da683a2246cd5e53b4645"}, - {file = "opentelemetry_sdk-1.15.0.tar.gz", hash = "sha256:98dbffcfeebcbff12c0c974292d6ea603180a145904cf838b1fe4d5c99078425"}, -] [package.dependencies] opentelemetry-api = "1.15.0" @@ -5144,23 +3417,17 @@ typing-extensions = ">=3.7.4" name = "opentelemetry-semantic-conventions" version = "0.36b0" description = "OpenTelemetry Semantic Conventions" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "opentelemetry_semantic_conventions-0.36b0-py3-none-any.whl", hash = "sha256:adc05635e87b9d3e007c9f530eed487fc3ef2177d02f82f674f28ebf9aff8243"}, - {file = "opentelemetry_semantic_conventions-0.36b0.tar.gz", hash = "sha256:829dc221795467d98b773c04096e29be038d77526dc8d6ac76f546fb6279bf01"}, -] [[package]] name = "ordered-set" version = "4.1.0" description = "An OrderedSet is a custom MutableSet that remembers its order, so that every" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "ordered-set-4.1.0.tar.gz", hash = "sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8"}, - {file = "ordered_set-4.1.0-py3-none-any.whl", hash = "sha256:046e1132c71fcf3330438a539928932caf51ddbc582496833e23de611de14562"}, -] [package.extras] dev = ["black", "mypy", "pytest"] @@ -5169,154 +3436,66 @@ dev = ["black", "mypy", "pytest"] name = "orjson" version = "3.9.5" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "orjson-3.9.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ad6845912a71adcc65df7c8a7f2155eba2096cf03ad2c061c93857de70d699ad"}, - {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e298e0aacfcc14ef4476c3f409e85475031de24e5b23605a465e9bf4b2156273"}, - {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83c9939073281ef7dd7c5ca7f54cceccb840b440cec4b8a326bda507ff88a0a6"}, - {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e174cc579904a48ee1ea3acb7045e8a6c5d52c17688dfcb00e0e842ec378cabf"}, - {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f8d51702f42c785b115401e1d64a27a2ea767ae7cf1fb8edaa09c7cf1571c660"}, - {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d61c0c7414ddee1ef4d0f303e2222f8cced5a2e26d9774751aecd72324c9e"}, - {file = "orjson-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d748cc48caf5a91c883d306ab648df1b29e16b488c9316852844dd0fd000d1c2"}, - {file = "orjson-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bd19bc08fa023e4c2cbf8294ad3f2b8922f4de9ba088dbc71e6b268fdf54591c"}, - {file = "orjson-3.9.5-cp310-none-win32.whl", hash = "sha256:5793a21a21bf34e1767e3d61a778a25feea8476dcc0bdf0ae1bc506dc34561ea"}, - {file = "orjson-3.9.5-cp310-none-win_amd64.whl", hash = "sha256:2bcec0b1024d0031ab3eab7a8cb260c8a4e4a5e35993878a2da639d69cdf6a65"}, - {file = "orjson-3.9.5-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8547b95ca0e2abd17e1471973e6d676f1d8acedd5f8fb4f739e0612651602d66"}, - {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87ce174d6a38d12b3327f76145acbd26f7bc808b2b458f61e94d83cd0ebb4d76"}, - {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a960bb1bc9a964d16fcc2d4af5a04ce5e4dfddca84e3060c35720d0a062064fe"}, - {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a7aa5573a949760d6161d826d34dc36db6011926f836851fe9ccb55b5a7d8e8"}, - {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b2852afca17d7eea85f8e200d324e38c851c96598ac7b227e4f6c4e59fbd3df"}, - {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa185959c082475288da90f996a82e05e0c437216b96f2a8111caeb1d54ef926"}, - {file = "orjson-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:89c9332695b838438ea4b9a482bce8ffbfddde4df92750522d928fb00b7b8dce"}, - {file = "orjson-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2493f1351a8f0611bc26e2d3d407efb873032b4f6b8926fed8cfed39210ca4ba"}, - {file = "orjson-3.9.5-cp311-none-win32.whl", hash = "sha256:ffc544e0e24e9ae69301b9a79df87a971fa5d1c20a6b18dca885699709d01be0"}, - {file = "orjson-3.9.5-cp311-none-win_amd64.whl", hash = "sha256:89670fe2732e3c0c54406f77cad1765c4c582f67b915c74fda742286809a0cdc"}, - {file = "orjson-3.9.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:15df211469625fa27eced4aa08dc03e35f99c57d45a33855cc35f218ea4071b8"}, - {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9f17c59fe6c02bc5f89ad29edb0253d3059fe8ba64806d789af89a45c35269a"}, - {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca6b96659c7690773d8cebb6115c631f4a259a611788463e9c41e74fa53bf33f"}, - {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a26fafe966e9195b149950334bdbe9026eca17fe8ffe2d8fa87fdc30ca925d30"}, - {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9006b1eb645ecf460da067e2dd17768ccbb8f39b01815a571bfcfab7e8da5e52"}, - {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebfdbf695734b1785e792a1315e41835ddf2a3e907ca0e1c87a53f23006ce01d"}, - {file = "orjson-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4a3943234342ab37d9ed78fb0a8f81cd4b9532f67bf2ac0d3aa45fa3f0a339f3"}, - {file = "orjson-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e6762755470b5c82f07b96b934af32e4d77395a11768b964aaa5eb092817bc31"}, - {file = "orjson-3.9.5-cp312-none-win_amd64.whl", hash = "sha256:c74df28749c076fd6e2157190df23d43d42b2c83e09d79b51694ee7315374ad5"}, - {file = "orjson-3.9.5-cp37-cp37m-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:88e18a74d916b74f00d0978d84e365c6bf0e7ab846792efa15756b5fb2f7d49d"}, - {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28514b5b6dfaf69097be70d0cf4f1407ec29d0f93e0b4131bf9cc8fd3f3e374"}, - {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b81aca8c7be61e2566246b6a0ca49f8aece70dd3f38c7f5c837f398c4cb142"}, - {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:385c1c713b1e47fd92e96cf55fd88650ac6dfa0b997e8aa7ecffd8b5865078b1"}, - {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9850c03a8e42fba1a508466e6a0f99472fd2b4a5f30235ea49b2a1b32c04c11"}, - {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4449f84bbb13bcef493d8aa669feadfced0f7c5eea2d0d88b5cc21f812183af8"}, - {file = "orjson-3.9.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:86127bf194f3b873135e44ce5dc9212cb152b7e06798d5667a898a00f0519be4"}, - {file = "orjson-3.9.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0abcd039f05ae9ab5b0ff11624d0b9e54376253b7d3217a358d09c3edf1d36f7"}, - {file = "orjson-3.9.5-cp37-none-win32.whl", hash = "sha256:10cc8ad5ff7188efcb4bec196009d61ce525a4e09488e6d5db41218c7fe4f001"}, - {file = "orjson-3.9.5-cp37-none-win_amd64.whl", hash = "sha256:ff27e98532cb87379d1a585837d59b187907228268e7b0a87abe122b2be6968e"}, - {file = "orjson-3.9.5-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5bfa79916ef5fef75ad1f377e54a167f0de334c1fa4ebb8d0224075f3ec3d8c0"}, - {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e87dfa6ac0dae764371ab19b35eaaa46dfcb6ef2545dfca03064f21f5d08239f"}, - {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50ced24a7b23058b469ecdb96e36607fc611cbaee38b58e62a55c80d1b3ad4e1"}, - {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b1b74ea2a3064e1375da87788897935832e806cc784de3e789fd3c4ab8eb3fa5"}, - {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7cb961efe013606913d05609f014ad43edfaced82a576e8b520a5574ce3b2b9"}, - {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1225d2d5ee76a786bda02f8c5e15017462f8432bb960de13d7c2619dba6f0275"}, - {file = "orjson-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f39f4b99199df05c7ecdd006086259ed25886cdbd7b14c8cdb10c7675cfcca7d"}, - {file = "orjson-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a461dc9fb60cac44f2d3218c36a0c1c01132314839a0e229d7fb1bba69b810d8"}, - {file = "orjson-3.9.5-cp38-none-win32.whl", hash = "sha256:dedf1a6173748202df223aea29de814b5836732a176b33501375c66f6ab7d822"}, - {file = "orjson-3.9.5-cp38-none-win_amd64.whl", hash = "sha256:fa504082f53efcbacb9087cc8676c163237beb6e999d43e72acb4bb6f0db11e6"}, - {file = "orjson-3.9.5-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6900f0248edc1bec2a2a3095a78a7e3ef4e63f60f8ddc583687eed162eedfd69"}, - {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17404333c40047888ac40bd8c4d49752a787e0a946e728a4e5723f111b6e55a5"}, - {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0eefb7cfdd9c2bc65f19f974a5d1dfecbac711dae91ed635820c6b12da7a3c11"}, - {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68c78b2a3718892dc018adbc62e8bab6ef3c0d811816d21e6973dee0ca30c152"}, - {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:591ad7d9e4a9f9b104486ad5d88658c79ba29b66c5557ef9edf8ca877a3f8d11"}, - {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cc2cbf302fbb2d0b2c3c142a663d028873232a434d89ce1b2604ebe5cc93ce8"}, - {file = "orjson-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b26b5aa5e9ee1bad2795b925b3adb1b1b34122cb977f30d89e0a1b3f24d18450"}, - {file = "orjson-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ef84724f7d29dcfe3aafb1fc5fc7788dca63e8ae626bb9298022866146091a3e"}, - {file = "orjson-3.9.5-cp39-none-win32.whl", hash = "sha256:664cff27f85939059472afd39acff152fbac9a091b7137092cb651cf5f7747b5"}, - {file = "orjson-3.9.5-cp39-none-win_amd64.whl", hash = "sha256:91dda66755795ac6100e303e206b636568d42ac83c156547634256a2e68de694"}, - {file = "orjson-3.9.5.tar.gz", hash = "sha256:6daf5ee0b3cf530b9978cdbf71024f1c16ed4a67d05f6ec435c6e7fe7a52724c"}, -] - -[[package]] -name = "oscrypto" -version = "1.3.0" -description = "TLS (SSL) sockets, key generation, encryption, decryption, signing, verification and KDFs using the OS crypto libraries. Does not require a compiler, and relies on the OS for patching. Works on Windows, OS X and Linux/BSD." -optional = true -python-versions = "*" -files = [ - {file = "oscrypto-1.3.0-py2.py3-none-any.whl", hash = "sha256:2b2f1d2d42ec152ca90ccb5682f3e051fb55986e1b170ebde472b133713e7085"}, - {file = "oscrypto-1.3.0.tar.gz", hash = "sha256:6f5fef59cb5b3708321db7cca56aed8ad7e662853351e7991fcf60ec606d47a4"}, -] - -[package.dependencies] -asn1crypto = ">=1.5.1" [[package]] name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, -] [[package]] name = "pandas" -version = "1.5.3" +version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"}, - {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"}, - {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"}, - {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"}, - {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"}, - {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"}, - {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"}, - {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"}, - {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"}, - {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"}, - {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"}, - {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"}, - {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"}, - {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"}, - {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"}, - {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"}, - {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"}, - {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"}, -] [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] -python-dateutil = ">=2.8.1" +python-dateutil = ">=2.8.2" pytz = ">=2020.1" - -[package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] [[package]] name = "parsedatetime" version = "2.4" description = "Parse human-readable date/time text." +category = "main" optional = false python-versions = "*" -files = [ - {file = "parsedatetime-2.4-py2-none-any.whl", hash = "sha256:9ee3529454bf35c40a77115f5a596771e59e1aee8c53306f346c461b8e913094"}, - {file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"}, -] [package.dependencies] future = "*" @@ -5325,23 +3504,17 @@ future = "*" name = "pathspec" version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, - {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, -] [[package]] name = "pathvalidate" version = "3.1.0" description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "pathvalidate-3.1.0-py3-none-any.whl", hash = "sha256:912fd1d2e1a2a6a6f98da36a91f21ed86746473810ff625b9c34f3d06c0caa1d"}, - {file = "pathvalidate-3.1.0.tar.gz", hash = "sha256:426970226e24199fd90d93995d223c1e28bda967cdf4370755a14cdf72a2a8ee"}, -] [package.extras] docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] @@ -5351,42 +3524,17 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1 name = "pbr" version = "5.11.1" description = "Python Build Reasonableness" +category = "dev" optional = false python-versions = ">=2.6" -files = [ - {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, - {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, -] [[package]] name = "pendulum" version = "2.1.2" description = "Python datetimes made easy" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "pendulum-2.1.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b6c352f4bd32dff1ea7066bd31ad0f71f8d8100b9ff709fb343f3b86cee43efe"}, - {file = "pendulum-2.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:318f72f62e8e23cd6660dbafe1e346950281a9aed144b5c596b2ddabc1d19739"}, - {file = "pendulum-2.1.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394"}, - {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3481fad1dc3f6f6738bd575a951d3c15d4b4ce7c82dce37cf8ac1483fde6e8b0"}, - {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9702069c694306297ed362ce7e3c1ef8404ac8ede39f9b28b7c1a7ad8c3959e3"}, - {file = "pendulum-2.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:fb53ffa0085002ddd43b6ca61a7b34f2d4d7c3ed66f931fe599e1a531b42af9b"}, - {file = "pendulum-2.1.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:c501749fdd3d6f9e726086bf0cd4437281ed47e7bca132ddb522f86a1645d360"}, - {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c807a578a532eeb226150d5006f156632df2cc8c5693d778324b43ff8c515dd0"}, - {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:2d1619a721df661e506eff8db8614016f0720ac171fe80dda1333ee44e684087"}, - {file = "pendulum-2.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f888f2d2909a414680a29ae74d0592758f2b9fcdee3549887779cd4055e975db"}, - {file = "pendulum-2.1.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e95d329384717c7bf627bf27e204bc3b15c8238fa8d9d9781d93712776c14002"}, - {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4c9c689747f39d0d02a9f94fcee737b34a5773803a64a5fdb046ee9cac7442c5"}, - {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1245cd0075a3c6d889f581f6325dd8404aca5884dea7223a5566c38aab94642b"}, - {file = "pendulum-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:db0a40d8bcd27b4fb46676e8eb3c732c67a5a5e6bfab8927028224fbced0b40b"}, - {file = "pendulum-2.1.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f5e236e7730cab1644e1b87aca3d2ff3e375a608542e90fe25685dae46310116"}, - {file = "pendulum-2.1.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de42ea3e2943171a9e95141f2eecf972480636e8e484ccffaf1e833929e9e052"}, - {file = "pendulum-2.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7c5ec650cb4bec4c63a89a0242cc8c3cebcec92fcfe937c417ba18277d8560be"}, - {file = "pendulum-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:33fb61601083f3eb1d15edeb45274f73c63b3c44a8524703dc143f4212bf3269"}, - {file = "pendulum-2.1.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:29c40a6f2942376185728c9a0347d7c0f07905638c83007e1d262781f1e6953a"}, - {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, - {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, -] [package.dependencies] python-dateutil = ">=2.6,<3.0" @@ -5396,12 +3544,9 @@ pytzdata = ">=2020.1" name = "pipdeptree" version = "2.9.6" description = "Command line utility to show dependency tree of packages." +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "pipdeptree-2.9.6-py3-none-any.whl", hash = "sha256:de93f990d21224297c9f03e057da5a3dc65ff732a0147945dd9421671f13626b"}, - {file = "pipdeptree-2.9.6.tar.gz", hash = "sha256:f815caf165e89c576ce659b866c7a82ae4590420c2d020a92d32e45097f8bc73"}, -] [package.extras] graphviz = ["graphviz (>=0.20.1)"] @@ -5411,23 +3556,17 @@ test = ["covdefaults (>=2.3)", "diff-cover (>=7.6)", "pip (>=23.1.2)", "pytest ( name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, - {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, -] [[package]] name = "platformdirs" version = "3.8.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "platformdirs-3.8.1-py3-none-any.whl", hash = "sha256:cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c"}, - {file = "platformdirs-3.8.1.tar.gz", hash = "sha256:f87ca4fcff7d2b0f81c6a748a77973d7af0f4d526f98f308477c3c436c74d528"}, -] [package.extras] docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] @@ -5437,12 +3576,9 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, - {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, -] [package.extras] dev = ["pre-commit", "tox"] @@ -5452,23 +3588,17 @@ testing = ["pytest", "pytest-benchmark"] name = "ply" version = "3.11" description = "Python Lex & Yacc" +category = "main" optional = false python-versions = "*" -files = [ - {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, - {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, -] [[package]] name = "portalocker" version = "2.7.0" description = "Wraps the portalocker recipe for easy usage" +category = "main" optional = true python-versions = ">=3.5" -files = [ - {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"}, - {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"}, -] [package.dependencies] pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} @@ -5482,23 +3612,17 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p name = "prefixed" version = "0.7.0" description = "Prefixed alternative numeric library" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "prefixed-0.7.0-py2.py3-none-any.whl", hash = "sha256:537b0e4ff4516c4578f277a41d7104f769d6935ae9cdb0f88fed82ec7b3c0ca5"}, - {file = "prefixed-0.7.0.tar.gz", hash = "sha256:0b54d15e602eb8af4ac31b1db21a37ea95ce5890e0741bb0dd9ded493cefbbe9"}, -] [[package]] name = "prison" version = "0.2.1" description = "Rison encoder/decoder" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "prison-0.2.1-py2.py3-none-any.whl", hash = "sha256:f90bab63fca497aa0819a852f64fb21a4e181ed9f6114deaa5dc04001a7555c5"}, - {file = "prison-0.2.1.tar.gz", hash = "sha256:e6cd724044afcb1a8a69340cad2f1e3151a5839fd3a8027fd1357571e797c599"}, -] [package.dependencies] six = "*" @@ -5510,12 +3634,9 @@ dev = ["nose", "pipreqs", "twine"] name = "proto-plus" version = "1.22.3" description = "Beautiful, Pythonic protocol buffers." +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, - {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, -] [package.dependencies] protobuf = ">=3.19.0,<5.0.0dev" @@ -5527,46 +3648,17 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] name = "protobuf" version = "4.24.2" description = "" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "protobuf-4.24.2-cp310-abi3-win32.whl", hash = "sha256:58e12d2c1aa428ece2281cef09bbaa6938b083bcda606db3da4e02e991a0d924"}, - {file = "protobuf-4.24.2-cp310-abi3-win_amd64.whl", hash = "sha256:77700b55ba41144fc64828e02afb41901b42497b8217b558e4a001f18a85f2e3"}, - {file = "protobuf-4.24.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:237b9a50bd3b7307d0d834c1b0eb1a6cd47d3f4c2da840802cd03ea288ae8880"}, - {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:25ae91d21e3ce8d874211110c2f7edd6384816fb44e06b2867afe35139e1fd1c"}, - {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:c00c3c7eb9ad3833806e21e86dca448f46035242a680f81c3fe068ff65e79c74"}, - {file = "protobuf-4.24.2-cp37-cp37m-win32.whl", hash = "sha256:4e69965e7e54de4db989289a9b971a099e626f6167a9351e9d112221fc691bc1"}, - {file = "protobuf-4.24.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c5cdd486af081bf752225b26809d2d0a85e575b80a84cde5172a05bbb1990099"}, - {file = "protobuf-4.24.2-cp38-cp38-win32.whl", hash = "sha256:6bd26c1fa9038b26c5c044ee77e0ecb18463e957fefbaeb81a3feb419313a54e"}, - {file = "protobuf-4.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb7aa97c252279da65584af0456f802bd4b2de429eb945bbc9b3d61a42a8cd16"}, - {file = "protobuf-4.24.2-cp39-cp39-win32.whl", hash = "sha256:2b23bd6e06445699b12f525f3e92a916f2dcf45ffba441026357dea7fa46f42b"}, - {file = "protobuf-4.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:839952e759fc40b5d46be319a265cf94920174d88de31657d5622b5d8d6be5cd"}, - {file = "protobuf-4.24.2-py3-none-any.whl", hash = "sha256:3b7b170d3491ceed33f723bbf2d5a260f8a4e23843799a3906f16ef736ef251e"}, - {file = "protobuf-4.24.2.tar.gz", hash = "sha256:7fda70797ddec31ddfa3576cbdcc3ddbb6b3078b737a1a87ab9136af0570cd6e"}, -] [[package]] name = "psutil" version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"}, - {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"}, - {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"}, - {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"}, - {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"}, - {file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"}, - {file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"}, - {file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"}, - {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"}, - {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"}, - {file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"}, - {file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"}, - {file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"}, - {file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"}, -] [package.extras] test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] @@ -5575,80 +3667,17 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "psycopg2-binary" version = "2.9.7" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "psycopg2-binary-2.9.7.tar.gz", hash = "sha256:1b918f64a51ffe19cd2e230b3240ba481330ce1d4b7875ae67305bd1d37b041c"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ea5f8ee87f1eddc818fc04649d952c526db4426d26bab16efbe5a0c52b27d6ab"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2993ccb2b7e80844d534e55e0f12534c2871952f78e0da33c35e648bf002bbff"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbbc3c5d15ed76b0d9db7753c0db40899136ecfe97d50cbde918f630c5eb857a"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:692df8763b71d42eb8343f54091368f6f6c9cfc56dc391858cdb3c3ef1e3e584"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dcfd5d37e027ec393a303cc0a216be564b96c80ba532f3d1e0d2b5e5e4b1e6e"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17cc17a70dfb295a240db7f65b6d8153c3d81efb145d76da1e4a096e9c5c0e63"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e5666632ba2b0d9757b38fc17337d84bdf932d38563c5234f5f8c54fd01349c9"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7db7b9b701974c96a88997d458b38ccb110eba8f805d4b4f74944aac48639b42"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c82986635a16fb1fa15cd5436035c88bc65c3d5ced1cfaac7f357ee9e9deddd4"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4fe13712357d802080cfccbf8c6266a3121dc0e27e2144819029095ccf708372"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-win32.whl", hash = "sha256:122641b7fab18ef76b18860dd0c772290566b6fb30cc08e923ad73d17461dc63"}, - {file = "psycopg2_binary-2.9.7-cp310-cp310-win_amd64.whl", hash = "sha256:f8651cf1f144f9ee0fa7d1a1df61a9184ab72962531ca99f077bbdcba3947c58"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ecc15666f16f97709106d87284c136cdc82647e1c3f8392a672616aed3c7151"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fbb1184c7e9d28d67671992970718c05af5f77fc88e26fd7136613c4ece1f89"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7968fd20bd550431837656872c19575b687f3f6f98120046228e451e4064df"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:094af2e77a1976efd4956a031028774b827029729725e136514aae3cdf49b87b"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26484e913d472ecb6b45937ea55ce29c57c662066d222fb0fbdc1fab457f18c5"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f309b77a7c716e6ed9891b9b42953c3ff7d533dc548c1e33fddc73d2f5e21f9"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6d92e139ca388ccfe8c04aacc163756e55ba4c623c6ba13d5d1595ed97523e4b"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2df562bb2e4e00ee064779902d721223cfa9f8f58e7e52318c97d139cf7f012d"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4eec5d36dbcfc076caab61a2114c12094c0b7027d57e9e4387b634e8ab36fd44"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1011eeb0c51e5b9ea1016f0f45fa23aca63966a4c0afcf0340ccabe85a9f65bd"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-win32.whl", hash = "sha256:ded8e15f7550db9e75c60b3d9fcbc7737fea258a0f10032cdb7edc26c2a671fd"}, - {file = "psycopg2_binary-2.9.7-cp311-cp311-win_amd64.whl", hash = "sha256:8a136c8aaf6615653450817a7abe0fc01e4ea720ae41dfb2823eccae4b9062a3"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2dec5a75a3a5d42b120e88e6ed3e3b37b46459202bb8e36cd67591b6e5feebc1"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc10da7e7df3380426521e8c1ed975d22df678639da2ed0ec3244c3dc2ab54c8"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee919b676da28f78f91b464fb3e12238bd7474483352a59c8a16c39dfc59f0c5"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb1c0e682138f9067a58fc3c9a9bf1c83d8e08cfbee380d858e63196466d5c86"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00d8db270afb76f48a499f7bb8fa70297e66da67288471ca873db88382850bf4"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9b0c2b466b2f4d89ccc33784c4ebb1627989bd84a39b79092e560e937a11d4ac"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:51d1b42d44f4ffb93188f9b39e6d1c82aa758fdb8d9de65e1ddfe7a7d250d7ad"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:11abdbfc6f7f7dea4a524b5f4117369b0d757725798f1593796be6ece20266cb"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f02f4a72cc3ab2565c6d9720f0343cb840fb2dc01a2e9ecb8bc58ccf95dc5c06"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-win32.whl", hash = "sha256:81d5dd2dd9ab78d31a451e357315f201d976c131ca7d43870a0e8063b6b7a1ec"}, - {file = "psycopg2_binary-2.9.7-cp37-cp37m-win_amd64.whl", hash = "sha256:62cb6de84d7767164a87ca97e22e5e0a134856ebcb08f21b621c6125baf61f16"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:59f7e9109a59dfa31efa022e94a244736ae401526682de504e87bd11ce870c22"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:95a7a747bdc3b010bb6a980f053233e7610276d55f3ca506afff4ad7749ab58a"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c721ee464e45ecf609ff8c0a555018764974114f671815a0a7152aedb9f3343"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4f37bbc6588d402980ffbd1f3338c871368fb4b1cfa091debe13c68bb3852b3"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac83ab05e25354dad798401babaa6daa9577462136ba215694865394840e31f8"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:024eaeb2a08c9a65cd5f94b31ace1ee3bb3f978cd4d079406aef85169ba01f08"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1c31c2606ac500dbd26381145684d87730a2fac9a62ebcfbaa2b119f8d6c19f4"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:42a62ef0e5abb55bf6ffb050eb2b0fcd767261fa3faf943a4267539168807522"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7952807f95c8eba6a8ccb14e00bf170bb700cafcec3924d565235dffc7dc4ae8"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e02bc4f2966475a7393bd0f098e1165d470d3fa816264054359ed4f10f6914ea"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-win32.whl", hash = "sha256:fdca0511458d26cf39b827a663d7d87db6f32b93efc22442a742035728603d5f"}, - {file = "psycopg2_binary-2.9.7-cp38-cp38-win_amd64.whl", hash = "sha256:d0b16e5bb0ab78583f0ed7ab16378a0f8a89a27256bb5560402749dbe8a164d7"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6822c9c63308d650db201ba22fe6648bd6786ca6d14fdaf273b17e15608d0852"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f94cb12150d57ea433e3e02aabd072205648e86f1d5a0a692d60242f7809b15"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5ee89587696d808c9a00876065d725d4ae606f5f7853b961cdbc348b0f7c9a1"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad5ec10b53cbb57e9a2e77b67e4e4368df56b54d6b00cc86398578f1c635f329"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:642df77484b2dcaf87d4237792246d8068653f9e0f5c025e2c692fc56b0dda70"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a8b575ac45af1eaccbbcdcf710ab984fd50af048fe130672377f78aaff6fc1"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f955aa50d7d5220fcb6e38f69ea126eafecd812d96aeed5d5f3597f33fad43bb"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ad26d4eeaa0d722b25814cce97335ecf1b707630258f14ac4d2ed3d1d8415265"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ced63c054bdaf0298f62681d5dcae3afe60cbae332390bfb1acf0e23dcd25fc8"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b04da24cbde33292ad34a40db9832a80ad12de26486ffeda883413c9e1b1d5e"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-win32.whl", hash = "sha256:18f12632ab516c47c1ac4841a78fddea6508a8284c7cf0f292cb1a523f2e2379"}, - {file = "psycopg2_binary-2.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb3b8d55924a6058a26db69fb1d3e7e32695ff8b491835ba9f479537e14dcf9f"}, -] [[package]] name = "psycopg2cffi" version = "2.9.0" description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" +category = "main" optional = true python-versions = "*" -files = [ - {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, -] [package.dependencies] cffi = ">=1.0" @@ -5658,46 +3687,17 @@ six = "*" name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] [[package]] name = "pyarrow" -version = "10.0.1" +version = "14.0.1" description = "Python library for Apache Arrow" +category = "main" optional = true -python-versions = ">=3.7" -files = [ - {file = "pyarrow-10.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:e00174764a8b4e9d8d5909b6d19ee0c217a6cf0232c5682e31fdfbd5a9f0ae52"}, - {file = "pyarrow-10.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f7a7dbe2f7f65ac1d0bd3163f756deb478a9e9afc2269557ed75b1b25ab3610"}, - {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb627673cb98708ef00864e2e243f51ba7b4c1b9f07a1d821f98043eccd3f585"}, - {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba71e6fc348c92477586424566110d332f60d9a35cb85278f42e3473bc1373da"}, - {file = "pyarrow-10.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b4ede715c004b6fc535de63ef79fa29740b4080639a5ff1ea9ca84e9282f349"}, - {file = "pyarrow-10.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e3fe5049d2e9ca661d8e43fab6ad5a4c571af12d20a57dffc392a014caebef65"}, - {file = "pyarrow-10.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:254017ca43c45c5098b7f2a00e995e1f8346b0fb0be225f042838323bb55283c"}, - {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70acca1ece4322705652f48db65145b5028f2c01c7e426c5d16a30ba5d739c24"}, - {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb57334f2c57979a49b7be2792c31c23430ca02d24becd0b511cbe7b6b08649"}, - {file = "pyarrow-10.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:1765a18205eb1e02ccdedb66049b0ec148c2a0cb52ed1fb3aac322dfc086a6ee"}, - {file = "pyarrow-10.0.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:61f4c37d82fe00d855d0ab522c685262bdeafd3fbcb5fe596fe15025fbc7341b"}, - {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e141a65705ac98fa52a9113fe574fdaf87fe0316cde2dffe6b94841d3c61544c"}, - {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf26f809926a9d74e02d76593026f0aaeac48a65b64f1bb17eed9964bfe7ae1a"}, - {file = "pyarrow-10.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:443eb9409b0cf78df10ced326490e1a300205a458fbeb0767b6b31ab3ebae6b2"}, - {file = "pyarrow-10.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f2d00aa481becf57098e85d99e34a25dba5a9ade2f44eb0b7d80c80f2984fc03"}, - {file = "pyarrow-10.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b1fc226d28c7783b52a84d03a66573d5a22e63f8a24b841d5fc68caeed6784d4"}, - {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efa59933b20183c1c13efc34bd91efc6b2997377c4c6ad9272da92d224e3beb1"}, - {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:668e00e3b19f183394388a687d29c443eb000fb3fe25599c9b4762a0afd37775"}, - {file = "pyarrow-10.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1bc6e4d5d6f69e0861d5d7f6cf4d061cf1069cb9d490040129877acf16d4c2a"}, - {file = "pyarrow-10.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:42ba7c5347ce665338f2bc64685d74855900200dac81a972d49fe127e8132f75"}, - {file = "pyarrow-10.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b069602eb1fc09f1adec0a7bdd7897f4d25575611dfa43543c8b8a75d99d6874"}, - {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fb4a0c12a2ac1ed8e7e2aa52aade833772cf2d3de9dde685401b22cec30002"}, - {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db0c5986bf0808927f49640582d2032a07aa49828f14e51f362075f03747d198"}, - {file = "pyarrow-10.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0ec7587d759153f452d5263dbc8b1af318c4609b607be2bd5127dcda6708cdb1"}, - {file = "pyarrow-10.0.1.tar.gz", hash = "sha256:1a14f57a5f472ce8234f2964cd5184cccaa8df7e04568c64edc33b23eb285dd5"}, -] +python-versions = ">=3.8" [package.dependencies] numpy = ">=1.16.6" @@ -5706,23 +3706,17 @@ numpy = ">=1.16.6" name = "pyasn1" version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, - {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, -] [[package]] name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, - {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, -] [package.dependencies] pyasn1 = ">=0.4.6,<0.6.0" @@ -5731,12 +3725,9 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pyathena" version = "3.0.6" description = "Python DB API 2.0 (PEP 249) client for Amazon Athena" +category = "main" optional = true python-versions = ">=3.8.1" -files = [ - {file = "pyathena-3.0.6-py3-none-any.whl", hash = "sha256:27fb606a73644e62be8ef9b86cdf583ab3cb9f8cac9c2ad8f05b7ad6d4eaaa87"}, - {file = "pyathena-3.0.6.tar.gz", hash = "sha256:ee6ea175134894209af2c6be1859b7be4371f7741faa7a58f9f97905ff6a73a4"}, -] [package.dependencies] boto3 = ">=1.26.4" @@ -5754,127 +3745,52 @@ sqlalchemy = ["sqlalchemy (>=1.0.0)"] name = "pycodestyle" version = "2.9.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, - {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, -] [[package]] name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, -] [[package]] -name = "pycryptodomex" -version = "3.18.0" -description = "Cryptographic library for Python" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "pycryptodomex-3.18.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:160a39a708c36fa0b168ab79386dede588e62aec06eb505add870739329aecc6"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c2953afebf282a444c51bf4effe751706b4d0d63d7ca2cc51db21f902aa5b84e"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:ba95abd563b0d1b88401658665a260852a8e6c647026ee6a0a65589287681df8"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-manylinux2014_aarch64.whl", hash = "sha256:192306cf881fe3467dda0e174a4f47bb3a8bb24b90c9cdfbdc248eec5fc0578c"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:f9ab5ef0718f6a8716695dea16d83b671b22c45e9c0c78fd807c32c0192e54b5"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-win32.whl", hash = "sha256:50308fcdbf8345e5ec224a5502b4215178bdb5e95456ead8ab1a69ffd94779cb"}, - {file = "pycryptodomex-3.18.0-cp27-cp27m-win_amd64.whl", hash = "sha256:4d9379c684efea80fdab02a3eb0169372bca7db13f9332cb67483b8dc8b67c37"}, - {file = "pycryptodomex-3.18.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5594a125dae30d60e94f37797fc67ce3c744522de7992c7c360d02fdb34918f8"}, - {file = "pycryptodomex-3.18.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:8ff129a5a0eb5ff16e45ca4fa70a6051da7f3de303c33b259063c19be0c43d35"}, - {file = "pycryptodomex-3.18.0-cp27-cp27mu-manylinux2014_aarch64.whl", hash = "sha256:3d9314ac785a5b75d5aaf924c5f21d6ca7e8df442e5cf4f0fefad4f6e284d422"}, - {file = "pycryptodomex-3.18.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:f237278836dda412a325e9340ba2e6a84cb0f56b9244781e5b61f10b3905de88"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac614363a86cc53d8ba44b6c469831d1555947e69ab3276ae8d6edc219f570f7"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:302a8f37c224e7b5d72017d462a2be058e28f7be627bdd854066e16722d0fc0c"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:6421d23d6a648e83ba2670a352bcd978542dad86829209f59d17a3f087f4afef"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84e105787f5e5d36ec6a581ff37a1048d12e638688074b2a00bcf402f9aa1c2"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6875eb8666f68ddbd39097867325bd22771f595b4e2b0149739b5623c8bf899b"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:27072a494ce621cc7a9096bbf60ed66826bb94db24b49b7359509e7951033e74"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:1949e09ea49b09c36d11a951b16ff2a05a0ffe969dda1846e4686ee342fe8646"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6ed3606832987018615f68e8ed716a7065c09a0fe94afd7c9ca1b6777f0ac6eb"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-win32.whl", hash = "sha256:d56c9ec41258fd3734db9f5e4d2faeabe48644ba9ca23b18e1839b3bdf093222"}, - {file = "pycryptodomex-3.18.0-cp35-abi3-win_amd64.whl", hash = "sha256:e00a4bacb83a2627e8210cb353a2e31f04befc1155db2976e5e239dd66482278"}, - {file = "pycryptodomex-3.18.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:2dc4eab20f4f04a2d00220fdc9258717b82d31913552e766d5f00282c031b70a"}, - {file = "pycryptodomex-3.18.0-pp27-pypy_73-win32.whl", hash = "sha256:75672205148bdea34669173366df005dbd52be05115e919551ee97171083423d"}, - {file = "pycryptodomex-3.18.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bec6c80994d4e7a38312072f89458903b65ec99bed2d65aa4de96d997a53ea7a"}, - {file = "pycryptodomex-3.18.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d35a8ffdc8b05e4b353ba281217c8437f02c57d7233363824e9d794cf753c419"}, - {file = "pycryptodomex-3.18.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f0a46bee539dae4b3dfe37216f678769349576b0080fdbe431d19a02da42ff"}, - {file = "pycryptodomex-3.18.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:71687eed47df7e965f6e0bf3cadef98f368d5221f0fb89d2132effe1a3e6a194"}, - {file = "pycryptodomex-3.18.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73d64b32d84cf48d9ec62106aa277dbe99ab5fbfd38c5100bc7bddd3beb569f7"}, - {file = "pycryptodomex-3.18.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbdcce0a226d9205560a5936b05208c709b01d493ed8307792075dedfaaffa5f"}, - {file = "pycryptodomex-3.18.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58fc0aceb9c961b9897facec9da24c6a94c5db04597ec832060f53d4d6a07196"}, - {file = "pycryptodomex-3.18.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:215be2980a6b70704c10796dd7003eb4390e7be138ac6fb8344bf47e71a8d470"}, - {file = "pycryptodomex-3.18.0.tar.gz", hash = "sha256:3e3ecb5fe979e7c1bb0027e518340acf7ee60415d79295e5251d13c68dde576e"}, -] +name = "pydantic" +version = "2.5.0" +description = "Data validation using Python type hints" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.14.1" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] [[package]] -name = "pydantic" -version = "1.10.12" -description = "Data validation and settings management using python type hints" +name = "pydantic-core" +version = "2.14.1" +description = "" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"}, - {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"}, - {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"}, - {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"}, - {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"}, - {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"}, - {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"}, - {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"}, - {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"}, - {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"}, - {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"}, - {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"}, - {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"}, - {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"}, - {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"}, - {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"}, - {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"}, - {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"}, - {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"}, - {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"}, - {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"}, - {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"}, - {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"}, - {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"}, - {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"}, - {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"}, - {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"}, - {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"}, - {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"}, - {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"}, - {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"}, - {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"}, - {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"}, - {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"}, - {file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"}, - {file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"}, -] - -[package.dependencies] -typing-extensions = ">=4.2.0" - -[package.extras] -dotenv = ["python-dotenv (>=0.10.4)"] -email = ["email-validator (>=1.0.3)"] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pydoc-markdown" version = "4.8.2" description = "Create Python API documentation in Markdown format." +category = "dev" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "pydoc_markdown-4.8.2-py3-none-any.whl", hash = "sha256:203f74119e6bb2f9deba43d452422de7c8ec31955b61e0620fa4dd8c2611715f"}, - {file = "pydoc_markdown-4.8.2.tar.gz", hash = "sha256:fb6c927e31386de17472d42f9bd3d3be2905977d026f6216881c65145aa67f0b"}, -] [package.dependencies] click = ">=7.1,<9.0" @@ -5896,23 +3812,17 @@ yapf = ">=0.30.0" name = "pyflakes" version = "2.5.0" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, - {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, -] [[package]] name = "pygments" version = "2.16.1" description = "Pygments is a syntax highlighting package written in Python." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, - {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, -] [package.extras] plugins = ["importlib-metadata"] @@ -5921,12 +3831,9 @@ plugins = ["importlib-metadata"] name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, - {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, -] [package.dependencies] cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} @@ -5941,91 +3848,9 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pymongo" version = "4.6.0" description = "Python driver for MongoDB " +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "pymongo-4.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c011bd5ad03cc096f99ffcfdd18a1817354132c1331bed7a837a25226659845f"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:5e63146dbdb1eac207464f6e0cfcdb640c9c5ff0f57b754fa96fe252314a1dc6"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:2972dd1f1285866aba027eff2f4a2bbf8aa98563c2ced14cb34ee5602b36afdf"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_i686.whl", hash = "sha256:a0be99b599da95b7a90a918dd927b20c434bea5e1c9b3efc6a3c6cd67c23f813"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_ppc64le.whl", hash = "sha256:9b0f98481ad5dc4cb430a60bbb8869f05505283b9ae1c62bdb65eb5e020ee8e3"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_s390x.whl", hash = "sha256:256c503a75bd71cf7fb9ebf889e7e222d49c6036a48aad5a619f98a0adf0e0d7"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b4ad70d7cac4ca0c7b31444a0148bd3af01a2662fa12b1ad6f57cd4a04e21766"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5717a308a703dda2886a5796a07489c698b442f5e409cf7dc2ac93de8d61d764"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f7f9feecae53fa18d6a3ea7c75f9e9a1d4d20e5c3f9ce3fba83f07bcc4eee2"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128b1485753106c54af481789cdfea12b90a228afca0b11fb3828309a907e10e"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3077a31633beef77d057c6523f5de7271ddef7bde5e019285b00c0cc9cac1e3"}, - {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebf02c32afa6b67e5861a27183dd98ed88419a94a2ab843cc145fb0bafcc5b28"}, - {file = "pymongo-4.6.0-cp310-cp310-win32.whl", hash = "sha256:b14dd73f595199f4275bed4fb509277470d9b9059310537e3b3daba12b30c157"}, - {file = "pymongo-4.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:8adf014f2779992eba3b513e060d06f075f0ab2fb3ad956f413a102312f65cdf"}, - {file = "pymongo-4.6.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba51129fcc510824b6ca6e2ce1c27e3e4d048b6e35d3ae6f7e517bed1b8b25ce"}, - {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2973f113e079fb98515722cd728e1820282721ec9fd52830e4b73cabdbf1eb28"}, - {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af425f323fce1b07755edd783581e7283557296946212f5b1a934441718e7528"}, - {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ec71ac633b126c0775ed4604ca8f56c3540f5c21a1220639f299e7a544b55f9"}, - {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ec6c20385c5a58e16b1ea60c5e4993ea060540671d7d12664f385f2fb32fe79"}, - {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85f2cdc400ee87f5952ebf2a117488f2525a3fb2e23863a8efe3e4ee9e54e4d1"}, - {file = "pymongo-4.6.0-cp311-cp311-win32.whl", hash = "sha256:7fc2bb8a74dcfcdd32f89528e38dcbf70a3a6594963d60dc9595e3b35b66e414"}, - {file = "pymongo-4.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:6695d7136a435c1305b261a9ddb9b3ecec9863e05aab3935b96038145fd3a977"}, - {file = "pymongo-4.6.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d603edea1ff7408638b2504905c032193b7dcee7af269802dbb35bc8c3310ed5"}, - {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79f41576b3022c2fe9780ae3e44202b2438128a25284a8ddfa038f0785d87019"}, - {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f2af6cf82509b15093ce3569229e0d53c90ad8ae2eef940652d4cf1f81e045"}, - {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecd9e1fa97aa11bf67472220285775fa15e896da108f425e55d23d7540a712ce"}, - {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d2be5c9c3488fa8a70f83ed925940f488eac2837a996708d98a0e54a861f212"}, - {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab6bcc8e424e07c1d4ba6df96f7fb963bcb48f590b9456de9ebd03b88084fe8"}, - {file = "pymongo-4.6.0-cp312-cp312-win32.whl", hash = "sha256:47aa128be2e66abd9d1a9b0437c62499d812d291f17b55185cb4aa33a5f710a4"}, - {file = "pymongo-4.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:014e7049dd019a6663747ca7dae328943e14f7261f7c1381045dfc26a04fa330"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:288c21ab9531b037f7efa4e467b33176bc73a0c27223c141b822ab4a0e66ff2a"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:747c84f4e690fbe6999c90ac97246c95d31460d890510e4a3fa61b7d2b87aa34"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:055f5c266e2767a88bb585d01137d9c7f778b0195d3dbf4a487ef0638be9b651"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:82e620842e12e8cb4050d2643a81c8149361cd82c0a920fa5a15dc4ca8a4000f"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:6b18276f14b4b6d92e707ab6db19b938e112bd2f1dc3f9f1a628df58e4fd3f0d"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:680fa0fc719e1a3dcb81130858368f51d83667d431924d0bcf249644bce8f303"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:3919708594b86d0f5cdc713eb6fccd3f9b9532af09ea7a5d843c933825ef56c4"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db082f728160369d9a6ed2e722438291558fc15ce06d0a7d696a8dad735c236b"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e4ed21029d80c4f62605ab16398fe1ce093fff4b5f22d114055e7d9fbc4adb0"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bea9138b0fc6e2218147e9c6ce1ff76ff8e29dc00bb1b64842bd1ca107aee9f"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a0269811661ba93c472c8a60ea82640e838c2eb148d252720a09b5123f2c2fe"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d6a1b1361f118e7fefa17ae3114e77f10ee1b228b20d50c47c9f351346180c8"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e3b0127b260d4abae7b62203c4c7ef0874c901b55155692353db19de4b18bc4"}, - {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a49aca4d961823b2846b739380c847e8964ff7ae0f0a683992b9d926054f0d6d"}, - {file = "pymongo-4.6.0-cp37-cp37m-win32.whl", hash = "sha256:09c7de516b08c57647176b9fc21d929d628e35bcebc7422220c89ae40b62126a"}, - {file = "pymongo-4.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:81dd1308bd5630d2bb5980f00aa163b986b133f1e9ed66c66ce2a5bc3572e891"}, - {file = "pymongo-4.6.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:2f8c04277d879146eacda920476e93d520eff8bec6c022ac108cfa6280d84348"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5802acc012bbb4bce4dff92973dff76482f30ef35dd4cb8ab5b0e06aa8f08c80"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:ccd785fafa1c931deff6a7116e9a0d402d59fabe51644b0d0c268295ff847b25"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fe03bf25fae4b95d8afe40004a321df644400fdcba4c8e5e1a19c1085b740888"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:2ca0ba501898b2ec31e6c3acf90c31910944f01d454ad8e489213a156ccf1bda"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:10a379fb60f1b2406ae57b8899bacfe20567918c8e9d2d545e1b93628fcf2050"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:a4dc1319d0c162919ee7f4ee6face076becae2abbd351cc14f1fe70af5fb20d9"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:ddef295aaf80cefb0c1606f1995899efcb17edc6b327eb6589e234e614b87756"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:518c90bdd6e842c446d01a766b9136fec5ec6cc94f3b8c3f8b4a332786ee6b64"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b80a4ee19b3442c57c38afa978adca546521a8822d663310b63ae2a7d7b13f3a"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb438a8bf6b695bf50d57e6a059ff09652a07968b2041178b3744ea785fcef9b"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3db7d833a7c38c317dc95b54e27f1d27012e031b45a7c24e360b53197d5f6e7"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3729b8db02063da50eeb3db88a27670d85953afb9a7f14c213ac9e3dca93034b"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:39a1cd5d383b37285641d5a7a86be85274466ae336a61b51117155936529f9b3"}, - {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7b0e6361754ac596cd16bfc6ed49f69ffcd9b60b7bc4bcd3ea65c6a83475e4ff"}, - {file = "pymongo-4.6.0-cp38-cp38-win32.whl", hash = "sha256:806e094e9e85d8badc978af8c95b69c556077f11844655cb8cd2d1758769e521"}, - {file = "pymongo-4.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1394c4737b325166a65ae7c145af1ebdb9fb153ebedd37cf91d676313e4a67b8"}, - {file = "pymongo-4.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a8273e1abbcff1d7d29cbbb1ea7e57d38be72f1af3c597c854168508b91516c2"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:e16ade71c93f6814d095d25cd6d28a90d63511ea396bd96e9ffcb886b278baaa"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:325701ae7b56daa5b0692305b7cb505ca50f80a1288abb32ff420a8a209b01ca"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:cc94f9fea17a5af8cf1a343597711a26b0117c0b812550d99934acb89d526ed2"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:21812453354b151200034750cd30b0140e82ec2a01fd4357390f67714a1bfbde"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:0634994b026336195778e5693583c060418d4ab453eff21530422690a97e1ee8"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:ad4f66fbb893b55f96f03020e67dcab49ffde0177c6565ccf9dec4fdf974eb61"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:2703a9f8f5767986b4f51c259ff452cc837c5a83c8ed5f5361f6e49933743b2f"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bafea6061d63059d8bc2ffc545e2f049221c8a4457d236c5cd6a66678673eab"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f28ae33dc5a0b9cee06e95fd420e42155d83271ab75964baf747ce959cac5f52"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16a534da0e39785687b7295e2fcf9a339f4a20689024983d11afaa4657f8507"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef67fedd863ffffd4adfd46d9d992b0f929c7f61a8307366d664d93517f2c78e"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05c30fd35cc97f14f354916b45feea535d59060ef867446b5c3c7f9b609dd5dc"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c63e3a2e8fb815c4b1f738c284a4579897e37c3cfd95fdb199229a1ccfb638a"}, - {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e5e193f89f4f8c1fe273f9a6e6df915092c9f2af6db2d1afb8bd53855025c11f"}, - {file = "pymongo-4.6.0-cp39-cp39-win32.whl", hash = "sha256:a09bfb51953930e7e838972ddf646c5d5f984992a66d79da6ba7f6a8d8a890cd"}, - {file = "pymongo-4.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:107a234dc55affc5802acb3b6d83cbb8c87355b38a9457fcd8806bdeb8bce161"}, - {file = "pymongo-4.6.0.tar.gz", hash = "sha256:fb1c56d891f9e34303c451998ef62ba52659648bb0d75b03c5e4ac223a3342c2"}, -] [package.dependencies] dnspython = ">=1.16.0,<3.0.0" @@ -6043,12 +3868,9 @@ zstd = ["zstandard"] name = "pymysql" version = "1.1.0" description = "Pure Python MySQL Driver" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, - {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, -] [package.extras] ed25519 = ["PyNaCl (>=1.4.0)"] @@ -6058,56 +3880,17 @@ rsa = ["cryptography"] name = "pyodbc" version = "4.0.39" description = "DB API Module for ODBC" +category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" -files = [ - {file = "pyodbc-4.0.39-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74af348dbaee4885998858daf50c8964e767629ecf6c195868b016367b0bb861"}, - {file = "pyodbc-4.0.39-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f5901b57eaef0761f4cf02bca8e7c63f589fd0fd723a79f6ccf1ea1275372e5"}, - {file = "pyodbc-4.0.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0db69478d00fcd8d0b9bdde8aca0b0eada341fd6ed8c2da84b594b928c84106"}, - {file = "pyodbc-4.0.39-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5faf2870e9d434c6a85c6adc1cdff55c0e376273baf480f06d9848025405688"}, - {file = "pyodbc-4.0.39-cp310-cp310-win32.whl", hash = "sha256:62bb6d7d0d25dc75d1445e539f946461c9c5a3643ae14676b240f71794ea004f"}, - {file = "pyodbc-4.0.39-cp310-cp310-win_amd64.whl", hash = "sha256:8eb5547282dc73a7784ce7b99584f68687dd85543538ca6f70cffaa6310676e7"}, - {file = "pyodbc-4.0.39-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:530c1ac37ead782803b44fb1934ba4c68ed4a6969f7475cb8bc04ae1da14486e"}, - {file = "pyodbc-4.0.39-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1f7fb65191926308f09ce75ae7ccecf89310232ee50cdea74edf17ee04a9b068"}, - {file = "pyodbc-4.0.39-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ec009180fcd7c8197f45d083e6670623d8dfe198a457ca2a50ebb1bafe4107f"}, - {file = "pyodbc-4.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:400e911d54980098c6badadecc82385fc0d6a9057db525d63d2652317df43efe"}, - {file = "pyodbc-4.0.39-cp311-cp311-win32.whl", hash = "sha256:f792677b88e1dde12dab46de8647620fc8171742c02780d51744f7b1b2135dbc"}, - {file = "pyodbc-4.0.39-cp311-cp311-win_amd64.whl", hash = "sha256:3d9d70e1635d35ba3aee3df216ec8e35f2824909f43331c0112b17f460a93923"}, - {file = "pyodbc-4.0.39-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1a59096f1784d0cda3d0b8f393849f05515c46a10016edb6da1b1960d039800"}, - {file = "pyodbc-4.0.39-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3467157661615d5c30893efa1069b55c9ffa434097fc3ae3739e740d83d2ec"}, - {file = "pyodbc-4.0.39-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af027a60e84274ea08fad1c75991d37a5f1f6e8bcd30f6bda20db99f0cdfbc7d"}, - {file = "pyodbc-4.0.39-cp36-cp36m-win32.whl", hash = "sha256:64c1de1263281de7b5ce585b0352746ab1a483453017a8589f838a79cbe3d6d9"}, - {file = "pyodbc-4.0.39-cp36-cp36m-win_amd64.whl", hash = "sha256:27d1b3c3159673b44c97c878f9d8056901d45f747ce2e0b4d5d99f0fb6949dc7"}, - {file = "pyodbc-4.0.39-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:efccc11dff6fba684a74ae1030c92ff8b82429d7f00e0a50aa2ac6f56621cd9f"}, - {file = "pyodbc-4.0.39-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea08e9379c08663d7260e2b8a6c451f56d36c17291af735191089f8e29ad9578"}, - {file = "pyodbc-4.0.39-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b36fe804d367d01ad81077fa524a36e667aabc3945e32564c7ef9595b28edfa9"}, - {file = "pyodbc-4.0.39-cp37-cp37m-win32.whl", hash = "sha256:72d364e52f6ca2417881a23834b3a36733c09e0dcd4760f49a6b864218d98d92"}, - {file = "pyodbc-4.0.39-cp37-cp37m-win_amd64.whl", hash = "sha256:39f6c56022c764309aa7552c0eb2c58fbb5902ab5d2010d42b021c0b205aa609"}, - {file = "pyodbc-4.0.39-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ebcb900fcaf19ca2bc38632218c5d48c666fcc19fe38b08cde001917f4581456"}, - {file = "pyodbc-4.0.39-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a3e133621ac2dad22d0870a8521c7e82d4270e24ce02451d64e7eb6a40ad0941"}, - {file = "pyodbc-4.0.39-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05a0912e852ebddaffa8f235b0f3974475021dd8eb604eb46ea67af06efe1239"}, - {file = "pyodbc-4.0.39-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6353044b99c763aeec7ca1760b4340298504d8ee544fdcab3c380a2abec15b78"}, - {file = "pyodbc-4.0.39-cp38-cp38-win32.whl", hash = "sha256:a591a1cf3c251a9c7c1642cfb3774119bf3512f3be56151247238f8a7b22b336"}, - {file = "pyodbc-4.0.39-cp38-cp38-win_amd64.whl", hash = "sha256:8553eaef9f8ec333bbddff6eadf0d322dda34b37f4bab19f0658eb532037840c"}, - {file = "pyodbc-4.0.39-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9253e746c5c94bf61e3e9adb08fb7688d413cb68c06ebb287ec233387534760a"}, - {file = "pyodbc-4.0.39-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a6f4067f46aaa78e77e8a15ade81eb21fb344563d245fb2d9a0aaa553c367cbd"}, - {file = "pyodbc-4.0.39-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdf5a27e6587d1762f7f0e35d6f0309f09019bf3e19ca9177a4b765121f3f106"}, - {file = "pyodbc-4.0.39-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe4ee87b88867867f582dd0c1236cd982508db359a6cbb5e91623ceb6c83e60a"}, - {file = "pyodbc-4.0.39-cp39-cp39-win32.whl", hash = "sha256:42649ed57d09c04aa197bdd4fe0aa9ca319790b7aa86d0b0784cc70e78c426e5"}, - {file = "pyodbc-4.0.39-cp39-cp39-win_amd64.whl", hash = "sha256:305c7d6337e2d4c8350677cc641b343fc0197b7b9bc167815c66b64545c67a53"}, - {file = "pyodbc-4.0.39.tar.gz", hash = "sha256:e528bb70dd6d6299ee429868925df0866e3e919c772b9eff79c8e17920d8f116"}, -] [[package]] name = "pyopenssl" version = "23.2.0" description = "Python wrapper module around the OpenSSL library" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "pyOpenSSL-23.2.0-py3-none-any.whl", hash = "sha256:24f0dc5227396b3e831f4c7f602b950a5e9833d292c8e4a2e06b709292806ae2"}, - {file = "pyOpenSSL-23.2.0.tar.gz", hash = "sha256:276f931f55a452e7dea69c7173e984eb2a4407ce413c918aa34b55f82f9b8bac"}, -] [package.dependencies] cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" @@ -6120,12 +3903,9 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" optional = false python-versions = ">=3.6.8" -files = [ - {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, - {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, -] [package.extras] diagrams = ["jinja2", "railroad-diagrams"] @@ -6134,12 +3914,9 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pypdf2" version = "3.0.1" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, - {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, -] [package.dependencies] typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} @@ -6155,23 +3932,17 @@ image = ["Pillow"] name = "pyreadline3" version = "3.4.1" description = "A python implementation of GNU readline." +category = "main" optional = true python-versions = "*" -files = [ - {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, - {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, -] [[package]] name = "pytest" version = "6.2.5" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, - {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, -] [package.dependencies] atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} @@ -6190,12 +3961,9 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm name = "pytest-cases" version = "3.6.14" description = "Separate test code from test cases in pytest." +category = "dev" optional = false python-versions = "*" -files = [ - {file = "pytest-cases-3.6.14.tar.gz", hash = "sha256:7455e6ca57a544c1bfdd8b56ace08c1c1ce4c6572a8aab8f1bd351dc25a10b6b"}, - {file = "pytest_cases-3.6.14-py2.py3-none-any.whl", hash = "sha256:a087f3d019efd8942d0f0dc3fb526bedf9f83d742c40289e9623f6788aff7257"}, -] [package.dependencies] decopatch = "*" @@ -6205,12 +3973,9 @@ makefun = ">=1.9.5" name = "pytest-console-scripts" version = "1.4.1" description = "Pytest plugin for testing console scripts" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "pytest-console-scripts-1.4.1.tar.gz", hash = "sha256:5a826ed84cc0afa202eb9e44381d7d762f7bdda8e0c23f9f79a7f1f44cf4a895"}, - {file = "pytest_console_scripts-1.4.1-py3-none-any.whl", hash = "sha256:ad860a951a90eca4bd3bd1159b8f5428633ba4ea01abd5c9526b67a95f65437a"}, -] [package.dependencies] importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} @@ -6220,12 +3985,9 @@ pytest = ">=4.0.0" name = "pytest-forked" version = "1.6.0" description = "run tests in isolated forked subprocesses" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, - {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, -] [package.dependencies] py = "*" @@ -6235,12 +3997,9 @@ pytest = ">=3.10" name = "pytest-order" version = "1.1.0" description = "pytest plugin to run your tests in a specific order" +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"}, - {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"}, -] [package.dependencies] pytest = [ @@ -6252,12 +4011,9 @@ pytest = [ name = "pytest-pythonpath" version = "0.7.4" description = "pytest plugin for adding to the PYTHONPATH from command line or configs." +category = "dev" optional = false python-versions = ">=2.6, <4" -files = [ - {file = "pytest-pythonpath-0.7.4.tar.gz", hash = "sha256:64e195b23a8f8c0c631fb16882d9ad6fa4137ed1f2961ddd15d52065cd435db6"}, - {file = "pytest_pythonpath-0.7.4-py3-none-any.whl", hash = "sha256:e73e11dab2f0b83e73229e261242b251f0a369d7f527dbfec068822fd26a6ce5"}, -] [package.dependencies] pytest = ">=2.5.2,<7" @@ -6266,12 +4022,9 @@ pytest = ">=2.5.2,<7" name = "python-daemon" version = "3.0.1" description = "Library to implement a well-behaved Unix daemon process." +category = "dev" optional = false python-versions = ">=3" -files = [ - {file = "python-daemon-3.0.1.tar.gz", hash = "sha256:6c57452372f7eaff40934a1c03ad1826bf5e793558e87fef49131e6464b4dae5"}, - {file = "python_daemon-3.0.1-py3-none-any.whl", hash = "sha256:42bb848a3260a027fa71ad47ecd959e471327cb34da5965962edd5926229f341"}, -] [package.dependencies] docutils = "*" @@ -6286,12 +4039,9 @@ test = ["coverage", "docutils", "testscenarios (>=0.4)", "testtools"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] [package.dependencies] six = ">=1.5" @@ -6300,11 +4050,9 @@ six = ">=1.5" name = "python-nvd3" version = "0.15.0" description = "Python NVD3 - Chart Library for d3.js" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "python-nvd3-0.15.0.tar.gz", hash = "sha256:fbd75ff47e0ef255b4aa4f3a8b10dc8b4024aa5a9a7abed5b2406bd3cb817715"}, -] [package.dependencies] Jinja2 = ">=2.8" @@ -6314,12 +4062,9 @@ python-slugify = ">=1.2.5" name = "python-slugify" version = "8.0.1" description = "A Python slugify application that also handles Unicode" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "python-slugify-8.0.1.tar.gz", hash = "sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"}, - {file = "python_slugify-8.0.1-py2.py3-none-any.whl", hash = "sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395"}, -] [package.dependencies] text-unidecode = ">=1.3" @@ -6331,128 +4076,57 @@ unidecode = ["Unidecode (>=1.1.1)"] name = "pytimeparse" version = "1.1.8" description = "Time expression parser" +category = "main" optional = false python-versions = "*" -files = [ - {file = "pytimeparse-1.1.8-py2.py3-none-any.whl", hash = "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd"}, - {file = "pytimeparse-1.1.8.tar.gz", hash = "sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a"}, -] [[package]] name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" -files = [ - {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, - {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, -] [[package]] name = "pytzdata" version = "2020.1" description = "The Olson timezone database for Python." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, - {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, -] [[package]] name = "pywin32" version = "306" description = "Python for Window Extensions" +category = "main" optional = true python-versions = "*" -files = [ - {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, - {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, - {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, - {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, - {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, - {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, - {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, - {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, - {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, - {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, - {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, - {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, - {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, - {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, -] [[package]] name = "pywin32-ctypes" version = "0.2.2" description = "A (partial) reimplementation of pywin32 using ctypes/cffi" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "pywin32-ctypes-0.2.2.tar.gz", hash = "sha256:3426e063bdd5fd4df74a14fa3cf80a0b42845a87e1d1e81f6549f9daec593a60"}, - {file = "pywin32_ctypes-0.2.2-py3-none-any.whl", hash = "sha256:bf490a1a709baf35d688fe0ecf980ed4de11d2b3e37b51e5442587a75d9957e7"}, -] [[package]] name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, -] [[package]] name = "qdrant-client" version = "1.6.4" description = "Client library for the Qdrant vector search engine" +category = "main" optional = true python-versions = ">=3.8,<3.13" -files = [ - {file = "qdrant_client-1.6.4-py3-none-any.whl", hash = "sha256:db4696978d6a62d78ff60f70b912383f1e467bda3053f732b01ddb5f93281b10"}, - {file = "qdrant_client-1.6.4.tar.gz", hash = "sha256:bbd65f383b6a55a9ccf4e301250fa925179340dd90cfde9b93ce4230fd68867b"}, -] [package.dependencies] fastembed = {version = "0.1.1", optional = true, markers = "python_version < \"3.12\" and extra == \"fastembed\""} @@ -6474,11 +4148,9 @@ fastembed = ["fastembed (==0.1.1)"] name = "redshift-connector" version = "2.0.913" description = "Redshift interface library" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "redshift_connector-2.0.913-py3-none-any.whl", hash = "sha256:bd70395c5b7ec9fcae9565daff6bcb88c7d3ea6182dafba2bac6138f68d00582"}, -] [package.dependencies] beautifulsoup4 = ">=4.7.0,<5.0.0" @@ -6498,12 +4170,9 @@ full = ["numpy", "pandas"] name = "referencing" version = "0.30.2" description = "JSON Referencing + Python" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "referencing-0.30.2-py3-none-any.whl", hash = "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf"}, - {file = "referencing-0.30.2.tar.gz", hash = "sha256:794ad8003c65938edcdbc027f1933215e0d0ccc0291e3ce20a4d87432b59efc0"}, -] [package.dependencies] attrs = ">=22.2.0" @@ -6513,109 +4182,17 @@ rpds-py = ">=0.7.0" name = "regex" version = "2023.8.8" description = "Alternative regular expression module, to replace re." +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"}, - {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c662a4cbdd6280ee56f841f14620787215a171c4e2d1744c9528bed8f5816c96"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf0633e4a1b667bfe0bb10b5e53fe0d5f34a6243ea2530eb342491f1adf4f739"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551ad543fa19e94943c5b2cebc54c73353ffff08228ee5f3376bd27b3d5b9800"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54de2619f5ea58474f2ac211ceea6b615af2d7e4306220d4f3fe690c91988a61"}, - {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ec4b3f0aebbbe2fc0134ee30a791af522a92ad9f164858805a77442d7d18570"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ae646c35cb9f820491760ac62c25b6d6b496757fda2d51be429e0e7b67ae0ab"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca339088839582d01654e6f83a637a4b8194d0960477b9769d2ff2cfa0fa36d2"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d9b6627408021452dcd0d2cdf8da0534e19d93d070bfa8b6b4176f99711e7f90"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:bd3366aceedf274f765a3a4bc95d6cd97b130d1dda524d8f25225d14123c01db"}, - {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7aed90a72fc3654fba9bc4b7f851571dcc368120432ad68b226bd593f3f6c0b7"}, - {file = "regex-2023.8.8-cp310-cp310-win32.whl", hash = "sha256:80b80b889cb767cc47f31d2b2f3dec2db8126fbcd0cff31b3925b4dc6609dcdb"}, - {file = "regex-2023.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:b82edc98d107cbc7357da7a5a695901b47d6eb0420e587256ba3ad24b80b7d0b"}, - {file = "regex-2023.8.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1e7d84d64c84ad97bf06f3c8cb5e48941f135ace28f450d86af6b6512f1c9a71"}, - {file = "regex-2023.8.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce0f9fbe7d295f9922c0424a3637b88c6c472b75eafeaff6f910494a1fa719ef"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06c57e14ac723b04458df5956cfb7e2d9caa6e9d353c0b4c7d5d54fcb1325c46"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7a9aaa5a1267125eef22cef3b63484c3241aaec6f48949b366d26c7250e0357"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b7408511fca48a82a119d78a77c2f5eb1b22fe88b0d2450ed0756d194fe7a9a"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14dc6f2d88192a67d708341f3085df6a4f5a0c7b03dec08d763ca2cd86e9f559"}, - {file = "regex-2023.8.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48c640b99213643d141550326f34f0502fedb1798adb3c9eb79650b1ecb2f177"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0085da0f6c6393428bf0d9c08d8b1874d805bb55e17cb1dfa5ddb7cfb11140bf"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:964b16dcc10c79a4a2be9f1273fcc2684a9eedb3906439720598029a797b46e6"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7ce606c14bb195b0e5108544b540e2c5faed6843367e4ab3deb5c6aa5e681208"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40f029d73b10fac448c73d6eb33d57b34607f40116e9f6e9f0d32e9229b147d7"}, - {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3b8e6ea6be6d64104d8e9afc34c151926f8182f84e7ac290a93925c0db004bfd"}, - {file = "regex-2023.8.8-cp311-cp311-win32.whl", hash = "sha256:942f8b1f3b223638b02df7df79140646c03938d488fbfb771824f3d05fc083a8"}, - {file = "regex-2023.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:51d8ea2a3a1a8fe4f67de21b8b93757005213e8ac3917567872f2865185fa7fb"}, - {file = "regex-2023.8.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e951d1a8e9963ea51efd7f150450803e3b95db5939f994ad3d5edac2b6f6e2b4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704f63b774218207b8ccc6c47fcef5340741e5d839d11d606f70af93ee78e4d4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22283c769a7b01c8ac355d5be0715bf6929b6267619505e289f792b01304d898"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91129ff1bb0619bc1f4ad19485718cc623a2dc433dff95baadbf89405c7f6b57"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de35342190deb7b866ad6ba5cbcccb2d22c0487ee0cbb251efef0843d705f0d4"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b993b6f524d1e274a5062488a43e3f9f8764ee9745ccd8e8193df743dbe5ee61"}, - {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3026cbcf11d79095a32d9a13bbc572a458727bd5b1ca332df4a79faecd45281c"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:293352710172239bf579c90a9864d0df57340b6fd21272345222fb6371bf82b3"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d909b5a3fff619dc7e48b6b1bedc2f30ec43033ba7af32f936c10839e81b9217"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3d370ff652323c5307d9c8e4c62efd1956fb08051b0e9210212bc51168b4ff56"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:b076da1ed19dc37788f6a934c60adf97bd02c7eea461b73730513921a85d4235"}, - {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e9941a4ada58f6218694f382e43fdd256e97615db9da135e77359da257a7168b"}, - {file = "regex-2023.8.8-cp36-cp36m-win32.whl", hash = "sha256:a8c65c17aed7e15a0c824cdc63a6b104dfc530f6fa8cb6ac51c437af52b481c7"}, - {file = "regex-2023.8.8-cp36-cp36m-win_amd64.whl", hash = "sha256:aadf28046e77a72f30dcc1ab185639e8de7f4104b8cb5c6dfa5d8ed860e57236"}, - {file = "regex-2023.8.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:423adfa872b4908843ac3e7a30f957f5d5282944b81ca0a3b8a7ccbbfaa06103"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ae594c66f4a7e1ea67232a0846649a7c94c188d6c071ac0210c3e86a5f92109"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e51c80c168074faa793685656c38eb7a06cbad7774c8cbc3ea05552d615393d8"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09b7f4c66aa9d1522b06e31a54f15581c37286237208df1345108fcf4e050c18"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e73e5243af12d9cd6a9d6a45a43570dbe2e5b1cdfc862f5ae2b031e44dd95a8"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:941460db8fe3bd613db52f05259c9336f5a47ccae7d7def44cc277184030a116"}, - {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f0ccf3e01afeb412a1a9993049cb160d0352dba635bbca7762b2dc722aa5742a"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e9216e0d2cdce7dbc9be48cb3eacb962740a09b011a116fd7af8c832ab116ca"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5cd9cd7170459b9223c5e592ac036e0704bee765706445c353d96f2890e816c8"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4873ef92e03a4309b3ccd8281454801b291b689f6ad45ef8c3658b6fa761d7ac"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:239c3c2a339d3b3ddd51c2daef10874410917cd2b998f043c13e2084cb191684"}, - {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1005c60ed7037be0d9dea1f9c53cc42f836188227366370867222bda4c3c6bd7"}, - {file = "regex-2023.8.8-cp37-cp37m-win32.whl", hash = "sha256:e6bd1e9b95bc5614a7a9c9c44fde9539cba1c823b43a9f7bc11266446dd568e3"}, - {file = "regex-2023.8.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9a96edd79661e93327cfeac4edec72a4046e14550a1d22aa0dd2e3ca52aec921"}, - {file = "regex-2023.8.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2181c20ef18747d5f4a7ea513e09ea03bdd50884a11ce46066bb90fe4213675"}, - {file = "regex-2023.8.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2ad5add903eb7cdde2b7c64aaca405f3957ab34f16594d2b78d53b8b1a6a7d6"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9233ac249b354c54146e392e8a451e465dd2d967fc773690811d3a8c240ac601"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920974009fb37b20d32afcdf0227a2e707eb83fe418713f7a8b7de038b870d0b"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2b6c5dfe0929b6c23dde9624483380b170b6e34ed79054ad131b20203a1a63"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96979d753b1dc3b2169003e1854dc67bfc86edf93c01e84757927f810b8c3c93"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ae54a338191e1356253e7883d9d19f8679b6143703086245fb14d1f20196be9"}, - {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2162ae2eb8b079622176a81b65d486ba50b888271302190870b8cc488587d280"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c884d1a59e69e03b93cf0dfee8794c63d7de0ee8f7ffb76e5f75be8131b6400a"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf9273e96f3ee2ac89ffcb17627a78f78e7516b08f94dc435844ae72576a276e"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:83215147121e15d5f3a45d99abeed9cf1fe16869d5c233b08c56cdf75f43a504"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f7454aa427b8ab9101f3787eb178057c5250478e39b99540cfc2b889c7d0586"}, - {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0640913d2c1044d97e30d7c41728195fc37e54d190c5385eacb52115127b882"}, - {file = "regex-2023.8.8-cp38-cp38-win32.whl", hash = "sha256:0c59122ceccb905a941fb23b087b8eafc5290bf983ebcb14d2301febcbe199c7"}, - {file = "regex-2023.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:c12f6f67495ea05c3d542d119d270007090bad5b843f642d418eb601ec0fa7be"}, - {file = "regex-2023.8.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82cd0a69cd28f6cc3789cc6adeb1027f79526b1ab50b1f6062bbc3a0ccb2dbc3"}, - {file = "regex-2023.8.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb34d1605f96a245fc39790a117ac1bac8de84ab7691637b26ab2c5efb8f228c"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:987b9ac04d0b38ef4f89fbc035e84a7efad9cdd5f1e29024f9289182c8d99e09"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dd6082f4e2aec9b6a0927202c85bc1b09dcab113f97265127c1dc20e2e32495"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7eb95fe8222932c10d4436e7a6f7c99991e3fdd9f36c949eff16a69246dee2dc"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7098c524ba9f20717a56a8d551d2ed491ea89cbf37e540759ed3b776a4f8d6eb"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b694430b3f00eb02c594ff5a16db30e054c1b9589a043fe9174584c6efa8033"}, - {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2aeab3895d778155054abea5238d0eb9a72e9242bd4b43f42fd911ef9a13470"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:988631b9d78b546e284478c2ec15c8a85960e262e247b35ca5eaf7ee22f6050a"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:67ecd894e56a0c6108ec5ab1d8fa8418ec0cff45844a855966b875d1039a2e34"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:14898830f0a0eb67cae2bbbc787c1a7d6e34ecc06fbd39d3af5fe29a4468e2c9"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f2200e00b62568cfd920127782c61bc1c546062a879cdc741cfcc6976668dfcf"}, - {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9691a549c19c22d26a4f3b948071e93517bdf86e41b81d8c6ac8a964bb71e5a6"}, - {file = "regex-2023.8.8-cp39-cp39-win32.whl", hash = "sha256:6ab2ed84bf0137927846b37e882745a827458689eb969028af8032b1b3dac78e"}, - {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"}, - {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"}, -] [[package]] name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, -] [package.dependencies] certifi = ">=2017.4.17" @@ -6631,12 +4208,9 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-mock" version = "1.11.0" description = "Mock out responses from the requests package" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, - {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, -] [package.dependencies] requests = ">=2.3,<3" @@ -6650,12 +4224,9 @@ test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "tes name = "requests-oauthlib" version = "1.3.1" description = "OAuthlib authentication support for Requests." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, - {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, -] [package.dependencies] oauthlib = ">=3.0.0" @@ -6668,12 +4239,9 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, - {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, -] [package.dependencies] requests = ">=2.0.1,<3.0.0" @@ -6682,12 +4250,9 @@ requests = ">=2.0.1,<3.0.0" name = "requirements-parser" version = "0.5.0" description = "This is a small Python module for parsing Pip requirement files." +category = "main" optional = false python-versions = ">=3.6,<4.0" -files = [ - {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, - {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, -] [package.dependencies] types-setuptools = ">=57.0.0" @@ -6696,12 +4261,9 @@ types-setuptools = ">=57.0.0" name = "rfc3339-validator" version = "0.1.4" description = "A pure python RFC3339 validator" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, - {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, -] [package.dependencies] six = "*" @@ -6710,12 +4272,9 @@ six = "*" name = "rich" version = "13.5.2" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "dev" optional = false python-versions = ">=3.7.0" -files = [ - {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, - {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, -] [package.dependencies] markdown-it-py = ">=2.2.0" @@ -6729,12 +4288,9 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "rich-argparse" version = "1.3.0" description = "Rich help formatters for argparse and optparse" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "rich_argparse-1.3.0-py3-none-any.whl", hash = "sha256:1a5eda1659c0a215862fe3630fcbe68d7792f18a8106baaf4e005b9896acc6f6"}, - {file = "rich_argparse-1.3.0.tar.gz", hash = "sha256:974cc1ba0aaa0d6aabc09ab1b78f9ba928670e08590f9551121bcbc60c75b74a"}, -] [package.dependencies] rich = ">=11.0.0" @@ -6743,118 +4299,17 @@ rich = ">=11.0.0" name = "rpds-py" version = "0.10.0" description = "Python bindings to Rust's persistent data structures (rpds)" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "rpds_py-0.10.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:c1e0e9916301e3b3d970814b1439ca59487f0616d30f36a44cead66ee1748c31"}, - {file = "rpds_py-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ce8caa29ebbdcde67e5fd652c811d34bc01f249dbc0d61e5cc4db05ae79a83b"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad277f74b1c164f7248afa968700e410651eb858d7c160d109fb451dc45a2f09"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8e1c68303ccf7fceb50fbab79064a2636119fd9aca121f28453709283dbca727"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:780fcb855be29153901c67fc9c5633d48aebef21b90aa72812fa181d731c6b00"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbd7b24d108509a1b9b6679fcc1166a7dd031dbef1f3c2c73788f42e3ebb3beb"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0700c2133ba203c4068aaecd6a59bda22e06a5e46255c9da23cbf68c6942215d"}, - {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:576da63eae7809f375932bfcbca2cf20620a1915bf2fedce4b9cc8491eceefe3"}, - {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23750a9b8a329844ba1fe267ca456bb3184984da2880ed17ae641c5af8de3fef"}, - {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d08395595c42bcd82c3608762ce734504c6d025eef1c06f42326a6023a584186"}, - {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1d7b7b71bcb82d8713c7c2e9c5f061415598af5938666beded20d81fa23e7640"}, - {file = "rpds_py-0.10.0-cp310-none-win32.whl", hash = "sha256:97f5811df21703446b42303475b8b855ee07d6ab6cdf8565eff115540624f25d"}, - {file = "rpds_py-0.10.0-cp310-none-win_amd64.whl", hash = "sha256:cdbed8f21204398f47de39b0a9b180d7e571f02dfb18bf5f1b618e238454b685"}, - {file = "rpds_py-0.10.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:7a3a3d3e4f1e3cd2a67b93a0b6ed0f2499e33f47cc568e3a0023e405abdc0ff1"}, - {file = "rpds_py-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fc72ae476732cdb7b2c1acb5af23b478b8a0d4b6fcf19b90dd150291e0d5b26b"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0583f69522732bdd79dca4cd3873e63a29acf4a299769c7541f2ca1e4dd4bc6"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8b9a7cd381970e64849070aca7c32d53ab7d96c66db6c2ef7aa23c6e803f514"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d292cabd7c8335bdd3237ded442480a249dbcdb4ddfac5218799364a01a0f5c"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6903cdca64f1e301af9be424798328c1fe3b4b14aede35f04510989fc72f012"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed57543c99249ab3a4586ddc8786529fbc33309e5e8a1351802a06ca2baf4c2"}, - {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15932ec5f224b0e35764dc156514533a4fca52dcfda0dfbe462a1a22b37efd59"}, - {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb2d59bc196e6d3b1827c7db06c1a898bfa0787c0574af398e65ccf2e97c0fbe"}, - {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f99d74ddf9d3b6126b509e81865f89bd1283e3fc1b568b68cd7bd9dfa15583d7"}, - {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f70bec8a14a692be6dbe7ce8aab303e88df891cbd4a39af091f90b6702e28055"}, - {file = "rpds_py-0.10.0-cp311-none-win32.whl", hash = "sha256:5f7487be65b9c2c510819e744e375bd41b929a97e5915c4852a82fbb085df62c"}, - {file = "rpds_py-0.10.0-cp311-none-win_amd64.whl", hash = "sha256:748e472345c3a82cfb462d0dff998a7bf43e621eed73374cb19f307e97e08a83"}, - {file = "rpds_py-0.10.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:d4639111e73997567343df6551da9dd90d66aece1b9fc26c786d328439488103"}, - {file = "rpds_py-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f4760e1b02173f4155203054f77a5dc0b4078de7645c922b208d28e7eb99f3e2"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6420a36975e0073acaeee44ead260c1f6ea56812cfc6c31ec00c1c48197173"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:58fc4d66ee349a23dbf08c7e964120dc9027059566e29cf0ce6205d590ed7eca"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:063411228b852fb2ed7485cf91f8e7d30893e69b0acb207ec349db04cccc8225"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65af12f70355de29e1092f319f85a3467f4005e959ab65129cb697169ce94b86"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298e8b5d8087e0330aac211c85428c8761230ef46a1f2c516d6a2f67fb8803c5"}, - {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5b9bf77008f2c55dabbd099fd3ac87009471d223a1c7ebea36873d39511b780a"}, - {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c7853f27195598e550fe089f78f0732c66ee1d1f0eaae8ad081589a5a2f5d4af"}, - {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:75dbfd41a61bc1fb0536bf7b1abf272dc115c53d4d77db770cd65d46d4520882"}, - {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b25136212a3d064a8f0b9ebbb6c57094c5229e0de76d15c79b76feff26aeb7b8"}, - {file = "rpds_py-0.10.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:9affee8cb1ec453382c27eb9043378ab32f49cd4bc24a24275f5c39bf186c279"}, - {file = "rpds_py-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4d55528ef13af4b4e074d067977b1f61408602f53ae4537dccf42ba665c2c7bd"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7865df1fb564092bcf46dac61b5def25342faf6352e4bc0e61a286e3fa26a3d"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f5cc8c7bc99d2bbcd704cef165ca7d155cd6464c86cbda8339026a42d219397"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbae50d352e4717ffc22c566afc2d0da744380e87ed44a144508e3fb9114a3f4"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fccbf0cd3411719e4c9426755df90bf3449d9fc5a89f077f4a7f1abd4f70c910"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d10c431073dc6ebceed35ab22948a016cc2b5120963c13a41e38bdde4a7212"}, - {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1b401e8b9aece651512e62c431181e6e83048a651698a727ea0eb0699e9f9b74"}, - {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:7618a082c55cf038eede4a918c1001cc8a4411dfe508dc762659bcd48d8f4c6e"}, - {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:b3226b246facae14909b465061ddcfa2dfeadb6a64f407f24300d42d69bcb1a1"}, - {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a8edd467551c1102dc0f5754ab55cd0703431cd3044edf8c8e7d9208d63fa453"}, - {file = "rpds_py-0.10.0-cp38-none-win32.whl", hash = "sha256:71333c22f7cf5f0480b59a0aef21f652cf9bbaa9679ad261b405b65a57511d1e"}, - {file = "rpds_py-0.10.0-cp38-none-win_amd64.whl", hash = "sha256:a8ab1adf04ae2d6d65835995218fd3f3eb644fe20655ca8ee233e2c7270ff53b"}, - {file = "rpds_py-0.10.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:87c93b25d538c433fb053da6228c6290117ba53ff6a537c133b0f2087948a582"}, - {file = "rpds_py-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7996aed3f65667c6dcc8302a69368435a87c2364079a066750a2eac75ea01e"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8856aa76839dc234d3469f1e270918ce6bec1d6a601eba928f45d68a15f04fc3"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00215f6a9058fbf84f9d47536902558eb61f180a6b2a0fa35338d06ceb9a2e5a"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23a059143c1393015c68936370cce11690f7294731904bdae47cc3e16d0b2474"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e5c26905aa651cc8c0ddc45e0e5dea2a1296f70bdc96af17aee9d0493280a17"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c651847545422c8131660704c58606d841e228ed576c8f1666d98b3d318f89da"}, - {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80992eb20755701753e30a6952a96aa58f353d12a65ad3c9d48a8da5ec4690cf"}, - {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ffcf18ad3edf1c170e27e88b10282a2c449aa0358659592462448d71b2000cfc"}, - {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:08e08ccf5b10badb7d0a5c84829b914c6e1e1f3a716fdb2bf294e2bd01562775"}, - {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7150b83b3e3ddaac81a8bb6a9b5f93117674a0e7a2b5a5b32ab31fdfea6df27f"}, - {file = "rpds_py-0.10.0-cp39-none-win32.whl", hash = "sha256:3455ecc46ea443b5f7d9c2f946ce4017745e017b0d0f8b99c92564eff97e97f5"}, - {file = "rpds_py-0.10.0-cp39-none-win_amd64.whl", hash = "sha256:afe6b5a04b2ab1aa89bad32ca47bf71358e7302a06fdfdad857389dca8fb5f04"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b1cb078f54af0abd835ca76f93a3152565b73be0f056264da45117d0adf5e99c"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8e7e2b3577e97fa43c2c2b12a16139b2cedbd0770235d5179c0412b4794efd9b"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae46a50d235f1631d9ec4670503f7b30405103034830bc13df29fd947207f795"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f869e34d2326e417baee430ae998e91412cc8e7fdd83d979277a90a0e79a5b47"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d544a614055b131111bed6edfa1cb0fb082a7265761bcb03321f2dd7b5c6c48"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee9c2f6ca9774c2c24bbf7b23086264e6b5fa178201450535ec0859739e6f78d"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2da4a8c6d465fde36cea7d54bf47b5cf089073452f0e47c8632ecb9dec23c07"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac00c41dd315d147b129976204839ca9de699d83519ff1272afbe4fb9d362d12"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:0155c33af0676fc38e1107679be882077680ad1abb6303956b97259c3177e85e"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:db6585b600b2e76e98131e0ac0e5195759082b51687ad0c94505970c90718f4a"}, - {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:7b6975d3763d0952c111700c0634968419268e6bbc0b55fe71138987fa66f309"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:6388e4e95a26717b94a05ced084e19da4d92aca883f392dffcf8e48c8e221a24"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:18f87baa20e02e9277ad8960cd89b63c79c05caf106f4c959a9595c43f2a34a5"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f05fc7d832e970047662b3440b190d24ea04f8d3c760e33e7163b67308c878"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:291c9ce3929a75b45ce8ddde2aa7694fc8449f2bc8f5bd93adf021efaae2d10b"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:861d25ae0985a1dd5297fee35f476b60c6029e2e6e19847d5b4d0a43a390b696"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:668d2b45d62c68c7a370ac3dce108ffda482b0a0f50abd8b4c604a813a59e08f"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344b89384c250ba6a4ce1786e04d01500e4dac0f4137ceebcaad12973c0ac0b3"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:885e023e73ce09b11b89ab91fc60f35d80878d2c19d6213a32b42ff36543c291"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:841128a22e6ac04070a0f84776d07e9c38c4dcce8e28792a95e45fc621605517"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:899b5e7e2d5a8bc92aa533c2d4e55e5ebba095c485568a5e4bedbc163421259a"}, - {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e7947d9a6264c727a556541b1630296bbd5d0a05068d21c38dde8e7a1c703ef0"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4992266817169997854f81df7f6db7bdcda1609972d8ffd6919252f09ec3c0f6"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:26d9fd624649a10e4610fab2bc820e215a184d193e47d0be7fe53c1c8f67f370"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0028eb0967942d0d2891eae700ae1a27b7fd18604cfcb16a1ef486a790fee99e"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9e7e493ded7042712a374471203dd43ae3fff5b81e3de1a0513fa241af9fd41"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d68a8e8a3a816629283faf82358d8c93fe5bd974dd2704152394a3de4cec22a"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6d5f061f6a2aa55790b9e64a23dfd87b6664ab56e24cd06c78eb43986cb260b"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c7c4266c1b61eb429e8aeb7d8ed6a3bfe6c890a1788b18dbec090c35c6b93fa"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80772e3bda6787510d9620bc0c7572be404a922f8ccdfd436bf6c3778119464c"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:b98e75b21fc2ba5285aef8efaf34131d16af1c38df36bdca2f50634bea2d3060"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:d63787f289944cc4bde518ad2b5e70a4f0d6e2ce76324635359c74c113fd188f"}, - {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:872f3dcaa8bf2245944861d7311179d2c0c9b2aaa7d3b464d99a7c2e401f01fa"}, - {file = "rpds_py-0.10.0.tar.gz", hash = "sha256:e36d7369363d2707d5f68950a64c4e025991eb0177db01ccb6aa6facae48b69f"}, -] [[package]] name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" +category = "main" optional = false python-versions = ">=3.6,<4" -files = [ - {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, - {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, -] [package.dependencies] pyasn1 = ">=0.1.3" @@ -6863,12 +4318,9 @@ pyasn1 = ">=0.1.3" name = "s3fs" version = "2023.6.0" description = "Convenient Filesystem interface over S3" +category = "main" optional = true python-versions = ">= 3.8" -files = [ - {file = "s3fs-2023.6.0-py3-none-any.whl", hash = "sha256:d1a0a423d0d2e17fb2a193d9531935dc3f45ba742693448a461b6b34f6a92a24"}, - {file = "s3fs-2023.6.0.tar.gz", hash = "sha256:63fd8ddf05eb722de784b7b503196107f2a518061298cf005a8a4715b4d49117"}, -] [package.dependencies] aiobotocore = ">=2.5.0,<2.6.0" @@ -6883,12 +4335,9 @@ boto3 = ["aiobotocore[boto3] (>=2.5.0,<2.6.0)"] name = "s3transfer" version = "0.6.2" description = "An Amazon S3 Transfer Manager" +category = "main" optional = true python-versions = ">= 3.7" -files = [ - {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, - {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, -] [package.dependencies] botocore = ">=1.12.36,<2.0a.0" @@ -6900,12 +4349,9 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] name = "scramp" version = "1.4.4" description = "An implementation of the SCRAM protocol." +category = "main" optional = true python-versions = ">=3.7" -files = [ - {file = "scramp-1.4.4-py3-none-any.whl", hash = "sha256:b142312df7c2977241d951318b7ee923d6b7a4f75ba0f05b621ece1ed616faa3"}, - {file = "scramp-1.4.4.tar.gz", hash = "sha256:b7022a140040f33cf863ab2657917ed05287a807b917950489b89b9f685d59bc"}, -] [package.dependencies] asn1crypto = ">=1.5.1" @@ -6914,12 +4360,9 @@ asn1crypto = ">=1.5.1" name = "secretstorage" version = "3.3.3" description = "Python bindings to FreeDesktop.org Secret Service API" +category = "main" optional = true python-versions = ">=3.6" -files = [ - {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, - {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, -] [package.dependencies] cryptography = ">=2.0" @@ -6929,23 +4372,17 @@ jeepney = ">=0.6" name = "semver" version = "3.0.1" description = "Python helper for Semantic Versioning (https://semver.org)" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "semver-3.0.1-py3-none-any.whl", hash = "sha256:2a23844ba1647362c7490fe3995a86e097bb590d16f0f32dfc383008f19e4cdf"}, - {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"}, -] [[package]] name = "sentry-sdk" version = "1.30.0" description = "Python client for Sentry (https://sentry.io)" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "sentry-sdk-1.30.0.tar.gz", hash = "sha256:7dc873b87e1faf4d00614afd1058bfa1522942f33daef8a59f90de8ed75cd10c"}, - {file = "sentry_sdk-1.30.0-py2.py3-none-any.whl", hash = "sha256:2e53ad63f96bb9da6570ba2e755c267e529edcf58580a2c0d2a11ef26e1e678b"}, -] [package.dependencies] certifi = "*" @@ -6983,82 +4420,9 @@ tornado = ["tornado (>=5)"] name = "setproctitle" version = "1.3.2" description = "A Python module to customize the process title" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:288943dec88e178bb2fd868adf491197cc0fc8b6810416b1c6775e686bab87fe"}, - {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:630f6fe5e24a619ccf970c78e084319ee8be5be253ecc9b5b216b0f474f5ef18"}, - {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c877691b90026670e5a70adfbcc735460a9f4c274d35ec5e8a43ce3f8443005"}, - {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a55fe05f15c10e8c705038777656fe45e3bd676d49ad9ac8370b75c66dd7cd7"}, - {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab45146c71ca6592c9cc8b354a2cc9cc4843c33efcbe1d245d7d37ce9696552d"}, - {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00c9d5c541a2713ba0e657e0303bf96ddddc412ef4761676adc35df35d7c246"}, - {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:265ecbe2c6eafe82e104f994ddd7c811520acdd0647b73f65c24f51374cf9494"}, - {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c2c46200656280a064073447ebd363937562debef329482fd7e570c8d498f806"}, - {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:fa2f50678f04fda7a75d0fe5dd02bbdd3b13cbe6ed4cf626e4472a7ccf47ae94"}, - {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f2719a398e1a2c01c2a63bf30377a34d0b6ef61946ab9cf4d550733af8f1ef1"}, - {file = "setproctitle-1.3.2-cp310-cp310-win32.whl", hash = "sha256:e425be62524dc0c593985da794ee73eb8a17abb10fe692ee43bb39e201d7a099"}, - {file = "setproctitle-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:e85e50b9c67854f89635a86247412f3ad66b132a4d8534ac017547197c88f27d"}, - {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a97d51c17d438cf5be284775a322d57b7ca9505bb7e118c28b1824ecaf8aeaa"}, - {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:587c7d6780109fbd8a627758063d08ab0421377c0853780e5c356873cdf0f077"}, - {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d17c8bd073cbf8d141993db45145a70b307385b69171d6b54bcf23e5d644de"}, - {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e932089c35a396dc31a5a1fc49889dd559548d14cb2237adae260382a090382e"}, - {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e4f8f12258a8739c565292a551c3db62cca4ed4f6b6126664e2381acb4931bf"}, - {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:570d255fd99c7f14d8f91363c3ea96bd54f8742275796bca67e1414aeca7d8c3"}, - {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a8e0881568c5e6beff91ef73c0ec8ac2a9d3ecc9edd6bd83c31ca34f770910c4"}, - {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4bba3be4c1fabf170595b71f3af46c6d482fbe7d9e0563999b49999a31876f77"}, - {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:37ece938110cab2bb3957e3910af8152ca15f2b6efdf4f2612e3f6b7e5459b80"}, - {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db684d6bbb735a80bcbc3737856385b55d53f8a44ce9b46e9a5682c5133a9bf7"}, - {file = "setproctitle-1.3.2-cp311-cp311-win32.whl", hash = "sha256:ca58cd260ea02759238d994cfae844fc8b1e206c684beb8f38877dcab8451dfc"}, - {file = "setproctitle-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:88486e6cce2a18a033013d17b30a594f1c5cb42520c49c19e6ade40b864bb7ff"}, - {file = "setproctitle-1.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:92c626edc66169a1b09e9541b9c0c9f10488447d8a2b1d87c8f0672e771bc927"}, - {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710e16fa3bade3b026907e4a5e841124983620046166f355bbb84be364bf2a02"}, - {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f29b75e86260b0ab59adb12661ef9f113d2f93a59951373eb6d68a852b13e83"}, - {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c8d9650154afaa86a44ff195b7b10d683c73509d085339d174e394a22cccbb9"}, - {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0452282258dfcc01697026a8841258dd2057c4438b43914b611bccbcd048f10"}, - {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e49ae693306d7624015f31cb3e82708916759d592c2e5f72a35c8f4cc8aef258"}, - {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1ff863a20d1ff6ba2c24e22436a3daa3cd80be1dfb26891aae73f61b54b04aca"}, - {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:55ce1e9925ce1765865442ede9dca0ba9bde10593fcd570b1f0fa25d3ec6b31c"}, - {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7fe9df7aeb8c64db6c34fc3b13271a363475d77bc157d3f00275a53910cb1989"}, - {file = "setproctitle-1.3.2-cp37-cp37m-win32.whl", hash = "sha256:e5c50e164cd2459bc5137c15288a9ef57160fd5cbf293265ea3c45efe7870865"}, - {file = "setproctitle-1.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a499fff50387c1520c085a07578a000123f519e5f3eee61dd68e1d301659651f"}, - {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b932c3041aa924163f4aab970c2f0e6b4d9d773f4d50326e0ea1cd69240e5c5"}, - {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4bfc89bd33ebb8e4c0e9846a09b1f5a4a86f5cb7a317e75cc42fee1131b4f4f"}, - {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd3cf4286a60fdc95451d8d14e0389a6b4f5cebe02c7f2609325eb016535963"}, - {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5fb4f769c02f63fac90989711a3fee83919f47ae9afd4758ced5d86596318c65"}, - {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5194b4969f82ea842a4f6af2f82cd16ebdc3f1771fb2771796e6add9835c1973"}, - {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cde41857a644b7353a0060b5f94f7ba7cf593ebde5a1094da1be581ac9a31"}, - {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9124bedd8006b0e04d4e8a71a0945da9b67e7a4ab88fdad7b1440dc5b6122c42"}, - {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c8a09d570b39517de10ee5b718730e171251ce63bbb890c430c725c8c53d4484"}, - {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8ff3c8cb26afaed25e8bca7b9dd0c1e36de71f35a3a0706b5c0d5172587a3827"}, - {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:589be87172b238f839e19f146b9ea47c71e413e951ef0dc6db4218ddacf3c202"}, - {file = "setproctitle-1.3.2-cp38-cp38-win32.whl", hash = "sha256:4749a2b0c9ac52f864d13cee94546606f92b981b50e46226f7f830a56a9dc8e1"}, - {file = "setproctitle-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:e43f315c68aa61cbdef522a2272c5a5b9b8fd03c301d3167b5e1343ef50c676c"}, - {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:de3a540cd1817ede31f530d20e6a4935bbc1b145fd8f8cf393903b1e02f1ae76"}, - {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4058564195b975ddc3f0462375c533cce310ccdd41b80ac9aed641c296c3eff4"}, - {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c5d5dad7c28bdd1ec4187d818e43796f58a845aa892bb4481587010dc4d362b"}, - {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ffc61a388a5834a97953d6444a2888c24a05f2e333f9ed49f977a87bb1ad4761"}, - {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fa1a0fbee72b47dc339c87c890d3c03a72ea65c061ade3204f285582f2da30f"}, - {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8a988c7220c002c45347430993830666e55bc350179d91fcee0feafe64e1d4"}, - {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bae283e85fc084b18ffeb92e061ff7ac5af9e183c9d1345c93e178c3e5069cbe"}, - {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fed18e44711c5af4b681c2b3b18f85e6f0f1b2370a28854c645d636d5305ccd8"}, - {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:b34baef93bfb20a8ecb930e395ccd2ae3268050d8cf4fe187de5e2bd806fd796"}, - {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f0bed90a216ef28b9d227d8d73e28a8c9b88c0f48a082d13ab3fa83c581488f"}, - {file = "setproctitle-1.3.2-cp39-cp39-win32.whl", hash = "sha256:4d8938249a7cea45ab7e1e48b77685d0f2bab1ebfa9dde23e94ab97968996a7c"}, - {file = "setproctitle-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:a47d97a75fd2d10c37410b180f67a5835cb1d8fdea2648fd7f359d4277f180b9"}, - {file = "setproctitle-1.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:dad42e676c5261eb50fdb16bdf3e2771cf8f99a79ef69ba88729aeb3472d8575"}, - {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c91b9bc8985d00239f7dc08a49927a7ca1ca8a6af2c3890feec3ed9665b6f91e"}, - {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8579a43eafd246e285eb3a5b939e7158073d5087aacdd2308f23200eac2458b"}, - {file = "setproctitle-1.3.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:2fbd8187948284293f43533c150cd69a0e4192c83c377da837dbcd29f6b83084"}, - {file = "setproctitle-1.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:faec934cfe5fd6ac1151c02e67156c3f526e82f96b24d550b5d51efa4a5527c6"}, - {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1aafc91cbdacc9e5fe712c52077369168e6b6c346f3a9d51bf600b53eae56bb"}, - {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b617f12c9be61e8f4b2857be4a4319754756845dbbbd9c3718f468bbb1e17bcb"}, - {file = "setproctitle-1.3.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b2c9cb2705fc84cb8798f1ba74194f4c080aaef19d9dae843591c09b97678e98"}, - {file = "setproctitle-1.3.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a149a5f7f2c5a065d4e63cb0d7a4b6d3b66e6e80f12e3f8827c4f63974cbf122"}, - {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e3ac25bfc4a0f29d2409650c7532d5ddfdbf29f16f8a256fc31c47d0dc05172"}, - {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65d884e22037b23fa25b2baf1a3316602ed5c5971eb3e9d771a38c3a69ce6e13"}, - {file = "setproctitle-1.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7aa0aac1711fadffc1d51e9d00a3bea61f68443d6ac0241a224e4d622489d665"}, - {file = "setproctitle-1.3.2.tar.gz", hash = "sha256:b9fb97907c830d260fa0658ed58afd48a86b2b88aac521135c352ff7fd3477fd"}, -] [package.extras] test = ["pytest"] @@ -7067,12 +4431,9 @@ test = ["pytest"] name = "setuptools" version = "68.1.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"}, - {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"}, -] [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] @@ -7083,158 +4444,41 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "simplejson" version = "3.19.1" description = "Simple, fast, extensible JSON encoder/decoder for Python" +category = "main" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "simplejson-3.19.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:412e58997a30c5deb8cab5858b8e2e5b40ca007079f7010ee74565cc13d19665"}, - {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e765b1f47293dedf77946f0427e03ee45def2862edacd8868c6cf9ab97c8afbd"}, - {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:3231100edee292da78948fa0a77dee4e5a94a0a60bcba9ed7a9dc77f4d4bb11e"}, - {file = "simplejson-3.19.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:081ea6305b3b5e84ae7417e7f45956db5ea3872ec497a584ec86c3260cda049e"}, - {file = "simplejson-3.19.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:f253edf694ce836631b350d758d00a8c4011243d58318fbfbe0dd54a6a839ab4"}, - {file = "simplejson-3.19.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:5db86bb82034e055257c8e45228ca3dbce85e38d7bfa84fa7b2838e032a3219c"}, - {file = "simplejson-3.19.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:69a8b10a4f81548bc1e06ded0c4a6c9042c0be0d947c53c1ed89703f7e613950"}, - {file = "simplejson-3.19.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:58ee5e24d6863b22194020eb62673cf8cc69945fcad6b283919490f6e359f7c5"}, - {file = "simplejson-3.19.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:73d0904c2471f317386d4ae5c665b16b5c50ab4f3ee7fd3d3b7651e564ad74b1"}, - {file = "simplejson-3.19.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:66d780047c31ff316ee305c3f7550f352d87257c756413632303fc59fef19eac"}, - {file = "simplejson-3.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd4d50a27b065447c9c399f0bf0a993bd0e6308db8bbbfbc3ea03b41c145775a"}, - {file = "simplejson-3.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c16ec6a67a5f66ab004190829eeede01c633936375edcad7cbf06d3241e5865"}, - {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17a963e8dd4d81061cc05b627677c1f6a12e81345111fbdc5708c9f088d752c9"}, - {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7e78d79b10aa92f40f54178ada2b635c960d24fc6141856b926d82f67e56d169"}, - {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad071cd84a636195f35fa71de2186d717db775f94f985232775794d09f8d9061"}, - {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e7c70f19405e5f99168077b785fe15fcb5f9b3c0b70b0b5c2757ce294922c8c"}, - {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54fca2b26bcd1c403146fd9461d1da76199442297160721b1d63def2a1b17799"}, - {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:48600a6e0032bed17c20319d91775f1797d39953ccfd68c27f83c8d7fc3b32cb"}, - {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:93f5ac30607157a0b2579af59a065bcfaa7fadeb4875bf927a8f8b6739c8d910"}, - {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b79642a599740603ca86cf9df54f57a2013c47e1dd4dd2ae4769af0a6816900"}, - {file = "simplejson-3.19.1-cp310-cp310-win32.whl", hash = "sha256:d9f2c27f18a0b94107d57294aab3d06d6046ea843ed4a45cae8bd45756749f3a"}, - {file = "simplejson-3.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:5673d27806085d2a413b3be5f85fad6fca4b7ffd31cfe510bbe65eea52fff571"}, - {file = "simplejson-3.19.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:79c748aa61fd8098d0472e776743de20fae2686edb80a24f0f6593a77f74fe86"}, - {file = "simplejson-3.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:390f4a8ca61d90bcf806c3ad644e05fa5890f5b9a72abdd4ca8430cdc1e386fa"}, - {file = "simplejson-3.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d61482b5d18181e6bb4810b4a6a24c63a490c3a20e9fbd7876639653e2b30a1a"}, - {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2541fdb7467ef9bfad1f55b6c52e8ea52b3ce4a0027d37aff094190a955daa9d"}, - {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46133bc7dd45c9953e6ee4852e3de3d5a9a4a03b068bd238935a5c72f0a1ce34"}, - {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f96def94576f857abf58e031ce881b5a3fc25cbec64b2bc4824824a8a4367af9"}, - {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f14ecca970d825df0d29d5c6736ff27999ee7bdf5510e807f7ad8845f7760ce"}, - {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:66389b6b6ee46a94a493a933a26008a1bae0cfadeca176933e7ff6556c0ce998"}, - {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:22b867205cd258050c2625325fdd9a65f917a5aff22a23387e245ecae4098e78"}, - {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c39fa911e4302eb79c804b221ddec775c3da08833c0a9120041dd322789824de"}, - {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:65dafe413b15e8895ad42e49210b74a955c9ae65564952b0243a18fb35b986cc"}, - {file = "simplejson-3.19.1-cp311-cp311-win32.whl", hash = "sha256:f05d05d99fce5537d8f7a0af6417a9afa9af3a6c4bb1ba7359c53b6257625fcb"}, - {file = "simplejson-3.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:b46aaf0332a8a9c965310058cf3487d705bf672641d2c43a835625b326689cf4"}, - {file = "simplejson-3.19.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b438e5eaa474365f4faaeeef1ec3e8d5b4e7030706e3e3d6b5bee6049732e0e6"}, - {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9d614a612ad02492f704fbac636f666fa89295a5d22b4facf2d665fc3b5ea9"}, - {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46e89f58e4bed107626edce1cf098da3664a336d01fc78fddcfb1f397f553d44"}, - {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96ade243fb6f3b57e7bd3b71e90c190cd0f93ec5dce6bf38734a73a2e5fa274f"}, - {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed18728b90758d171f0c66c475c24a443ede815cf3f1a91e907b0db0ebc6e508"}, - {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:6a561320485017ddfc21bd2ed5de2d70184f754f1c9b1947c55f8e2b0163a268"}, - {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:2098811cd241429c08b7fc5c9e41fcc3f59f27c2e8d1da2ccdcf6c8e340ab507"}, - {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:8f8d179393e6f0cf6c7c950576892ea6acbcea0a320838c61968ac7046f59228"}, - {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:eff87c68058374e45225089e4538c26329a13499bc0104b52b77f8428eed36b2"}, - {file = "simplejson-3.19.1-cp36-cp36m-win32.whl", hash = "sha256:d300773b93eed82f6da138fd1d081dc96fbe53d96000a85e41460fe07c8d8b33"}, - {file = "simplejson-3.19.1-cp36-cp36m-win_amd64.whl", hash = "sha256:37724c634f93e5caaca04458f267836eb9505d897ab3947b52f33b191bf344f3"}, - {file = "simplejson-3.19.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:74bf802debe68627227ddb665c067eb8c73aa68b2476369237adf55c1161b728"}, - {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70128fb92932524c89f373e17221cf9535d7d0c63794955cc3cd5868e19f5d38"}, - {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8090e75653ea7db75bc21fa5f7bcf5f7bdf64ea258cbbac45c7065f6324f1b50"}, - {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a755f7bfc8adcb94887710dc70cc12a69a454120c6adcc6f251c3f7b46ee6aac"}, - {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ccb2c1877bc9b25bc4f4687169caa925ffda605d7569c40e8e95186e9a5e58b"}, - {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:919bc5aa4d8094cf8f1371ea9119e5d952f741dc4162810ab714aec948a23fe5"}, - {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e333c5b62e93949f5ac27e6758ba53ef6ee4f93e36cc977fe2e3df85c02f6dc4"}, - {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3a4480e348000d89cf501b5606415f4d328484bbb431146c2971123d49fd8430"}, - {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:cb502cde018e93e75dc8fc7bb2d93477ce4f3ac10369f48866c61b5e031db1fd"}, - {file = "simplejson-3.19.1-cp37-cp37m-win32.whl", hash = "sha256:f41915a4e1f059dfad614b187bc06021fefb5fc5255bfe63abf8247d2f7a646a"}, - {file = "simplejson-3.19.1-cp37-cp37m-win_amd64.whl", hash = "sha256:3844305bc33d52c4975da07f75b480e17af3558c0d13085eaa6cc2f32882ccf7"}, - {file = "simplejson-3.19.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1cb19eacb77adc5a9720244d8d0b5507421d117c7ed4f2f9461424a1829e0ceb"}, - {file = "simplejson-3.19.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926957b278de22797bfc2f004b15297013843b595b3cd7ecd9e37ccb5fad0b72"}, - {file = "simplejson-3.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b0e9a5e66969f7a47dc500e3dba8edc3b45d4eb31efb855c8647700a3493dd8a"}, - {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79d46e7e33c3a4ef853a1307b2032cfb7220e1a079d0c65488fbd7118f44935a"}, - {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344a5093b71c1b370968d0fbd14d55c9413cb6f0355fdefeb4a322d602d21776"}, - {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23fbb7b46d44ed7cbcda689295862851105c7594ae5875dce2a70eeaa498ff86"}, - {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3025e7e9ddb48813aec2974e1a7e68e63eac911dd5e0a9568775de107ac79a"}, - {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:87b190e6ceec286219bd6b6f13547ca433f977d4600b4e81739e9ac23b5b9ba9"}, - {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dc935d8322ba9bc7b84f99f40f111809b0473df167bf5b93b89fb719d2c4892b"}, - {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3b652579c21af73879d99c8072c31476788c8c26b5565687fd9db154070d852a"}, - {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6aa7ca03f25b23b01629b1c7f78e1cd826a66bfb8809f8977a3635be2ec48f1a"}, - {file = "simplejson-3.19.1-cp38-cp38-win32.whl", hash = "sha256:08be5a241fdf67a8e05ac7edbd49b07b638ebe4846b560673e196b2a25c94b92"}, - {file = "simplejson-3.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ca56a6c8c8236d6fe19abb67ef08d76f3c3f46712c49a3b6a5352b6e43e8855f"}, - {file = "simplejson-3.19.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6424d8229ba62e5dbbc377908cfee9b2edf25abd63b855c21f12ac596cd18e41"}, - {file = "simplejson-3.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:547ea86ca408a6735335c881a2e6208851027f5bfd678d8f2c92a0f02c7e7330"}, - {file = "simplejson-3.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:889328873c35cb0b2b4c83cbb83ec52efee5a05e75002e2c0c46c4e42790e83c"}, - {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cdb4e544134f305b033ad79ae5c6b9a32e7c58b46d9f55a64e2a883fbbba01"}, - {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc2b3f06430cbd4fac0dae5b2974d2bf14f71b415fb6de017f498950da8159b1"}, - {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d125e754d26c0298715bdc3f8a03a0658ecbe72330be247f4b328d229d8cf67f"}, - {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:476c8033abed7b1fd8db62a7600bf18501ce701c1a71179e4ce04ac92c1c5c3c"}, - {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:199a0bcd792811c252d71e3eabb3d4a132b3e85e43ebd93bfd053d5b59a7e78b"}, - {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a79b439a6a77649bb8e2f2644e6c9cc0adb720fc55bed63546edea86e1d5c6c8"}, - {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:203412745fed916fc04566ecef3f2b6c872b52f1e7fb3a6a84451b800fb508c1"}, - {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ca922c61d87b4c38f37aa706520328ffe22d7ac1553ef1cadc73f053a673553"}, - {file = "simplejson-3.19.1-cp39-cp39-win32.whl", hash = "sha256:3e0902c278243d6f7223ba3e6c5738614c971fd9a887fff8feaa8dcf7249c8d4"}, - {file = "simplejson-3.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:d396b610e77b0c438846607cd56418bfc194973b9886550a98fd6724e8c6cfec"}, - {file = "simplejson-3.19.1-py3-none-any.whl", hash = "sha256:4710806eb75e87919b858af0cba4ffedc01b463edc3982ded7b55143f39e41e1"}, - {file = "simplejson-3.19.1.tar.gz", hash = "sha256:6277f60848a7d8319d27d2be767a7546bc965535b28070e310b3a9af90604a4c"}, -] [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] [[package]] name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, - {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, -] [[package]] name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, - {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, -] [[package]] name = "snowflake-connector-python" -version = "3.1.1" +version = "3.5.0" description = "Snowflake Connector for Python" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "snowflake-connector-python-3.1.1.tar.gz", hash = "sha256:2700503a5f99d6e22e412d7cf4fd2211296cc0e50b2a38ad9c6f48ddb8beff67"}, - {file = "snowflake_connector_python-3.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3aec4ab6f6d66a0dc2b5bbd8fc2c11fd76090c63fdc65577af9d4e28055c51f2"}, - {file = "snowflake_connector_python-3.1.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5d2589f39b1c1c91eda6711181afb7f197f7dd43204f26db48df90849d9f528b"}, - {file = "snowflake_connector_python-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c540b4fe173cc9a24df285ce49c70fe0dadc6316b8a2160324c549086a71a118"}, - {file = "snowflake_connector_python-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25007ccf5d9c0b87e29af40470f6f1e76d03621642a7492d62282215b7e9d67d"}, - {file = "snowflake_connector_python-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:fff3caebd8b60cee09ad55674d12b8940b9d5f57a394c8467637167372710841"}, - {file = "snowflake_connector_python-3.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7b7622be7bcad26786bf771341e3b4819df6e4d7858e5dd4c8700423ca7364e"}, - {file = "snowflake_connector_python-3.1.1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:260d259a79e6120bf58fcec9a52705fd02a430f296a77a1531720906b7a02f5e"}, - {file = "snowflake_connector_python-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0163d5036f05a39977c6d7aba5e8bb1632be1117785a72e2602e3a34b89ded1c"}, - {file = "snowflake_connector_python-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d38546ebcba7bca37a16cfcbbc0f8e7c19946b4e45e0c5dc2a8963f3b739958"}, - {file = "snowflake_connector_python-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:484044c2d9aacd5c8a0a9d8d8b69b06352e3612f23c5e44d54771a96047d80b1"}, - {file = "snowflake_connector_python-3.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7e4a4aab55a4a3236625b738fad19524c9cef810fe041d567dc5dc1d9b1f9eb7"}, - {file = "snowflake_connector_python-3.1.1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5d95eeaff7b085b0c8facab40391bede699ffc0865f2cdaa37b19a8429d47943"}, - {file = "snowflake_connector_python-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a944a1862672552f8c00b98b576a8b16da46f9c5b918ba4b969bd7d1205c32a"}, - {file = "snowflake_connector_python-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7abb142ba3ee5db6c61be0dc578fa10e59b7c1f33716b0c93ae6706b2a8bbee3"}, - {file = "snowflake_connector_python-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:bf6ca8f8678dea6cf5275f69dbd9e4ebb18c2211be35379b65175e36e5953b92"}, - {file = "snowflake_connector_python-3.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ceb263b95720ab645c2e60e37d436db51321e0192d399631d052387728911689"}, - {file = "snowflake_connector_python-3.1.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:8b7fe82d8d1cdc90caadbcce419d3bcbf1bdeffb9bba974a81a46f389d8ee243"}, - {file = "snowflake_connector_python-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d46b798507f6c7447e21c76bd71969e22e55fa848196f20de73b3e2b65373b5"}, - {file = "snowflake_connector_python-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bdcce7069368b7b2ec8a855812c1b0e9e6bdf6b01660225ffff5ba163fa507d"}, - {file = "snowflake_connector_python-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:daedeff774cd68df05e68dbfa66e83a877e63a99461b8262eb5c8cd37e309aa7"}, -] [package.dependencies] asn1crypto = ">0.24.0,<2.0.0" @@ -7245,12 +4489,8 @@ cryptography = ">=3.1.0,<42.0.0" filelock = ">=3.5,<4" idna = ">=2.5,<4" keyring = {version = "<16.1.0 || >16.1.0,<25.0.0", optional = true, markers = "extra == \"secure-local-storage\""} -oscrypto = "<2.0.0" packaging = "*" -pandas = {version = ">=1.0.0,<2.1.0", optional = true, markers = "extra == \"pandas\""} -platformdirs = ">=2.6.0,<3.9.0" -pyarrow = {version = ">=10.0.1,<10.1.0", optional = true, markers = "extra == \"pandas\""} -pycryptodomex = ">=3.2,<3.5.0 || >3.5.0,<4.0.0" +platformdirs = ">=2.6.0,<4.0.0" pyjwt = "<3.0.0" pyOpenSSL = ">=16.2.0,<24.0.0" pytz = "*" @@ -7258,98 +4498,43 @@ requests = "<3.0.0" sortedcontainers = ">=2.4.0" tomlkit = "*" typing-extensions = ">=4.3,<5" -urllib3 = ">=1.21.1,<1.27" +urllib3 = ">=1.21.1,<2.0.0" [package.extras] -development = ["Cython", "coverage", "more-itertools", "numpy (<1.26.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<7.5.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytzdata"] -pandas = ["pandas (>=1.0.0,<2.1.0)", "pyarrow (>=10.0.1,<10.1.0)"] +development = ["Cython", "coverage", "more-itertools", "numpy (<1.27.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<7.5.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytzdata"] +pandas = ["pandas (>=1.0.0,<2.1.0)", "pyarrow"] secure-local-storage = ["keyring (!=16.1.0,<25.0.0)"] [[package]] name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = true python-versions = "*" -files = [ - {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, - {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, -] [[package]] name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, - {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, -] [[package]] name = "sqlalchemy" version = "1.4.49" description = "Database Abstraction Library" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "SQLAlchemy-1.4.49-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e126cf98b7fd38f1e33c64484406b78e937b1a280e078ef558b95bf5b6895f6"}, - {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca46de16650d143a928d10842939dab208e8d8c3a9a8757600cae9b7c579c5cd"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-win_amd64.whl", hash = "sha256:f8a65990c9c490f4651b5c02abccc9f113a7f56fa482031ac8cb88b70bc8ccaa"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8923dfdf24d5aa8a3adb59723f54118dd4fe62cf59ed0d0d65d940579c1170a4"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9ab2c507a7a439f13ca4499db6d3f50423d1d65dc9b5ed897e70941d9e135b0"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, - {file = "SQLAlchemy-1.4.49-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f23755c384c2969ca2f7667a83f7c5648fcf8b62a3f2bbd883d805454964a800"}, - {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8396e896e08e37032e87e7fbf4a15f431aa878c286dc7f79e616c2feacdb366c"}, - {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66da9627cfcc43bbdebd47bfe0145bb662041472393c03b7802253993b6b7c90"}, - {file = "SQLAlchemy-1.4.49-cp312-cp312-win32.whl", hash = "sha256:9a06e046ffeb8a484279e54bda0a5abfd9675f594a2e38ef3133d7e4d75b6214"}, - {file = "SQLAlchemy-1.4.49-cp312-cp312-win_amd64.whl", hash = "sha256:7cf8b90ad84ad3a45098b1c9f56f2b161601e4670827d6b892ea0e884569bd1d"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc22807a7e161c0d8f3da34018ab7c97ef6223578fcdd99b1d3e7ed1100a5db"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:393cd06c3b00b57f5421e2133e088df9cabcececcea180327e43b937b5a7caa5"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95ab792ca493891d7a45a077e35b418f68435efb3e1706cb8155e20e86a9013c"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:738d7321212941ab19ba2acf02a68b8ee64987b248ffa2101630e8fccb549e0d"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-win_amd64.whl", hash = "sha256:bbdf16372859b8ed3f4d05f925a984771cd2abd18bd187042f24be4886c2a15f"}, - {file = "SQLAlchemy-1.4.49.tar.gz", hash = "sha256:06ff25cbae30c396c4b7737464f2a7fc37a67b7da409993b182b024cec80aed9"}, -] [package.dependencies] greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] @@ -7359,25 +4544,22 @@ mssql-pyodbc = ["pyodbc"] mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] mysql-connector = ["mysql-connector-python"] -oracle = ["cx-oracle (>=7)", "cx-oracle (>=7,<8)"] +oracle = ["cx_oracle (>=7)", "cx_oracle (>=7,<8)"] postgresql = ["psycopg2 (>=2.7)"] postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] postgresql-psycopg2binary = ["psycopg2-binary"] postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] -sqlcipher = ["sqlcipher3-binary"] +sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlalchemy-jsonfield" version = "1.0.1.post0" description = "SQLALchemy JSONField implementation for storing dicts at SQL" +category = "dev" optional = false python-versions = ">=3.7.0" -files = [ - {file = "SQLAlchemy-JSONField-1.0.1.post0.tar.gz", hash = "sha256:72a5e714fe0493d2660abd7484a9fd9f492f493a0856288dd22a5decb29f5dc4"}, - {file = "SQLAlchemy_JSONField-1.0.1.post0-py3-none-any.whl", hash = "sha256:d6f1e5ee329a3c0d9d164e40d81a2143ac8332e09988fbbaff84179dac5503d4"}, -] [package.dependencies] sqlalchemy = "*" @@ -7386,12 +4568,9 @@ sqlalchemy = "*" name = "sqlalchemy-utils" version = "0.41.1" description = "Various utility functions for SQLAlchemy." +category = "dev" optional = false python-versions = ">=3.6" -files = [ - {file = "SQLAlchemy-Utils-0.41.1.tar.gz", hash = "sha256:a2181bff01eeb84479e38571d2c0718eb52042f9afd8c194d0d02877e84b7d74"}, - {file = "SQLAlchemy_Utils-0.41.1-py3-none-any.whl", hash = "sha256:6c96b0768ea3f15c0dc56b363d386138c562752b84f647fb8d31a2223aaab801"}, -] [package.dependencies] SQLAlchemy = ">=1.3" @@ -7414,12 +4593,9 @@ url = ["furl (>=0.4.1)"] name = "sqlfluff" version = "2.3.2" description = "The SQL Linter for Humans" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "sqlfluff-2.3.2-py3-none-any.whl", hash = "sha256:85c8b683e283ff632fe28529ddb60585ea2d1d3c614fc7a1db171632b99dcce3"}, - {file = "sqlfluff-2.3.2.tar.gz", hash = "sha256:3403ce7e9133766d7336b7e26638657ec6cc9e5610e35186b7f02cc427dd49b7"}, -] [package.dependencies] appdirs = "*" @@ -7442,12 +4618,9 @@ typing-extensions = "*" name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." +category = "main" optional = false python-versions = ">=3.5" -files = [ - {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, - {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, -] [package.extras] dev = ["build", "flake8"] @@ -7458,12 +4631,9 @@ test = ["pytest", "pytest-cov"] name = "stevedore" version = "5.1.0" description = "Manage dynamic plugins for Python applications" +category = "dev" optional = false python-versions = ">=3.8" -files = [ - {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, - {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, -] [package.dependencies] pbr = ">=2.0.0,<2.1.0 || >2.1.0" @@ -7472,12 +4642,9 @@ pbr = ">=2.0.0,<2.1.0 || >2.1.0" name = "sympy" version = "1.12" description = "Computer algebra system (CAS) in Python" +category = "main" optional = true python-versions = ">=3.8" -files = [ - {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, - {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, -] [package.dependencies] mpmath = ">=0.19" @@ -7486,12 +4653,9 @@ mpmath = ">=0.19" name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, - {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, -] [package.extras] widechars = ["wcwidth"] @@ -7500,23 +4664,17 @@ widechars = ["wcwidth"] name = "tblib" version = "2.0.0" description = "Traceback serialization library." +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "tblib-2.0.0-py3-none-any.whl", hash = "sha256:9100bfa016b047d5b980d66e7efed952fbd20bd85b56110aaf473cb97d18709a"}, - {file = "tblib-2.0.0.tar.gz", hash = "sha256:a6df30f272c08bf8be66e0775fad862005d950a6b8449b94f7c788731d70ecd7"}, -] [[package]] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, - {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, -] [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] @@ -7525,12 +4683,9 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "termcolor" version = "2.3.0" description = "ANSI color formatting for output in terminal" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "termcolor-2.3.0-py3-none-any.whl", hash = "sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475"}, - {file = "termcolor-2.3.0.tar.gz", hash = "sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a"}, -] [package.extras] tests = ["pytest", "pytest-cov"] @@ -7539,61 +4694,17 @@ tests = ["pytest", "pytest-cov"] name = "text-unidecode" version = "1.3" description = "The most basic Text::Unidecode port" +category = "main" optional = false python-versions = "*" -files = [ - {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, - {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, -] [[package]] name = "tokenizers" version = "0.13.3" description = "Fast and Customizable Tokenizers" +category = "main" optional = true python-versions = "*" -files = [ - {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, - {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, - {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, - {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, - {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, - {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, - {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, - {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, - {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, - {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, - {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, - {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, -] [package.extras] dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] @@ -7604,56 +4715,41 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] [[package]] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] [[package]] name = "tomli-w" version = "1.0.0" description = "A lil' TOML writer" +category = "dev" optional = false python-versions = ">=3.7" -files = [ - {file = "tomli_w-1.0.0-py3-none-any.whl", hash = "sha256:9f2a07e8be30a0729e533ec968016807069991ae2fd921a78d42f429ae5f4463"}, - {file = "tomli_w-1.0.0.tar.gz", hash = "sha256:f463434305e0336248cac9c2dc8076b707d8a12d019dd349f5c1e382dd1ae1b9"}, -] [[package]] name = "tomlkit" version = "0.12.1" description = "Style preserving TOML library" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, - {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, -] [[package]] name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, - {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, -] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -7668,12 +4764,9 @@ telegram = ["requests"] name = "typeapi" version = "2.1.1" description = "" +category = "dev" optional = false python-versions = ">=3.6.3,<4.0.0" -files = [ - {file = "typeapi-2.1.1-py3-none-any.whl", hash = "sha256:ef41577f316bfd362572e727ba349dab80a7362318a80fc72e6a807017d04c5c"}, - {file = "typeapi-2.1.1.tar.gz", hash = "sha256:49b3c1d3382e27dccbb59132a3a823c61954f679a0c61f119fd6d8470073a298"}, -] [package.dependencies] typing-extensions = ">=3.0.0" @@ -7682,272 +4775,3686 @@ typing-extensions = ">=3.0.0" name = "types-awscrt" version = "0.19.1" description = "Type annotations and code completion for awscrt" +category = "main" optional = false python-versions = ">=3.7,<4.0" -files = [ - {file = "types_awscrt-0.19.1-py3-none-any.whl", hash = "sha256:68fffeb75396e9e7614cd930b2d52295f680230774750907bcafb56f11514043"}, - {file = "types_awscrt-0.19.1.tar.gz", hash = "sha256:61833aa140e724a9098025610f4b8cde3dcf65b842631d7447378f9f5db4e1fd"}, -] [[package]] name = "types-cachetools" version = "5.3.0.6" description = "Typing stubs for cachetools" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "types-cachetools-5.3.0.6.tar.gz", hash = "sha256:595f0342d246c8ba534f5a762cf4c2f60ecb61e8002b8b2277fd5cf791d4e851"}, - {file = "types_cachetools-5.3.0.6-py3-none-any.whl", hash = "sha256:f7f8a25bfe306f2e6bc2ad0a2f949d9e72f2d91036d509c36d3810bf728bc6e1"}, -] [[package]] name = "types-click" version = "7.1.8" description = "Typing stubs for click" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "types-click-7.1.8.tar.gz", hash = "sha256:b6604968be6401dc516311ca50708a0a28baa7a0cb840efd7412f0dbbff4e092"}, - {file = "types_click-7.1.8-py3-none-any.whl", hash = "sha256:8cb030a669e2e927461be9827375f83c16b8178c365852c060a34e24871e7e81"}, -] [[package]] name = "types-deprecated" version = "1.2.9.3" description = "Typing stubs for Deprecated" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "types-Deprecated-1.2.9.3.tar.gz", hash = "sha256:ef87327adf3e3c4a4c7d8e06e58f6476710d3466ecfb53c49efb080804a70ef3"}, - {file = "types_Deprecated-1.2.9.3-py3-none-any.whl", hash = "sha256:24da9210763e5e1b3d0d4f6f8bba9ad3bb6af3fe7f6815fc37e3ede4681704f5"}, -] [[package]] name = "types-protobuf" version = "4.24.0.1" description = "Typing stubs for protobuf" +category = "dev" optional = false python-versions = "*" -files = [ - {file = "types-protobuf-4.24.0.1.tar.gz", hash = "sha256:90adea3b693d6a40d8ef075c58fe6b5cc6e01fe1496301a7e6fc70398dcff92e"}, - {file = "types_protobuf-4.24.0.1-py3-none-any.whl", hash = "sha256:df203a204e4ae97d4cca4c9cf725262579dd7857a19f9e7fc74871ccfa073c01"}, -] [[package]] name = "types-psutil" version = "5.9.5.16" description = "Typing stubs for psutil" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-psycopg2" +version = "2.9.21.14" +description = "Typing stubs for psycopg2" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-python-dateutil" +version = "2.8.19.14" +description = "Typing stubs for python-dateutil" +category = "dev" optional = false python-versions = "*" -files = [ + +[[package]] +name = "types-pyyaml" +version = "6.0.12.11" +description = "Typing stubs for PyYAML" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-requests" +version = "2.31.0.2" +description = "Typing stubs for requests" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-s3transfer" +version = "0.6.2" +description = "Type annotations and code completion for s3transfer" +category = "main" +optional = false +python-versions = ">=3.7,<4.0" + +[[package]] +name = "types-setuptools" +version = "68.1.0.1" +description = "Typing stubs for setuptools" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "types-simplejson" +version = "3.19.0.2" +description = "Typing stubs for simplejson" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-sqlalchemy" +version = "1.4.53.38" +description = "Typing stubs for SQLAlchemy" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-tqdm" +version = "4.66.0.2" +description = "Typing stubs for tqdm" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +category = "main" +optional = false +python-versions = ">=2" + +[[package]] +name = "uc-micro-py" +version = "1.0.2" +description = "Micro subset of unicode data files for linkify-it-py projects." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["coverage", "pytest", "pytest-cov"] + +[[package]] +name = "unicodecsv" +version = "0.14.1" +description = "Python2's stdlib csv module is nice, but it doesn't support unicode. This module is a drop-in replacement which *does*." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "urllib3" +version = "1.26.16" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "validators" +version = "0.21.0" +description = "Python Data Validation for Humans™" +category = "main" +optional = true +python-versions = ">=3.8,<4.0" + +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[[package]] +name = "wcwidth" +version = "0.2.6" +description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "weaviate-client" +version = "3.23.2" +description = "A python native Weaviate client" +category = "main" +optional = true +python-versions = ">=3.8" + +[package.dependencies] +authlib = ">=1.1.0" +requests = ">=2.28.0,<=2.31.0" +tqdm = ">=4.59.0,<5.0.0" +validators = ">=0.18.2,<=0.21.0" + +[package.extras] +grpc = ["grpcio", "grpcio-tools"] + +[[package]] +name = "werkzeug" +version = "2.3.7" +description = "The comprehensive WSGI web application library." +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + +[[package]] +name = "wheel" +version = "0.41.2" +description = "A built-package format for Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "win-precise-time" +version = "1.4.2" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[[package]] +name = "wtforms" +version = "3.0.1" +description = "Form validation and rendering for Python web development." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +MarkupSafe = "*" + +[package.extras] +email = ["email-validator"] + +[[package]] +name = "yapf" +version = "0.33.0" +description = "A formatter for Python code." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +tomli = ">=2.0.1" + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[extras] +athena = ["pyathena", "pyarrow", "s3fs", "botocore"] +az = ["adlfs"] +bigquery = ["grpcio", "google-cloud-bigquery", "pyarrow", "gcsfs"] +cli = ["pipdeptree", "cron-descriptor"] +dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community"] +duckdb = ["duckdb"] +filesystem = ["s3fs", "botocore"] +gcp = ["grpcio", "google-cloud-bigquery", "gcsfs"] +gs = ["gcsfs"] +motherduck = ["duckdb", "pyarrow"] +mssql = ["pyodbc"] +parquet = ["pyarrow"] +postgres = ["psycopg2-binary", "psycopg2cffi"] +qdrant = ["qdrant-client"] +redshift = ["psycopg2-binary", "psycopg2cffi"] +s3 = ["s3fs", "botocore"] +snowflake = ["snowflake-connector-python"] +weaviate = ["weaviate-client"] + +[metadata] +lock-version = "1.1" +python-versions = ">=3.8.1,<3.13" +content-hash = "6d89195f314b522b9f244be68848a1d6f2fbcba9b16742c84e69da7bcd603913" + +[metadata.files] +about-time = [ + {file = "about-time-4.2.1.tar.gz", hash = "sha256:6a538862d33ce67d997429d14998310e1dbfda6cb7d9bbfbf799c4709847fece"}, + {file = "about_time-4.2.1-py3-none-any.whl", hash = "sha256:8bbf4c75fe13cbd3d72f49a03b02c5c7dca32169b6d49117c257e7eb3eaee341"}, +] +adlfs = [ + {file = "adlfs-2023.8.0-py3-none-any.whl", hash = "sha256:3eb248a3c2a30b419f1147bd7676d156b5219f96ef7f11d47166afd2a3bdb07e"}, + {file = "adlfs-2023.8.0.tar.gz", hash = "sha256:07e804f6df4593acfcaf01025b162e30ac13e523d3570279c98b2d91a18026d9"}, +] +agate = [ + {file = "agate-1.6.3-py2.py3-none-any.whl", hash = "sha256:2d568fd68a8eb8b56c805a1299ba4bc30ca0434563be1bea309c9d1c1c8401f4"}, + {file = "agate-1.6.3.tar.gz", hash = "sha256:e0f2f813f7e12311a4cdccc97d6ba0a6781e9c1aa8eca0ab00d5931c0113a308"}, +] +aiobotocore = [ + {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, + {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, +] +aiohttp = [ + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, + {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, + {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, + {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, + {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, + {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, + {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, + {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, + {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, + {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, + {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, +] +aioitertools = [ + {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, + {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, +] +aiosignal = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] +alembic = [ + {file = "alembic-1.12.0-py3-none-any.whl", hash = "sha256:03226222f1cf943deee6c85d9464261a6c710cd19b4fe867a3ad1f25afda610f"}, + {file = "alembic-1.12.0.tar.gz", hash = "sha256:8e7645c32e4f200675e69f0745415335eb59a3663f5feb487abfa0b30c45888b"}, +] +alive-progress = [ + {file = "alive-progress-3.1.4.tar.gz", hash = "sha256:74a95d8d0d42bc99d3a3725dbd06ebb852245f1b64e301a7c375b92b22663f7b"}, + {file = "alive_progress-3.1.4-py3-none-any.whl", hash = "sha256:c80ad87ce9c1054b01135a87fae69ecebbfc2107497ae87cbe6aec7e534903db"}, +] +annotated-types = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] +ansicon = [ + {file = "ansicon-1.89.0-py2.py3-none-any.whl", hash = "sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec"}, + {file = "ansicon-1.89.0.tar.gz", hash = "sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1"}, +] +anyio = [ + {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, + {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, +] +apache-airflow = [ + {file = "apache-airflow-2.7.2.tar.gz", hash = "sha256:c6fab3449066867d9a7728f40b6b9e27f1ea68bca39b064a27f5c5ddc3262224"}, + {file = "apache_airflow-2.7.2-py3-none-any.whl", hash = "sha256:1bc2c022bcae24b911e49fafd5fb619b49efba87ed7bc8561a2065810d8fe899"}, +] +apache-airflow-providers-common-sql = [ + {file = "apache-airflow-providers-common-sql-1.7.1.tar.gz", hash = "sha256:ba37f795d9656a87cf4661edc381b8ecfe930272c59324b59f8a158fd0971aeb"}, + {file = "apache_airflow_providers_common_sql-1.7.1-py3-none-any.whl", hash = "sha256:36da2f51b51a64765b0ed5e6a5fece8eaa3ca173dfbff803e2fe2a0afbb90944"}, +] +apache-airflow-providers-ftp = [ + {file = "apache-airflow-providers-ftp-3.5.1.tar.gz", hash = "sha256:dc6dc524dc7454857a0812154d7540172e36db3a87e48a4a91918ebf80898bbf"}, + {file = "apache_airflow_providers_ftp-3.5.1-py3-none-any.whl", hash = "sha256:e4ea77d6276355acfe2392c12155db7b9d51be460b7673b616dc1d8bee03c1d7"}, +] +apache-airflow-providers-http = [ + {file = "apache-airflow-providers-http-4.5.1.tar.gz", hash = "sha256:ec90920ff980fc264af9811dc72c37ef272bcdb3d007c7114e12366559426460"}, + {file = "apache_airflow_providers_http-4.5.1-py3-none-any.whl", hash = "sha256:702f26938bc22684eefecd297c2b0809793f9e43b8d911d807a29f21e69da179"}, +] +apache-airflow-providers-imap = [ + {file = "apache-airflow-providers-imap-3.3.1.tar.gz", hash = "sha256:40bac2a75e4dfbcd7d397776d90d03938facaf2707acc6cc119a8db684e53f77"}, + {file = "apache_airflow_providers_imap-3.3.1-py3-none-any.whl", hash = "sha256:adb6ef7864a5a8e245fbbd555bb4ef1eecf5b094d6d23ca0edc5f0aded50490d"}, +] +apache-airflow-providers-sqlite = [ + {file = "apache-airflow-providers-sqlite-3.4.3.tar.gz", hash = "sha256:347d2db03eaa5ea9fef414666565ffa5e849935cbc30e37237edcaa822b5ced8"}, + {file = "apache_airflow_providers_sqlite-3.4.3-py3-none-any.whl", hash = "sha256:4ffa6a50f0ea1b4e51240b657dfec3fb026c87bdfa71af908a56461df6a6f2e0"}, +] +apispec = [ + {file = "apispec-6.3.0-py3-none-any.whl", hash = "sha256:95a0b9355785df998bb0e9b939237a30ee4c7428fd6ef97305eae3da06b9b339"}, + {file = "apispec-6.3.0.tar.gz", hash = "sha256:6cb08d92ce73ff0b3bf46cb2ea5c00d57289b0f279fb0256a3df468182ba5344"}, +] +appdirs = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] +argcomplete = [ + {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, + {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, +] +asgiref = [ + {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, + {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, +] +asn1crypto = [ + {file = "asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"}, + {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, +] +astatine = [ + {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, + {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, +] +asttokens = [ + {file = "asttokens-2.3.0-py2.py3-none-any.whl", hash = "sha256:bef1a51bc256d349e9f94e7e40e44b705ed1162f55294220dd561d24583d9877"}, + {file = "asttokens-2.3.0.tar.gz", hash = "sha256:2552a88626aaa7f0f299f871479fc755bd4e7c11e89078965e928fb7bb9a6afe"}, +] +astunparse = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] +async-timeout = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] +atomicwrites = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] +attrs = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] +authlib = [ + {file = "Authlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:c88984ea00149a90e3537c964327da930779afa4564e354edfd98410bea01911"}, + {file = "Authlib-1.2.1.tar.gz", hash = "sha256:421f7c6b468d907ca2d9afede256f068f87e34d23dd221c07d13d4c234726afb"}, +] +azure-core = [ + {file = "azure-core-1.29.3.tar.gz", hash = "sha256:c92700af982e71c8c73de9f4c20da8b3f03ce2c22d13066e4d416b4629c87903"}, + {file = "azure_core-1.29.3-py3-none-any.whl", hash = "sha256:f8b2910f92b66293d93bd00564924ad20ad48f4a1e150577cf18d1e7d4f9263c"}, +] +azure-datalake-store = [ + {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, + {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, +] +azure-identity = [ + {file = "azure-identity-1.14.0.zip", hash = "sha256:72441799f8c5c89bfe21026965e266672a7c5d050c2c65119ef899dd5362e2b1"}, + {file = "azure_identity-1.14.0-py3-none-any.whl", hash = "sha256:edabf0e010eb85760e1dd19424d5e8f97ba2c9caff73a16e7b30ccbdbcce369b"}, +] +azure-storage-blob = [ + {file = "azure-storage-blob-12.17.0.zip", hash = "sha256:c14b785a17050b30fc326a315bdae6bc4a078855f4f94a4c303ad74a48dc8c63"}, + {file = "azure_storage_blob-12.17.0-py3-none-any.whl", hash = "sha256:0016e0c549a80282d7b4920c03f2f4ba35c53e6e3c7dbcd2a4a8c8eb3882c1e7"}, +] +babel = [ + {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, + {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, +] +backoff = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] +bandit = [ + {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, + {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, +] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] +black = [ + {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, + {file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, + {file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, + {file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, + {file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, + {file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, + {file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, + {file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, + {file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, + {file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, + {file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, + {file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, + {file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, + {file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, + {file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, +] +blessed = [ + {file = "blessed-1.20.0-py2.py3-none-any.whl", hash = "sha256:0c542922586a265e699188e52d5f5ac5ec0dd517e5a1041d90d2bbf23f906058"}, + {file = "blessed-1.20.0.tar.gz", hash = "sha256:2cdd67f8746e048f00df47a2880f4d6acbcdb399031b604e34ba8f71d5787680"}, +] +blinker = [ + {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"}, + {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"}, +] +boto3 = [ + {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, + {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, +] +boto3-stubs = [ + {file = "boto3-stubs-1.28.40.tar.gz", hash = "sha256:76079a82f199087319762c931f13506e02129132e80257dab0888d3da7dc11c7"}, + {file = "boto3_stubs-1.28.40-py3-none-any.whl", hash = "sha256:bd1d1cbdcbf18902a090d4a746cdecef2a7ebe31cf9a474bbe407d57eaa79a6a"}, +] +botocore = [ + {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, + {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, +] +botocore-stubs = [ + {file = "botocore_stubs-1.31.40-py3-none-any.whl", hash = "sha256:aab534d7e7949cd543bc9b2fadc1a36712033cb00e6f31e2475eefe8486d19ae"}, + {file = "botocore_stubs-1.31.40.tar.gz", hash = "sha256:2001a253daf4ae2e171e6137b9982a00a7fbfc7a53449a16856dc049e7cd5214"}, +] +cachelib = [ + {file = "cachelib-0.9.0-py3-none-any.whl", hash = "sha256:811ceeb1209d2fe51cd2b62810bd1eccf70feba5c52641532498be5c675493b3"}, + {file = "cachelib-0.9.0.tar.gz", hash = "sha256:38222cc7c1b79a23606de5c2607f4925779e37cdcea1c2ad21b8bae94b5425a5"}, +] +cachetools = [ + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, +] +cattrs = [ + {file = "cattrs-23.1.2-py3-none-any.whl", hash = "sha256:b2bb14311ac17bed0d58785e5a60f022e5431aca3932e3fc5cc8ed8639de50a4"}, + {file = "cattrs-23.1.2.tar.gz", hash = "sha256:db1c821b8c537382b2c7c66678c3790091ca0275ac486c76f3c8f3920e83c657"}, +] +certifi = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] +cffi = [ + {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, + {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, + {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, + {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, + {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, + {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, + {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, + {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, + {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, + {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, + {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, + {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, + {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, + {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, + {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, + {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, + {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, + {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, + {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, +] +chardet = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] +charset-normalizer = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] +click = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] +clickclick = [ + {file = "clickclick-20.10.2-py2.py3-none-any.whl", hash = "sha256:c8f33e6d9ec83f68416dd2136a7950125bd256ec39ccc9a85c6e280a16be2bb5"}, + {file = "clickclick-20.10.2.tar.gz", hash = "sha256:4efb13e62353e34c5eef7ed6582c4920b418d7dedc86d819e22ee089ba01802c"}, +] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +coloredlogs = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] +colorlog = [ + {file = "colorlog-4.8.0-py2.py3-none-any.whl", hash = "sha256:3dd15cb27e8119a24c1a7b5c93f9f3b455855e0f73993b1c25921b2f646f1dcd"}, + {file = "colorlog-4.8.0.tar.gz", hash = "sha256:59b53160c60902c405cdec28d38356e09d40686659048893e026ecbd589516b1"}, +] +configupdater = [ + {file = "ConfigUpdater-3.1.1-py2.py3-none-any.whl", hash = "sha256:805986dbeba317886c7a8d348b2e34986dc9e3128cd3761ecc35decbd372b286"}, + {file = "ConfigUpdater-3.1.1.tar.gz", hash = "sha256:46f0c74d73efa723776764b43c9739f68052495dd3d734319c1d0eb58511f15b"}, +] +connectorx = [ + {file = "connectorx-0.3.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:719750045e7c3b94c199271fbfe6aef47944768e711f27bcc606b498707e0054"}, + {file = "connectorx-0.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aed31b08acebeb3ebbe53c0df846c686e7c27c4242bff3a75b72cf517d070257"}, + {file = "connectorx-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71d2c2678339fb01f89469bbe22e66e75cabcf727a52ed72d576fef5744ebc58"}, + {file = "connectorx-0.3.1-cp310-none-win_amd64.whl", hash = "sha256:92e576ef9610b59f8e5456c12d22e5b0752d0207f586df82701987657909888b"}, + {file = "connectorx-0.3.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:36c28cc59220998928e7b283eecf404e17e077dc3e525570096d0968b192cc64"}, + {file = "connectorx-0.3.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:c5173e7252f593c46787627a46561b0d949eb80ab23321e045bbf6bd5131945c"}, + {file = "connectorx-0.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c8411631750d24c12e5e296720637909b8515d5faa3b5eaf7bb86c582d02667"}, + {file = "connectorx-0.3.1-cp37-none-win_amd64.whl", hash = "sha256:0674b6389f8f2ba62155ac2f718df18f76f9de5c50d9911a5fefe7485e1c598e"}, + {file = "connectorx-0.3.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:324c5075e8aa6698db8c877cb847f0d86172784db88ac0f3e6762aa9852330f3"}, + {file = "connectorx-0.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:027a3880629a7b33ae0c7a80ab4fa53286957a253af2dfe34f19adfea6b79b91"}, + {file = "connectorx-0.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a666b967958fcf9fc0444a7b3603483ee23a2fe39f0da3d545ff199f376f7e4b"}, + {file = "connectorx-0.3.1-cp38-none-win_amd64.whl", hash = "sha256:3c5dedfd75cf44898c17cc84a1dd0ab6ed0fa54de0461f2d6aa4bcb2c2b0dc1d"}, + {file = "connectorx-0.3.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:354c4126bcd7a9efbb8879feac92e1e7b0d0712f7e98665c392af663805491f8"}, + {file = "connectorx-0.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3011e1f9a27fd2a7b12c6a45bc29f6e7577a27418a3f607adaf54b301ff09068"}, + {file = "connectorx-0.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1efb6ed547acc5837c2211e3d65d22948019d1653e7b30e522a4a4bd6d25fa8"}, + {file = "connectorx-0.3.1-cp39-none-win_amd64.whl", hash = "sha256:001b473e600b6d25af83b32674f98dccf49705a59bd6df724b5ba9beb236a0e0"}, +] +connexion = [ + {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, + {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, +] +cron-descriptor = [ + {file = "cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd"}, +] +croniter = [ + {file = "croniter-1.4.1-py2.py3-none-any.whl", hash = "sha256:9595da48af37ea06ec3a9f899738f1b2c1c13da3c38cea606ef7cd03ea421128"}, + {file = "croniter-1.4.1.tar.gz", hash = "sha256:1a6df60eacec3b7a0aa52a8f2ef251ae3dd2a7c7c8b9874e73e791636d55a361"}, +] +cryptography = [ + {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507"}, + {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922"}, + {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81"}, + {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd"}, + {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47"}, + {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116"}, + {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c"}, + {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae"}, + {file = "cryptography-41.0.3-cp37-abi3-win32.whl", hash = "sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306"}, + {file = "cryptography-41.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574"}, + {file = "cryptography-41.0.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087"}, + {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858"}, + {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906"}, + {file = "cryptography-41.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e"}, + {file = "cryptography-41.0.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd"}, + {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207"}, + {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84"}, + {file = "cryptography-41.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7"}, + {file = "cryptography-41.0.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d"}, + {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de"}, + {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1"}, + {file = "cryptography-41.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4"}, + {file = "cryptography-41.0.3.tar.gz", hash = "sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34"}, +] +databind-core = [ + {file = "databind.core-4.4.0-py3-none-any.whl", hash = "sha256:3c8a4d9abc93e158af9931d8cec389ddfc0514e02aec03b397948d243db11881"}, + {file = "databind.core-4.4.0.tar.gz", hash = "sha256:715d485e934c073f819f0250bbfcaf59c1319f83427365bc7cfd4c347f87576d"}, +] +databind-json = [ + {file = "databind.json-4.4.0-py3-none-any.whl", hash = "sha256:df8874118cfba6fd0e77ec3d41a87e04e26034bd545230cab0db1fe904bf1b09"}, + {file = "databind.json-4.4.0.tar.gz", hash = "sha256:4356afdf0aeefcc053eda0888650c59cc558be2686f08a58324d675ccd023586"}, +] +dbt-athena-community = [ + {file = "dbt-athena-community-1.5.2.tar.gz", hash = "sha256:9acd333ddf33514769189a7a0b6219e13966d370098211cb1d022fa32e64671a"}, + {file = "dbt_athena_community-1.5.2-py3-none-any.whl", hash = "sha256:c9f0f8425500211a1c1deddce5aff5ed24fe08530f0ffad38e63de9c9b9f3ee6"}, +] +dbt-bigquery = [ + {file = "dbt-bigquery-1.5.6.tar.gz", hash = "sha256:4655cf2ee0acda986b80e6c5d55cae57871bef22d868dfe29d8d4a5bca98a1ba"}, + {file = "dbt_bigquery-1.5.6-py3-none-any.whl", hash = "sha256:3f37544716880cbd17b32bc0c9728a0407b5615b2cd08e1bb904a7a83c46eb6c"}, +] +dbt-core = [ + {file = "dbt-core-1.5.6.tar.gz", hash = "sha256:af3c03cd4a1fc92481362888014ca1ffed2ffef0b0e0d98463ad0f26c49ef458"}, + {file = "dbt_core-1.5.6-py3-none-any.whl", hash = "sha256:030d2179f9efbf8ccea079296d0c79278d963bb2475c0bcce9ca4bbb0d8c393c"}, +] +dbt-duckdb = [ + {file = "dbt-duckdb-1.5.2.tar.gz", hash = "sha256:3407216c21bf78fd128dccfcff3ec4bf260fb145e633432015bc7d0f123e8e4b"}, + {file = "dbt_duckdb-1.5.2-py3-none-any.whl", hash = "sha256:5d18254807bbc3e61daf4f360208ad886adf44b8525e1998168290fbe73a5cbb"}, +] +dbt-extractor = [ + {file = "dbt_extractor-0.4.1-cp36-abi3-macosx_10_7_x86_64.whl", hash = "sha256:4dc715bd740e418d8dc1dd418fea508e79208a24cf5ab110b0092a3cbe96bf71"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:bc9e0050e3a2f4ea9fe58e8794bc808e6709a0c688ed710fc7c5b6ef3e5623ec"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76872cdee659075d6ce2df92dc62e59a74ba571be62acab2e297ca478b49d766"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81435841610be1b07806d72cd89b1956c6e2a84c360b9ceb3f949c62a546d569"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c291f9f483eae4f60dd5859097d7ba51d5cb6c4725f08973ebd18cdea89d758"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:822b1e911db230e1b9701c99896578e711232001027b518c44c32f79a46fa3f9"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:554d27741a54599c39e5c0b7dbcab77400d83f908caba284a3e960db812e5814"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a805d51a25317f53cbff951c79b9cf75421cf48e4b3e1dfb3e9e8de6d824b76c"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cad90ddc708cb4182dc16fe2c87b1f088a1679877b93e641af068eb68a25d582"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:34783d788b133f223844e280e37b3f5244f2fb60acc457aa75c2667e418d5442"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:9da211869a1220ea55c5552c1567a3ea5233a6c52fa89ca87a22465481c37bc9"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:7d7c47774dc051b8c18690281a55e2e3d3320e823b17e04b06bc3ff81b1874ba"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:037907a7c7ae0391045d81338ca77ddaef899a91d80f09958f09fe374594e19b"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-win32.whl", hash = "sha256:3fe8d8e28a7bd3e0884896147269ca0202ca432d8733113386bdc84c824561bf"}, + {file = "dbt_extractor-0.4.1-cp36-abi3-win_amd64.whl", hash = "sha256:35265a0ae0a250623b0c2e3308b2738dc8212e40e0aa88407849e9ea090bb312"}, + {file = "dbt_extractor-0.4.1.tar.gz", hash = "sha256:75b1c665699ec0f1ffce1ba3d776f7dfce802156f22e70a7b9c8f0b4d7e80f42"}, +] +dbt-postgres = [ + {file = "dbt-postgres-1.5.6.tar.gz", hash = "sha256:b74e471dc661819a3d4bda2d11497935661ac2e25786c8a5b7314d8241b18582"}, + {file = "dbt_postgres-1.5.6-py3-none-any.whl", hash = "sha256:bc5711c9ab0ec4b57ab814b2c4e4c973554c8374b7da94b06814ac81c91f67ef"}, +] +dbt-redshift = [ + {file = "dbt-redshift-1.5.10.tar.gz", hash = "sha256:2b9ae1a7d05349e208b0937cd7cc920ea427341ef96096021b18e4070e927f5c"}, + {file = "dbt_redshift-1.5.10-py3-none-any.whl", hash = "sha256:b7689b043535b6b0d217c2abfe924db2336beaae71f3f36ab9aa1e920d2bb2e0"}, +] +dbt-snowflake = [ + {file = "dbt-snowflake-1.5.3.tar.gz", hash = "sha256:cf42772d2c2f1e29a2a64b039c66d80a8593f52a2dd711a144d43b4175802f9a"}, + {file = "dbt_snowflake-1.5.3-py3-none-any.whl", hash = "sha256:8aaa939d834798e5bb10a3ba4f52fc32a53e6e5568d6c0e8b3ac644f099972ff"}, +] +decopatch = [ + {file = "decopatch-1.4.10-py2.py3-none-any.whl", hash = "sha256:e151f7f93de2b1b3fd3f3272dcc7cefd1a69f68ec1c2d8e288ecd9deb36dc5f7"}, + {file = "decopatch-1.4.10.tar.gz", hash = "sha256:957f49c93f4150182c23f8fb51d13bb3213e0f17a79e09c8cca7057598b55720"}, +] +decorator = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] +deprecated = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] +diff-cover = [ + {file = "diff_cover-7.7.0-py3-none-any.whl", hash = "sha256:bf86f32ec999f9a9e79bf24969f7127ea7b4e55c3ef3cd9300feb13188c89736"}, + {file = "diff_cover-7.7.0.tar.gz", hash = "sha256:60614cf7e722cf7fb1bde497afac0b514294e1e26534449622dac4da296123fb"}, +] +dill = [ + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, +] +dnspython = [ + {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, + {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, +] +docspec = [ + {file = "docspec-2.2.1-py3-none-any.whl", hash = "sha256:7538f750095a9688c6980ff9a4e029a823a500f64bd00b6b4bdb27951feb31cb"}, + {file = "docspec-2.2.1.tar.gz", hash = "sha256:4854e77edc0e2de40e785e57e95880f7095a05fe978f8b54cef7a269586e15ff"}, +] +docspec-python = [ + {file = "docspec_python-2.2.1-py3-none-any.whl", hash = "sha256:76ac41d35a8face35b2d766c2e8a416fb8832359785d396f0d53bcb00f178e54"}, + {file = "docspec_python-2.2.1.tar.gz", hash = "sha256:c41b850b4d6f4de30999ea6f82c9cdb9183d9bcba45559ee9173d3dab7281559"}, +] +docstring-parser = [ + {file = "docstring_parser-0.11.tar.gz", hash = "sha256:93b3f8f481c7d24e37c5d9f30293c89e2933fa209421c8abd731dd3ef0715ecb"}, +] +docutils = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] +domdf-python-tools = [ + {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, + {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, +] +duckdb = [ + {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6c724e105ecd78c8d86b3c03639b24e1df982392fc836705eb007e4b1b488864"}, + {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75f12c5a3086079fb6440122565f1762ef1a610a954f2d8081014c1dd0646e1a"}, + {file = "duckdb-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:151f5410c32f8f8fe03bf23462b9604349bc0b4bd3a51049bbf5e6a482a435e8"}, + {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1d066fdae22b9b711b1603541651a378017645f9fbc4adc9764b2f3c9e9e4a"}, + {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1de56d8b7bd7a7653428c1bd4b8948316df488626d27e9c388194f2e0d1428d4"}, + {file = "duckdb-0.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1fb6cd590b1bb4e31fde8efd25fedfbfa19a86fa72789fa5b31a71da0d95bce4"}, + {file = "duckdb-0.9.1-cp310-cp310-win32.whl", hash = "sha256:1039e073714d668cef9069bb02c2a6756c7969cedda0bff1332520c4462951c8"}, + {file = "duckdb-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:7e6ac4c28918e1d278a89ff26fd528882aa823868ed530df69d6c8a193ae4e41"}, + {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5eb750f2ee44397a61343f32ee9d9e8c8b5d053fa27ba4185d0e31507157f130"}, + {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aea2a46881d75dc069a242cb164642d7a4f792889010fb98210953ab7ff48849"}, + {file = "duckdb-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed3dcedfc7a9449b6d73f9a2715c730180056e0ba837123e7967be1cd3935081"}, + {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c55397bed0087ec4445b96f8d55f924680f6d40fbaa7f2e35468c54367214a5"}, + {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3261696130f1cfb955735647c93297b4a6241753fb0de26c05d96d50986c6347"}, + {file = "duckdb-0.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:64c04b1728e3e37cf93748829b5d1e028227deea75115bb5ead01c608ece44b1"}, + {file = "duckdb-0.9.1-cp311-cp311-win32.whl", hash = "sha256:12cf9fb441a32702e31534330a7b4d569083d46a91bf185e0c9415000a978789"}, + {file = "duckdb-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:fdfd85575ce9540e593d5d25c9d32050bd636c27786afd7b776aae0f6432b55e"}, + {file = "duckdb-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:704700a4b469e3bb1a7e85ac12e58037daaf2b555ef64a3fe2913ffef7bd585b"}, + {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf55b303b7b1a8c2165a96e609eb30484bc47481d94a5fb1e23123e728df0a74"}, + {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b70e23c14746904ca5de316436e43a685eb769c67fe3dbfaacbd3cce996c5045"}, + {file = "duckdb-0.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:77379f7f1f8b4dc98e01f8f6f8f15a0858cf456e2385e22507f3cb93348a88f9"}, + {file = "duckdb-0.9.1-cp37-cp37m-win32.whl", hash = "sha256:92c8f738489838666cae9ef41703f8b16f660bb146970d1eba8b2c06cb3afa39"}, + {file = "duckdb-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08c5484ac06ab714f745526d791141f547e2f5ac92f97a0a1b37dfbb3ea1bd13"}, + {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f66d3c07c7f6938d3277294677eb7dad75165e7c57c8dd505503fc5ef10f67ad"}, + {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c38044e5f78c0c7b58e9f937dcc6c34de17e9ca6be42f9f8f1a5a239f7a847a5"}, + {file = "duckdb-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73bc0d715b79566b3ede00c367235cfcce67be0eddda06e17665c7a233d6854a"}, + {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26622c3b4ea6a8328d95882059e3cc646cdc62d267d48d09e55988a3bba0165"}, + {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3367d10096ff2b7919cedddcf60d308d22d6e53e72ee2702f6e6ca03d361004a"}, + {file = "duckdb-0.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d88a119f1cb41911a22f08a6f084d061a8c864e28b9433435beb50a56b0d06bb"}, + {file = "duckdb-0.9.1-cp38-cp38-win32.whl", hash = "sha256:99567496e45b55c67427133dc916013e8eb20a811fc7079213f5f03b2a4f5fc0"}, + {file = "duckdb-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:5b3da4da73422a3235c3500b3fb541ac546adb3e35642ef1119dbcd9cc7f68b8"}, + {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eca00c0c2062c0265c6c0e78ca2f6a30611b28f3afef062036610e9fc9d4a67d"}, + {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb5af8e89d40fc4baab1515787ea1520a6c6cf6aa40ab9f107df6c3a75686ce1"}, + {file = "duckdb-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fae3d4f83ebcb47995f6acad7c6d57d003a9b6f0e1b31f79a3edd6feb377443"}, + {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b9a7efc745bc3c5d1018c3a2f58d9e6ce49c0446819a9600fdba5f78e54c47"}, + {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0b60167f5537772e9f5af940e69dcf50e66f5247732b8bb84a493a9af6055"}, + {file = "duckdb-0.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4f27f5e94c47df6c4ccddf18e3277b7464eea3db07356d2c4bf033b5c88359b8"}, + {file = "duckdb-0.9.1-cp39-cp39-win32.whl", hash = "sha256:d43cd7e6f783006b59dcc5e40fcf157d21ee3d0c8dfced35278091209e9974d7"}, + {file = "duckdb-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e666795887d9cf1d6b6f6cbb9d487270680e5ff6205ebc54b2308151f13b8cff"}, + {file = "duckdb-0.9.1.tar.gz", hash = "sha256:603a878746015a3f2363a65eb48bcbec816261b6ee8d71eee53061117f6eef9d"}, +] +email-validator = [ + {file = "email_validator-1.3.1-py2.py3-none-any.whl", hash = "sha256:49a72f5fa6ed26be1c964f0567d931d10bf3fdeeacdf97bc26ef1cd2a44e0bda"}, + {file = "email_validator-1.3.1.tar.gz", hash = "sha256:d178c5c6fa6c6824e9b04f199cf23e79ac15756786573c190d2ad13089411ad2"}, +] +enlighten = [ + {file = "enlighten-1.11.2-py2.py3-none-any.whl", hash = "sha256:98c9eb20e022b6a57f1c8d4f17e16760780b6881e6d658c40f52d21255ea45f3"}, + {file = "enlighten-1.11.2.tar.gz", hash = "sha256:9284861dee5a272e0e1a3758cd3f3b7180b1bd1754875da76876f2a7f46ccb61"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] +fastembed = [ + {file = "fastembed-0.1.1-py3-none-any.whl", hash = "sha256:131413ae52cd72f4c8cced7a675f8269dbfd1a852abade3c815e265114bcc05a"}, + {file = "fastembed-0.1.1.tar.gz", hash = "sha256:f7e524ee4f74bb8aad16be5b687d1f77f608d40e96e292c87881dc36baf8f4c7"}, +] +filelock = [ + {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, + {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, +] +flake8 = [ + {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, + {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, +] +flake8-bugbear = [ + {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, + {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, +] +flake8-builtins = [ + {file = "flake8-builtins-1.5.3.tar.gz", hash = "sha256:09998853b2405e98e61d2ff3027c47033adbdc17f9fe44ca58443d876eb00f3b"}, + {file = "flake8_builtins-1.5.3-py2.py3-none-any.whl", hash = "sha256:7706babee43879320376861897e5d1468e396a40b8918ed7bccf70e5f90b8687"}, +] +flake8-encodings = [ + {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, + {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, +] +flake8-helper = [ + {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, + {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, +] +flake8-tidy-imports = [ + {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, + {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, +] +flask = [ + {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"}, + {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"}, +] +flask-appbuilder = [ + {file = "Flask-AppBuilder-4.3.6.tar.gz", hash = "sha256:8ca9710fa7d2704747d195e11b487d45a571f40559d8399d9d5dfa42ea1f3c78"}, + {file = "Flask_AppBuilder-4.3.6-py3-none-any.whl", hash = "sha256:840480dfd43134bebf78f3c7dc909e324c2689d2d9f27aeb1880a8a25466bc8d"}, +] +flask-babel = [ + {file = "Flask-Babel-2.0.0.tar.gz", hash = "sha256:f9faf45cdb2e1a32ea2ec14403587d4295108f35017a7821a2b1acb8cfd9257d"}, + {file = "Flask_Babel-2.0.0-py3-none-any.whl", hash = "sha256:e6820a052a8d344e178cdd36dd4bb8aea09b4bda3d5f9fa9f008df2c7f2f5468"}, +] +flask-caching = [ + {file = "Flask-Caching-2.0.2.tar.gz", hash = "sha256:24b60c552d59a9605cc1b6a42c56cdb39a82a28dab4532bbedb9222ae54ecb4e"}, + {file = "Flask_Caching-2.0.2-py3-none-any.whl", hash = "sha256:19571f2570e9b8dd9dd9d2f49d7cbee69c14ebe8cc001100b1eb98c379dd80ad"}, +] +flask-jwt-extended = [ + {file = "Flask-JWT-Extended-4.5.2.tar.gz", hash = "sha256:ba56245ba43b71c8ae936784b867625dce8b9956faeedec2953222e57942fb0b"}, + {file = "Flask_JWT_Extended-4.5.2-py2.py3-none-any.whl", hash = "sha256:e0ef23d8c863746bd141046167073699e1a7b03c97169cbba70f05b8d9cd6b9e"}, +] +flask-limiter = [ + {file = "Flask-Limiter-3.5.0.tar.gz", hash = "sha256:13a3491b994c49f7cb4706587a38ca47e8162b576530472df38be68104f299c0"}, + {file = "Flask_Limiter-3.5.0-py3-none-any.whl", hash = "sha256:dbda4174f44e6cb858c6eb75e7488186f2977dd5d33d7028ba1aabf179de1bee"}, +] +flask-login = [ + {file = "Flask-Login-0.6.2.tar.gz", hash = "sha256:c0a7baa9fdc448cdd3dd6f0939df72eec5177b2f7abe6cb82fc934d29caac9c3"}, + {file = "Flask_Login-0.6.2-py3-none-any.whl", hash = "sha256:1ef79843f5eddd0f143c2cd994c1b05ac83c0401dc6234c143495af9a939613f"}, +] +flask-session = [ + {file = "Flask-Session-0.5.0.tar.gz", hash = "sha256:190875e6aebf2953c6803d42379ef3b934bc209ef8ef006f97aecb08f5aaeb86"}, + {file = "flask_session-0.5.0-py3-none-any.whl", hash = "sha256:1619bcbc16f04f64e90f8e0b17145ba5c9700090bb1294e889956c1282d58631"}, +] +flask-sqlalchemy = [ + {file = "Flask-SQLAlchemy-2.5.1.tar.gz", hash = "sha256:2bda44b43e7cacb15d4e05ff3cc1f8bc97936cc464623424102bfc2c35e95912"}, + {file = "Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl", hash = "sha256:f12c3d4cc5cc7fdcc148b9527ea05671718c3ea45d50c7e732cceb33f574b390"}, +] +flask-wtf = [ + {file = "Flask-WTF-1.1.1.tar.gz", hash = "sha256:41c4244e9ae626d63bed42ae4785b90667b885b1535d5a4095e1f63060d12aa9"}, + {file = "Flask_WTF-1.1.1-py3-none-any.whl", hash = "sha256:7887d6f1ebb3e17bf648647422f0944c9a469d0fcf63e3b66fb9a83037e38b2c"}, +] +flatbuffers = [ + {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, + {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, +] +frozenlist = [ + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, +] +fsspec = [ + {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, + {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, +] +future = [ + {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"}, +] +gcsfs = [ + {file = "gcsfs-2023.6.0-py2.py3-none-any.whl", hash = "sha256:3b3c7d8eddd4ec1380f3b49fbb861ee1e974adb223564401f10884b6260d406f"}, + {file = "gcsfs-2023.6.0.tar.gz", hash = "sha256:30b14fccadb3b7f0d99b2cd03bd8507c40f3a9a7d05847edca571f642bedbdff"}, +] +gitdb = [ + {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, + {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, +] +gitpython = [ + {file = "GitPython-3.1.34-py3-none-any.whl", hash = "sha256:5d3802b98a3bae1c2b8ae0e1ff2e4aa16bcdf02c145da34d092324f599f01395"}, + {file = "GitPython-3.1.34.tar.gz", hash = "sha256:85f7d365d1f6bf677ae51039c1ef67ca59091c7ebd5a3509aa399d4eda02d6dd"}, +] +giturlparse = [ + {file = "giturlparse-0.11.1-py2.py3-none-any.whl", hash = "sha256:6422f25c8ca563e1a3cb6b85862e48614be804cd1334e6d84be5630eb26b343f"}, + {file = "giturlparse-0.11.1.tar.gz", hash = "sha256:cdbe0c062096c69e00f08397826dddebc1f73bc15b793994579c13aafc70c990"}, +] +google-api-core = [ + {file = "google-api-core-2.11.1.tar.gz", hash = "sha256:25d29e05a0058ed5f19c61c0a78b1b53adea4d9364b464d014fbda941f6d1c9a"}, + {file = "google_api_core-2.11.1-py3-none-any.whl", hash = "sha256:d92a5a92dc36dd4f4b9ee4e55528a90e432b059f93aee6ad857f9de8cc7ae94a"}, +] +google-api-python-client = [ + {file = "google-api-python-client-2.97.0.tar.gz", hash = "sha256:48277291894876a1ca7ed4127e055e81f81e6343ced1b544a7200ae2c119dcd7"}, + {file = "google_api_python_client-2.97.0-py2.py3-none-any.whl", hash = "sha256:5215f4cd577753fc4192ccfbe0bb8b55d4bb5fd68fa6268ac5cf271b6305de31"}, +] +google-auth = [ + {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, + {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, +] +google-auth-httplib2 = [ + {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, + {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, +] +google-auth-oauthlib = [ + {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, + {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, +] +google-cloud-bigquery = [ + {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, + {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, +] +google-cloud-core = [ + {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, + {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, +] +google-cloud-dataproc = [ + {file = "google-cloud-dataproc-5.4.3.tar.gz", hash = "sha256:d9c77c52aa5ddf52ae657736dbfb5312402933f72bab8480fc2d2afe98697402"}, + {file = "google_cloud_dataproc-5.4.3-py2.py3-none-any.whl", hash = "sha256:9cfff56cb53621cdffd0a3d6b10701e886e0a8ad54891e6c223eb67c0ff753ad"}, +] +google-cloud-storage = [ + {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, + {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, +] +google-crc32c = [ + {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, +] +google-re2 = [ + {file = "google-re2-1.1.tar.gz", hash = "sha256:d3a9467ee52b46ac77ca928f6d0cbeaccfd92f03ca0f0f65b9df6a95184f3a1c"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:874d2e36dfa506b03d4f9c4aef1701a65304f4004c96c7edac7d8aea08fe193e"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b66eb84850afdce09aabca40bcd6f2a0e96178a1b4990d555678edb1f59bf255"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c461640a07db26dc2b51f43de607b7520e7debaf4f6a000f796a3c0196ca52af"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:7f9ba69eaee6e7a9f5ddfb919bf1a866af14a18b26a179e3fb1a6fe3d0cbf349"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f95cf16739cc3ea63728366881221b119f2322b4b739b7da6522d45a68792cea"}, + {file = "google_re2-1.1-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9fb56a41250191298e6a2859b0fdea1e83330c9870fe8d84e5836c506ae46e96"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb22ea995564d87baf4a4bfbb3ca024be913683a710f4f0dc9c94dc663afab20"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19b3f0bfbb2a2ca58ed0aaa9356d07a5c0921383a6dbeca086b2b74472f5ee08"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34fd7f97b84af7453cf05b25adfe2491ba3cef1ca548ac2907efa63d3510954d"}, + {file = "google_re2-1.1-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e029664192d8d30f7c977706183ef483e82ca239302272df74e01d2e22897ca"}, + {file = "google_re2-1.1-1-cp310-cp310-win32.whl", hash = "sha256:41a8f222f9839d059d37efd28e4deec203502d7e39c3759d83d6a33deadf1d2e"}, + {file = "google_re2-1.1-1-cp310-cp310-win_amd64.whl", hash = "sha256:6141d569fdf72aa693f040ba05c469036587395af07ff419b9a3c009d6ffefd3"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2d03f6aaf22788ba13a770f0d183b8eebe55545bcbb6e4c41dcccac7ded014d"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a98f15fd9c31bf80d368698447191a2e9703880b305dbf34d9a63ce634b8a557"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:42128916cc2966623832aabbd224c88e862d1c531d6bc49ab141f565e6321a90"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6e27986a166903ad7000635f6faed8ab5072d687f822ac9f692c40b2470aebcf"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:5e9edcd743a830d0c0b2729201e42ab86fceef8f4086df65563f482e4544359e"}, + {file = "google_re2-1.1-1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:d33145bbfd32e916f1c911cd9225be5364a36c3959742a0cc4dfc0692d6a2a5e"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b27cc2544b69a357ab2a749dc0c13a1b9055198c56f4c2c3b0f61d693f8e203"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cdf8982b6def987e95b37984d0c1c878de32635dd78acde3273f730b69708c9"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71ac661a7365e134741fe5542f13d7ce1e6187446b96ddee4c8b7d153fc8f05a"}, + {file = "google_re2-1.1-1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35a902ba31a71a3e9e114e44473624d9aa9f9b85ec981bfa91671aefe0ef1a6c"}, + {file = "google_re2-1.1-1-cp311-cp311-win32.whl", hash = "sha256:9469f26b485da2784c658e687a766c72e1a17b1e63b3ed24b5f64c3d19fbae3d"}, + {file = "google_re2-1.1-1-cp311-cp311-win_amd64.whl", hash = "sha256:07dd0780240ee431781119b46c3bbf76f5cef24a2cbb542f6a08c643e0a68d98"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9857dc4d69b8025057c8129e98406a24d51bdaf1b96e481dbba7e69e0ec85104"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:a6eaaa5f200022eb0bdded5949c91454fc96e1edd6f9e9a96dd1dc32c821c00e"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a32bb2afe128d90b8edc20d4f7d297f7e2753206eba92937a57e5280736eac74"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4f2754616c61b76ab4e5a4f39892a52a00897203b859c5abd7e3c630dd883cda"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:b110f3d657e8f67a43a699d327ce47095b80180ea1118e2de44cb5c7002503d9"}, + {file = "google_re2-1.1-1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:fd62ba2853eef65e249a9c4437a9ecac568222062bc956f0c61a3d1151a6271b"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23b50eb74dc3e1d480b04b987c61242df5dade50d08bc16e25eb3582b83fca80"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1bde89855dd5ab0811187d21eec149975510c80e865c771c883524a452445e7"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10c6cddc720151a509beb98ab310fa0cc8bcb265f83518ebf831de2c9ff73af0"}, + {file = "google_re2-1.1-1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bea09c5e8401ec50b8f211bc820ec2f0ca5e744ac67431a1b39bdacbd266553"}, + {file = "google_re2-1.1-1-cp38-cp38-win32.whl", hash = "sha256:ffa51b118037518bcdf63c7649d0b4be7071982b83f48ee3bbabf24a9cb48f8a"}, + {file = "google_re2-1.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:3b47715b6d43c9351957eb5092ad0fa625d04106d81f34cb8a726c53395ad474"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:998f31bf7efbc9bb603d0c356c1c77e5331f689c71783df8e21e67bb025fc66a"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0b5f0eaab859d3ba5f462c82bf37ab56e9d37e19b40b5898c731dbe4213a85f7"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f6d591d9c4cbc7142b729ddcc3f654d059d8ebc3bc95891198808a4785a6b4d8"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:3c325c2eae197b423330a04ab62e2e1cf942676cd5560907db4d63e23ce0648a"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:1e019e8f57955806ee843254ce454249b58800a6e872b2c8e9df2ef3459de0d5"}, + {file = "google_re2-1.1-1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:58ebbcc7ad2b639768a6bca586357291660ea40dfac83039208e5055c357513b"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:723f8553e7fc022294071f14fb7dfc7958c365dc7d4a71d4938ccd2df8c6eca4"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d81512b08e6787fc8ef29fea365d3fdbf957553a625550e1d96c36877ae30355"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c58601b155651cc572a23ee2860788c77581aad85d3567a55b89b0674702f34d"}, + {file = "google_re2-1.1-1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c6c9f64b9724ec38da8e514f404ac64e9a6a5e8b1d7031c2dadd05c1f4c16fd"}, + {file = "google_re2-1.1-1-cp39-cp39-win32.whl", hash = "sha256:d1b751b9ab9f8e2ab2a36d72b909281ce65f328c9115a1685acae1a2d1afd7a4"}, + {file = "google_re2-1.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:ac775c75cec7069351d201da4e0fb0cae4c1c5ebecd08fa34e1be89740c1d80b"}, +] +google-resumable-media = [ + {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, + {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, +] +googleapis-common-protos = [ + {file = "googleapis-common-protos-1.60.0.tar.gz", hash = "sha256:e73ebb404098db405ba95d1e1ae0aa91c3e15a71da031a2eeb6b2e23e7bc3708"}, + {file = "googleapis_common_protos-1.60.0-py2.py3-none-any.whl", hash = "sha256:69f9bbcc6acde92cab2db95ce30a70bd2b81d20b12eff3f1aabaffcbe8a93918"}, +] +grapheme = [ + {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"}, +] +graphviz = [ + {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, + {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, +] +greenlet = [ + {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, + {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, + {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, + {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, + {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75209eed723105f9596807495d58d10b3470fa6732dd6756595e89925ce2470"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a51c9751078733d88e013587b108f1b7a1fb106d402fb390740f002b6f6551a"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, + {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:eff4eb9b7eb3e4d0cae3d28c283dc16d9bed6b193c2e1ace3ed86ce48ea8df19"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5454276c07d27a740c5892f4907c86327b632127dd9abec42ee62e12427ff7e3"}, + {file = "greenlet-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:7cafd1208fdbe93b67c7086876f061f660cfddc44f404279c1585bbf3cdc64c5"}, + {file = "greenlet-2.0.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:910841381caba4f744a44bf81bfd573c94e10b3045ee00de0cbf436fe50673a6"}, + {file = "greenlet-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:18a7f18b82b52ee85322d7a7874e676f34ab319b9f8cce5de06067384aa8ff43"}, + {file = "greenlet-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:03a8f4f3430c3b3ff8d10a2a86028c660355ab637cee9333d63d66b56f09d52a"}, + {file = "greenlet-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4b58adb399c4d61d912c4c331984d60eb66565175cdf4a34792cd9600f21b394"}, + {file = "greenlet-2.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:703f18f3fda276b9a916f0934d2fb6d989bf0b4fb5a64825260eb9bfd52d78f0"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:32e5b64b148966d9cccc2c8d35a671409e45f195864560829f395a54226408d3"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd11f291565a81d71dab10b7033395b7a3a5456e637cf997a6f33ebdf06f8db"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0f72c9ddb8cd28532185f54cc1453f2c16fb417a08b53a855c4e6a418edd099"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd021c754b162c0fb55ad5d6b9d960db667faad0fa2ff25bb6e1301b0b6e6a75"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:3c9b12575734155d0c09d6c3e10dbd81665d5c18e1a7c6597df72fd05990c8cf"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b9ec052b06a0524f0e35bd8790686a1da006bd911dd1ef7d50b77bfbad74e292"}, + {file = "greenlet-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:dbfcfc0218093a19c252ca8eb9aee3d29cfdcb586df21049b9d777fd32c14fd9"}, + {file = "greenlet-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9f35ec95538f50292f6d8f2c9c9f8a3c6540bbfec21c9e5b4b751e0a7c20864f"}, + {file = "greenlet-2.0.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5508f0b173e6aa47273bdc0a0b5ba055b59662ba7c7ee5119528f466585526b"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:f82d4d717d8ef19188687aa32b8363e96062911e63ba22a0cff7802a8e58e5f1"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c59a2120b55788e800d82dfa99b9e156ff8f2227f07c5e3012a45a399620b7"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2780572ec463d44c1d3ae850239508dbeb9fed38e294c68d19a24d925d9223ca"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937e9020b514ceedb9c830c55d5c9872abc90f4b5862f89c0887033ae33c6f73"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:36abbf031e1c0f79dd5d596bfaf8e921c41df2bdf54ee1eed921ce1f52999a86"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:18e98fb3de7dba1c0a852731c3070cf022d14f0d68b4c87a19cc1016f3bb8b33"}, + {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, + {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acd2162a36d3de67ee896c43effcd5ee3de247eb00354db411feb025aa319857"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0bf60faf0bc2468089bdc5edd10555bab6e85152191df713e2ab1fcc86382b5a"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, + {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, + {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be4ed120b52ae4d974aa40215fcdfde9194d63541c7ded40ee12eb4dda57b76b"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c817e84245513926588caf1152e3b559ff794d505555211ca041f032abbb6b"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1a819eef4b0e0b96bb0d98d797bef17dc1b4a10e8d7446be32d1da33e095dbb8"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7efde645ca1cc441d6dc4b48c0f7101e8d86b54c8530141b09fd31cef5149ec9"}, + {file = "greenlet-2.0.2-cp39-cp39-win32.whl", hash = "sha256:ea9872c80c132f4663822dd2a08d404073a5a9b5ba6155bea72fb2a79d1093b5"}, + {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, + {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, +] +grpc-google-iam-v1 = [ + {file = "grpc-google-iam-v1-0.12.6.tar.gz", hash = "sha256:2bc4b8fdf22115a65d751c9317329322602c39b7c86a289c9b72d228d960ef5f"}, + {file = "grpc_google_iam_v1-0.12.6-py2.py3-none-any.whl", hash = "sha256:5c10f3d8dc2d88678ab1a9b0cb5482735c5efee71e6c0cd59f872eef22913f5c"}, +] +grpcio = [ + {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, + {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, + {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, + {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, + {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, + {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, + {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, + {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, + {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, + {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, + {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, + {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, + {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, + {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, + {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, + {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, + {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, + {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, + {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, + {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, +] +grpcio-status = [ + {file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"}, + {file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"}, +] +grpcio-tools = [ + {file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"}, +] +gunicorn = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] +h11 = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] +h2 = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] +hexbytes = [ + {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, + {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, +] +hologram = [ + {file = "hologram-0.0.16-py3-none-any.whl", hash = "sha256:4e56bd525336bb64a18916f871977a4125b64be8aaa750233583003333cda361"}, + {file = "hologram-0.0.16.tar.gz", hash = "sha256:1c2c921b4e575361623ea0e0d0aa5aee377b1a333cc6c6a879e213ed34583e55"}, +] +hpack = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] +httpcore = [ + {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, + {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, +] +httplib2 = [ + {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, + {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, +] +httpx = [ + {file = "httpx-0.24.1-py3-none-any.whl", hash = "sha256:06781eb9ac53cde990577af654bd990a4949de37a28bdb4a230d434f3a30b9bd"}, + {file = "httpx-0.24.1.tar.gz", hash = "sha256:5853a43053df830c20f8110c5e69fe44d035d850b2dfe795e196f00fdb774bdd"}, +] +humanfriendly = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] +humanize = [ + {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, + {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, +] +hyperframe = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] +idna = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] +importlib-metadata = [ + {file = "importlib_metadata-4.13.0-py3-none-any.whl", hash = "sha256:8a8a81bcf996e74fee46f0d16bd3eaa382a7eb20fd82445c3ad11f4090334116"}, + {file = "importlib_metadata-4.13.0.tar.gz", hash = "sha256:dd0173e8f150d6815e098fd354f6414b0f079af4644ddfe90c71e2fc6174346d"}, +] +importlib-resources = [ + {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, + {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, +] +inflection = [ + {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, + {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, +] +iniconfig = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] +isodate = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] +isort = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] +itsdangerous = [ + {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, + {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, +] +jaraco-classes = [ + {file = "jaraco.classes-3.3.0-py3-none-any.whl", hash = "sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb"}, + {file = "jaraco.classes-3.3.0.tar.gz", hash = "sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621"}, +] +jeepney = [ + {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, + {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, +] +jinja2 = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] +jinxed = [ + {file = "jinxed-1.2.0-py2.py3-none-any.whl", hash = "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b"}, + {file = "jinxed-1.2.0.tar.gz", hash = "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5"}, +] +jmespath = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] +jsonpath-ng = [ + {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, + {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, + {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, +] +jsonschema = [ + {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, + {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, +] +jsonschema-specifications = [ + {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, + {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, +] +keyring = [ + {file = "keyring-24.2.0-py3-none-any.whl", hash = "sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6"}, + {file = "keyring-24.2.0.tar.gz", hash = "sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509"}, +] +lazy-object-proxy = [ + {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"}, + {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"}, + {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"}, + {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"}, + {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"}, + {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"}, +] +leather = [ + {file = "leather-0.3.4-py2.py3-none-any.whl", hash = "sha256:5e741daee96e9f1e9e06081b8c8a10c4ac199301a0564cdd99b09df15b4603d2"}, + {file = "leather-0.3.4.tar.gz", hash = "sha256:b43e21c8fa46b2679de8449f4d953c06418666dc058ce41055ee8a8d3bb40918"}, +] +limits = [ + {file = "limits-3.6.0-py3-none-any.whl", hash = "sha256:32fe29a398352c71bc43d53773117d47e22c5ea4200aef28d3f5fdee10334cd7"}, + {file = "limits-3.6.0.tar.gz", hash = "sha256:57a9c69fd37ad1e4fa3886dff8d035227e1f6af87f47e9118627e72cf1ced3bf"}, +] +linkify-it-py = [ + {file = "linkify-it-py-2.0.2.tar.gz", hash = "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2"}, + {file = "linkify_it_py-2.0.2-py3-none-any.whl", hash = "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"}, +] +lockfile = [ + {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, + {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, +] +logbook = [ + {file = "Logbook-1.5.3-cp27-cp27m-win32.whl", hash = "sha256:56ee54c11df3377314cedcd6507638f015b4b88c0238c2e01b5eb44fd3a6ad1b"}, + {file = "Logbook-1.5.3-cp27-cp27m-win_amd64.whl", hash = "sha256:2dc85f1510533fddb481e97677bb7bca913560862734c0b3b289bfed04f78c92"}, + {file = "Logbook-1.5.3-cp35-cp35m-win32.whl", hash = "sha256:94e2e11ff3c2304b0d09a36c6208e5ae756eb948b210e5cbd63cd8d27f911542"}, + {file = "Logbook-1.5.3-cp35-cp35m-win_amd64.whl", hash = "sha256:97fee1bd9605f76335b169430ed65e15e457a844b2121bd1d90a08cf7e30aba0"}, + {file = "Logbook-1.5.3-cp36-cp36m-win32.whl", hash = "sha256:7c533eb728b3d220b1b5414ba4635292d149d79f74f6973b4aa744c850ca944a"}, + {file = "Logbook-1.5.3-cp36-cp36m-win_amd64.whl", hash = "sha256:e18f7422214b1cf0240c56f884fd9c9b4ff9d0da2eabca9abccba56df7222f66"}, + {file = "Logbook-1.5.3-cp37-cp37m-win32.whl", hash = "sha256:8f76a2e7b1f72595f753228732f81ce342caf03babc3fed6bbdcf366f2f20f18"}, + {file = "Logbook-1.5.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0cf2cdbfb65a03b5987d19109dacad13417809dcf697f66e1a7084fb21744ea9"}, + {file = "Logbook-1.5.3.tar.gz", hash = "sha256:66f454ada0f56eae43066f604a222b09893f98c1adc18df169710761b8f32fe8"}, +] +lxml = [ + {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, + {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, + {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, + {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, + {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, + {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, + {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, + {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, + {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, + {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, + {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, + {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, + {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, + {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, + {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, + {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, + {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, + {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, + {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, + {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, + {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, + {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, + {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, + {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, + {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, +] +makefun = [ + {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, + {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, +] +mako = [ + {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, + {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, +] +markdown = [ + {file = "Markdown-3.4.4-py3-none-any.whl", hash = "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941"}, + {file = "Markdown-3.4.4.tar.gz", hash = "sha256:225c6123522495d4119a90b3a3ba31a1e87a70369e03f14799ea9c0d7183a3d6"}, +] +markdown-it-py = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] +markupsafe = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] +marshmallow = [ + {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, + {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, +] +marshmallow-oneofschema = [ + {file = "marshmallow-oneofschema-3.0.1.tar.gz", hash = "sha256:62cd2099b29188c92493c2940ee79d1bf2f2619a71721664e5a98ec2faa58237"}, + {file = "marshmallow_oneofschema-3.0.1-py2.py3-none-any.whl", hash = "sha256:bd29410a9f2f7457a2b428286e2a80ef76b8ddc3701527dc1f935a88914b02f2"}, +] +marshmallow-sqlalchemy = [ + {file = "marshmallow-sqlalchemy-0.26.1.tar.gz", hash = "sha256:d8525f74de51554b5c8491effe036f60629a426229befa33ff614c8569a16a73"}, + {file = "marshmallow_sqlalchemy-0.26.1-py2.py3-none-any.whl", hash = "sha256:ba7493eeb8669a3bf00d8f906b657feaa87a740ae9e4ecf829cfd6ddf763d276"}, +] +mashumaro = [ + {file = "mashumaro-3.6-py3-none-any.whl", hash = "sha256:77403e3e2ecd0a7d0e22d472c08e33282460e48726eabe356c5163efbdf9c7ee"}, + {file = "mashumaro-3.6.tar.gz", hash = "sha256:ceb3de53029219bbbb0385ca600b59348dcd14e0c68523986c6d51889ad338f5"}, +] +mccabe = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] +mdit-py-plugins = [ + {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, + {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, +] +mdurl = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] +minimal-snowplow-tracker = [ + {file = "minimal-snowplow-tracker-0.0.2.tar.gz", hash = "sha256:acabf7572db0e7f5cbf6983d495eef54081f71be392330eb3aadb9ccb39daaa4"}, +] +more-itertools = [ + {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, + {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, +] +mpmath = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] +msal = [ + {file = "msal-1.23.0-py2.py3-none-any.whl", hash = "sha256:3342e0837a047007f9d479e814b559c3219767453d57920dc40a31986862048b"}, + {file = "msal-1.23.0.tar.gz", hash = "sha256:25c9a33acf84301f93d1fdbe9f1a9c60cd38af0d5fffdbfa378138fc7bc1e86b"}, +] +msal-extensions = [ + {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, + {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, +] +msgpack = [ + {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"}, + {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198"}, + {file = "msgpack-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81"}, + {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7"}, + {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3"}, + {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b"}, + {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c"}, + {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd"}, + {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a"}, + {file = "msgpack-1.0.5-cp310-cp310-win32.whl", hash = "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea"}, + {file = "msgpack-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a"}, + {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0"}, + {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898"}, + {file = "msgpack-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a"}, + {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a"}, + {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705"}, + {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d"}, + {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9"}, + {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7"}, + {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed"}, + {file = "msgpack-1.0.5-cp311-cp311-win32.whl", hash = "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c"}, + {file = "msgpack-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2"}, + {file = "msgpack-1.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57"}, + {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080"}, + {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6"}, + {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f"}, + {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c"}, + {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b"}, + {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c"}, + {file = "msgpack-1.0.5-cp36-cp36m-win32.whl", hash = "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9"}, + {file = "msgpack-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a"}, + {file = "msgpack-1.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c"}, + {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b"}, + {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f"}, + {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f"}, + {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d"}, + {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086"}, + {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf"}, + {file = "msgpack-1.0.5-cp37-cp37m-win32.whl", hash = "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77"}, + {file = "msgpack-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82"}, + {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c"}, + {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d"}, + {file = "msgpack-1.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb"}, + {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba"}, + {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1"}, + {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87"}, + {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb"}, + {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48"}, + {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0"}, + {file = "msgpack-1.0.5-cp38-cp38-win32.whl", hash = "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e"}, + {file = "msgpack-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1"}, + {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025"}, + {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5"}, + {file = "msgpack-1.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd"}, + {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437"}, + {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f"}, + {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282"}, + {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d"}, + {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8"}, + {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11"}, + {file = "msgpack-1.0.5-cp39-cp39-win32.whl", hash = "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc"}, + {file = "msgpack-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164"}, + {file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"}, +] +multidict = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] +mypy = [ + {file = "mypy-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5012e5cc2ac628177eaac0e83d622b2dd499e28253d4107a08ecc59ede3fc2c"}, + {file = "mypy-1.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8fbb68711905f8912e5af474ca8b78d077447d8f3918997fecbf26943ff3cbb"}, + {file = "mypy-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a1ad938fee7d2d96ca666c77b7c494c3c5bd88dff792220e1afbebb2925b5e"}, + {file = "mypy-1.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b96ae2c1279d1065413965c607712006205a9ac541895004a1e0d4f281f2ff9f"}, + {file = "mypy-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:40b1844d2e8b232ed92e50a4bd11c48d2daa351f9deee6c194b83bf03e418b0c"}, + {file = "mypy-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81af8adaa5e3099469e7623436881eff6b3b06db5ef75e6f5b6d4871263547e5"}, + {file = "mypy-1.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8c223fa57cb154c7eab5156856c231c3f5eace1e0bed9b32a24696b7ba3c3245"}, + {file = "mypy-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8032e00ce71c3ceb93eeba63963b864bf635a18f6c0c12da6c13c450eedb183"}, + {file = "mypy-1.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c46b51de523817a0045b150ed11b56f9fff55f12b9edd0f3ed35b15a2809de0"}, + {file = "mypy-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:19f905bcfd9e167159b3d63ecd8cb5e696151c3e59a1742e79bc3bcb540c42c7"}, + {file = "mypy-1.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:82e469518d3e9a321912955cc702d418773a2fd1e91c651280a1bda10622f02f"}, + {file = "mypy-1.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4473c22cc296425bbbce7e9429588e76e05bc7342da359d6520b6427bf76660"}, + {file = "mypy-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59a0d7d24dfb26729e0a068639a6ce3500e31d6655df8557156c51c1cb874ce7"}, + {file = "mypy-1.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cfd13d47b29ed3bbaafaff7d8b21e90d827631afda134836962011acb5904b71"}, + {file = "mypy-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:eb4f18589d196a4cbe5290b435d135dee96567e07c2b2d43b5c4621b6501531a"}, + {file = "mypy-1.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:41697773aa0bf53ff917aa077e2cde7aa50254f28750f9b88884acea38a16169"}, + {file = "mypy-1.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7274b0c57737bd3476d2229c6389b2ec9eefeb090bbaf77777e9d6b1b5a9d143"}, + {file = "mypy-1.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbaf4662e498c8c2e352da5f5bca5ab29d378895fa2d980630656178bd607c46"}, + {file = "mypy-1.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bb8ccb4724f7d8601938571bf3f24da0da791fe2db7be3d9e79849cb64e0ae85"}, + {file = "mypy-1.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:68351911e85145f582b5aa6cd9ad666c8958bcae897a1bfda8f4940472463c45"}, + {file = "mypy-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:49ae115da099dcc0922a7a895c1eec82c1518109ea5c162ed50e3b3594c71208"}, + {file = "mypy-1.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b27958f8c76bed8edaa63da0739d76e4e9ad4ed325c814f9b3851425582a3cd"}, + {file = "mypy-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:925cd6a3b7b55dfba252b7c4561892311c5358c6b5a601847015a1ad4eb7d332"}, + {file = "mypy-1.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8f57e6b6927a49550da3d122f0cb983d400f843a8a82e65b3b380d3d7259468f"}, + {file = "mypy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a43ef1c8ddfdb9575691720b6352761f3f53d85f1b57d7745701041053deff30"}, + {file = "mypy-1.6.1-py3-none-any.whl", hash = "sha256:4cbe68ef919c28ea561165206a2dcb68591c50f3bcf777932323bc208d949cf1"}, + {file = "mypy-1.6.1.tar.gz", hash = "sha256:4d01c00d09a0be62a4ca3f933e315455bde83f37f892ba4b08ce92f3cf44bcc1"}, +] +mypy-boto3-athena = [ + {file = "mypy-boto3-athena-1.28.36.tar.gz", hash = "sha256:a76df6aace3dc1d91b3f74640d617cd1b4802e5f348a22db2f16dfce0b01ee26"}, + {file = "mypy_boto3_athena-1.28.36-py3-none-any.whl", hash = "sha256:b79b77df6ba30c55ff2f1f8b36de410f537c8c978d892e958b4c5e165797915a"}, +] +mypy-boto3-glue = [ + {file = "mypy-boto3-glue-1.28.36.tar.gz", hash = "sha256:161771252bb6a220a0bfd8e6ad71da8548599c611f95fe8a94846f4a3386d2ae"}, + {file = "mypy_boto3_glue-1.28.36-py3-none-any.whl", hash = "sha256:73bc14616ac65a5c02adea5efba7bbbcf8207cd0c0e3237c13d351ebc916338d"}, +] +mypy-boto3-lakeformation = [ + {file = "mypy-boto3-lakeformation-1.28.36.tar.gz", hash = "sha256:9327cf0d28a09abf5bd90ae946ce7420b32a3b979a1a3554ac93716c3dceacb0"}, + {file = "mypy_boto3_lakeformation-1.28.36-py3-none-any.whl", hash = "sha256:9525a8ab3d69632d4ec83eb565ff7fdfa1181fbdf032bcff4a20d4f8a0350688"}, +] +mypy-boto3-sts = [ + {file = "mypy-boto3-sts-1.28.37.tar.gz", hash = "sha256:54d64ca695ab90a51c68ac1e67ff9eae7ec69f926649e320a3b90ed1ec841a95"}, + {file = "mypy_boto3_sts-1.28.37-py3-none-any.whl", hash = "sha256:24106ff30ecfe7ad0538657bbd00b6009418a5382b323cac46e0e26c1f5d50fb"}, +] +mypy-extensions = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] +natsort = [ + {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, + {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, +] +networkx = [ + {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"}, + {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"}, +] +nr-date = [ + {file = "nr_date-2.1.0-py3-none-any.whl", hash = "sha256:bd672a9dfbdcf7c4b9289fea6750c42490eaee08036a72059dcc78cb236ed568"}, + {file = "nr_date-2.1.0.tar.gz", hash = "sha256:0643aea13bcdc2a8bc56af9d5e6a89ef244c9744a1ef00cdc735902ba7f7d2e6"}, +] +nr-stream = [ + {file = "nr_stream-1.1.5-py3-none-any.whl", hash = "sha256:47e12150b331ad2cb729cfd9d2abd281c9949809729ba461c6aa87dd9927b2d4"}, + {file = "nr_stream-1.1.5.tar.gz", hash = "sha256:eb0216c6bfc61a46d4568dba3b588502c610ec8ddef4ac98f3932a2bd7264f65"}, +] +nr-util = [ + {file = "nr.util-0.8.12-py3-none-any.whl", hash = "sha256:91da02ac9795eb8e015372275c1efe54bac9051231ee9b0e7e6f96b0b4e7d2bb"}, + {file = "nr.util-0.8.12.tar.gz", hash = "sha256:a4549c2033d99d2f0379b3f3d233fd2a8ade286bbf0b3ad0cc7cea16022214f4"}, +] +numpy = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, + {file = "numpy-1.26.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82e871307a6331b5f09efda3c22e03c095d957f04bf6bc1804f30048d0e5e7af"}, + {file = "numpy-1.26.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdd9ec98f0063d93baeb01aad472a1a0840dee302842a2746a7a8e92968f9575"}, + {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d78f269e0c4fd365fc2992c00353e4530d274ba68f15e968d8bc3c69ce5f5244"}, + {file = "numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ab9163ca8aeb7fd32fe93866490654d2f7dda4e61bc6297bf72ce07fdc02f67"}, + {file = "numpy-1.26.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:78ca54b2f9daffa5f323f34cdf21e1d9779a54073f0018a3094ab907938331a2"}, + {file = "numpy-1.26.1-cp310-cp310-win32.whl", hash = "sha256:d1cfc92db6af1fd37a7bb58e55c8383b4aa1ba23d012bdbba26b4bcca45ac297"}, + {file = "numpy-1.26.1-cp310-cp310-win_amd64.whl", hash = "sha256:d2984cb6caaf05294b8466966627e80bf6c7afd273279077679cb010acb0e5ab"}, + {file = "numpy-1.26.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cd7837b2b734ca72959a1caf3309457a318c934abef7a43a14bb984e574bbb9a"}, + {file = "numpy-1.26.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c59c046c31a43310ad0199d6299e59f57a289e22f0f36951ced1c9eac3665b9"}, + {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d58e8c51a7cf43090d124d5073bc29ab2755822181fcad978b12e144e5e5a4b3"}, + {file = "numpy-1.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6081aed64714a18c72b168a9276095ef9155dd7888b9e74b5987808f0dd0a974"}, + {file = "numpy-1.26.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:97e5d6a9f0702c2863aaabf19f0d1b6c2628fbe476438ce0b5ce06e83085064c"}, + {file = "numpy-1.26.1-cp311-cp311-win32.whl", hash = "sha256:b9d45d1dbb9de84894cc50efece5b09939752a2d75aab3a8b0cef6f3a35ecd6b"}, + {file = "numpy-1.26.1-cp311-cp311-win_amd64.whl", hash = "sha256:3649d566e2fc067597125428db15d60eb42a4e0897fc48d28cb75dc2e0454e53"}, + {file = "numpy-1.26.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1d1bd82d539607951cac963388534da3b7ea0e18b149a53cf883d8f699178c0f"}, + {file = "numpy-1.26.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:afd5ced4e5a96dac6725daeb5242a35494243f2239244fad10a90ce58b071d24"}, + {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03fb25610ef560a6201ff06df4f8105292ba56e7cdd196ea350d123fc32e24e"}, + {file = "numpy-1.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcfaf015b79d1f9f9c9fd0731a907407dc3e45769262d657d754c3a028586124"}, + {file = "numpy-1.26.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e509cbc488c735b43b5ffea175235cec24bbc57b227ef1acc691725beb230d1c"}, + {file = "numpy-1.26.1-cp312-cp312-win32.whl", hash = "sha256:af22f3d8e228d84d1c0c44c1fbdeb80f97a15a0abe4f080960393a00db733b66"}, + {file = "numpy-1.26.1-cp312-cp312-win_amd64.whl", hash = "sha256:9f42284ebf91bdf32fafac29d29d4c07e5e9d1af862ea73686581773ef9e73a7"}, + {file = "numpy-1.26.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb894accfd16b867d8643fc2ba6c8617c78ba2828051e9a69511644ce86ce83e"}, + {file = "numpy-1.26.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e44ccb93f30c75dfc0c3aa3ce38f33486a75ec9abadabd4e59f114994a9c4617"}, + {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9696aa2e35cc41e398a6d42d147cf326f8f9d81befcb399bc1ed7ffea339b64e"}, + {file = "numpy-1.26.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5b411040beead47a228bde3b2241100454a6abde9df139ed087bd73fc0a4908"}, + {file = "numpy-1.26.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1e11668d6f756ca5ef534b5be8653d16c5352cbb210a5c2a79ff288e937010d5"}, + {file = "numpy-1.26.1-cp39-cp39-win32.whl", hash = "sha256:d1d2c6b7dd618c41e202c59c1413ef9b2c8e8a15f5039e344af64195459e3104"}, + {file = "numpy-1.26.1-cp39-cp39-win_amd64.whl", hash = "sha256:59227c981d43425ca5e5c01094d59eb14e8772ce6975d4b2fc1e106a833d5ae2"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:06934e1a22c54636a059215d6da99e23286424f316fddd979f5071093b648668"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76ff661a867d9272cd2a99eed002470f46dbe0943a5ffd140f49be84f68ffc42"}, + {file = "numpy-1.26.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6965888d65d2848e8768824ca8288db0a81263c1efccec881cb35a0d805fcd2f"}, + {file = "numpy-1.26.1.tar.gz", hash = "sha256:c8c6c72d4a9f831f328efb1312642a1cafafaa88981d9ab76368d50d07d93cbe"}, +] +oauthlib = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] +onnx = [ + {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:51cacb6aafba308aaf462252ced562111f6991cdc7bc57a6c554c3519453a8ff"}, + {file = "onnx-1.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:0aee26b6f7f7da7e840de75ad9195a77a147d0662c94eaa6483be13ba468ffc1"}, + {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baf6ef6c93b3b843edb97a8d5b3d229a1301984f3f8dee859c29634d2083e6f9"}, + {file = "onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ed899fe6000edc05bb2828863d3841cfddd5a7cf04c1a771f112e94de75d9f"}, + {file = "onnx-1.15.0-cp310-cp310-win32.whl", hash = "sha256:f1ad3d77fc2f4b4296f0ac2c8cadd8c1dcf765fc586b737462d3a0fe8f7c696a"}, + {file = "onnx-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:ca4ebc4f47109bfb12c8c9e83dd99ec5c9f07d2e5f05976356c6ccdce3552010"}, + {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:233ffdb5ca8cc2d960b10965a763910c0830b64b450376da59207f454701f343"}, + {file = "onnx-1.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:51fa79c9ea9af033638ec51f9177b8e76c55fad65bb83ea96ee88fafade18ee7"}, + {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f277d4861729f5253a51fa41ce91bfec1c4574ee41b5637056b43500917295ce"}, + {file = "onnx-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8a7c94d2ebead8f739fdb70d1ce5a71726f4e17b3e5b8ad64455ea1b2801a85"}, + {file = "onnx-1.15.0-cp311-cp311-win32.whl", hash = "sha256:17dcfb86a8c6bdc3971443c29b023dd9c90ff1d15d8baecee0747a6b7f74e650"}, + {file = "onnx-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:60a3e28747e305cd2e766e6a53a0a6d952cf9e72005ec6023ce5e07666676a4e"}, + {file = "onnx-1.15.0-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6b5c798d9e0907eaf319e3d3e7c89a2ed9a854bcb83da5fefb6d4c12d5e90721"}, + {file = "onnx-1.15.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:a4f774ff50092fe19bd8f46b2c9b27b1d30fbd700c22abde48a478142d464322"}, + {file = "onnx-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2b0e7f3938f2d994c34616bfb8b4b1cebbc4a0398483344fe5e9f2fe95175e6"}, + {file = "onnx-1.15.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49cebebd0020a4b12c1dd0909d426631212ef28606d7e4d49463d36abe7639ad"}, + {file = "onnx-1.15.0-cp38-cp38-win32.whl", hash = "sha256:1fdf8a3ff75abc2b32c83bf27fb7c18d6b976c9c537263fadd82b9560fe186fa"}, + {file = "onnx-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:763e55c26e8de3a2dce008d55ae81b27fa8fb4acbb01a29b9f3c01f200c4d676"}, + {file = "onnx-1.15.0-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:b2d5e802837629fc9c86f19448d19dd04d206578328bce202aeb3d4bedab43c4"}, + {file = "onnx-1.15.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:9a9cfbb5e5d5d88f89d0dfc9df5fb858899db874e1d5ed21e76c481f3cafc90d"}, + {file = "onnx-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f472bbe5cb670a0a4a4db08f41fde69b187a009d0cb628f964840d3f83524e9"}, + {file = "onnx-1.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf2de9bef64792e5b8080c678023ac7d2b9e05d79a3e17e92cf6a4a624831d2"}, + {file = "onnx-1.15.0-cp39-cp39-win32.whl", hash = "sha256:ef4d9eb44b111e69e4534f3233fc2c13d1e26920d24ae4359d513bd54694bc6d"}, + {file = "onnx-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d7a3e2d79d371e272e39ae3f7547e0b116d0c7f774a4004e97febe6c93507f"}, + {file = "onnx-1.15.0.tar.gz", hash = "sha256:b18461a7d38f286618ca2a6e78062a2a9c634ce498e631e708a8041b00094825"}, +] +onnxruntime = [ + {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, + {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, + {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fa93b166f2d97063dc9f33c5118c5729a4a5dd5617296b6dbef42f9047b3e81"}, + {file = "onnxruntime-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:042dd9201b3016ee18f8f8bc4609baf11ff34ca1ff489c0a46bcd30919bf883d"}, + {file = "onnxruntime-1.16.1-cp310-cp310-win32.whl", hash = "sha256:c20aa0591f305012f1b21aad607ed96917c86ae7aede4a4dd95824b3d124ceb7"}, + {file = "onnxruntime-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:5581873e578917bea76d6434ee7337e28195d03488dcf72d161d08e9398c6249"}, + {file = "onnxruntime-1.16.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:ef8c0c8abf5f309aa1caf35941380839dc5f7a2fa53da533be4a3f254993f120"}, + {file = "onnxruntime-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e680380bea35a137cbc3efd67a17486e96972901192ad3026ee79c8d8fe264f7"}, + {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e62cc38ce1a669013d0a596d984762dc9c67c56f60ecfeee0d5ad36da5863f6"}, + {file = "onnxruntime-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:025c7a4d57bd2e63b8a0f84ad3df53e419e3df1cc72d63184f2aae807b17c13c"}, + {file = "onnxruntime-1.16.1-cp311-cp311-win32.whl", hash = "sha256:9ad074057fa8d028df248b5668514088cb0937b6ac5954073b7fb9b2891ffc8c"}, + {file = "onnxruntime-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:d5e43a3478bffc01f817ecf826de7b25a2ca1bca8547d70888594ab80a77ad24"}, + {file = "onnxruntime-1.16.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:3aef4d70b0930e29a8943eab248cd1565664458d3a62b2276bd11181f28fd0a3"}, + {file = "onnxruntime-1.16.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:55a7b843a57c8ca0c8ff169428137958146081d5d76f1a6dd444c4ffcd37c3c2"}, + {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c631af1941bf3b5f7d063d24c04aacce8cff0794e157c497e315e89ac5ad7b"}, + {file = "onnxruntime-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5671f296c3d5c233f601e97a10ab5a1dd8e65ba35c7b7b0c253332aba9dff330"}, + {file = "onnxruntime-1.16.1-cp38-cp38-win32.whl", hash = "sha256:eb3802305023dd05e16848d4e22b41f8147247894309c0c27122aaa08793b3d2"}, + {file = "onnxruntime-1.16.1-cp38-cp38-win_amd64.whl", hash = "sha256:fecfb07443d09d271b1487f401fbdf1ba0c829af6fd4fe8f6af25f71190e7eb9"}, + {file = "onnxruntime-1.16.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:de3e12094234db6545c67adbf801874b4eb91e9f299bda34c62967ef0050960f"}, + {file = "onnxruntime-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff723c2a5621b5e7103f3be84d5aae1e03a20621e72219dddceae81f65f240af"}, + {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a7fb3073aaf6b462e3d7fb433320f7700558a8892e5021780522dc4574292a"}, + {file = "onnxruntime-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:963159f1f699b0454cd72fcef3276c8a1aab9389a7b301bcd8e320fb9d9e8597"}, + {file = "onnxruntime-1.16.1-cp39-cp39-win32.whl", hash = "sha256:85771adb75190db9364b25ddec353ebf07635b83eb94b64ed014f1f6d57a3857"}, + {file = "onnxruntime-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:d32d2b30799c1f950123c60ae8390818381fd5f88bdf3627eeca10071c155dc5"}, +] +opentelemetry-api = [ + {file = "opentelemetry_api-1.15.0-py3-none-any.whl", hash = "sha256:e6c2d2e42140fd396e96edf75a7ceb11073f4efb4db87565a431cc9d0f93f2e0"}, + {file = "opentelemetry_api-1.15.0.tar.gz", hash = "sha256:79ab791b4aaad27acc3dc3ba01596db5b5aac2ef75c70622c6038051d6c2cded"}, +] +opentelemetry-exporter-otlp = [ + {file = "opentelemetry_exporter_otlp-1.15.0-py3-none-any.whl", hash = "sha256:79f22748b6a54808a0448093dfa189c8490e729f67c134d4c992533d9393b33e"}, + {file = "opentelemetry_exporter_otlp-1.15.0.tar.gz", hash = "sha256:4f7c49751d9720e2e726e13b0bb958ccade4e29122c305d92c033da432c8d2c5"}, +] +opentelemetry-exporter-otlp-proto-grpc = [ + {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0-py3-none-any.whl", hash = "sha256:c2a5492ba7d140109968135d641d06ce3c5bd73c50665f787526065d57d7fd1d"}, + {file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0.tar.gz", hash = "sha256:844f2a4bb9bcda34e4eb6fe36765e5031aacb36dc60ed88c90fc246942ea26e7"}, +] +opentelemetry-exporter-otlp-proto-http = [ + {file = "opentelemetry_exporter_otlp_proto_http-1.15.0-py3-none-any.whl", hash = "sha256:3ec2a02196c8a54bf5cbf7fe623a5238625638e83b6047a983bdf96e2bbb74c0"}, + {file = "opentelemetry_exporter_otlp_proto_http-1.15.0.tar.gz", hash = "sha256:11b2c814249a49b22f6cca7a06b05701f561d577b747f3660dfd67b6eb9daf9c"}, +] +opentelemetry-proto = [ + {file = "opentelemetry_proto-1.15.0-py3-none-any.whl", hash = "sha256:044b6d044b4d10530f250856f933442b8753a17f94ae37c207607f733fb9a844"}, + {file = "opentelemetry_proto-1.15.0.tar.gz", hash = "sha256:9c4008e40ac8cab359daac283fbe7002c5c29c77ea2674ad5626a249e64e0101"}, +] +opentelemetry-sdk = [ + {file = "opentelemetry_sdk-1.15.0-py3-none-any.whl", hash = "sha256:555c533e9837766119bbccc7a80458c9971d853a6f1da683a2246cd5e53b4645"}, + {file = "opentelemetry_sdk-1.15.0.tar.gz", hash = "sha256:98dbffcfeebcbff12c0c974292d6ea603180a145904cf838b1fe4d5c99078425"}, +] +opentelemetry-semantic-conventions = [ + {file = "opentelemetry_semantic_conventions-0.36b0-py3-none-any.whl", hash = "sha256:adc05635e87b9d3e007c9f530eed487fc3ef2177d02f82f674f28ebf9aff8243"}, + {file = "opentelemetry_semantic_conventions-0.36b0.tar.gz", hash = "sha256:829dc221795467d98b773c04096e29be038d77526dc8d6ac76f546fb6279bf01"}, +] +ordered-set = [ + {file = "ordered-set-4.1.0.tar.gz", hash = "sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8"}, + {file = "ordered_set-4.1.0-py3-none-any.whl", hash = "sha256:046e1132c71fcf3330438a539928932caf51ddbc582496833e23de611de14562"}, +] +orjson = [ + {file = "orjson-3.9.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ad6845912a71adcc65df7c8a7f2155eba2096cf03ad2c061c93857de70d699ad"}, + {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e298e0aacfcc14ef4476c3f409e85475031de24e5b23605a465e9bf4b2156273"}, + {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83c9939073281ef7dd7c5ca7f54cceccb840b440cec4b8a326bda507ff88a0a6"}, + {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e174cc579904a48ee1ea3acb7045e8a6c5d52c17688dfcb00e0e842ec378cabf"}, + {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f8d51702f42c785b115401e1d64a27a2ea767ae7cf1fb8edaa09c7cf1571c660"}, + {file = "orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d61c0c7414ddee1ef4d0f303e2222f8cced5a2e26d9774751aecd72324c9e"}, + {file = "orjson-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d748cc48caf5a91c883d306ab648df1b29e16b488c9316852844dd0fd000d1c2"}, + {file = "orjson-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bd19bc08fa023e4c2cbf8294ad3f2b8922f4de9ba088dbc71e6b268fdf54591c"}, + {file = "orjson-3.9.5-cp310-none-win32.whl", hash = "sha256:5793a21a21bf34e1767e3d61a778a25feea8476dcc0bdf0ae1bc506dc34561ea"}, + {file = "orjson-3.9.5-cp310-none-win_amd64.whl", hash = "sha256:2bcec0b1024d0031ab3eab7a8cb260c8a4e4a5e35993878a2da639d69cdf6a65"}, + {file = "orjson-3.9.5-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8547b95ca0e2abd17e1471973e6d676f1d8acedd5f8fb4f739e0612651602d66"}, + {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87ce174d6a38d12b3327f76145acbd26f7bc808b2b458f61e94d83cd0ebb4d76"}, + {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a960bb1bc9a964d16fcc2d4af5a04ce5e4dfddca84e3060c35720d0a062064fe"}, + {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a7aa5573a949760d6161d826d34dc36db6011926f836851fe9ccb55b5a7d8e8"}, + {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b2852afca17d7eea85f8e200d324e38c851c96598ac7b227e4f6c4e59fbd3df"}, + {file = "orjson-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa185959c082475288da90f996a82e05e0c437216b96f2a8111caeb1d54ef926"}, + {file = "orjson-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:89c9332695b838438ea4b9a482bce8ffbfddde4df92750522d928fb00b7b8dce"}, + {file = "orjson-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2493f1351a8f0611bc26e2d3d407efb873032b4f6b8926fed8cfed39210ca4ba"}, + {file = "orjson-3.9.5-cp311-none-win32.whl", hash = "sha256:ffc544e0e24e9ae69301b9a79df87a971fa5d1c20a6b18dca885699709d01be0"}, + {file = "orjson-3.9.5-cp311-none-win_amd64.whl", hash = "sha256:89670fe2732e3c0c54406f77cad1765c4c582f67b915c74fda742286809a0cdc"}, + {file = "orjson-3.9.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:15df211469625fa27eced4aa08dc03e35f99c57d45a33855cc35f218ea4071b8"}, + {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9f17c59fe6c02bc5f89ad29edb0253d3059fe8ba64806d789af89a45c35269a"}, + {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca6b96659c7690773d8cebb6115c631f4a259a611788463e9c41e74fa53bf33f"}, + {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a26fafe966e9195b149950334bdbe9026eca17fe8ffe2d8fa87fdc30ca925d30"}, + {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9006b1eb645ecf460da067e2dd17768ccbb8f39b01815a571bfcfab7e8da5e52"}, + {file = "orjson-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebfdbf695734b1785e792a1315e41835ddf2a3e907ca0e1c87a53f23006ce01d"}, + {file = "orjson-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4a3943234342ab37d9ed78fb0a8f81cd4b9532f67bf2ac0d3aa45fa3f0a339f3"}, + {file = "orjson-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e6762755470b5c82f07b96b934af32e4d77395a11768b964aaa5eb092817bc31"}, + {file = "orjson-3.9.5-cp312-none-win_amd64.whl", hash = "sha256:c74df28749c076fd6e2157190df23d43d42b2c83e09d79b51694ee7315374ad5"}, + {file = "orjson-3.9.5-cp37-cp37m-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:88e18a74d916b74f00d0978d84e365c6bf0e7ab846792efa15756b5fb2f7d49d"}, + {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28514b5b6dfaf69097be70d0cf4f1407ec29d0f93e0b4131bf9cc8fd3f3e374"}, + {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b81aca8c7be61e2566246b6a0ca49f8aece70dd3f38c7f5c837f398c4cb142"}, + {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:385c1c713b1e47fd92e96cf55fd88650ac6dfa0b997e8aa7ecffd8b5865078b1"}, + {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9850c03a8e42fba1a508466e6a0f99472fd2b4a5f30235ea49b2a1b32c04c11"}, + {file = "orjson-3.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4449f84bbb13bcef493d8aa669feadfced0f7c5eea2d0d88b5cc21f812183af8"}, + {file = "orjson-3.9.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:86127bf194f3b873135e44ce5dc9212cb152b7e06798d5667a898a00f0519be4"}, + {file = "orjson-3.9.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0abcd039f05ae9ab5b0ff11624d0b9e54376253b7d3217a358d09c3edf1d36f7"}, + {file = "orjson-3.9.5-cp37-none-win32.whl", hash = "sha256:10cc8ad5ff7188efcb4bec196009d61ce525a4e09488e6d5db41218c7fe4f001"}, + {file = "orjson-3.9.5-cp37-none-win_amd64.whl", hash = "sha256:ff27e98532cb87379d1a585837d59b187907228268e7b0a87abe122b2be6968e"}, + {file = "orjson-3.9.5-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5bfa79916ef5fef75ad1f377e54a167f0de334c1fa4ebb8d0224075f3ec3d8c0"}, + {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e87dfa6ac0dae764371ab19b35eaaa46dfcb6ef2545dfca03064f21f5d08239f"}, + {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50ced24a7b23058b469ecdb96e36607fc611cbaee38b58e62a55c80d1b3ad4e1"}, + {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b1b74ea2a3064e1375da87788897935832e806cc784de3e789fd3c4ab8eb3fa5"}, + {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7cb961efe013606913d05609f014ad43edfaced82a576e8b520a5574ce3b2b9"}, + {file = "orjson-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1225d2d5ee76a786bda02f8c5e15017462f8432bb960de13d7c2619dba6f0275"}, + {file = "orjson-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f39f4b99199df05c7ecdd006086259ed25886cdbd7b14c8cdb10c7675cfcca7d"}, + {file = "orjson-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a461dc9fb60cac44f2d3218c36a0c1c01132314839a0e229d7fb1bba69b810d8"}, + {file = "orjson-3.9.5-cp38-none-win32.whl", hash = "sha256:dedf1a6173748202df223aea29de814b5836732a176b33501375c66f6ab7d822"}, + {file = "orjson-3.9.5-cp38-none-win_amd64.whl", hash = "sha256:fa504082f53efcbacb9087cc8676c163237beb6e999d43e72acb4bb6f0db11e6"}, + {file = "orjson-3.9.5-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6900f0248edc1bec2a2a3095a78a7e3ef4e63f60f8ddc583687eed162eedfd69"}, + {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17404333c40047888ac40bd8c4d49752a787e0a946e728a4e5723f111b6e55a5"}, + {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0eefb7cfdd9c2bc65f19f974a5d1dfecbac711dae91ed635820c6b12da7a3c11"}, + {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68c78b2a3718892dc018adbc62e8bab6ef3c0d811816d21e6973dee0ca30c152"}, + {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:591ad7d9e4a9f9b104486ad5d88658c79ba29b66c5557ef9edf8ca877a3f8d11"}, + {file = "orjson-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cc2cbf302fbb2d0b2c3c142a663d028873232a434d89ce1b2604ebe5cc93ce8"}, + {file = "orjson-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b26b5aa5e9ee1bad2795b925b3adb1b1b34122cb977f30d89e0a1b3f24d18450"}, + {file = "orjson-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ef84724f7d29dcfe3aafb1fc5fc7788dca63e8ae626bb9298022866146091a3e"}, + {file = "orjson-3.9.5-cp39-none-win32.whl", hash = "sha256:664cff27f85939059472afd39acff152fbac9a091b7137092cb651cf5f7747b5"}, + {file = "orjson-3.9.5-cp39-none-win_amd64.whl", hash = "sha256:91dda66755795ac6100e303e206b636568d42ac83c156547634256a2e68de694"}, + {file = "orjson-3.9.5.tar.gz", hash = "sha256:6daf5ee0b3cf530b9978cdbf71024f1c16ed4a67d05f6ec435c6e7fe7a52724c"}, +] +packaging = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] +pandas = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] +parsedatetime = [ + {file = "parsedatetime-2.4-py2-none-any.whl", hash = "sha256:9ee3529454bf35c40a77115f5a596771e59e1aee8c53306f346c461b8e913094"}, + {file = "parsedatetime-2.4.tar.gz", hash = "sha256:3d817c58fb9570d1eec1dd46fa9448cd644eeed4fb612684b02dfda3a79cb84b"}, +] +pathspec = [ + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, +] +pathvalidate = [ + {file = "pathvalidate-3.1.0-py3-none-any.whl", hash = "sha256:912fd1d2e1a2a6a6f98da36a91f21ed86746473810ff625b9c34f3d06c0caa1d"}, + {file = "pathvalidate-3.1.0.tar.gz", hash = "sha256:426970226e24199fd90d93995d223c1e28bda967cdf4370755a14cdf72a2a8ee"}, +] +pbr = [ + {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, + {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, +] +pendulum = [ + {file = "pendulum-2.1.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b6c352f4bd32dff1ea7066bd31ad0f71f8d8100b9ff709fb343f3b86cee43efe"}, + {file = "pendulum-2.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:318f72f62e8e23cd6660dbafe1e346950281a9aed144b5c596b2ddabc1d19739"}, + {file = "pendulum-2.1.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394"}, + {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3481fad1dc3f6f6738bd575a951d3c15d4b4ce7c82dce37cf8ac1483fde6e8b0"}, + {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9702069c694306297ed362ce7e3c1ef8404ac8ede39f9b28b7c1a7ad8c3959e3"}, + {file = "pendulum-2.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:fb53ffa0085002ddd43b6ca61a7b34f2d4d7c3ed66f931fe599e1a531b42af9b"}, + {file = "pendulum-2.1.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:c501749fdd3d6f9e726086bf0cd4437281ed47e7bca132ddb522f86a1645d360"}, + {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c807a578a532eeb226150d5006f156632df2cc8c5693d778324b43ff8c515dd0"}, + {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:2d1619a721df661e506eff8db8614016f0720ac171fe80dda1333ee44e684087"}, + {file = "pendulum-2.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f888f2d2909a414680a29ae74d0592758f2b9fcdee3549887779cd4055e975db"}, + {file = "pendulum-2.1.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e95d329384717c7bf627bf27e204bc3b15c8238fa8d9d9781d93712776c14002"}, + {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4c9c689747f39d0d02a9f94fcee737b34a5773803a64a5fdb046ee9cac7442c5"}, + {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1245cd0075a3c6d889f581f6325dd8404aca5884dea7223a5566c38aab94642b"}, + {file = "pendulum-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:db0a40d8bcd27b4fb46676e8eb3c732c67a5a5e6bfab8927028224fbced0b40b"}, + {file = "pendulum-2.1.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f5e236e7730cab1644e1b87aca3d2ff3e375a608542e90fe25685dae46310116"}, + {file = "pendulum-2.1.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de42ea3e2943171a9e95141f2eecf972480636e8e484ccffaf1e833929e9e052"}, + {file = "pendulum-2.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7c5ec650cb4bec4c63a89a0242cc8c3cebcec92fcfe937c417ba18277d8560be"}, + {file = "pendulum-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:33fb61601083f3eb1d15edeb45274f73c63b3c44a8524703dc143f4212bf3269"}, + {file = "pendulum-2.1.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:29c40a6f2942376185728c9a0347d7c0f07905638c83007e1d262781f1e6953a"}, + {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, + {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, +] +pipdeptree = [ + {file = "pipdeptree-2.9.6-py3-none-any.whl", hash = "sha256:de93f990d21224297c9f03e057da5a3dc65ff732a0147945dd9421671f13626b"}, + {file = "pipdeptree-2.9.6.tar.gz", hash = "sha256:f815caf165e89c576ce659b866c7a82ae4590420c2d020a92d32e45097f8bc73"}, +] +pkgutil-resolve-name = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] +platformdirs = [ + {file = "platformdirs-3.8.1-py3-none-any.whl", hash = "sha256:cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c"}, + {file = "platformdirs-3.8.1.tar.gz", hash = "sha256:f87ca4fcff7d2b0f81c6a748a77973d7af0f4d526f98f308477c3c436c74d528"}, +] +pluggy = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] +ply = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] +portalocker = [ + {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"}, + {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"}, +] +prefixed = [ + {file = "prefixed-0.7.0-py2.py3-none-any.whl", hash = "sha256:537b0e4ff4516c4578f277a41d7104f769d6935ae9cdb0f88fed82ec7b3c0ca5"}, + {file = "prefixed-0.7.0.tar.gz", hash = "sha256:0b54d15e602eb8af4ac31b1db21a37ea95ce5890e0741bb0dd9ded493cefbbe9"}, +] +prison = [ + {file = "prison-0.2.1-py2.py3-none-any.whl", hash = "sha256:f90bab63fca497aa0819a852f64fb21a4e181ed9f6114deaa5dc04001a7555c5"}, + {file = "prison-0.2.1.tar.gz", hash = "sha256:e6cd724044afcb1a8a69340cad2f1e3151a5839fd3a8027fd1357571e797c599"}, +] +proto-plus = [ + {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, + {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, +] +protobuf = [ + {file = "protobuf-4.24.2-cp310-abi3-win32.whl", hash = "sha256:58e12d2c1aa428ece2281cef09bbaa6938b083bcda606db3da4e02e991a0d924"}, + {file = "protobuf-4.24.2-cp310-abi3-win_amd64.whl", hash = "sha256:77700b55ba41144fc64828e02afb41901b42497b8217b558e4a001f18a85f2e3"}, + {file = "protobuf-4.24.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:237b9a50bd3b7307d0d834c1b0eb1a6cd47d3f4c2da840802cd03ea288ae8880"}, + {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:25ae91d21e3ce8d874211110c2f7edd6384816fb44e06b2867afe35139e1fd1c"}, + {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:c00c3c7eb9ad3833806e21e86dca448f46035242a680f81c3fe068ff65e79c74"}, + {file = "protobuf-4.24.2-cp37-cp37m-win32.whl", hash = "sha256:4e69965e7e54de4db989289a9b971a099e626f6167a9351e9d112221fc691bc1"}, + {file = "protobuf-4.24.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c5cdd486af081bf752225b26809d2d0a85e575b80a84cde5172a05bbb1990099"}, + {file = "protobuf-4.24.2-cp38-cp38-win32.whl", hash = "sha256:6bd26c1fa9038b26c5c044ee77e0ecb18463e957fefbaeb81a3feb419313a54e"}, + {file = "protobuf-4.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb7aa97c252279da65584af0456f802bd4b2de429eb945bbc9b3d61a42a8cd16"}, + {file = "protobuf-4.24.2-cp39-cp39-win32.whl", hash = "sha256:2b23bd6e06445699b12f525f3e92a916f2dcf45ffba441026357dea7fa46f42b"}, + {file = "protobuf-4.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:839952e759fc40b5d46be319a265cf94920174d88de31657d5622b5d8d6be5cd"}, + {file = "protobuf-4.24.2-py3-none-any.whl", hash = "sha256:3b7b170d3491ceed33f723bbf2d5a260f8a4e23843799a3906f16ef736ef251e"}, + {file = "protobuf-4.24.2.tar.gz", hash = "sha256:7fda70797ddec31ddfa3576cbdcc3ddbb6b3078b737a1a87ab9136af0570cd6e"}, +] +psutil = [ + {file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"}, + {file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"}, + {file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"}, + {file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"}, + {file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"}, + {file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"}, + {file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"}, + {file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"}, +] +psycopg2-binary = [ + {file = "psycopg2-binary-2.9.7.tar.gz", hash = "sha256:1b918f64a51ffe19cd2e230b3240ba481330ce1d4b7875ae67305bd1d37b041c"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ea5f8ee87f1eddc818fc04649d952c526db4426d26bab16efbe5a0c52b27d6ab"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2993ccb2b7e80844d534e55e0f12534c2871952f78e0da33c35e648bf002bbff"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbbc3c5d15ed76b0d9db7753c0db40899136ecfe97d50cbde918f630c5eb857a"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:692df8763b71d42eb8343f54091368f6f6c9cfc56dc391858cdb3c3ef1e3e584"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dcfd5d37e027ec393a303cc0a216be564b96c80ba532f3d1e0d2b5e5e4b1e6e"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17cc17a70dfb295a240db7f65b6d8153c3d81efb145d76da1e4a096e9c5c0e63"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e5666632ba2b0d9757b38fc17337d84bdf932d38563c5234f5f8c54fd01349c9"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7db7b9b701974c96a88997d458b38ccb110eba8f805d4b4f74944aac48639b42"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c82986635a16fb1fa15cd5436035c88bc65c3d5ced1cfaac7f357ee9e9deddd4"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4fe13712357d802080cfccbf8c6266a3121dc0e27e2144819029095ccf708372"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-win32.whl", hash = "sha256:122641b7fab18ef76b18860dd0c772290566b6fb30cc08e923ad73d17461dc63"}, + {file = "psycopg2_binary-2.9.7-cp310-cp310-win_amd64.whl", hash = "sha256:f8651cf1f144f9ee0fa7d1a1df61a9184ab72962531ca99f077bbdcba3947c58"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ecc15666f16f97709106d87284c136cdc82647e1c3f8392a672616aed3c7151"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fbb1184c7e9d28d67671992970718c05af5f77fc88e26fd7136613c4ece1f89"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7968fd20bd550431837656872c19575b687f3f6f98120046228e451e4064df"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:094af2e77a1976efd4956a031028774b827029729725e136514aae3cdf49b87b"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26484e913d472ecb6b45937ea55ce29c57c662066d222fb0fbdc1fab457f18c5"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f309b77a7c716e6ed9891b9b42953c3ff7d533dc548c1e33fddc73d2f5e21f9"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6d92e139ca388ccfe8c04aacc163756e55ba4c623c6ba13d5d1595ed97523e4b"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2df562bb2e4e00ee064779902d721223cfa9f8f58e7e52318c97d139cf7f012d"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4eec5d36dbcfc076caab61a2114c12094c0b7027d57e9e4387b634e8ab36fd44"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1011eeb0c51e5b9ea1016f0f45fa23aca63966a4c0afcf0340ccabe85a9f65bd"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-win32.whl", hash = "sha256:ded8e15f7550db9e75c60b3d9fcbc7737fea258a0f10032cdb7edc26c2a671fd"}, + {file = "psycopg2_binary-2.9.7-cp311-cp311-win_amd64.whl", hash = "sha256:8a136c8aaf6615653450817a7abe0fc01e4ea720ae41dfb2823eccae4b9062a3"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2dec5a75a3a5d42b120e88e6ed3e3b37b46459202bb8e36cd67591b6e5feebc1"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc10da7e7df3380426521e8c1ed975d22df678639da2ed0ec3244c3dc2ab54c8"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee919b676da28f78f91b464fb3e12238bd7474483352a59c8a16c39dfc59f0c5"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb1c0e682138f9067a58fc3c9a9bf1c83d8e08cfbee380d858e63196466d5c86"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00d8db270afb76f48a499f7bb8fa70297e66da67288471ca873db88382850bf4"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9b0c2b466b2f4d89ccc33784c4ebb1627989bd84a39b79092e560e937a11d4ac"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:51d1b42d44f4ffb93188f9b39e6d1c82aa758fdb8d9de65e1ddfe7a7d250d7ad"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:11abdbfc6f7f7dea4a524b5f4117369b0d757725798f1593796be6ece20266cb"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f02f4a72cc3ab2565c6d9720f0343cb840fb2dc01a2e9ecb8bc58ccf95dc5c06"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-win32.whl", hash = "sha256:81d5dd2dd9ab78d31a451e357315f201d976c131ca7d43870a0e8063b6b7a1ec"}, + {file = "psycopg2_binary-2.9.7-cp37-cp37m-win_amd64.whl", hash = "sha256:62cb6de84d7767164a87ca97e22e5e0a134856ebcb08f21b621c6125baf61f16"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:59f7e9109a59dfa31efa022e94a244736ae401526682de504e87bd11ce870c22"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:95a7a747bdc3b010bb6a980f053233e7610276d55f3ca506afff4ad7749ab58a"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c721ee464e45ecf609ff8c0a555018764974114f671815a0a7152aedb9f3343"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4f37bbc6588d402980ffbd1f3338c871368fb4b1cfa091debe13c68bb3852b3"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac83ab05e25354dad798401babaa6daa9577462136ba215694865394840e31f8"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:024eaeb2a08c9a65cd5f94b31ace1ee3bb3f978cd4d079406aef85169ba01f08"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1c31c2606ac500dbd26381145684d87730a2fac9a62ebcfbaa2b119f8d6c19f4"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:42a62ef0e5abb55bf6ffb050eb2b0fcd767261fa3faf943a4267539168807522"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7952807f95c8eba6a8ccb14e00bf170bb700cafcec3924d565235dffc7dc4ae8"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e02bc4f2966475a7393bd0f098e1165d470d3fa816264054359ed4f10f6914ea"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-win32.whl", hash = "sha256:fdca0511458d26cf39b827a663d7d87db6f32b93efc22442a742035728603d5f"}, + {file = "psycopg2_binary-2.9.7-cp38-cp38-win_amd64.whl", hash = "sha256:d0b16e5bb0ab78583f0ed7ab16378a0f8a89a27256bb5560402749dbe8a164d7"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6822c9c63308d650db201ba22fe6648bd6786ca6d14fdaf273b17e15608d0852"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f94cb12150d57ea433e3e02aabd072205648e86f1d5a0a692d60242f7809b15"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5ee89587696d808c9a00876065d725d4ae606f5f7853b961cdbc348b0f7c9a1"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad5ec10b53cbb57e9a2e77b67e4e4368df56b54d6b00cc86398578f1c635f329"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:642df77484b2dcaf87d4237792246d8068653f9e0f5c025e2c692fc56b0dda70"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a8b575ac45af1eaccbbcdcf710ab984fd50af048fe130672377f78aaff6fc1"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f955aa50d7d5220fcb6e38f69ea126eafecd812d96aeed5d5f3597f33fad43bb"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ad26d4eeaa0d722b25814cce97335ecf1b707630258f14ac4d2ed3d1d8415265"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ced63c054bdaf0298f62681d5dcae3afe60cbae332390bfb1acf0e23dcd25fc8"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b04da24cbde33292ad34a40db9832a80ad12de26486ffeda883413c9e1b1d5e"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-win32.whl", hash = "sha256:18f12632ab516c47c1ac4841a78fddea6508a8284c7cf0f292cb1a523f2e2379"}, + {file = "psycopg2_binary-2.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:eb3b8d55924a6058a26db69fb1d3e7e32695ff8b491835ba9f479537e14dcf9f"}, +] +psycopg2cffi = [ + {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] +pyarrow = [ + {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"}, + {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"}, + {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"}, + {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"}, + {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"}, + {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"}, + {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"}, + {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"}, + {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"}, + {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"}, + {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"}, + {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"}, + {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"}, + {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"}, + {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"}, + {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"}, + {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"}, + {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"}, + {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"}, + {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"}, + {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"}, + {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"}, + {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"}, + {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"}, + {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"}, + {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"}, + {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"}, + {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"}, + {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"}, + {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"}, + {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"}, + {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"}, + {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"}, + {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"}, + {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"}, + {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"}, +] +pyasn1 = [ + {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, + {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, +] +pyasn1-modules = [ + {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, + {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, +] +pyathena = [ + {file = "pyathena-3.0.6-py3-none-any.whl", hash = "sha256:27fb606a73644e62be8ef9b86cdf583ab3cb9f8cac9c2ad8f05b7ad6d4eaaa87"}, + {file = "pyathena-3.0.6.tar.gz", hash = "sha256:ee6ea175134894209af2c6be1859b7be4371f7741faa7a58f9f97905ff6a73a4"}, +] +pycodestyle = [ + {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, + {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, +] +pycparser = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] +pydantic = [ + {file = "pydantic-2.5.0-py3-none-any.whl", hash = "sha256:7ce6e766c456ad026fe5712f7bcf036efc34bd5d107b3e669ef7ea01b3a9050c"}, + {file = "pydantic-2.5.0.tar.gz", hash = "sha256:69bd6fb62d2d04b7055f59a396993486a2ee586c43a0b89231ce0000de07627c"}, +] +pydantic-core = [ + {file = "pydantic_core-2.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:812beca1dcb2b722cccc7e9c620bd972cbc323321194ec2725eab3222e6ac573"}, + {file = "pydantic_core-2.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2ccdc53cb88e51c7d47d74c59630d7be844428f6b8d463055ffad6f0392d8da"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd937733bf2fe7d6a8bf208c12741f1f730b7bf5636033877767a75093c29b8a"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:581bb606a31749a00796f5257947a0968182d7fe91e1dada41f06aeb6bfbc91a"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aadf74a40a7ae49c3c1aa7d32334fe94f4f968e21dd948e301bb4ed431fb2412"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b89821a2c77cc1b8f2c1fc3aacd6a3ecc5df8f7e518dc3f18aef8c4dcf66003d"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49ee28d65f506b2858a60745cc974ed005298ebab12693646b97641dd7c99c35"}, + {file = "pydantic_core-2.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97246f896b4df7fd84caa8a75a67abb95f94bc0b547665bf0889e3262b060399"}, + {file = "pydantic_core-2.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1185548665bc61bbab0dc78f10c8eafa0db0aa1e920fe9a451b77782b10a65cc"}, + {file = "pydantic_core-2.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2a7d08b39fac97540fba785fce3b21ee01a81f081a07a4d031efd791da6666f9"}, + {file = "pydantic_core-2.14.1-cp310-none-win32.whl", hash = "sha256:0a8c8daf4e3aa3aeb98e3638fc3d58a359738f3d12590b2474c6bb64031a0764"}, + {file = "pydantic_core-2.14.1-cp310-none-win_amd64.whl", hash = "sha256:4f0788699a92d604f348e9c1ac5e97e304e97127ba8325c7d0af88dcc7d35bd3"}, + {file = "pydantic_core-2.14.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:2be018a84995b6be1bbd40d6064395dbf71592a981169cf154c0885637f5f54a"}, + {file = "pydantic_core-2.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fc3227408808ba7df8e95eb1d8389f4ba2203bed8240b308de1d7ae66d828f24"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42d5d0e9bbb50481a049bd0203224b339d4db04006b78564df2b782e2fd16ebc"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc6a4ea9f88a810cb65ccae14404da846e2a02dd5c0ad21dee712ff69d142638"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d312ad20e3c6d179cb97c42232b53111bcd8dcdd5c1136083db9d6bdd489bc73"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:679cc4e184f213c8227862e57340d12fd4d4d19dc0e3ddb0f653f86f01e90f94"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101df420e954966868b8bc992aefed5fa71dd1f2755104da62ee247abab28e2f"}, + {file = "pydantic_core-2.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c964c0cc443d6c08a2347c0e5c1fc2d85a272dc66c1a6f3cde4fc4843882ada4"}, + {file = "pydantic_core-2.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8276bbab68a9dbe721da92d19cbc061f76655248fe24fb63969d0c3e0e5755e7"}, + {file = "pydantic_core-2.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:12163197fec7c95751a3c71b36dcc1909eed9959f011ffc79cc8170a6a74c826"}, + {file = "pydantic_core-2.14.1-cp311-none-win32.whl", hash = "sha256:b8ff0302518dcd001bd722bbe342919c29e5066c7eda86828fe08cdc112668b8"}, + {file = "pydantic_core-2.14.1-cp311-none-win_amd64.whl", hash = "sha256:59fa83873223f856d898452c6162a390af4297756f6ba38493a67533387d85d9"}, + {file = "pydantic_core-2.14.1-cp311-none-win_arm64.whl", hash = "sha256:798590d38c9381f07c48d13af1f1ef337cebf76ee452fcec5deb04aceced51c7"}, + {file = "pydantic_core-2.14.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:587d75aec9ae50d0d63788cec38bf13c5128b3fc1411aa4b9398ebac884ab179"}, + {file = "pydantic_core-2.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26242e3593d4929123615bd9365dd86ef79b7b0592d64a96cd11fd83c69c9f34"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5879ac4791508d8f0eb7dec71ff8521855180688dac0c55f8c99fc4d1a939845"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ad9ea86f5fc50f1b62c31184767fe0cacaa13b54fe57d38898c3776d30602411"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:102ac85a775e77821943ae38da9634ddd774b37a8d407181b4f7b05cdfb36b55"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2459cc06572730e079ec1e694e8f68c99d977b40d98748ae72ff11ef21a56b0b"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:217dcbfaf429a9b8f1d54eb380908b9c778e78f31378283b30ba463c21e89d5d"}, + {file = "pydantic_core-2.14.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d59e0d7cdfe8ed1d4fcd28aad09625c715dc18976c7067e37d8a11b06f4be3e"}, + {file = "pydantic_core-2.14.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e2be646a5155d408e68b560c0553e8a83dc7b9f90ec6e5a2fc3ff216719385db"}, + {file = "pydantic_core-2.14.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ffba979801e3931a19cd30ed2049450820effe8f152aaa317e2fd93795d318d7"}, + {file = "pydantic_core-2.14.1-cp312-none-win32.whl", hash = "sha256:132b40e479cb5cebbbb681f77aaceabbc8355df16c9124cff1d4060ada83cde2"}, + {file = "pydantic_core-2.14.1-cp312-none-win_amd64.whl", hash = "sha256:744b807fe2733b6da3b53e8ad93e8b3ea3ee3dfc3abece4dd2824cc1f39aa343"}, + {file = "pydantic_core-2.14.1-cp312-none-win_arm64.whl", hash = "sha256:24ba48f9d0b8d64fc5e42e1600366c3d7db701201294989aebdaca23110c02ab"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:ba55d73a2df4771b211d0bcdea8b79454980a81ed34a1d77a19ddcc81f98c895"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e905014815687d88cbb14bbc0496420526cf20d49f20606537d87646b70f1046"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:443dc5eede7fa76b2370213e0abe881eb17c96f7d694501853c11d5d56916602"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:abae6fd5504e5e438e4f6f739f8364fd9ff5a5cdca897e68363e2318af90bc28"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9486e27bb3f137f33e2315be2baa0b0b983dae9e2f5f5395240178ad8e644728"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69df82892ff00491d673b1929538efb8c8d68f534fdc6cb7fd3ac8a5852b9034"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:184ff7b30c3f60e1b775378c060099285fd4b5249271046c9005f8b247b39377"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3d5b2a4b3c10cad0615670cab99059441ff42e92cf793a0336f4bc611e895204"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:871c641a83719caaa856a11dcc61c5e5b35b0db888e1a0d338fe67ce744575e2"}, + {file = "pydantic_core-2.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1e7208946ea9b27a8cef13822c339d4ae96e45952cc01fc4a91c7f1cb0ae2861"}, + {file = "pydantic_core-2.14.1-cp37-none-win32.whl", hash = "sha256:b4ff385a525017f5adf6066d7f9fb309f99ade725dcf17ed623dc7dce1f85d9f"}, + {file = "pydantic_core-2.14.1-cp37-none-win_amd64.whl", hash = "sha256:c7411cd06afeb263182e38c6ca5b4f5fe4f20d91466ad7db0cd6af453a02edec"}, + {file = "pydantic_core-2.14.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:2871daf5b2823bf77bf7d3d43825e5d904030c155affdf84b21a00a2e00821d2"}, + {file = "pydantic_core-2.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7977e261cac5f99873dc2c6f044315d09b19a71c4246560e1e67593889a90978"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5a111f9158555582deadd202a60bd7803b6c68f406391b7cf6905adf0af6811"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac417312bf6b7a0223ba73fb12e26b2854c93bf5b1911f7afef6d24c379b22aa"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c36987f5eb2a7856b5f5feacc3be206b4d1852a6ce799f6799dd9ffb0cba56ae"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6e98227eb02623d57e1fd061788837834b68bb995a869565211b9abf3de4bf4"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:023b6d7ec4e97890b28eb2ee24413e69a6d48de4e8b75123957edd5432f4eeb3"}, + {file = "pydantic_core-2.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6015beb28deb5306049ecf2519a59627e9e050892927850a884df6d5672f8c7d"}, + {file = "pydantic_core-2.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3f48d4afd973abbd65266ac24b24de1591116880efc7729caf6b6b94a9654c9e"}, + {file = "pydantic_core-2.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:28734bcfb8fc5b03293dec5eb5ea73b32ff767f6ef79a31f6e41dad2f5470270"}, + {file = "pydantic_core-2.14.1-cp38-none-win32.whl", hash = "sha256:3303113fdfaca927ef11e0c5f109e2ec196c404f9d7ba5f8ddb63cdf287ea159"}, + {file = "pydantic_core-2.14.1-cp38-none-win_amd64.whl", hash = "sha256:144f2c1d5579108b6ed1193fcc9926124bd4142b0f7020a7744980d1235c8a40"}, + {file = "pydantic_core-2.14.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:893bf4fb9bfb9c4639bc12f3de323325ada4c6d60e478d5cded65453e9364890"}, + {file = "pydantic_core-2.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:052d8731aaf844f91fe4cd3faf28983b109a5865b3a256ec550b80a5689ead87"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb1c6ecb53e4b907ee8486f453dd940b8cbb509946e2b671e3bf807d310a96fc"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:94cf6d0274eb899d39189144dcf52814c67f9b0fd196f211420d9aac793df2da"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36c3bf96f803e207a80dbcb633d82b98ff02a9faa76dd446e969424dec8e2b9f"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fb290491f1f0786a7da4585250f1feee200fc17ff64855bdd7c42fb54526fa29"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6590ed9d13eb51b28ea17ddcc6c8dbd6050b4eb589d497105f0e13339f223b72"}, + {file = "pydantic_core-2.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:69cd74e55a5326d920e7b46daa2d81c2bdb8bcf588eafb2330d981297b742ddc"}, + {file = "pydantic_core-2.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d965bdb50725a805b083f5f58d05669a85705f50a6a864e31b545c589290ee31"}, + {file = "pydantic_core-2.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca942a2dc066ca5e04c27feaa8dfb9d353ddad14c6641660c565149186095343"}, + {file = "pydantic_core-2.14.1-cp39-none-win32.whl", hash = "sha256:72c2ef3787c3b577e5d6225d73a77167b942d12cef3c1fbd5e74e55b7f881c36"}, + {file = "pydantic_core-2.14.1-cp39-none-win_amd64.whl", hash = "sha256:55713d155da1e508083c4b08d0b1ad2c3054f68b8ef7eb3d3864822e456f0bb5"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:53efe03cc383a83660cfdda6a3cb40ee31372cedea0fde0b2a2e55e838873ab6"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f523e116879bc6714e61d447ce934676473b068069dce6563ea040381dc7a257"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85bb66d661be51b2cba9ca06759264b3469d2dbb53c3e6effb3f05fec6322be6"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f53a3ccdc30234cb4342cec541e3e6ed87799c7ca552f0b5f44e3967a5fed526"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1bfb63821ada76719ffcd703fc40dd57962e0d8c253e3c565252e6de6d3e0bc6"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e2c689439f262c29cf3fcd5364da1e64d8600facecf9eabea8643b8755d2f0de"}, + {file = "pydantic_core-2.14.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a15f6e5588f7afb7f6fc4b0f4ff064749e515d34f34c666ed6e37933873d8ad8"}, + {file = "pydantic_core-2.14.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:f1a30eef060e21af22c7d23349f1028de0611f522941c80efa51c05a63142c62"}, + {file = "pydantic_core-2.14.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16f4a7e1ec6b3ea98a1e108a2739710cd659d68b33fbbeaba066202cab69c7b6"}, + {file = "pydantic_core-2.14.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd80a2d383940eec3db6a5b59d1820f947317acc5c75482ff8d79bf700f8ad6a"}, + {file = "pydantic_core-2.14.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:a68a36d71c7f638dda6c9e6b67f6aabf3fa1471b198d246457bfdc7c777cdeb7"}, + {file = "pydantic_core-2.14.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ebc79120e105e4bcd7865f369e3b9dbabb0d492d221e1a7f62a3e8e292550278"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c8c466facec2ccdf025b0b1455b18f2c3d574d5f64d24df905d3d7b8f05d5f4e"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b91b5ec423e88caa16777094c4b2b97f11453283e7a837e5e5e1b886abba1251"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130e49aa0cb316f743bc7792c36aefa39fc2221312f1d4b333b19edbdd71f2b1"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f483467c046f549572f8aca3b7128829e09ae3a9fe933ea421f7cb7c58120edb"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dee4682bd7947afc682d342a8d65ad1834583132383f8e801601a8698cb8d17a"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:8d927d042c0ef04607ee7822828b208ab045867d20477ec6593d612156798547"}, + {file = "pydantic_core-2.14.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5a1570875eb0d1479fb2270ed80c88c231aaaf68b0c3f114f35e7fb610435e4f"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cb2fd3ab67558eb16aecfb4f2db4febb4d37dc74e6b8613dc2e7160fb58158a9"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7991f25b98038252363a03e6a9fe92e60fe390fda2631d238dc3b0e396632f8"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b45b7be9f99991405ecd6f6172fb6798908a8097106ae78d5cc5cc15121bad9"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:51506e7652a2ef1d1cf763c4b51b972ff4568d1dddc96ca83931a6941f5e6389"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:66dc0e63349ec39c1ea66622aa5c2c1f84382112afd3ab2fa0cca4fb01f7db39"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:8e17f0c3ba4cb07faa0038a59ce162de584ed48ba645c8d05a5de1e40d4c21e7"}, + {file = "pydantic_core-2.14.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d983222223f63e323a5f497f5b85e211557a5d8fb670dc88f343784502b466ba"}, + {file = "pydantic_core-2.14.1.tar.gz", hash = "sha256:0d82a6ee815388a362885186e431fac84c7a06623bc136f508e9f88261d8cadb"}, +] +pydoc-markdown = [ + {file = "pydoc_markdown-4.8.2-py3-none-any.whl", hash = "sha256:203f74119e6bb2f9deba43d452422de7c8ec31955b61e0620fa4dd8c2611715f"}, + {file = "pydoc_markdown-4.8.2.tar.gz", hash = "sha256:fb6c927e31386de17472d42f9bd3d3be2905977d026f6216881c65145aa67f0b"}, +] +pyflakes = [ + {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, + {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, +] +pygments = [ + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, +] +pyjwt = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] +pymongo = [ + {file = "pymongo-4.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c011bd5ad03cc096f99ffcfdd18a1817354132c1331bed7a837a25226659845f"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:5e63146dbdb1eac207464f6e0cfcdb640c9c5ff0f57b754fa96fe252314a1dc6"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:2972dd1f1285866aba027eff2f4a2bbf8aa98563c2ced14cb34ee5602b36afdf"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_i686.whl", hash = "sha256:a0be99b599da95b7a90a918dd927b20c434bea5e1c9b3efc6a3c6cd67c23f813"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_ppc64le.whl", hash = "sha256:9b0f98481ad5dc4cb430a60bbb8869f05505283b9ae1c62bdb65eb5e020ee8e3"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_s390x.whl", hash = "sha256:256c503a75bd71cf7fb9ebf889e7e222d49c6036a48aad5a619f98a0adf0e0d7"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b4ad70d7cac4ca0c7b31444a0148bd3af01a2662fa12b1ad6f57cd4a04e21766"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5717a308a703dda2886a5796a07489c698b442f5e409cf7dc2ac93de8d61d764"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f7f9feecae53fa18d6a3ea7c75f9e9a1d4d20e5c3f9ce3fba83f07bcc4eee2"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128b1485753106c54af481789cdfea12b90a228afca0b11fb3828309a907e10e"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3077a31633beef77d057c6523f5de7271ddef7bde5e019285b00c0cc9cac1e3"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebf02c32afa6b67e5861a27183dd98ed88419a94a2ab843cc145fb0bafcc5b28"}, + {file = "pymongo-4.6.0-cp310-cp310-win32.whl", hash = "sha256:b14dd73f595199f4275bed4fb509277470d9b9059310537e3b3daba12b30c157"}, + {file = "pymongo-4.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:8adf014f2779992eba3b513e060d06f075f0ab2fb3ad956f413a102312f65cdf"}, + {file = "pymongo-4.6.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba51129fcc510824b6ca6e2ce1c27e3e4d048b6e35d3ae6f7e517bed1b8b25ce"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2973f113e079fb98515722cd728e1820282721ec9fd52830e4b73cabdbf1eb28"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af425f323fce1b07755edd783581e7283557296946212f5b1a934441718e7528"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ec71ac633b126c0775ed4604ca8f56c3540f5c21a1220639f299e7a544b55f9"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ec6c20385c5a58e16b1ea60c5e4993ea060540671d7d12664f385f2fb32fe79"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85f2cdc400ee87f5952ebf2a117488f2525a3fb2e23863a8efe3e4ee9e54e4d1"}, + {file = "pymongo-4.6.0-cp311-cp311-win32.whl", hash = "sha256:7fc2bb8a74dcfcdd32f89528e38dcbf70a3a6594963d60dc9595e3b35b66e414"}, + {file = "pymongo-4.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:6695d7136a435c1305b261a9ddb9b3ecec9863e05aab3935b96038145fd3a977"}, + {file = "pymongo-4.6.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d603edea1ff7408638b2504905c032193b7dcee7af269802dbb35bc8c3310ed5"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79f41576b3022c2fe9780ae3e44202b2438128a25284a8ddfa038f0785d87019"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f2af6cf82509b15093ce3569229e0d53c90ad8ae2eef940652d4cf1f81e045"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecd9e1fa97aa11bf67472220285775fa15e896da108f425e55d23d7540a712ce"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d2be5c9c3488fa8a70f83ed925940f488eac2837a996708d98a0e54a861f212"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab6bcc8e424e07c1d4ba6df96f7fb963bcb48f590b9456de9ebd03b88084fe8"}, + {file = "pymongo-4.6.0-cp312-cp312-win32.whl", hash = "sha256:47aa128be2e66abd9d1a9b0437c62499d812d291f17b55185cb4aa33a5f710a4"}, + {file = "pymongo-4.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:014e7049dd019a6663747ca7dae328943e14f7261f7c1381045dfc26a04fa330"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:288c21ab9531b037f7efa4e467b33176bc73a0c27223c141b822ab4a0e66ff2a"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:747c84f4e690fbe6999c90ac97246c95d31460d890510e4a3fa61b7d2b87aa34"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:055f5c266e2767a88bb585d01137d9c7f778b0195d3dbf4a487ef0638be9b651"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:82e620842e12e8cb4050d2643a81c8149361cd82c0a920fa5a15dc4ca8a4000f"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:6b18276f14b4b6d92e707ab6db19b938e112bd2f1dc3f9f1a628df58e4fd3f0d"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:680fa0fc719e1a3dcb81130858368f51d83667d431924d0bcf249644bce8f303"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:3919708594b86d0f5cdc713eb6fccd3f9b9532af09ea7a5d843c933825ef56c4"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db082f728160369d9a6ed2e722438291558fc15ce06d0a7d696a8dad735c236b"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e4ed21029d80c4f62605ab16398fe1ce093fff4b5f22d114055e7d9fbc4adb0"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bea9138b0fc6e2218147e9c6ce1ff76ff8e29dc00bb1b64842bd1ca107aee9f"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a0269811661ba93c472c8a60ea82640e838c2eb148d252720a09b5123f2c2fe"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d6a1b1361f118e7fefa17ae3114e77f10ee1b228b20d50c47c9f351346180c8"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e3b0127b260d4abae7b62203c4c7ef0874c901b55155692353db19de4b18bc4"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a49aca4d961823b2846b739380c847e8964ff7ae0f0a683992b9d926054f0d6d"}, + {file = "pymongo-4.6.0-cp37-cp37m-win32.whl", hash = "sha256:09c7de516b08c57647176b9fc21d929d628e35bcebc7422220c89ae40b62126a"}, + {file = "pymongo-4.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:81dd1308bd5630d2bb5980f00aa163b986b133f1e9ed66c66ce2a5bc3572e891"}, + {file = "pymongo-4.6.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:2f8c04277d879146eacda920476e93d520eff8bec6c022ac108cfa6280d84348"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5802acc012bbb4bce4dff92973dff76482f30ef35dd4cb8ab5b0e06aa8f08c80"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:ccd785fafa1c931deff6a7116e9a0d402d59fabe51644b0d0c268295ff847b25"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fe03bf25fae4b95d8afe40004a321df644400fdcba4c8e5e1a19c1085b740888"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:2ca0ba501898b2ec31e6c3acf90c31910944f01d454ad8e489213a156ccf1bda"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:10a379fb60f1b2406ae57b8899bacfe20567918c8e9d2d545e1b93628fcf2050"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:a4dc1319d0c162919ee7f4ee6face076becae2abbd351cc14f1fe70af5fb20d9"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:ddef295aaf80cefb0c1606f1995899efcb17edc6b327eb6589e234e614b87756"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:518c90bdd6e842c446d01a766b9136fec5ec6cc94f3b8c3f8b4a332786ee6b64"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b80a4ee19b3442c57c38afa978adca546521a8822d663310b63ae2a7d7b13f3a"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb438a8bf6b695bf50d57e6a059ff09652a07968b2041178b3744ea785fcef9b"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3db7d833a7c38c317dc95b54e27f1d27012e031b45a7c24e360b53197d5f6e7"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3729b8db02063da50eeb3db88a27670d85953afb9a7f14c213ac9e3dca93034b"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:39a1cd5d383b37285641d5a7a86be85274466ae336a61b51117155936529f9b3"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7b0e6361754ac596cd16bfc6ed49f69ffcd9b60b7bc4bcd3ea65c6a83475e4ff"}, + {file = "pymongo-4.6.0-cp38-cp38-win32.whl", hash = "sha256:806e094e9e85d8badc978af8c95b69c556077f11844655cb8cd2d1758769e521"}, + {file = "pymongo-4.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1394c4737b325166a65ae7c145af1ebdb9fb153ebedd37cf91d676313e4a67b8"}, + {file = "pymongo-4.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a8273e1abbcff1d7d29cbbb1ea7e57d38be72f1af3c597c854168508b91516c2"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:e16ade71c93f6814d095d25cd6d28a90d63511ea396bd96e9ffcb886b278baaa"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:325701ae7b56daa5b0692305b7cb505ca50f80a1288abb32ff420a8a209b01ca"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:cc94f9fea17a5af8cf1a343597711a26b0117c0b812550d99934acb89d526ed2"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:21812453354b151200034750cd30b0140e82ec2a01fd4357390f67714a1bfbde"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:0634994b026336195778e5693583c060418d4ab453eff21530422690a97e1ee8"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:ad4f66fbb893b55f96f03020e67dcab49ffde0177c6565ccf9dec4fdf974eb61"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:2703a9f8f5767986b4f51c259ff452cc837c5a83c8ed5f5361f6e49933743b2f"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bafea6061d63059d8bc2ffc545e2f049221c8a4457d236c5cd6a66678673eab"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f28ae33dc5a0b9cee06e95fd420e42155d83271ab75964baf747ce959cac5f52"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16a534da0e39785687b7295e2fcf9a339f4a20689024983d11afaa4657f8507"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef67fedd863ffffd4adfd46d9d992b0f929c7f61a8307366d664d93517f2c78e"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05c30fd35cc97f14f354916b45feea535d59060ef867446b5c3c7f9b609dd5dc"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c63e3a2e8fb815c4b1f738c284a4579897e37c3cfd95fdb199229a1ccfb638a"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e5e193f89f4f8c1fe273f9a6e6df915092c9f2af6db2d1afb8bd53855025c11f"}, + {file = "pymongo-4.6.0-cp39-cp39-win32.whl", hash = "sha256:a09bfb51953930e7e838972ddf646c5d5f984992a66d79da6ba7f6a8d8a890cd"}, + {file = "pymongo-4.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:107a234dc55affc5802acb3b6d83cbb8c87355b38a9457fcd8806bdeb8bce161"}, + {file = "pymongo-4.6.0.tar.gz", hash = "sha256:fb1c56d891f9e34303c451998ef62ba52659648bb0d75b03c5e4ac223a3342c2"}, +] +pymysql = [ + {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, + {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, +] +pyodbc = [ + {file = "pyodbc-4.0.39-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74af348dbaee4885998858daf50c8964e767629ecf6c195868b016367b0bb861"}, + {file = "pyodbc-4.0.39-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f5901b57eaef0761f4cf02bca8e7c63f589fd0fd723a79f6ccf1ea1275372e5"}, + {file = "pyodbc-4.0.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0db69478d00fcd8d0b9bdde8aca0b0eada341fd6ed8c2da84b594b928c84106"}, + {file = "pyodbc-4.0.39-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5faf2870e9d434c6a85c6adc1cdff55c0e376273baf480f06d9848025405688"}, + {file = "pyodbc-4.0.39-cp310-cp310-win32.whl", hash = "sha256:62bb6d7d0d25dc75d1445e539f946461c9c5a3643ae14676b240f71794ea004f"}, + {file = "pyodbc-4.0.39-cp310-cp310-win_amd64.whl", hash = "sha256:8eb5547282dc73a7784ce7b99584f68687dd85543538ca6f70cffaa6310676e7"}, + {file = "pyodbc-4.0.39-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:530c1ac37ead782803b44fb1934ba4c68ed4a6969f7475cb8bc04ae1da14486e"}, + {file = "pyodbc-4.0.39-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1f7fb65191926308f09ce75ae7ccecf89310232ee50cdea74edf17ee04a9b068"}, + {file = "pyodbc-4.0.39-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ec009180fcd7c8197f45d083e6670623d8dfe198a457ca2a50ebb1bafe4107f"}, + {file = "pyodbc-4.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:400e911d54980098c6badadecc82385fc0d6a9057db525d63d2652317df43efe"}, + {file = "pyodbc-4.0.39-cp311-cp311-win32.whl", hash = "sha256:f792677b88e1dde12dab46de8647620fc8171742c02780d51744f7b1b2135dbc"}, + {file = "pyodbc-4.0.39-cp311-cp311-win_amd64.whl", hash = "sha256:3d9d70e1635d35ba3aee3df216ec8e35f2824909f43331c0112b17f460a93923"}, + {file = "pyodbc-4.0.39-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c1a59096f1784d0cda3d0b8f393849f05515c46a10016edb6da1b1960d039800"}, + {file = "pyodbc-4.0.39-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3467157661615d5c30893efa1069b55c9ffa434097fc3ae3739e740d83d2ec"}, + {file = "pyodbc-4.0.39-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af027a60e84274ea08fad1c75991d37a5f1f6e8bcd30f6bda20db99f0cdfbc7d"}, + {file = "pyodbc-4.0.39-cp36-cp36m-win32.whl", hash = "sha256:64c1de1263281de7b5ce585b0352746ab1a483453017a8589f838a79cbe3d6d9"}, + {file = "pyodbc-4.0.39-cp36-cp36m-win_amd64.whl", hash = "sha256:27d1b3c3159673b44c97c878f9d8056901d45f747ce2e0b4d5d99f0fb6949dc7"}, + {file = "pyodbc-4.0.39-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:efccc11dff6fba684a74ae1030c92ff8b82429d7f00e0a50aa2ac6f56621cd9f"}, + {file = "pyodbc-4.0.39-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea08e9379c08663d7260e2b8a6c451f56d36c17291af735191089f8e29ad9578"}, + {file = "pyodbc-4.0.39-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b36fe804d367d01ad81077fa524a36e667aabc3945e32564c7ef9595b28edfa9"}, + {file = "pyodbc-4.0.39-cp37-cp37m-win32.whl", hash = "sha256:72d364e52f6ca2417881a23834b3a36733c09e0dcd4760f49a6b864218d98d92"}, + {file = "pyodbc-4.0.39-cp37-cp37m-win_amd64.whl", hash = "sha256:39f6c56022c764309aa7552c0eb2c58fbb5902ab5d2010d42b021c0b205aa609"}, + {file = "pyodbc-4.0.39-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ebcb900fcaf19ca2bc38632218c5d48c666fcc19fe38b08cde001917f4581456"}, + {file = "pyodbc-4.0.39-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a3e133621ac2dad22d0870a8521c7e82d4270e24ce02451d64e7eb6a40ad0941"}, + {file = "pyodbc-4.0.39-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05a0912e852ebddaffa8f235b0f3974475021dd8eb604eb46ea67af06efe1239"}, + {file = "pyodbc-4.0.39-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6353044b99c763aeec7ca1760b4340298504d8ee544fdcab3c380a2abec15b78"}, + {file = "pyodbc-4.0.39-cp38-cp38-win32.whl", hash = "sha256:a591a1cf3c251a9c7c1642cfb3774119bf3512f3be56151247238f8a7b22b336"}, + {file = "pyodbc-4.0.39-cp38-cp38-win_amd64.whl", hash = "sha256:8553eaef9f8ec333bbddff6eadf0d322dda34b37f4bab19f0658eb532037840c"}, + {file = "pyodbc-4.0.39-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9253e746c5c94bf61e3e9adb08fb7688d413cb68c06ebb287ec233387534760a"}, + {file = "pyodbc-4.0.39-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a6f4067f46aaa78e77e8a15ade81eb21fb344563d245fb2d9a0aaa553c367cbd"}, + {file = "pyodbc-4.0.39-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdf5a27e6587d1762f7f0e35d6f0309f09019bf3e19ca9177a4b765121f3f106"}, + {file = "pyodbc-4.0.39-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe4ee87b88867867f582dd0c1236cd982508db359a6cbb5e91623ceb6c83e60a"}, + {file = "pyodbc-4.0.39-cp39-cp39-win32.whl", hash = "sha256:42649ed57d09c04aa197bdd4fe0aa9ca319790b7aa86d0b0784cc70e78c426e5"}, + {file = "pyodbc-4.0.39-cp39-cp39-win_amd64.whl", hash = "sha256:305c7d6337e2d4c8350677cc641b343fc0197b7b9bc167815c66b64545c67a53"}, + {file = "pyodbc-4.0.39.tar.gz", hash = "sha256:e528bb70dd6d6299ee429868925df0866e3e919c772b9eff79c8e17920d8f116"}, +] +pyopenssl = [ + {file = "pyOpenSSL-23.2.0-py3-none-any.whl", hash = "sha256:24f0dc5227396b3e831f4c7f602b950a5e9833d292c8e4a2e06b709292806ae2"}, + {file = "pyOpenSSL-23.2.0.tar.gz", hash = "sha256:276f931f55a452e7dea69c7173e984eb2a4407ce413c918aa34b55f82f9b8bac"}, +] +pyparsing = [ + {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, + {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, +] +pypdf2 = [ + {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, + {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, +] +pyreadline3 = [ + {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, + {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, +] +pytest = [ + {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, + {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, +] +pytest-cases = [ + {file = "pytest-cases-3.6.14.tar.gz", hash = "sha256:7455e6ca57a544c1bfdd8b56ace08c1c1ce4c6572a8aab8f1bd351dc25a10b6b"}, + {file = "pytest_cases-3.6.14-py2.py3-none-any.whl", hash = "sha256:a087f3d019efd8942d0f0dc3fb526bedf9f83d742c40289e9623f6788aff7257"}, +] +pytest-console-scripts = [ + {file = "pytest-console-scripts-1.4.1.tar.gz", hash = "sha256:5a826ed84cc0afa202eb9e44381d7d762f7bdda8e0c23f9f79a7f1f44cf4a895"}, + {file = "pytest_console_scripts-1.4.1-py3-none-any.whl", hash = "sha256:ad860a951a90eca4bd3bd1159b8f5428633ba4ea01abd5c9526b67a95f65437a"}, +] +pytest-forked = [ + {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, + {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, +] +pytest-order = [ + {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"}, + {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"}, +] +pytest-pythonpath = [ + {file = "pytest-pythonpath-0.7.4.tar.gz", hash = "sha256:64e195b23a8f8c0c631fb16882d9ad6fa4137ed1f2961ddd15d52065cd435db6"}, + {file = "pytest_pythonpath-0.7.4-py3-none-any.whl", hash = "sha256:e73e11dab2f0b83e73229e261242b251f0a369d7f527dbfec068822fd26a6ce5"}, +] +python-daemon = [ + {file = "python-daemon-3.0.1.tar.gz", hash = "sha256:6c57452372f7eaff40934a1c03ad1826bf5e793558e87fef49131e6464b4dae5"}, + {file = "python_daemon-3.0.1-py3-none-any.whl", hash = "sha256:42bb848a3260a027fa71ad47ecd959e471327cb34da5965962edd5926229f341"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +python-nvd3 = [ + {file = "python-nvd3-0.15.0.tar.gz", hash = "sha256:fbd75ff47e0ef255b4aa4f3a8b10dc8b4024aa5a9a7abed5b2406bd3cb817715"}, +] +python-slugify = [ + {file = "python-slugify-8.0.1.tar.gz", hash = "sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"}, + {file = "python_slugify-8.0.1-py2.py3-none-any.whl", hash = "sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395"}, +] +pytimeparse = [ + {file = "pytimeparse-1.1.8-py2.py3-none-any.whl", hash = "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd"}, + {file = "pytimeparse-1.1.8.tar.gz", hash = "sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a"}, +] +pytz = [ + {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, + {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, +] +pytzdata = [ + {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, + {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, +] +pywin32 = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] +pywin32-ctypes = [ + {file = "pywin32-ctypes-0.2.2.tar.gz", hash = "sha256:3426e063bdd5fd4df74a14fa3cf80a0b42845a87e1d1e81f6549f9daec593a60"}, + {file = "pywin32_ctypes-0.2.2-py3-none-any.whl", hash = "sha256:bf490a1a709baf35d688fe0ecf980ed4de11d2b3e37b51e5442587a75d9957e7"}, +] +pyyaml = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] +qdrant-client = [ + {file = "qdrant_client-1.6.4-py3-none-any.whl", hash = "sha256:db4696978d6a62d78ff60f70b912383f1e467bda3053f732b01ddb5f93281b10"}, + {file = "qdrant_client-1.6.4.tar.gz", hash = "sha256:bbd65f383b6a55a9ccf4e301250fa925179340dd90cfde9b93ce4230fd68867b"}, +] +redshift-connector = [ + {file = "redshift_connector-2.0.913-py3-none-any.whl", hash = "sha256:bd70395c5b7ec9fcae9565daff6bcb88c7d3ea6182dafba2bac6138f68d00582"}, +] +referencing = [ + {file = "referencing-0.30.2-py3-none-any.whl", hash = "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf"}, + {file = "referencing-0.30.2.tar.gz", hash = "sha256:794ad8003c65938edcdbc027f1933215e0d0ccc0291e3ce20a4d87432b59efc0"}, +] +regex = [ + {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"}, + {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c662a4cbdd6280ee56f841f14620787215a171c4e2d1744c9528bed8f5816c96"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf0633e4a1b667bfe0bb10b5e53fe0d5f34a6243ea2530eb342491f1adf4f739"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551ad543fa19e94943c5b2cebc54c73353ffff08228ee5f3376bd27b3d5b9800"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54de2619f5ea58474f2ac211ceea6b615af2d7e4306220d4f3fe690c91988a61"}, + {file = "regex-2023.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ec4b3f0aebbbe2fc0134ee30a791af522a92ad9f164858805a77442d7d18570"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ae646c35cb9f820491760ac62c25b6d6b496757fda2d51be429e0e7b67ae0ab"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca339088839582d01654e6f83a637a4b8194d0960477b9769d2ff2cfa0fa36d2"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d9b6627408021452dcd0d2cdf8da0534e19d93d070bfa8b6b4176f99711e7f90"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:bd3366aceedf274f765a3a4bc95d6cd97b130d1dda524d8f25225d14123c01db"}, + {file = "regex-2023.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7aed90a72fc3654fba9bc4b7f851571dcc368120432ad68b226bd593f3f6c0b7"}, + {file = "regex-2023.8.8-cp310-cp310-win32.whl", hash = "sha256:80b80b889cb767cc47f31d2b2f3dec2db8126fbcd0cff31b3925b4dc6609dcdb"}, + {file = "regex-2023.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:b82edc98d107cbc7357da7a5a695901b47d6eb0420e587256ba3ad24b80b7d0b"}, + {file = "regex-2023.8.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1e7d84d64c84ad97bf06f3c8cb5e48941f135ace28f450d86af6b6512f1c9a71"}, + {file = "regex-2023.8.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce0f9fbe7d295f9922c0424a3637b88c6c472b75eafeaff6f910494a1fa719ef"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06c57e14ac723b04458df5956cfb7e2d9caa6e9d353c0b4c7d5d54fcb1325c46"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7a9aaa5a1267125eef22cef3b63484c3241aaec6f48949b366d26c7250e0357"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b7408511fca48a82a119d78a77c2f5eb1b22fe88b0d2450ed0756d194fe7a9a"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14dc6f2d88192a67d708341f3085df6a4f5a0c7b03dec08d763ca2cd86e9f559"}, + {file = "regex-2023.8.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48c640b99213643d141550326f34f0502fedb1798adb3c9eb79650b1ecb2f177"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0085da0f6c6393428bf0d9c08d8b1874d805bb55e17cb1dfa5ddb7cfb11140bf"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:964b16dcc10c79a4a2be9f1273fcc2684a9eedb3906439720598029a797b46e6"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7ce606c14bb195b0e5108544b540e2c5faed6843367e4ab3deb5c6aa5e681208"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40f029d73b10fac448c73d6eb33d57b34607f40116e9f6e9f0d32e9229b147d7"}, + {file = "regex-2023.8.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3b8e6ea6be6d64104d8e9afc34c151926f8182f84e7ac290a93925c0db004bfd"}, + {file = "regex-2023.8.8-cp311-cp311-win32.whl", hash = "sha256:942f8b1f3b223638b02df7df79140646c03938d488fbfb771824f3d05fc083a8"}, + {file = "regex-2023.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:51d8ea2a3a1a8fe4f67de21b8b93757005213e8ac3917567872f2865185fa7fb"}, + {file = "regex-2023.8.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e951d1a8e9963ea51efd7f150450803e3b95db5939f994ad3d5edac2b6f6e2b4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704f63b774218207b8ccc6c47fcef5340741e5d839d11d606f70af93ee78e4d4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22283c769a7b01c8ac355d5be0715bf6929b6267619505e289f792b01304d898"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91129ff1bb0619bc1f4ad19485718cc623a2dc433dff95baadbf89405c7f6b57"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de35342190deb7b866ad6ba5cbcccb2d22c0487ee0cbb251efef0843d705f0d4"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b993b6f524d1e274a5062488a43e3f9f8764ee9745ccd8e8193df743dbe5ee61"}, + {file = "regex-2023.8.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3026cbcf11d79095a32d9a13bbc572a458727bd5b1ca332df4a79faecd45281c"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:293352710172239bf579c90a9864d0df57340b6fd21272345222fb6371bf82b3"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d909b5a3fff619dc7e48b6b1bedc2f30ec43033ba7af32f936c10839e81b9217"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3d370ff652323c5307d9c8e4c62efd1956fb08051b0e9210212bc51168b4ff56"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:b076da1ed19dc37788f6a934c60adf97bd02c7eea461b73730513921a85d4235"}, + {file = "regex-2023.8.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e9941a4ada58f6218694f382e43fdd256e97615db9da135e77359da257a7168b"}, + {file = "regex-2023.8.8-cp36-cp36m-win32.whl", hash = "sha256:a8c65c17aed7e15a0c824cdc63a6b104dfc530f6fa8cb6ac51c437af52b481c7"}, + {file = "regex-2023.8.8-cp36-cp36m-win_amd64.whl", hash = "sha256:aadf28046e77a72f30dcc1ab185639e8de7f4104b8cb5c6dfa5d8ed860e57236"}, + {file = "regex-2023.8.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:423adfa872b4908843ac3e7a30f957f5d5282944b81ca0a3b8a7ccbbfaa06103"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ae594c66f4a7e1ea67232a0846649a7c94c188d6c071ac0210c3e86a5f92109"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e51c80c168074faa793685656c38eb7a06cbad7774c8cbc3ea05552d615393d8"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09b7f4c66aa9d1522b06e31a54f15581c37286237208df1345108fcf4e050c18"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e73e5243af12d9cd6a9d6a45a43570dbe2e5b1cdfc862f5ae2b031e44dd95a8"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:941460db8fe3bd613db52f05259c9336f5a47ccae7d7def44cc277184030a116"}, + {file = "regex-2023.8.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f0ccf3e01afeb412a1a9993049cb160d0352dba635bbca7762b2dc722aa5742a"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2e9216e0d2cdce7dbc9be48cb3eacb962740a09b011a116fd7af8c832ab116ca"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5cd9cd7170459b9223c5e592ac036e0704bee765706445c353d96f2890e816c8"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4873ef92e03a4309b3ccd8281454801b291b689f6ad45ef8c3658b6fa761d7ac"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:239c3c2a339d3b3ddd51c2daef10874410917cd2b998f043c13e2084cb191684"}, + {file = "regex-2023.8.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1005c60ed7037be0d9dea1f9c53cc42f836188227366370867222bda4c3c6bd7"}, + {file = "regex-2023.8.8-cp37-cp37m-win32.whl", hash = "sha256:e6bd1e9b95bc5614a7a9c9c44fde9539cba1c823b43a9f7bc11266446dd568e3"}, + {file = "regex-2023.8.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9a96edd79661e93327cfeac4edec72a4046e14550a1d22aa0dd2e3ca52aec921"}, + {file = "regex-2023.8.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2181c20ef18747d5f4a7ea513e09ea03bdd50884a11ce46066bb90fe4213675"}, + {file = "regex-2023.8.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2ad5add903eb7cdde2b7c64aaca405f3957ab34f16594d2b78d53b8b1a6a7d6"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9233ac249b354c54146e392e8a451e465dd2d967fc773690811d3a8c240ac601"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:920974009fb37b20d32afcdf0227a2e707eb83fe418713f7a8b7de038b870d0b"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2b6c5dfe0929b6c23dde9624483380b170b6e34ed79054ad131b20203a1a63"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96979d753b1dc3b2169003e1854dc67bfc86edf93c01e84757927f810b8c3c93"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ae54a338191e1356253e7883d9d19f8679b6143703086245fb14d1f20196be9"}, + {file = "regex-2023.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2162ae2eb8b079622176a81b65d486ba50b888271302190870b8cc488587d280"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c884d1a59e69e03b93cf0dfee8794c63d7de0ee8f7ffb76e5f75be8131b6400a"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf9273e96f3ee2ac89ffcb17627a78f78e7516b08f94dc435844ae72576a276e"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:83215147121e15d5f3a45d99abeed9cf1fe16869d5c233b08c56cdf75f43a504"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f7454aa427b8ab9101f3787eb178057c5250478e39b99540cfc2b889c7d0586"}, + {file = "regex-2023.8.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0640913d2c1044d97e30d7c41728195fc37e54d190c5385eacb52115127b882"}, + {file = "regex-2023.8.8-cp38-cp38-win32.whl", hash = "sha256:0c59122ceccb905a941fb23b087b8eafc5290bf983ebcb14d2301febcbe199c7"}, + {file = "regex-2023.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:c12f6f67495ea05c3d542d119d270007090bad5b843f642d418eb601ec0fa7be"}, + {file = "regex-2023.8.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82cd0a69cd28f6cc3789cc6adeb1027f79526b1ab50b1f6062bbc3a0ccb2dbc3"}, + {file = "regex-2023.8.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bb34d1605f96a245fc39790a117ac1bac8de84ab7691637b26ab2c5efb8f228c"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:987b9ac04d0b38ef4f89fbc035e84a7efad9cdd5f1e29024f9289182c8d99e09"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dd6082f4e2aec9b6a0927202c85bc1b09dcab113f97265127c1dc20e2e32495"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7eb95fe8222932c10d4436e7a6f7c99991e3fdd9f36c949eff16a69246dee2dc"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7098c524ba9f20717a56a8d551d2ed491ea89cbf37e540759ed3b776a4f8d6eb"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b694430b3f00eb02c594ff5a16db30e054c1b9589a043fe9174584c6efa8033"}, + {file = "regex-2023.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2aeab3895d778155054abea5238d0eb9a72e9242bd4b43f42fd911ef9a13470"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:988631b9d78b546e284478c2ec15c8a85960e262e247b35ca5eaf7ee22f6050a"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:67ecd894e56a0c6108ec5ab1d8fa8418ec0cff45844a855966b875d1039a2e34"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:14898830f0a0eb67cae2bbbc787c1a7d6e34ecc06fbd39d3af5fe29a4468e2c9"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f2200e00b62568cfd920127782c61bc1c546062a879cdc741cfcc6976668dfcf"}, + {file = "regex-2023.8.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9691a549c19c22d26a4f3b948071e93517bdf86e41b81d8c6ac8a964bb71e5a6"}, + {file = "regex-2023.8.8-cp39-cp39-win32.whl", hash = "sha256:6ab2ed84bf0137927846b37e882745a827458689eb969028af8032b1b3dac78e"}, + {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"}, + {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"}, +] +requests = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] +requests-mock = [ + {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, + {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, +] +requests-oauthlib = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] +requests-toolbelt = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] +requirements-parser = [ + {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, + {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, +] +rfc3339-validator = [ + {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, + {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, +] +rich = [ + {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, + {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, +] +rich-argparse = [ + {file = "rich_argparse-1.3.0-py3-none-any.whl", hash = "sha256:1a5eda1659c0a215862fe3630fcbe68d7792f18a8106baaf4e005b9896acc6f6"}, + {file = "rich_argparse-1.3.0.tar.gz", hash = "sha256:974cc1ba0aaa0d6aabc09ab1b78f9ba928670e08590f9551121bcbc60c75b74a"}, +] +rpds-py = [ + {file = "rpds_py-0.10.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:c1e0e9916301e3b3d970814b1439ca59487f0616d30f36a44cead66ee1748c31"}, + {file = "rpds_py-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ce8caa29ebbdcde67e5fd652c811d34bc01f249dbc0d61e5cc4db05ae79a83b"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad277f74b1c164f7248afa968700e410651eb858d7c160d109fb451dc45a2f09"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8e1c68303ccf7fceb50fbab79064a2636119fd9aca121f28453709283dbca727"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:780fcb855be29153901c67fc9c5633d48aebef21b90aa72812fa181d731c6b00"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbd7b24d108509a1b9b6679fcc1166a7dd031dbef1f3c2c73788f42e3ebb3beb"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0700c2133ba203c4068aaecd6a59bda22e06a5e46255c9da23cbf68c6942215d"}, + {file = "rpds_py-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:576da63eae7809f375932bfcbca2cf20620a1915bf2fedce4b9cc8491eceefe3"}, + {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23750a9b8a329844ba1fe267ca456bb3184984da2880ed17ae641c5af8de3fef"}, + {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d08395595c42bcd82c3608762ce734504c6d025eef1c06f42326a6023a584186"}, + {file = "rpds_py-0.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1d7b7b71bcb82d8713c7c2e9c5f061415598af5938666beded20d81fa23e7640"}, + {file = "rpds_py-0.10.0-cp310-none-win32.whl", hash = "sha256:97f5811df21703446b42303475b8b855ee07d6ab6cdf8565eff115540624f25d"}, + {file = "rpds_py-0.10.0-cp310-none-win_amd64.whl", hash = "sha256:cdbed8f21204398f47de39b0a9b180d7e571f02dfb18bf5f1b618e238454b685"}, + {file = "rpds_py-0.10.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:7a3a3d3e4f1e3cd2a67b93a0b6ed0f2499e33f47cc568e3a0023e405abdc0ff1"}, + {file = "rpds_py-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fc72ae476732cdb7b2c1acb5af23b478b8a0d4b6fcf19b90dd150291e0d5b26b"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0583f69522732bdd79dca4cd3873e63a29acf4a299769c7541f2ca1e4dd4bc6"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8b9a7cd381970e64849070aca7c32d53ab7d96c66db6c2ef7aa23c6e803f514"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d292cabd7c8335bdd3237ded442480a249dbcdb4ddfac5218799364a01a0f5c"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6903cdca64f1e301af9be424798328c1fe3b4b14aede35f04510989fc72f012"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed57543c99249ab3a4586ddc8786529fbc33309e5e8a1351802a06ca2baf4c2"}, + {file = "rpds_py-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15932ec5f224b0e35764dc156514533a4fca52dcfda0dfbe462a1a22b37efd59"}, + {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb2d59bc196e6d3b1827c7db06c1a898bfa0787c0574af398e65ccf2e97c0fbe"}, + {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f99d74ddf9d3b6126b509e81865f89bd1283e3fc1b568b68cd7bd9dfa15583d7"}, + {file = "rpds_py-0.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f70bec8a14a692be6dbe7ce8aab303e88df891cbd4a39af091f90b6702e28055"}, + {file = "rpds_py-0.10.0-cp311-none-win32.whl", hash = "sha256:5f7487be65b9c2c510819e744e375bd41b929a97e5915c4852a82fbb085df62c"}, + {file = "rpds_py-0.10.0-cp311-none-win_amd64.whl", hash = "sha256:748e472345c3a82cfb462d0dff998a7bf43e621eed73374cb19f307e97e08a83"}, + {file = "rpds_py-0.10.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:d4639111e73997567343df6551da9dd90d66aece1b9fc26c786d328439488103"}, + {file = "rpds_py-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f4760e1b02173f4155203054f77a5dc0b4078de7645c922b208d28e7eb99f3e2"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6420a36975e0073acaeee44ead260c1f6ea56812cfc6c31ec00c1c48197173"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:58fc4d66ee349a23dbf08c7e964120dc9027059566e29cf0ce6205d590ed7eca"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:063411228b852fb2ed7485cf91f8e7d30893e69b0acb207ec349db04cccc8225"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65af12f70355de29e1092f319f85a3467f4005e959ab65129cb697169ce94b86"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298e8b5d8087e0330aac211c85428c8761230ef46a1f2c516d6a2f67fb8803c5"}, + {file = "rpds_py-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5b9bf77008f2c55dabbd099fd3ac87009471d223a1c7ebea36873d39511b780a"}, + {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c7853f27195598e550fe089f78f0732c66ee1d1f0eaae8ad081589a5a2f5d4af"}, + {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:75dbfd41a61bc1fb0536bf7b1abf272dc115c53d4d77db770cd65d46d4520882"}, + {file = "rpds_py-0.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b25136212a3d064a8f0b9ebbb6c57094c5229e0de76d15c79b76feff26aeb7b8"}, + {file = "rpds_py-0.10.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:9affee8cb1ec453382c27eb9043378ab32f49cd4bc24a24275f5c39bf186c279"}, + {file = "rpds_py-0.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4d55528ef13af4b4e074d067977b1f61408602f53ae4537dccf42ba665c2c7bd"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7865df1fb564092bcf46dac61b5def25342faf6352e4bc0e61a286e3fa26a3d"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f5cc8c7bc99d2bbcd704cef165ca7d155cd6464c86cbda8339026a42d219397"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbae50d352e4717ffc22c566afc2d0da744380e87ed44a144508e3fb9114a3f4"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fccbf0cd3411719e4c9426755df90bf3449d9fc5a89f077f4a7f1abd4f70c910"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d10c431073dc6ebceed35ab22948a016cc2b5120963c13a41e38bdde4a7212"}, + {file = "rpds_py-0.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1b401e8b9aece651512e62c431181e6e83048a651698a727ea0eb0699e9f9b74"}, + {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:7618a082c55cf038eede4a918c1001cc8a4411dfe508dc762659bcd48d8f4c6e"}, + {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:b3226b246facae14909b465061ddcfa2dfeadb6a64f407f24300d42d69bcb1a1"}, + {file = "rpds_py-0.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a8edd467551c1102dc0f5754ab55cd0703431cd3044edf8c8e7d9208d63fa453"}, + {file = "rpds_py-0.10.0-cp38-none-win32.whl", hash = "sha256:71333c22f7cf5f0480b59a0aef21f652cf9bbaa9679ad261b405b65a57511d1e"}, + {file = "rpds_py-0.10.0-cp38-none-win_amd64.whl", hash = "sha256:a8ab1adf04ae2d6d65835995218fd3f3eb644fe20655ca8ee233e2c7270ff53b"}, + {file = "rpds_py-0.10.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:87c93b25d538c433fb053da6228c6290117ba53ff6a537c133b0f2087948a582"}, + {file = "rpds_py-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7996aed3f65667c6dcc8302a69368435a87c2364079a066750a2eac75ea01e"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8856aa76839dc234d3469f1e270918ce6bec1d6a601eba928f45d68a15f04fc3"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00215f6a9058fbf84f9d47536902558eb61f180a6b2a0fa35338d06ceb9a2e5a"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23a059143c1393015c68936370cce11690f7294731904bdae47cc3e16d0b2474"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e5c26905aa651cc8c0ddc45e0e5dea2a1296f70bdc96af17aee9d0493280a17"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c651847545422c8131660704c58606d841e228ed576c8f1666d98b3d318f89da"}, + {file = "rpds_py-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80992eb20755701753e30a6952a96aa58f353d12a65ad3c9d48a8da5ec4690cf"}, + {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ffcf18ad3edf1c170e27e88b10282a2c449aa0358659592462448d71b2000cfc"}, + {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:08e08ccf5b10badb7d0a5c84829b914c6e1e1f3a716fdb2bf294e2bd01562775"}, + {file = "rpds_py-0.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7150b83b3e3ddaac81a8bb6a9b5f93117674a0e7a2b5a5b32ab31fdfea6df27f"}, + {file = "rpds_py-0.10.0-cp39-none-win32.whl", hash = "sha256:3455ecc46ea443b5f7d9c2f946ce4017745e017b0d0f8b99c92564eff97e97f5"}, + {file = "rpds_py-0.10.0-cp39-none-win_amd64.whl", hash = "sha256:afe6b5a04b2ab1aa89bad32ca47bf71358e7302a06fdfdad857389dca8fb5f04"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:b1cb078f54af0abd835ca76f93a3152565b73be0f056264da45117d0adf5e99c"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8e7e2b3577e97fa43c2c2b12a16139b2cedbd0770235d5179c0412b4794efd9b"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae46a50d235f1631d9ec4670503f7b30405103034830bc13df29fd947207f795"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f869e34d2326e417baee430ae998e91412cc8e7fdd83d979277a90a0e79a5b47"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d544a614055b131111bed6edfa1cb0fb082a7265761bcb03321f2dd7b5c6c48"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee9c2f6ca9774c2c24bbf7b23086264e6b5fa178201450535ec0859739e6f78d"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2da4a8c6d465fde36cea7d54bf47b5cf089073452f0e47c8632ecb9dec23c07"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac00c41dd315d147b129976204839ca9de699d83519ff1272afbe4fb9d362d12"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:0155c33af0676fc38e1107679be882077680ad1abb6303956b97259c3177e85e"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:db6585b600b2e76e98131e0ac0e5195759082b51687ad0c94505970c90718f4a"}, + {file = "rpds_py-0.10.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:7b6975d3763d0952c111700c0634968419268e6bbc0b55fe71138987fa66f309"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:6388e4e95a26717b94a05ced084e19da4d92aca883f392dffcf8e48c8e221a24"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:18f87baa20e02e9277ad8960cd89b63c79c05caf106f4c959a9595c43f2a34a5"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f05fc7d832e970047662b3440b190d24ea04f8d3c760e33e7163b67308c878"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:291c9ce3929a75b45ce8ddde2aa7694fc8449f2bc8f5bd93adf021efaae2d10b"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:861d25ae0985a1dd5297fee35f476b60c6029e2e6e19847d5b4d0a43a390b696"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:668d2b45d62c68c7a370ac3dce108ffda482b0a0f50abd8b4c604a813a59e08f"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344b89384c250ba6a4ce1786e04d01500e4dac0f4137ceebcaad12973c0ac0b3"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:885e023e73ce09b11b89ab91fc60f35d80878d2c19d6213a32b42ff36543c291"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:841128a22e6ac04070a0f84776d07e9c38c4dcce8e28792a95e45fc621605517"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:899b5e7e2d5a8bc92aa533c2d4e55e5ebba095c485568a5e4bedbc163421259a"}, + {file = "rpds_py-0.10.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e7947d9a6264c727a556541b1630296bbd5d0a05068d21c38dde8e7a1c703ef0"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4992266817169997854f81df7f6db7bdcda1609972d8ffd6919252f09ec3c0f6"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:26d9fd624649a10e4610fab2bc820e215a184d193e47d0be7fe53c1c8f67f370"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0028eb0967942d0d2891eae700ae1a27b7fd18604cfcb16a1ef486a790fee99e"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9e7e493ded7042712a374471203dd43ae3fff5b81e3de1a0513fa241af9fd41"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d68a8e8a3a816629283faf82358d8c93fe5bd974dd2704152394a3de4cec22a"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6d5f061f6a2aa55790b9e64a23dfd87b6664ab56e24cd06c78eb43986cb260b"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c7c4266c1b61eb429e8aeb7d8ed6a3bfe6c890a1788b18dbec090c35c6b93fa"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80772e3bda6787510d9620bc0c7572be404a922f8ccdfd436bf6c3778119464c"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:b98e75b21fc2ba5285aef8efaf34131d16af1c38df36bdca2f50634bea2d3060"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:d63787f289944cc4bde518ad2b5e70a4f0d6e2ce76324635359c74c113fd188f"}, + {file = "rpds_py-0.10.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:872f3dcaa8bf2245944861d7311179d2c0c9b2aaa7d3b464d99a7c2e401f01fa"}, + {file = "rpds_py-0.10.0.tar.gz", hash = "sha256:e36d7369363d2707d5f68950a64c4e025991eb0177db01ccb6aa6facae48b69f"}, +] +rsa = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] +s3fs = [ + {file = "s3fs-2023.6.0-py3-none-any.whl", hash = "sha256:d1a0a423d0d2e17fb2a193d9531935dc3f45ba742693448a461b6b34f6a92a24"}, + {file = "s3fs-2023.6.0.tar.gz", hash = "sha256:63fd8ddf05eb722de784b7b503196107f2a518061298cf005a8a4715b4d49117"}, +] +s3transfer = [ + {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, + {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, +] +scramp = [ + {file = "scramp-1.4.4-py3-none-any.whl", hash = "sha256:b142312df7c2977241d951318b7ee923d6b7a4f75ba0f05b621ece1ed616faa3"}, + {file = "scramp-1.4.4.tar.gz", hash = "sha256:b7022a140040f33cf863ab2657917ed05287a807b917950489b89b9f685d59bc"}, +] +secretstorage = [ + {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, + {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, +] +semver = [ + {file = "semver-3.0.1-py3-none-any.whl", hash = "sha256:2a23844ba1647362c7490fe3995a86e097bb590d16f0f32dfc383008f19e4cdf"}, + {file = "semver-3.0.1.tar.gz", hash = "sha256:9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1"}, +] +sentry-sdk = [ + {file = "sentry-sdk-1.30.0.tar.gz", hash = "sha256:7dc873b87e1faf4d00614afd1058bfa1522942f33daef8a59f90de8ed75cd10c"}, + {file = "sentry_sdk-1.30.0-py2.py3-none-any.whl", hash = "sha256:2e53ad63f96bb9da6570ba2e755c267e529edcf58580a2c0d2a11ef26e1e678b"}, +] +setproctitle = [ + {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:288943dec88e178bb2fd868adf491197cc0fc8b6810416b1c6775e686bab87fe"}, + {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:630f6fe5e24a619ccf970c78e084319ee8be5be253ecc9b5b216b0f474f5ef18"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c877691b90026670e5a70adfbcc735460a9f4c274d35ec5e8a43ce3f8443005"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a55fe05f15c10e8c705038777656fe45e3bd676d49ad9ac8370b75c66dd7cd7"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab45146c71ca6592c9cc8b354a2cc9cc4843c33efcbe1d245d7d37ce9696552d"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00c9d5c541a2713ba0e657e0303bf96ddddc412ef4761676adc35df35d7c246"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:265ecbe2c6eafe82e104f994ddd7c811520acdd0647b73f65c24f51374cf9494"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c2c46200656280a064073447ebd363937562debef329482fd7e570c8d498f806"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:fa2f50678f04fda7a75d0fe5dd02bbdd3b13cbe6ed4cf626e4472a7ccf47ae94"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f2719a398e1a2c01c2a63bf30377a34d0b6ef61946ab9cf4d550733af8f1ef1"}, + {file = "setproctitle-1.3.2-cp310-cp310-win32.whl", hash = "sha256:e425be62524dc0c593985da794ee73eb8a17abb10fe692ee43bb39e201d7a099"}, + {file = "setproctitle-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:e85e50b9c67854f89635a86247412f3ad66b132a4d8534ac017547197c88f27d"}, + {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a97d51c17d438cf5be284775a322d57b7ca9505bb7e118c28b1824ecaf8aeaa"}, + {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:587c7d6780109fbd8a627758063d08ab0421377c0853780e5c356873cdf0f077"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d17c8bd073cbf8d141993db45145a70b307385b69171d6b54bcf23e5d644de"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e932089c35a396dc31a5a1fc49889dd559548d14cb2237adae260382a090382e"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e4f8f12258a8739c565292a551c3db62cca4ed4f6b6126664e2381acb4931bf"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:570d255fd99c7f14d8f91363c3ea96bd54f8742275796bca67e1414aeca7d8c3"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a8e0881568c5e6beff91ef73c0ec8ac2a9d3ecc9edd6bd83c31ca34f770910c4"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4bba3be4c1fabf170595b71f3af46c6d482fbe7d9e0563999b49999a31876f77"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:37ece938110cab2bb3957e3910af8152ca15f2b6efdf4f2612e3f6b7e5459b80"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db684d6bbb735a80bcbc3737856385b55d53f8a44ce9b46e9a5682c5133a9bf7"}, + {file = "setproctitle-1.3.2-cp311-cp311-win32.whl", hash = "sha256:ca58cd260ea02759238d994cfae844fc8b1e206c684beb8f38877dcab8451dfc"}, + {file = "setproctitle-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:88486e6cce2a18a033013d17b30a594f1c5cb42520c49c19e6ade40b864bb7ff"}, + {file = "setproctitle-1.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:92c626edc66169a1b09e9541b9c0c9f10488447d8a2b1d87c8f0672e771bc927"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710e16fa3bade3b026907e4a5e841124983620046166f355bbb84be364bf2a02"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f29b75e86260b0ab59adb12661ef9f113d2f93a59951373eb6d68a852b13e83"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c8d9650154afaa86a44ff195b7b10d683c73509d085339d174e394a22cccbb9"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0452282258dfcc01697026a8841258dd2057c4438b43914b611bccbcd048f10"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e49ae693306d7624015f31cb3e82708916759d592c2e5f72a35c8f4cc8aef258"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1ff863a20d1ff6ba2c24e22436a3daa3cd80be1dfb26891aae73f61b54b04aca"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:55ce1e9925ce1765865442ede9dca0ba9bde10593fcd570b1f0fa25d3ec6b31c"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7fe9df7aeb8c64db6c34fc3b13271a363475d77bc157d3f00275a53910cb1989"}, + {file = "setproctitle-1.3.2-cp37-cp37m-win32.whl", hash = "sha256:e5c50e164cd2459bc5137c15288a9ef57160fd5cbf293265ea3c45efe7870865"}, + {file = "setproctitle-1.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a499fff50387c1520c085a07578a000123f519e5f3eee61dd68e1d301659651f"}, + {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b932c3041aa924163f4aab970c2f0e6b4d9d773f4d50326e0ea1cd69240e5c5"}, + {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4bfc89bd33ebb8e4c0e9846a09b1f5a4a86f5cb7a317e75cc42fee1131b4f4f"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd3cf4286a60fdc95451d8d14e0389a6b4f5cebe02c7f2609325eb016535963"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5fb4f769c02f63fac90989711a3fee83919f47ae9afd4758ced5d86596318c65"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5194b4969f82ea842a4f6af2f82cd16ebdc3f1771fb2771796e6add9835c1973"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cde41857a644b7353a0060b5f94f7ba7cf593ebde5a1094da1be581ac9a31"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9124bedd8006b0e04d4e8a71a0945da9b67e7a4ab88fdad7b1440dc5b6122c42"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c8a09d570b39517de10ee5b718730e171251ce63bbb890c430c725c8c53d4484"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8ff3c8cb26afaed25e8bca7b9dd0c1e36de71f35a3a0706b5c0d5172587a3827"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:589be87172b238f839e19f146b9ea47c71e413e951ef0dc6db4218ddacf3c202"}, + {file = "setproctitle-1.3.2-cp38-cp38-win32.whl", hash = "sha256:4749a2b0c9ac52f864d13cee94546606f92b981b50e46226f7f830a56a9dc8e1"}, + {file = "setproctitle-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:e43f315c68aa61cbdef522a2272c5a5b9b8fd03c301d3167b5e1343ef50c676c"}, + {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:de3a540cd1817ede31f530d20e6a4935bbc1b145fd8f8cf393903b1e02f1ae76"}, + {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4058564195b975ddc3f0462375c533cce310ccdd41b80ac9aed641c296c3eff4"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c5d5dad7c28bdd1ec4187d818e43796f58a845aa892bb4481587010dc4d362b"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ffc61a388a5834a97953d6444a2888c24a05f2e333f9ed49f977a87bb1ad4761"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fa1a0fbee72b47dc339c87c890d3c03a72ea65c061ade3204f285582f2da30f"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8a988c7220c002c45347430993830666e55bc350179d91fcee0feafe64e1d4"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bae283e85fc084b18ffeb92e061ff7ac5af9e183c9d1345c93e178c3e5069cbe"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fed18e44711c5af4b681c2b3b18f85e6f0f1b2370a28854c645d636d5305ccd8"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:b34baef93bfb20a8ecb930e395ccd2ae3268050d8cf4fe187de5e2bd806fd796"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f0bed90a216ef28b9d227d8d73e28a8c9b88c0f48a082d13ab3fa83c581488f"}, + {file = "setproctitle-1.3.2-cp39-cp39-win32.whl", hash = "sha256:4d8938249a7cea45ab7e1e48b77685d0f2bab1ebfa9dde23e94ab97968996a7c"}, + {file = "setproctitle-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:a47d97a75fd2d10c37410b180f67a5835cb1d8fdea2648fd7f359d4277f180b9"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:dad42e676c5261eb50fdb16bdf3e2771cf8f99a79ef69ba88729aeb3472d8575"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c91b9bc8985d00239f7dc08a49927a7ca1ca8a6af2c3890feec3ed9665b6f91e"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8579a43eafd246e285eb3a5b939e7158073d5087aacdd2308f23200eac2458b"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:2fbd8187948284293f43533c150cd69a0e4192c83c377da837dbcd29f6b83084"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:faec934cfe5fd6ac1151c02e67156c3f526e82f96b24d550b5d51efa4a5527c6"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1aafc91cbdacc9e5fe712c52077369168e6b6c346f3a9d51bf600b53eae56bb"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b617f12c9be61e8f4b2857be4a4319754756845dbbbd9c3718f468bbb1e17bcb"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b2c9cb2705fc84cb8798f1ba74194f4c080aaef19d9dae843591c09b97678e98"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a149a5f7f2c5a065d4e63cb0d7a4b6d3b66e6e80f12e3f8827c4f63974cbf122"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e3ac25bfc4a0f29d2409650c7532d5ddfdbf29f16f8a256fc31c47d0dc05172"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65d884e22037b23fa25b2baf1a3316602ed5c5971eb3e9d771a38c3a69ce6e13"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7aa0aac1711fadffc1d51e9d00a3bea61f68443d6ac0241a224e4d622489d665"}, + {file = "setproctitle-1.3.2.tar.gz", hash = "sha256:b9fb97907c830d260fa0658ed58afd48a86b2b88aac521135c352ff7fd3477fd"}, +] +setuptools = [ + {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"}, + {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"}, +] +simplejson = [ + {file = "simplejson-3.19.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:412e58997a30c5deb8cab5858b8e2e5b40ca007079f7010ee74565cc13d19665"}, + {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e765b1f47293dedf77946f0427e03ee45def2862edacd8868c6cf9ab97c8afbd"}, + {file = "simplejson-3.19.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:3231100edee292da78948fa0a77dee4e5a94a0a60bcba9ed7a9dc77f4d4bb11e"}, + {file = "simplejson-3.19.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:081ea6305b3b5e84ae7417e7f45956db5ea3872ec497a584ec86c3260cda049e"}, + {file = "simplejson-3.19.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:f253edf694ce836631b350d758d00a8c4011243d58318fbfbe0dd54a6a839ab4"}, + {file = "simplejson-3.19.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:5db86bb82034e055257c8e45228ca3dbce85e38d7bfa84fa7b2838e032a3219c"}, + {file = "simplejson-3.19.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:69a8b10a4f81548bc1e06ded0c4a6c9042c0be0d947c53c1ed89703f7e613950"}, + {file = "simplejson-3.19.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:58ee5e24d6863b22194020eb62673cf8cc69945fcad6b283919490f6e359f7c5"}, + {file = "simplejson-3.19.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:73d0904c2471f317386d4ae5c665b16b5c50ab4f3ee7fd3d3b7651e564ad74b1"}, + {file = "simplejson-3.19.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:66d780047c31ff316ee305c3f7550f352d87257c756413632303fc59fef19eac"}, + {file = "simplejson-3.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd4d50a27b065447c9c399f0bf0a993bd0e6308db8bbbfbc3ea03b41c145775a"}, + {file = "simplejson-3.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c16ec6a67a5f66ab004190829eeede01c633936375edcad7cbf06d3241e5865"}, + {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17a963e8dd4d81061cc05b627677c1f6a12e81345111fbdc5708c9f088d752c9"}, + {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7e78d79b10aa92f40f54178ada2b635c960d24fc6141856b926d82f67e56d169"}, + {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad071cd84a636195f35fa71de2186d717db775f94f985232775794d09f8d9061"}, + {file = "simplejson-3.19.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e7c70f19405e5f99168077b785fe15fcb5f9b3c0b70b0b5c2757ce294922c8c"}, + {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54fca2b26bcd1c403146fd9461d1da76199442297160721b1d63def2a1b17799"}, + {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:48600a6e0032bed17c20319d91775f1797d39953ccfd68c27f83c8d7fc3b32cb"}, + {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:93f5ac30607157a0b2579af59a065bcfaa7fadeb4875bf927a8f8b6739c8d910"}, + {file = "simplejson-3.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b79642a599740603ca86cf9df54f57a2013c47e1dd4dd2ae4769af0a6816900"}, + {file = "simplejson-3.19.1-cp310-cp310-win32.whl", hash = "sha256:d9f2c27f18a0b94107d57294aab3d06d6046ea843ed4a45cae8bd45756749f3a"}, + {file = "simplejson-3.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:5673d27806085d2a413b3be5f85fad6fca4b7ffd31cfe510bbe65eea52fff571"}, + {file = "simplejson-3.19.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:79c748aa61fd8098d0472e776743de20fae2686edb80a24f0f6593a77f74fe86"}, + {file = "simplejson-3.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:390f4a8ca61d90bcf806c3ad644e05fa5890f5b9a72abdd4ca8430cdc1e386fa"}, + {file = "simplejson-3.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d61482b5d18181e6bb4810b4a6a24c63a490c3a20e9fbd7876639653e2b30a1a"}, + {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2541fdb7467ef9bfad1f55b6c52e8ea52b3ce4a0027d37aff094190a955daa9d"}, + {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46133bc7dd45c9953e6ee4852e3de3d5a9a4a03b068bd238935a5c72f0a1ce34"}, + {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f96def94576f857abf58e031ce881b5a3fc25cbec64b2bc4824824a8a4367af9"}, + {file = "simplejson-3.19.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f14ecca970d825df0d29d5c6736ff27999ee7bdf5510e807f7ad8845f7760ce"}, + {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:66389b6b6ee46a94a493a933a26008a1bae0cfadeca176933e7ff6556c0ce998"}, + {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:22b867205cd258050c2625325fdd9a65f917a5aff22a23387e245ecae4098e78"}, + {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c39fa911e4302eb79c804b221ddec775c3da08833c0a9120041dd322789824de"}, + {file = "simplejson-3.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:65dafe413b15e8895ad42e49210b74a955c9ae65564952b0243a18fb35b986cc"}, + {file = "simplejson-3.19.1-cp311-cp311-win32.whl", hash = "sha256:f05d05d99fce5537d8f7a0af6417a9afa9af3a6c4bb1ba7359c53b6257625fcb"}, + {file = "simplejson-3.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:b46aaf0332a8a9c965310058cf3487d705bf672641d2c43a835625b326689cf4"}, + {file = "simplejson-3.19.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b438e5eaa474365f4faaeeef1ec3e8d5b4e7030706e3e3d6b5bee6049732e0e6"}, + {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9d614a612ad02492f704fbac636f666fa89295a5d22b4facf2d665fc3b5ea9"}, + {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46e89f58e4bed107626edce1cf098da3664a336d01fc78fddcfb1f397f553d44"}, + {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96ade243fb6f3b57e7bd3b71e90c190cd0f93ec5dce6bf38734a73a2e5fa274f"}, + {file = "simplejson-3.19.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed18728b90758d171f0c66c475c24a443ede815cf3f1a91e907b0db0ebc6e508"}, + {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:6a561320485017ddfc21bd2ed5de2d70184f754f1c9b1947c55f8e2b0163a268"}, + {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:2098811cd241429c08b7fc5c9e41fcc3f59f27c2e8d1da2ccdcf6c8e340ab507"}, + {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:8f8d179393e6f0cf6c7c950576892ea6acbcea0a320838c61968ac7046f59228"}, + {file = "simplejson-3.19.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:eff87c68058374e45225089e4538c26329a13499bc0104b52b77f8428eed36b2"}, + {file = "simplejson-3.19.1-cp36-cp36m-win32.whl", hash = "sha256:d300773b93eed82f6da138fd1d081dc96fbe53d96000a85e41460fe07c8d8b33"}, + {file = "simplejson-3.19.1-cp36-cp36m-win_amd64.whl", hash = "sha256:37724c634f93e5caaca04458f267836eb9505d897ab3947b52f33b191bf344f3"}, + {file = "simplejson-3.19.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:74bf802debe68627227ddb665c067eb8c73aa68b2476369237adf55c1161b728"}, + {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70128fb92932524c89f373e17221cf9535d7d0c63794955cc3cd5868e19f5d38"}, + {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8090e75653ea7db75bc21fa5f7bcf5f7bdf64ea258cbbac45c7065f6324f1b50"}, + {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a755f7bfc8adcb94887710dc70cc12a69a454120c6adcc6f251c3f7b46ee6aac"}, + {file = "simplejson-3.19.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ccb2c1877bc9b25bc4f4687169caa925ffda605d7569c40e8e95186e9a5e58b"}, + {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:919bc5aa4d8094cf8f1371ea9119e5d952f741dc4162810ab714aec948a23fe5"}, + {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e333c5b62e93949f5ac27e6758ba53ef6ee4f93e36cc977fe2e3df85c02f6dc4"}, + {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3a4480e348000d89cf501b5606415f4d328484bbb431146c2971123d49fd8430"}, + {file = "simplejson-3.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:cb502cde018e93e75dc8fc7bb2d93477ce4f3ac10369f48866c61b5e031db1fd"}, + {file = "simplejson-3.19.1-cp37-cp37m-win32.whl", hash = "sha256:f41915a4e1f059dfad614b187bc06021fefb5fc5255bfe63abf8247d2f7a646a"}, + {file = "simplejson-3.19.1-cp37-cp37m-win_amd64.whl", hash = "sha256:3844305bc33d52c4975da07f75b480e17af3558c0d13085eaa6cc2f32882ccf7"}, + {file = "simplejson-3.19.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1cb19eacb77adc5a9720244d8d0b5507421d117c7ed4f2f9461424a1829e0ceb"}, + {file = "simplejson-3.19.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926957b278de22797bfc2f004b15297013843b595b3cd7ecd9e37ccb5fad0b72"}, + {file = "simplejson-3.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b0e9a5e66969f7a47dc500e3dba8edc3b45d4eb31efb855c8647700a3493dd8a"}, + {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79d46e7e33c3a4ef853a1307b2032cfb7220e1a079d0c65488fbd7118f44935a"}, + {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344a5093b71c1b370968d0fbd14d55c9413cb6f0355fdefeb4a322d602d21776"}, + {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23fbb7b46d44ed7cbcda689295862851105c7594ae5875dce2a70eeaa498ff86"}, + {file = "simplejson-3.19.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3025e7e9ddb48813aec2974e1a7e68e63eac911dd5e0a9568775de107ac79a"}, + {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:87b190e6ceec286219bd6b6f13547ca433f977d4600b4e81739e9ac23b5b9ba9"}, + {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dc935d8322ba9bc7b84f99f40f111809b0473df167bf5b93b89fb719d2c4892b"}, + {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3b652579c21af73879d99c8072c31476788c8c26b5565687fd9db154070d852a"}, + {file = "simplejson-3.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6aa7ca03f25b23b01629b1c7f78e1cd826a66bfb8809f8977a3635be2ec48f1a"}, + {file = "simplejson-3.19.1-cp38-cp38-win32.whl", hash = "sha256:08be5a241fdf67a8e05ac7edbd49b07b638ebe4846b560673e196b2a25c94b92"}, + {file = "simplejson-3.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ca56a6c8c8236d6fe19abb67ef08d76f3c3f46712c49a3b6a5352b6e43e8855f"}, + {file = "simplejson-3.19.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6424d8229ba62e5dbbc377908cfee9b2edf25abd63b855c21f12ac596cd18e41"}, + {file = "simplejson-3.19.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:547ea86ca408a6735335c881a2e6208851027f5bfd678d8f2c92a0f02c7e7330"}, + {file = "simplejson-3.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:889328873c35cb0b2b4c83cbb83ec52efee5a05e75002e2c0c46c4e42790e83c"}, + {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cdb4e544134f305b033ad79ae5c6b9a32e7c58b46d9f55a64e2a883fbbba01"}, + {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc2b3f06430cbd4fac0dae5b2974d2bf14f71b415fb6de017f498950da8159b1"}, + {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d125e754d26c0298715bdc3f8a03a0658ecbe72330be247f4b328d229d8cf67f"}, + {file = "simplejson-3.19.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:476c8033abed7b1fd8db62a7600bf18501ce701c1a71179e4ce04ac92c1c5c3c"}, + {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:199a0bcd792811c252d71e3eabb3d4a132b3e85e43ebd93bfd053d5b59a7e78b"}, + {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a79b439a6a77649bb8e2f2644e6c9cc0adb720fc55bed63546edea86e1d5c6c8"}, + {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:203412745fed916fc04566ecef3f2b6c872b52f1e7fb3a6a84451b800fb508c1"}, + {file = "simplejson-3.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ca922c61d87b4c38f37aa706520328ffe22d7ac1553ef1cadc73f053a673553"}, + {file = "simplejson-3.19.1-cp39-cp39-win32.whl", hash = "sha256:3e0902c278243d6f7223ba3e6c5738614c971fd9a887fff8feaa8dcf7249c8d4"}, + {file = "simplejson-3.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:d396b610e77b0c438846607cd56418bfc194973b9886550a98fd6724e8c6cfec"}, + {file = "simplejson-3.19.1-py3-none-any.whl", hash = "sha256:4710806eb75e87919b858af0cba4ffedc01b463edc3982ded7b55143f39e41e1"}, + {file = "simplejson-3.19.1.tar.gz", hash = "sha256:6277f60848a7d8319d27d2be767a7546bc965535b28070e310b3a9af90604a4c"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +smmap = [ + {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, + {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, +] +sniffio = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] +snowflake-connector-python = [ + {file = "snowflake-connector-python-3.5.0.tar.gz", hash = "sha256:654e4a1f68a491544bd8f7c5ab02eb8531df67c5f4309d5253bd204044f8a1b3"}, + {file = "snowflake_connector_python-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a365fa4f23be27a4a46d04f73a48ccb1ddad5b9558f100ba592a49571c90a33c"}, + {file = "snowflake_connector_python-3.5.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5b648b8f32aa540e9adf14e84ea5d77a6c3c6cbc3cbcf172622a0b8db0e99384"}, + {file = "snowflake_connector_python-3.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722dc0100c3247788aeb975a8a5941f2f757e8524d2626cf6fe78df02b6384fb"}, + {file = "snowflake_connector_python-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7029b8776c5d2153ed2b0254dc23ae1e3bde141b6634fc6c77b919ed29d5bb42"}, + {file = "snowflake_connector_python-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:3472703fc4f308343d925c41dab976a42e10192fa0b8b9025e80b083ad7dcf1b"}, + {file = "snowflake_connector_python-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40f4a376b6da875d70383b60c66ad3723f0bed21d8bdbf7afb39525cb70c70ef"}, + {file = "snowflake_connector_python-3.5.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:8a08d8df6f1b5b5d0bf9145e6339dbeaf294392529629d0bd7e4dd3e49d7892c"}, + {file = "snowflake_connector_python-3.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac16a00bb3824069303e119cd049858c2caf92d174f9486ba273d19abf06a18d"}, + {file = "snowflake_connector_python-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a820148b64436621b5db79c2e7848d5d12ece13b0948281c19dd2f8a50e4dbe"}, + {file = "snowflake_connector_python-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ffa8f95a767e5077e82cf290a43950f37cfc25e34935f038abc96494a1595a03"}, + {file = "snowflake_connector_python-3.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ef70cd89aee56fbbaeb68dc1f7612598b0c8a470d16ddb68ca7657bd70cbf8d7"}, + {file = "snowflake_connector_python-3.5.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:09ff23c1aa4bf9e148e491512a81b097ce0b1c2a870f3d0bb0dc5febf764c45c"}, + {file = "snowflake_connector_python-3.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e696f133c57494dce57a68a92d1e2cf20334361400fe3c4c73637627f7d9c0ec"}, + {file = "snowflake_connector_python-3.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0136a9fb45013ea3d50045acb3cedb50b2d5d6ac1d0f9adc538e28cf86a1386"}, + {file = "snowflake_connector_python-3.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:133e2a8a5e7b59d84e83886bb516d290edbd0b92dd69304f8f7ac613faca2aeb"}, + {file = "snowflake_connector_python-3.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c463d11b05b57c40eb83d84044d761535a855e498ffd52456e92eed333e43b17"}, + {file = "snowflake_connector_python-3.5.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:cdd198dbc0aff373bb9e95f315cdc0b922ae61186ba9bd7da4950835827cd7f9"}, + {file = "snowflake_connector_python-3.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d8769b95a46040261a46dc58757c59b26e6122466222d8b8e518ea6aa62e83d"}, + {file = "snowflake_connector_python-3.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee97a8ac0aaf40a7b7420c8936a66d8d33376cd40498ac3d38efa7bb5712d14a"}, + {file = "snowflake_connector_python-3.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8cd747e2719ba44dd2ce0e9b1e6f8b03485b2b335a352f3b45138b56fad5888"}, +] +sortedcontainers = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] +soupsieve = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] +sqlalchemy = [ + {file = "SQLAlchemy-1.4.49-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e126cf98b7fd38f1e33c64484406b78e937b1a280e078ef558b95bf5b6895f6"}, + {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-win_amd64.whl", hash = "sha256:f8a65990c9c490f4651b5c02abccc9f113a7f56fa482031ac8cb88b70bc8ccaa"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8923dfdf24d5aa8a3adb59723f54118dd4fe62cf59ed0d0d65d940579c1170a4"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9ab2c507a7a439f13ca4499db6d3f50423d1d65dc9b5ed897e70941d9e135b0"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, + {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-win_amd64.whl", hash = "sha256:bbdf16372859b8ed3f4d05f925a984771cd2abd18bd187042f24be4886c2a15f"}, + {file = "SQLAlchemy-1.4.49.tar.gz", hash = "sha256:06ff25cbae30c396c4b7737464f2a7fc37a67b7da409993b182b024cec80aed9"}, +] +sqlalchemy-jsonfield = [ + {file = "SQLAlchemy-JSONField-1.0.1.post0.tar.gz", hash = "sha256:72a5e714fe0493d2660abd7484a9fd9f492f493a0856288dd22a5decb29f5dc4"}, + {file = "SQLAlchemy_JSONField-1.0.1.post0-py3-none-any.whl", hash = "sha256:d6f1e5ee329a3c0d9d164e40d81a2143ac8332e09988fbbaff84179dac5503d4"}, +] +sqlalchemy-utils = [ + {file = "SQLAlchemy-Utils-0.41.1.tar.gz", hash = "sha256:a2181bff01eeb84479e38571d2c0718eb52042f9afd8c194d0d02877e84b7d74"}, + {file = "SQLAlchemy_Utils-0.41.1-py3-none-any.whl", hash = "sha256:6c96b0768ea3f15c0dc56b363d386138c562752b84f647fb8d31a2223aaab801"}, +] +sqlfluff = [ + {file = "sqlfluff-2.3.2-py3-none-any.whl", hash = "sha256:85c8b683e283ff632fe28529ddb60585ea2d1d3c614fc7a1db171632b99dcce3"}, + {file = "sqlfluff-2.3.2.tar.gz", hash = "sha256:3403ce7e9133766d7336b7e26638657ec6cc9e5610e35186b7f02cc427dd49b7"}, +] +sqlparse = [ + {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, + {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, +] +stevedore = [ + {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, + {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, +] +sympy = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] +tabulate = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] +tblib = [ + {file = "tblib-2.0.0-py3-none-any.whl", hash = "sha256:9100bfa016b047d5b980d66e7efed952fbd20bd85b56110aaf473cb97d18709a"}, + {file = "tblib-2.0.0.tar.gz", hash = "sha256:a6df30f272c08bf8be66e0775fad862005d950a6b8449b94f7c788731d70ecd7"}, +] +tenacity = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] +termcolor = [ + {file = "termcolor-2.3.0-py3-none-any.whl", hash = "sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475"}, + {file = "termcolor-2.3.0.tar.gz", hash = "sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a"}, +] +text-unidecode = [ + {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, + {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, +] +tokenizers = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +tomli-w = [ + {file = "tomli_w-1.0.0-py3-none-any.whl", hash = "sha256:9f2a07e8be30a0729e533ec968016807069991ae2fd921a78d42f429ae5f4463"}, + {file = "tomli_w-1.0.0.tar.gz", hash = "sha256:f463434305e0336248cac9c2dc8076b707d8a12d019dd349f5c1e382dd1ae1b9"}, +] +tomlkit = [ + {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, + {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, +] +tqdm = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] +typeapi = [ + {file = "typeapi-2.1.1-py3-none-any.whl", hash = "sha256:ef41577f316bfd362572e727ba349dab80a7362318a80fc72e6a807017d04c5c"}, + {file = "typeapi-2.1.1.tar.gz", hash = "sha256:49b3c1d3382e27dccbb59132a3a823c61954f679a0c61f119fd6d8470073a298"}, +] +types-awscrt = [ + {file = "types_awscrt-0.19.1-py3-none-any.whl", hash = "sha256:68fffeb75396e9e7614cd930b2d52295f680230774750907bcafb56f11514043"}, + {file = "types_awscrt-0.19.1.tar.gz", hash = "sha256:61833aa140e724a9098025610f4b8cde3dcf65b842631d7447378f9f5db4e1fd"}, +] +types-cachetools = [ + {file = "types-cachetools-5.3.0.6.tar.gz", hash = "sha256:595f0342d246c8ba534f5a762cf4c2f60ecb61e8002b8b2277fd5cf791d4e851"}, + {file = "types_cachetools-5.3.0.6-py3-none-any.whl", hash = "sha256:f7f8a25bfe306f2e6bc2ad0a2f949d9e72f2d91036d509c36d3810bf728bc6e1"}, +] +types-click = [ + {file = "types-click-7.1.8.tar.gz", hash = "sha256:b6604968be6401dc516311ca50708a0a28baa7a0cb840efd7412f0dbbff4e092"}, + {file = "types_click-7.1.8-py3-none-any.whl", hash = "sha256:8cb030a669e2e927461be9827375f83c16b8178c365852c060a34e24871e7e81"}, +] +types-deprecated = [ + {file = "types-Deprecated-1.2.9.3.tar.gz", hash = "sha256:ef87327adf3e3c4a4c7d8e06e58f6476710d3466ecfb53c49efb080804a70ef3"}, + {file = "types_Deprecated-1.2.9.3-py3-none-any.whl", hash = "sha256:24da9210763e5e1b3d0d4f6f8bba9ad3bb6af3fe7f6815fc37e3ede4681704f5"}, +] +types-protobuf = [ + {file = "types-protobuf-4.24.0.1.tar.gz", hash = "sha256:90adea3b693d6a40d8ef075c58fe6b5cc6e01fe1496301a7e6fc70398dcff92e"}, + {file = "types_protobuf-4.24.0.1-py3-none-any.whl", hash = "sha256:df203a204e4ae97d4cca4c9cf725262579dd7857a19f9e7fc74871ccfa073c01"}, +] +types-psutil = [ {file = "types-psutil-5.9.5.16.tar.gz", hash = "sha256:4e9b219efb625d3d04f6bf106934f87cab49aa41a94b0a3b3089403f47a79228"}, {file = "types_psutil-5.9.5.16-py3-none-any.whl", hash = "sha256:fec713104d5d143afea7b976cfa691ca1840f5d19e8714a5d02a96ebd061363e"}, ] - -[[package]] -name = "types-psycopg2" -version = "2.9.21.14" -description = "Typing stubs for psycopg2" -optional = false -python-versions = "*" -files = [ +types-psycopg2 = [ {file = "types-psycopg2-2.9.21.14.tar.gz", hash = "sha256:bf73a0ac4da4e278c89bf1b01fc596d5a5ac7a356cfe6ac0249f47b9e259f868"}, {file = "types_psycopg2-2.9.21.14-py3-none-any.whl", hash = "sha256:cd9c5350631f3bc6184ec8d48f2ed31d4ea660f89d0fffe78239450782f383c5"}, ] - -[[package]] -name = "types-python-dateutil" -version = "2.8.19.14" -description = "Typing stubs for python-dateutil" -optional = false -python-versions = "*" -files = [ +types-python-dateutil = [ {file = "types-python-dateutil-2.8.19.14.tar.gz", hash = "sha256:1f4f10ac98bb8b16ade9dbee3518d9ace017821d94b057a425b069f834737f4b"}, {file = "types_python_dateutil-2.8.19.14-py3-none-any.whl", hash = "sha256:f977b8de27787639986b4e28963263fd0e5158942b3ecef91b9335c130cb1ce9"}, ] - -[[package]] -name = "types-pyyaml" -version = "6.0.12.11" -description = "Typing stubs for PyYAML" -optional = false -python-versions = "*" -files = [ +types-pyyaml = [ {file = "types-PyYAML-6.0.12.11.tar.gz", hash = "sha256:7d340b19ca28cddfdba438ee638cd4084bde213e501a3978738543e27094775b"}, {file = "types_PyYAML-6.0.12.11-py3-none-any.whl", hash = "sha256:a461508f3096d1d5810ec5ab95d7eeecb651f3a15b71959999988942063bf01d"}, ] - -[[package]] -name = "types-requests" -version = "2.31.0.2" -description = "Typing stubs for requests" -optional = false -python-versions = "*" -files = [ +types-requests = [ {file = "types-requests-2.31.0.2.tar.gz", hash = "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40"}, {file = "types_requests-2.31.0.2-py3-none-any.whl", hash = "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a"}, ] - -[package.dependencies] -types-urllib3 = "*" - -[[package]] -name = "types-s3transfer" -version = "0.6.2" -description = "Type annotations and code completion for s3transfer" -optional = false -python-versions = ">=3.7,<4.0" -files = [ +types-s3transfer = [ {file = "types_s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:1068877b6e59be5226fa3006ae64371ac9d5bc590dfdbd9c66fd0a075d3254ac"}, {file = "types_s3transfer-0.6.2.tar.gz", hash = "sha256:4ba9b483796fdcd026aa162ee03bdcedd2bf7d08e9387c820dcdd158b0102057"}, ] - -[[package]] -name = "types-setuptools" -version = "68.1.0.1" -description = "Typing stubs for setuptools" -optional = false -python-versions = "*" -files = [ +types-setuptools = [ {file = "types-setuptools-68.1.0.1.tar.gz", hash = "sha256:271ed8da44885cd9a701c86e48cc6d3cc988052260e72b3ce26c26b3028f86ed"}, {file = "types_setuptools-68.1.0.1-py3-none-any.whl", hash = "sha256:a9a0d2ca1da8a15924890d464adcee4004deb07b6a99bd0b1881eac5c73cb3a7"}, ] - -[[package]] -name = "types-simplejson" -version = "3.19.0.2" -description = "Typing stubs for simplejson" -optional = false -python-versions = "*" -files = [ +types-simplejson = [ {file = "types-simplejson-3.19.0.2.tar.gz", hash = "sha256:ebc81f886f89d99d6b80c726518aa2228bc77c26438f18fd81455e4f79f8ee1b"}, {file = "types_simplejson-3.19.0.2-py3-none-any.whl", hash = "sha256:8ba093dc7884f59b3e62aed217144085e675a269debc32678fd80e0b43b2b86f"}, ] - -[[package]] -name = "types-sqlalchemy" -version = "1.4.53.38" -description = "Typing stubs for SQLAlchemy" -optional = false -python-versions = "*" -files = [ +types-sqlalchemy = [ {file = "types-SQLAlchemy-1.4.53.38.tar.gz", hash = "sha256:5bb7463537e04e1aa5a3557eb725930df99226dcfd3c9bf93008025bfe5c169e"}, {file = "types_SQLAlchemy-1.4.53.38-py3-none-any.whl", hash = "sha256:7e60e74f823931cc9a9e8adb0a4c05e5533e6708b8a266807893a739faf4eaaa"}, ] - -[[package]] -name = "types-tqdm" -version = "4.66.0.2" -description = "Typing stubs for tqdm" -optional = false -python-versions = "*" -files = [ +types-tqdm = [ {file = "types-tqdm-4.66.0.2.tar.gz", hash = "sha256:9553a5e44c1d485fce19f505b8bd65c0c3e87e870678d1f2ed764ae59a55d45f"}, {file = "types_tqdm-4.66.0.2-py3-none-any.whl", hash = "sha256:13dddd38908834abdf0acdc2b70cab7ac4bcc5ad7356ced450471662e58a0ffc"}, ] - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -optional = false -python-versions = "*" -files = [ +types-urllib3 = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, ] - -[[package]] -name = "typing-extensions" -version = "4.7.1" -description = "Backported and Experimental Type Hints for Python 3.7+" -optional = false -python-versions = ">=3.7" -files = [ +typing-extensions = [ {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -files = [ +tzdata = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] - -[[package]] -name = "uc-micro-py" -version = "1.0.2" -description = "Micro subset of unicode data files for linkify-it-py projects." -optional = false -python-versions = ">=3.7" -files = [ +uc-micro-py = [ {file = "uc-micro-py-1.0.2.tar.gz", hash = "sha256:30ae2ac9c49f39ac6dce743bd187fcd2b574b16ca095fa74cd9396795c954c54"}, {file = "uc_micro_py-1.0.2-py3-none-any.whl", hash = "sha256:8c9110c309db9d9e87302e2f4ad2c3152770930d88ab385cd544e7a7e75f3de0"}, ] - -[package.extras] -test = ["coverage", "pytest", "pytest-cov"] - -[[package]] -name = "unicodecsv" -version = "0.14.1" -description = "Python2's stdlib csv module is nice, but it doesn't support unicode. This module is a drop-in replacement which *does*." -optional = false -python-versions = "*" -files = [ +unicodecsv = [ {file = "unicodecsv-0.14.1.tar.gz", hash = "sha256:018c08037d48649a0412063ff4eda26eaa81eff1546dbffa51fa5293276ff7fc"}, ] - -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -optional = false -python-versions = ">=3.6" -files = [ +uritemplate = [ {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] - -[[package]] -name = "urllib3" -version = "1.26.16" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" -files = [ +urllib3 = [ {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, ] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "validators" -version = "0.21.0" -description = "Python Data Validation for Humans™" -optional = true -python-versions = ">=3.8,<4.0" -files = [ +validators = [ {file = "validators-0.21.0-py3-none-any.whl", hash = "sha256:3470db6f2384c49727ee319afa2e97aec3f8fad736faa6067e0fd7f9eaf2c551"}, {file = "validators-0.21.0.tar.gz", hash = "sha256:245b98ab778ed9352a7269c6a8f6c2a839bed5b2a7e3e60273ce399d247dd4b3"}, ] - -[[package]] -name = "watchdog" -version = "3.0.0" -description = "Filesystem events monitoring" -optional = false -python-versions = ">=3.7" -files = [ +watchdog = [ {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, @@ -7976,79 +8483,38 @@ files = [ {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, ] - -[package.extras] -watchmedo = ["PyYAML (>=3.10)"] - -[[package]] -name = "wcwidth" -version = "0.2.6" -description = "Measures the displayed width of unicode strings in a terminal" -optional = false -python-versions = "*" -files = [ +wcwidth = [ {file = "wcwidth-0.2.6-py2.py3-none-any.whl", hash = "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e"}, {file = "wcwidth-0.2.6.tar.gz", hash = "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0"}, ] - -[[package]] -name = "weaviate-client" -version = "3.23.2" -description = "A python native Weaviate client" -optional = true -python-versions = ">=3.8" -files = [ +weaviate-client = [ {file = "weaviate-client-3.23.2.tar.gz", hash = "sha256:1c8c94df032dd2fa5a4ea615fc69ccb983ffad5cc02974f78c793839e61ac150"}, {file = "weaviate_client-3.23.2-py3-none-any.whl", hash = "sha256:88ffc38cca07806d64726cc74bc194c7da50b222aa4e2cd129f4c1f5e53e9b61"}, ] - -[package.dependencies] -authlib = ">=1.1.0" -requests = ">=2.28.0,<=2.31.0" -tqdm = ">=4.59.0,<5.0.0" -validators = ">=0.18.2,<=0.21.0" - -[package.extras] -grpc = ["grpcio", "grpcio-tools"] - -[[package]] -name = "werkzeug" -version = "2.3.7" -description = "The comprehensive WSGI web application library." -optional = false -python-versions = ">=3.8" -files = [ +werkzeug = [ {file = "werkzeug-2.3.7-py3-none-any.whl", hash = "sha256:effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528"}, {file = "werkzeug-2.3.7.tar.gz", hash = "sha256:2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8"}, ] - -[package.dependencies] -MarkupSafe = ">=2.1.1" - -[package.extras] -watchdog = ["watchdog (>=2.3)"] - -[[package]] -name = "wheel" -version = "0.41.2" -description = "A built-package format for Python" -optional = false -python-versions = ">=3.7" -files = [ +wheel = [ {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, ] - -[package.extras] -test = ["pytest (>=6.0.0)", "setuptools (>=65)"] - -[[package]] -name = "wrapt" -version = "1.15.0" -description = "Module for decorators, wrappers and monkey patching." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -files = [ +win-precise-time = [ + {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, + {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, + {file = "win_precise_time-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:bb8e44b0fc35fde268e8a781cdcd9f47d47abcd8089465d2d1d1063976411c8e"}, + {file = "win_precise_time-1.4.2-cp311-cp311-win32.whl", hash = "sha256:59272655ad6f36910d0b585969402386fa627fca3be24acc9a21be1d550e5db8"}, + {file = "win_precise_time-1.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:0897bb055f19f3b4336e2ba6bee0115ac20fd7ec615a6d736632e2df77f8851a"}, + {file = "win_precise_time-1.4.2-cp312-cp312-win32.whl", hash = "sha256:0210dcea88a520c91de1708ae4c881e3c0ddc956daa08b9eabf2b7c35f3109f5"}, + {file = "win_precise_time-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:85670f77cc8accd8f1e6d05073999f77561c23012a9ee988cbd44bb7ce655062"}, + {file = "win_precise_time-1.4.2-cp37-cp37m-win32.whl", hash = "sha256:3e23693201a0fc6ca39f016871e2581e20c91123734bd48a69259f8c8724eedb"}, + {file = "win_precise_time-1.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:07ef644d1bb7705039bc54abfe4b45e99e8dc326dfd1dad5831dab19670508cb"}, + {file = "win_precise_time-1.4.2-cp38-cp38-win32.whl", hash = "sha256:0a953b00772f205602fa712ef68387b8fb213a30b267ae310aa56bf17605e11b"}, + {file = "win_precise_time-1.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b5d83420925beca302b386b19c3e7414ada84b47b42f0680207f1508917a1731"}, + {file = "win_precise_time-1.4.2-cp39-cp39-win32.whl", hash = "sha256:50d11a6ff92e1be96a8d4bee99ff6dc07a0ea0e2a392b0956bb2192e334f41ba"}, + {file = "win_precise_time-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f510fa92d9c39ea533c983e1d62c7bc66fdf0a3e3c3bdda48d4ebb634ff7034"}, +] +wrapt = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, @@ -8125,45 +8591,15 @@ files = [ {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, ] - -[[package]] -name = "wtforms" -version = "3.0.1" -description = "Form validation and rendering for Python web development." -optional = false -python-versions = ">=3.7" -files = [ +wtforms = [ {file = "WTForms-3.0.1-py3-none-any.whl", hash = "sha256:837f2f0e0ca79481b92884962b914eba4e72b7a2daaf1f939c890ed0124b834b"}, {file = "WTForms-3.0.1.tar.gz", hash = "sha256:6b351bbb12dd58af57ffef05bc78425d08d1914e0fd68ee14143b7ade023c5bc"}, ] - -[package.dependencies] -MarkupSafe = "*" - -[package.extras] -email = ["email-validator"] - -[[package]] -name = "yapf" -version = "0.33.0" -description = "A formatter for Python code." -optional = false -python-versions = "*" -files = [ +yapf = [ {file = "yapf-0.33.0-py2.py3-none-any.whl", hash = "sha256:4c2b59bd5ffe46f3a7da48df87596877189148226ce267c16e8b44240e51578d"}, {file = "yapf-0.33.0.tar.gz", hash = "sha256:da62bdfea3df3673553351e6246abed26d9fe6780e548a5af9e70f6d2b4f5b9a"}, ] - -[package.dependencies] -tomli = ">=2.0.1" - -[[package]] -name = "yarl" -version = "1.9.2" -description = "Yet another URL library" -optional = false -python-versions = ">=3.7" -files = [ +yarl = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, @@ -8239,48 +8675,7 @@ files = [ {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, ] - -[package.dependencies] -idna = ">=2.0" -multidict = ">=4.0" - -[[package]] -name = "zipp" -version = "3.16.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false -python-versions = ">=3.8" -files = [ +zipp = [ {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, ] - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] - -[extras] -athena = ["botocore", "pyarrow", "pyathena", "s3fs"] -az = ["adlfs"] -bigquery = ["gcsfs", "google-cloud-bigquery", "grpcio", "pyarrow"] -cli = ["cron-descriptor", "pipdeptree"] -dbt = ["dbt-athena-community", "dbt-bigquery", "dbt-core", "dbt-duckdb", "dbt-redshift", "dbt-snowflake"] -duckdb = ["duckdb"] -filesystem = ["botocore", "s3fs"] -gcp = ["gcsfs", "google-cloud-bigquery", "grpcio"] -gs = ["gcsfs"] -motherduck = ["duckdb", "pyarrow"] -mssql = ["pyodbc"] -parquet = ["pyarrow"] -postgres = ["psycopg2-binary", "psycopg2cffi"] -pydantic = ["pydantic"] -qdrant = ["qdrant-client"] -redshift = ["psycopg2-binary", "psycopg2cffi"] -s3 = ["botocore", "s3fs"] -snowflake = ["snowflake-connector-python"] -weaviate = ["weaviate-client"] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.8.1,<3.13" -content-hash = "f857c300f44dadb0cf25af0016c5baf5318097a2d6b0d7035f6aaa2e7fb592b2" diff --git a/pyproject.toml b/pyproject.toml index ba39912867..de2c779989 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dlt" -version = "0.3.25" +version = "0.4.1" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." authors = ["dltHub Inc. "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ", "Ty Dunn "] @@ -47,9 +47,9 @@ giturlparse = ">=0.10.0" orjson = {version = ">=3.6.7", markers="platform_python_implementation != 'PyPy'"} tenacity = ">=8.0.2" jsonpath-ng = ">=1.5.3" -deprecated = ">=1.2.9" fsspec = ">=2022.4.0" packaging = ">=21.1" +win-precise-time = {version = ">=1.4.2", markers="os_name == 'nt'"} psycopg2-binary = {version = ">=2.9.1", optional = true} # use this dependency as the current version of psycopg2cffi does not have sql module @@ -57,7 +57,7 @@ psycopg2-binary = {version = ">=2.9.1", optional = true} psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_implementation == 'PyPy'"} grpcio = {version = ">=1.50.0", optional = true} google-cloud-bigquery = {version = ">=2.26.0", optional = true} -pyarrow = {version = ">=8.0.0", optional = true} +pyarrow = {version = ">=12.0.0", optional = true} duckdb = {version = ">=0.6.1,<0.10.0", optional = true} dbt-core = {version = ">=1.2.0", optional = true} dbt-redshift = {version = ">=1.2.0", optional = true} @@ -68,17 +68,15 @@ dbt-athena-community = {version = ">=1.2.0", optional = true} s3fs = {version = ">=2022.4.0", optional = true} gcsfs = {version = ">=2022.4.0", optional = true} botocore = {version = ">=1.28", optional = true} -snowflake-connector-python = {version = ">=3.1.1", optional = true, extras = ["pandas"]} +snowflake-connector-python = {version = ">=3.5.0", optional = true} cron-descriptor = {version = ">=1.2.32", optional = true} pipdeptree = {version = ">=2.9.0,<2.10", optional = true} pyathena = {version = ">=2.9.6", optional = true} weaviate-client = {version = ">=3.22", optional = true} -pydantic = {version = ">=1.10,<2.0", optional = true} adlfs = {version = ">=2022.4.0", optional = true} pyodbc = {version = "^4.0.39", optional = true} qdrant-client = {version = "^1.6.4", optional = true, extras = ["fastembed"]} - [tool.poetry.extras] dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community"] gcp = ["grpcio", "google-cloud-bigquery", "db-dtypes", "gcsfs"] @@ -97,7 +95,6 @@ motherduck = ["duckdb", "pyarrow"] cli = ["pipdeptree", "cron-descriptor"] athena = ["pyathena", "pyarrow", "s3fs", "botocore"] weaviate = ["weaviate-client"] -pydantic = ["pydantic"] mssql = ["pyodbc"] qdrant = ["qdrant-client"] @@ -107,18 +104,15 @@ dlt = "dlt.cli._dlt:_main" [tool.poetry.group.dev.dependencies] requests-mock = "^1.10.0" types-click = "^7.1.8" -pandas = "^1.5.3" sqlfluff = "^2.3.2" -google-auth-oauthlib = "^1.0.0" types-deprecated = "^1.2.9.2" -tqdm = "^4.65.0" -enlighten = "^1.11.2" -alive-progress = "^3.1.1" pytest-console-scripts = "^1.4.1" pytest = "^6.2.4" mypy = "^1.6.1" flake8 = "^5.0.0" bandit = "^1.7.0" +black = "^23.7.0" +isort = "^5.12.0" flake8-bugbear = "^22.0.0" pytest-pythonpath = "^0.7.3" pytest-order = "^1.0.0" @@ -139,6 +133,17 @@ types-tqdm = "^4.66.0.2" types-psutil = "^5.9.5.16" types-psycopg2 = "^2.9.21.14" +[tool.poetry.group.pipeline] +optional=true + +[tool.poetry.group.pipeline.dependencies] +google-auth-oauthlib = "^1.0.0" +tqdm = "^4.65.0" +enlighten = "^1.11.2" +alive-progress = "^3.1.1" +pydantic = ">2" +pandas = ">2" + [tool.poetry.group.airflow] optional = true @@ -151,6 +156,9 @@ optional = true [tool.poetry.group.providers.dependencies] google-api-python-client = "^2.86.0" +[tool.poetry.group.sentry-sdk] +optional = true + [tool.poetry.group.sentry-sdk.dependencies] sentry-sdk = "^1.5.6" @@ -165,6 +173,23 @@ connectorx="0.3.1" dbt-core=">=1.2.0" dbt-duckdb=">=1.2.0" pymongo = ">=4.3.3" +pandas = ">2" + +[tool.black] # https://black.readthedocs.io/en/stable/usage_and_configuration/the_basics.html#configuration-via-a-file +line-length = 100 +preview = true + +[tool.isort] # https://pycqa.github.io/isort/docs/configuration/options.html +color_output = true +line_length = 100 +profile = "black" +src_paths = ["dlt"] +multi_line_output = 3 + +[tool.ruff] # https://beta.ruff.rs/docs/ +line-length = 100 +ignore = ["F401"] +ignore-init-module-imports = true [build-system] requires = ["poetry-core>=1.0.8"] diff --git a/pytest.ini b/pytest.ini index fc7ce9119b..88c8353a69 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,4 +6,5 @@ xfail_strict= true log_cli= 1 log_cli_level= INFO python_files = test_*.py *_test.py *snippets.py *snippet.pytest -python_functions = *_test test_* *_snippet \ No newline at end of file +python_functions = *_test test_* *_snippet +filterwarnings= ignore::DeprecationWarning \ No newline at end of file diff --git a/tests/cases.py b/tests/cases.py index ca8a97082e..8653f999c6 100644 --- a/tests/cases.py +++ b/tests/cases.py @@ -9,7 +9,12 @@ from dlt.common.data_types import TDataType from dlt.common.typing import StrAny from dlt.common.wei import Wei -from dlt.common.time import ensure_pendulum_datetime, reduce_pendulum_datetime_precision, ensure_pendulum_time, ensure_pendulum_date +from dlt.common.time import ( + ensure_pendulum_datetime, + reduce_pendulum_datetime_precision, + ensure_pendulum_time, + ensure_pendulum_date, +) from dlt.common.schema import TColumnSchema, TTableSchemaColumns @@ -20,14 +25,16 @@ JSON_TYPED_DICT: StrAny = { "str": "string", "decimal": Decimal("21.37"), - "big_decimal": Decimal("115792089237316195423570985008687907853269984665640564039457584007913129639935.1"), + "big_decimal": Decimal( + "115792089237316195423570985008687907853269984665640564039457584007913129639935.1" + ), "datetime": pendulum.parse("2005-04-02T20:37:37.358236Z"), "date": ensure_pendulum_date("2022-02-02"), # "uuid": UUID(_UUID), "hexbytes": HexBytes("0x2137"), - "bytes": b'2137', + "bytes": b"2137", "wei": Wei.from_int256(2137, decimals=2), - "time": ensure_pendulum_time("20:37:37.358236") + "time": ensure_pendulum_time("20:37:37.358236"), } # TODO: a version after PUA decoder (time is not yet implemented end to end) JSON_TYPED_DICT_DECODED = dict(JSON_TYPED_DICT) @@ -42,185 +49,76 @@ "hexbytes": "binary", "bytes": "binary", "wei": "wei", - "time": "time" + "time": "time", } JSON_TYPED_DICT_NESTED = { "dict": dict(JSON_TYPED_DICT), "list_dicts": [dict(JSON_TYPED_DICT), dict(JSON_TYPED_DICT)], "list": list(JSON_TYPED_DICT.values()), - **JSON_TYPED_DICT + **JSON_TYPED_DICT, } JSON_TYPED_DICT_NESTED_DECODED = { "dict": dict(JSON_TYPED_DICT_DECODED), "list_dicts": [dict(JSON_TYPED_DICT_DECODED), dict(JSON_TYPED_DICT_DECODED)], "list": list(JSON_TYPED_DICT_DECODED.values()), - **JSON_TYPED_DICT_DECODED + **JSON_TYPED_DICT_DECODED, } TABLE_UPDATE: List[TColumnSchema] = [ - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }, - { - "name": "col2", - "data_type": "double", - "nullable": False - }, - { - "name": "col3", - "data_type": "bool", - "nullable": False - }, - { - "name": "col4", - "data_type": "timestamp", - "nullable": False - }, - { - "name": "col5", - "data_type": "text", - "nullable": False - }, - { - "name": "col6", - "data_type": "decimal", - "nullable": False - }, - { - "name": "col7", - "data_type": "binary", - "nullable": False - }, - { - "name": "col8", - "data_type": "wei", - "nullable": False - }, - { - "name": "col9", - "data_type": "complex", - "nullable": False, - "variant": True - }, - { - "name": "col10", - "data_type": "date", - "nullable": False - }, - { - "name": "col11", - "data_type": "time", - "nullable": False - }, - { - "name": "col1_null", - "data_type": "bigint", - "nullable": True - }, - { - "name": "col2_null", - "data_type": "double", - "nullable": True - }, - { - "name": "col3_null", - "data_type": "bool", - "nullable": True - }, - { - "name": "col4_null", - "data_type": "timestamp", - "nullable": True - }, - { - "name": "col5_null", - "data_type": "text", - "nullable": True - }, - { - "name": "col6_null", - "data_type": "decimal", - "nullable": True - }, - { - "name": "col7_null", - "data_type": "binary", - "nullable": True - }, - { - "name": "col8_null", - "data_type": "wei", - "nullable": True - }, - { - "name": "col9_null", - "data_type": "complex", - "nullable": True, - "variant": True - }, - { - "name": "col10_null", - "data_type": "date", - "nullable": True - }, - { - "name": "col11_null", - "data_type": "time", - "nullable": True - }, - { - "name": "col1_precision", - "data_type": "bigint", - "precision": 16, - "nullable": False - }, - { - "name": "col4_precision", - "data_type": "timestamp", - "precision": 3, - "nullable": False - }, - { - "name": "col5_precision", - "data_type": "text", - "precision": 25, - "nullable": False - }, + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "col2", "data_type": "double", "nullable": False}, + {"name": "col3", "data_type": "bool", "nullable": False}, + {"name": "col4", "data_type": "timestamp", "nullable": False}, + {"name": "col5", "data_type": "text", "nullable": False}, + {"name": "col6", "data_type": "decimal", "nullable": False}, + {"name": "col7", "data_type": "binary", "nullable": False}, + {"name": "col8", "data_type": "wei", "nullable": False}, + {"name": "col9", "data_type": "complex", "nullable": False, "variant": True}, + {"name": "col10", "data_type": "date", "nullable": False}, + {"name": "col11", "data_type": "time", "nullable": False}, + {"name": "col1_null", "data_type": "bigint", "nullable": True}, + {"name": "col2_null", "data_type": "double", "nullable": True}, + {"name": "col3_null", "data_type": "bool", "nullable": True}, + {"name": "col4_null", "data_type": "timestamp", "nullable": True}, + {"name": "col5_null", "data_type": "text", "nullable": True}, + {"name": "col6_null", "data_type": "decimal", "nullable": True}, + {"name": "col7_null", "data_type": "binary", "nullable": True}, + {"name": "col8_null", "data_type": "wei", "nullable": True}, + {"name": "col9_null", "data_type": "complex", "nullable": True, "variant": True}, + {"name": "col10_null", "data_type": "date", "nullable": True}, + {"name": "col11_null", "data_type": "time", "nullable": True}, + {"name": "col1_precision", "data_type": "bigint", "precision": 16, "nullable": False}, + {"name": "col4_precision", "data_type": "timestamp", "precision": 3, "nullable": False}, + {"name": "col5_precision", "data_type": "text", "precision": 25, "nullable": False}, { "name": "col6_precision", "data_type": "decimal", "precision": 6, "scale": 2, - "nullable": False - }, - { - "name": "col7_precision", - "data_type": "binary", - "precision": 19, - "nullable": False - }, - { - "name": "col11_precision", - "data_type": "time", - "precision": 3, - "nullable": False + "nullable": False, }, + {"name": "col7_precision", "data_type": "binary", "precision": 19, "nullable": False}, + {"name": "col11_precision", "data_type": "time", "precision": 3, "nullable": False}, ] -TABLE_UPDATE_COLUMNS_SCHEMA: TTableSchemaColumns = {t["name"]:t for t in TABLE_UPDATE} +TABLE_UPDATE_COLUMNS_SCHEMA: TTableSchemaColumns = {t["name"]: t for t in TABLE_UPDATE} -TABLE_ROW_ALL_DATA_TYPES = { +TABLE_ROW_ALL_DATA_TYPES = { "col1": 989127831, "col2": 898912.821982, "col3": True, "col4": "2022-05-23T13:26:45.176451+00:00", "col5": "string data \n \r \x8e 🦆", "col6": Decimal("2323.34"), - "col7": b'binary data \n \r \x8e', + "col7": b"binary data \n \r \x8e", "col8": 2**56 + 92093890840, - "col9": {"complex":[1,2,3,"a"], "link": "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\012 \6 \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085"}, + "col9": { + "complex": [1, 2, 3, "a"], + "link": ( + "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\012 \6" + " \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085" + ), + }, "col10": "2023-02-27", "col11": "13:26:45.176451", "col1_null": None, @@ -238,12 +136,14 @@ "col4_precision": "2022-05-23T13:26:46.167231+00:00", "col5_precision": "string data 2 \n \r \x8e 🦆", "col6_precision": Decimal("2323.34"), - "col7_precision": b'binary data 2 \n \r \x8e', + "col7_precision": b"binary data 2 \n \r \x8e", "col11_precision": "13:26:45.176451", } -def table_update_and_row(exclude_types: Sequence[TDataType] = None, exclude_columns: Sequence[str] = None) -> Tuple[TTableSchemaColumns, StrAny]: +def table_update_and_row( + exclude_types: Sequence[TDataType] = None, exclude_columns: Sequence[str] = None +) -> Tuple[TTableSchemaColumns, StrAny]: """Get a table schema and a row with all possible data types. Optionally exclude some data types from the schema and row. """ @@ -251,7 +151,9 @@ def table_update_and_row(exclude_types: Sequence[TDataType] = None, exclude_colu data_row = deepcopy(TABLE_ROW_ALL_DATA_TYPES) exclude_col_names = list(exclude_columns or []) if exclude_types: - exclude_col_names.extend([key for key, value in column_schemas.items() if value["data_type"] in exclude_types]) + exclude_col_names.extend( + [key for key, value in column_schemas.items() if value["data_type"] in exclude_types] + ) for col_name in set(exclude_col_names): del column_schemas[col_name] del data_row[col_name] @@ -262,7 +164,7 @@ def assert_all_data_types_row( db_row: List[Any], parse_complex_strings: bool = False, allow_base64_binary: bool = False, - timestamp_precision:int = 6, + timestamp_precision: int = 6, schema: TTableSchemaColumns = None, ) -> None: # content must equal @@ -276,24 +178,22 @@ def assert_all_data_types_row( if "col4" in expected_rows: parsed_date = pendulum.instance(db_mapping["col4"]) db_mapping["col4"] = reduce_pendulum_datetime_precision(parsed_date, timestamp_precision) - expected_rows['col4'] = reduce_pendulum_datetime_precision( + expected_rows["col4"] = reduce_pendulum_datetime_precision( ensure_pendulum_datetime(expected_rows["col4"]), # type: ignore[arg-type] - timestamp_precision + timestamp_precision, ) if "col4_precision" in expected_rows: parsed_date = pendulum.instance(db_mapping["col4_precision"]) db_mapping["col4_precision"] = reduce_pendulum_datetime_precision(parsed_date, 3) - expected_rows['col4_precision'] = reduce_pendulum_datetime_precision( - ensure_pendulum_datetime(expected_rows["col4_precision"]), # type: ignore[arg-type] - 3 + expected_rows["col4_precision"] = reduce_pendulum_datetime_precision( + ensure_pendulum_datetime(expected_rows["col4_precision"]), 3 # type: ignore[arg-type] ) if "col11_precision" in expected_rows: parsed_time = ensure_pendulum_time(db_mapping["col11_precision"]) db_mapping["col11_precision"] = reduce_pendulum_datetime_precision(parsed_time, 3) - expected_rows['col11_precision'] = reduce_pendulum_datetime_precision( - ensure_pendulum_time(expected_rows["col11_precision"]), # type: ignore[arg-type] - 3 + expected_rows["col11_precision"] = reduce_pendulum_datetime_precision( + ensure_pendulum_time(expected_rows["col11_precision"]), 3 # type: ignore[arg-type] ) # redshift and bigquery return strings from structured fields @@ -307,9 +207,7 @@ def assert_all_data_types_row( except ValueError: if not allow_base64_binary: raise - db_mapping[binary_col] = base64.b64decode( - db_mapping[binary_col], validate=True - ) + db_mapping[binary_col] = base64.b64decode(db_mapping[binary_col], validate=True) else: db_mapping[binary_col] = bytes(db_mapping[binary_col]) @@ -333,7 +231,29 @@ def assert_all_data_types_row( assert db_mapping == expected_rows -def arrow_table_all_data_types(object_format: TArrowFormat, include_json: bool = True, include_time: bool = True, num_rows: int = 3) -> Tuple[Any, List[Dict[str, Any]]]: +def arrow_format_from_pandas( + df: Any, + object_format: TArrowFormat, +) -> Any: + from dlt.common.libs.pyarrow import pyarrow as pa + + if object_format == "pandas": + return df + elif object_format == "table": + return pa.Table.from_pandas(df) + elif object_format == "record_batch": + return pa.RecordBatch.from_pandas(df) + raise ValueError("Unknown item type: " + object_format) + + +def arrow_table_all_data_types( + object_format: TArrowFormat, + include_json: bool = True, + include_time: bool = True, + include_not_normalized_name: bool = True, + include_name_clash: bool = False, + num_rows: int = 3, +) -> Tuple[Any, List[Dict[str, Any]]]: """Create an arrow object or pandas dataframe with all supported data types. Returns the table and its records in python format @@ -342,7 +262,6 @@ def arrow_table_all_data_types(object_format: TArrowFormat, include_json: bool = from dlt.common.libs.pyarrow import pyarrow as pa data = { - "Pre Normalized Column": [random.choice(ascii_lowercase) for _ in range(num_rows)], "string": [random.choice(ascii_lowercase) for _ in range(num_rows)], "float": [round(random.uniform(0, 100), 4) for _ in range(num_rows)], "int": [random.randrange(0, 100) for _ in range(num_rows)], @@ -352,9 +271,15 @@ def arrow_table_all_data_types(object_format: TArrowFormat, include_json: bool = "decimal": [Decimal(str(round(random.uniform(0, 100), 4))) for _ in range(num_rows)], "bool": [random.choice([True, False]) for _ in range(num_rows)], "string_null": [random.choice(ascii_lowercase) for _ in range(num_rows - 1)] + [None], - "null": pd.Series( [None for _ in range(num_rows)]) + "null": pd.Series([None for _ in range(num_rows)]), } + if include_name_clash: + data["pre Normalized Column"] = [random.choice(ascii_lowercase) for _ in range(num_rows)] + include_not_normalized_name = True + if include_not_normalized_name: + data["Pre Normalized Column"] = [random.choice(ascii_lowercase) for _ in range(num_rows)] + if include_json: data["json"] = [{"a": random.randrange(0, 100)} for _ in range(num_rows)] @@ -363,13 +288,13 @@ def arrow_table_all_data_types(object_format: TArrowFormat, include_json: bool = df = pd.DataFrame(data) # records have normalized identifiers for comparing - rows = df.rename(columns={ - "Pre Normalized Column": "pre_normalized_column", - }).drop(columns=['null']).to_dict("records") - if object_format == "pandas": - return df, rows - elif object_format == "table": - return pa.Table.from_pandas(df), rows - elif object_format == "record_batch": - return pa.RecordBatch.from_pandas(df), rows - raise ValueError("Unknown item type: " + object_format) + rows = ( + df.rename( + columns={ + "Pre Normalized Column": "pre_normalized_column", + } + ) + .drop(columns=["null"]) + .to_dict("records") + ) + return arrow_format_from_pandas(df, object_format), rows diff --git a/tests/cli/cases/deploy_pipeline/debug_pipeline.py b/tests/cli/cases/deploy_pipeline/debug_pipeline.py index 8d87c8ac3d..c49e8b524d 100644 --- a/tests/cli/cases/deploy_pipeline/debug_pipeline.py +++ b/tests/cli/cases/deploy_pipeline/debug_pipeline.py @@ -7,14 +7,17 @@ def example_resource(api_url=dlt.config.value, api_key=dlt.secrets.value, last_i @dlt.source -def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id = 0): +def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): # return all the resources to be loaded return example_resource(api_url, api_key, last_id) -if __name__ == '__main__': - p = dlt.pipeline(pipeline_name="debug_pipeline", destination="postgres", dataset_name="debug_pipeline_data", full_refresh=False) - load_info = p.run( - example_source(last_id=819273998) +if __name__ == "__main__": + p = dlt.pipeline( + pipeline_name="debug_pipeline", + destination="postgres", + dataset_name="debug_pipeline_data", + full_refresh=False, ) + load_info = p.run(example_source(last_id=819273998)) print(load_info) diff --git a/tests/cli/cases/deploy_pipeline/dummy_pipeline.py b/tests/cli/cases/deploy_pipeline/dummy_pipeline.py index 48e13c35cd..f78e1b2b81 100644 --- a/tests/cli/cases/deploy_pipeline/dummy_pipeline.py +++ b/tests/cli/cases/deploy_pipeline/dummy_pipeline.py @@ -7,14 +7,12 @@ def example_resource(api_url=dlt.config.value, api_key=dlt.secrets.value, last_i @dlt.source -def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id = 0): +def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): # return all the resources to be loaded return example_resource(api_url, api_key, last_id) -if __name__ == '__main__': +if __name__ == "__main__": p = dlt.pipeline(pipeline_name="dummy_pipeline", destination="dummy") - load_info = p.run( - example_source(last_id=819273998) - ) + load_info = p.run(example_source(last_id=819273998)) print(load_info) diff --git a/tests/cli/common/test_cli_invoke.py b/tests/cli/common/test_cli_invoke.py index e3a7676ad1..d367a97261 100644 --- a/tests/cli/common/test_cli_invoke.py +++ b/tests/cli/common/test_cli_invoke.py @@ -18,40 +18,40 @@ def test_invoke_basic(script_runner: ScriptRunner) -> None: - result = script_runner.run(['dlt', '--version']) + result = script_runner.run(["dlt", "--version"]) assert result.returncode == 0 assert result.stdout.startswith("dlt ") - assert result.stderr == '' + assert result.stderr == "" - result = script_runner.run(['dlt', '--version'], shell=True) + result = script_runner.run(["dlt", "--version"], shell=True) assert result.returncode == 0 assert result.stdout.startswith("dlt ") - assert result.stderr == '' + assert result.stderr == "" for command in BASE_COMMANDS: - result = script_runner.run(['dlt', command, '--help']) + result = script_runner.run(["dlt", command, "--help"]) assert result.returncode == 0 assert result.stdout.startswith(f"usage: dlt {command}") - result = script_runner.run(['dlt', "N/A", '--help']) + result = script_runner.run(["dlt", "N/A", "--help"]) assert result.returncode != 0 def test_invoke_list_pipelines(script_runner: ScriptRunner) -> None: - result = script_runner.run(['dlt', 'pipeline', '--list-pipelines']) + result = script_runner.run(["dlt", "pipeline", "--list-pipelines"]) # directory does not exist (we point to TEST_STORAGE) assert result.returncode == -2 # create empty os.makedirs(get_dlt_pipelines_dir()) - result = script_runner.run(['dlt', 'pipeline', '--list-pipelines']) + result = script_runner.run(["dlt", "pipeline", "--list-pipelines"]) assert result.returncode == 0 assert "No pipelines found in" in result.stdout def test_invoke_pipeline(script_runner: ScriptRunner) -> None: # info on non existing pipeline - result = script_runner.run(['dlt', 'pipeline', 'debug_pipeline', 'info']) + result = script_runner.run(["dlt", "pipeline", "debug_pipeline", "info"]) assert result.returncode == -1 assert "the pipeline was not found in" in result.stderr @@ -66,25 +66,30 @@ def test_invoke_pipeline(script_runner: ScriptRunner) -> None: venv = Venv.restore_current() venv.run_script("dummy_pipeline.py") # we check output test_pipeline_command else - result = script_runner.run(['dlt', 'pipeline', 'dummy_pipeline', 'info']) + result = script_runner.run(["dlt", "pipeline", "dummy_pipeline", "info"]) assert result.returncode == 0 - result = script_runner.run(['dlt', 'pipeline', 'dummy_pipeline', 'trace']) + result = script_runner.run(["dlt", "pipeline", "dummy_pipeline", "trace"]) assert result.returncode == 0 - result = script_runner.run(['dlt', 'pipeline', 'dummy_pipeline', 'failed-jobs']) + result = script_runner.run(["dlt", "pipeline", "dummy_pipeline", "failed-jobs"]) assert result.returncode == 0 - result = script_runner.run(['dlt', 'pipeline', 'dummy_pipeline', 'load-package']) + result = script_runner.run(["dlt", "pipeline", "dummy_pipeline", "load-package"]) assert result.returncode == 0 - result = script_runner.run(['dlt', 'pipeline', 'dummy_pipeline', 'load-package', "NON EXISTENT"]) + result = script_runner.run( + ["dlt", "pipeline", "dummy_pipeline", "load-package", "NON EXISTENT"] + ) assert result.returncode == -2 try: # use debug flag to raise an exception - result = script_runner.run(['dlt', '--debug', 'pipeline', 'dummy_pipeline', 'load-package', "NON EXISTENT"]) + result = script_runner.run( + ["dlt", "--debug", "pipeline", "dummy_pipeline", "load-package", "NON EXISTENT"] + ) # exception terminates command assert result.returncode == 1 assert "LoadPackageNotFound" in result.stderr finally: # reset debug flag so other tests may pass from dlt.cli import _dlt + _dlt.DEBUG_FLAG = False @@ -92,17 +97,17 @@ def test_invoke_init_chess_and_template(script_runner: ScriptRunner) -> None: with set_working_dir(TEST_STORAGE_ROOT): # store dlt data in test storage (like patch_home_dir) with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): - result = script_runner.run(['dlt', 'init', 'chess', 'dummy']) + result = script_runner.run(["dlt", "init", "chess", "dummy"]) assert "Verified source chess was added to your project!" in result.stdout assert result.returncode == 0 - result = script_runner.run(['dlt', 'init', 'debug_pipeline', 'dummy']) + result = script_runner.run(["dlt", "init", "debug_pipeline", "dummy"]) assert "Your new pipeline debug_pipeline is ready to be customized!" in result.stdout assert result.returncode == 0 def test_invoke_list_verified_sources(script_runner: ScriptRunner) -> None: known_sources = ["chess", "sql_database", "google_sheets", "pipedrive"] - result = script_runner.run(['dlt', 'init', '--list-verified-sources']) + result = script_runner.run(["dlt", "init", "--list-verified-sources"]) assert result.returncode == 0 for known_source in known_sources: assert known_source in result.stdout @@ -112,25 +117,31 @@ def test_invoke_deploy_project(script_runner: ScriptRunner) -> None: with set_working_dir(TEST_STORAGE_ROOT): # store dlt data in test storage (like patch_home_dir) with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): - result = script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'github-action', '--schedule', '@daily']) + result = script_runner.run( + ["dlt", "deploy", "debug_pipeline.py", "github-action", "--schedule", "@daily"] + ) assert result.returncode == -4 assert "The pipeline script does not exist" in result.stderr - result = script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'airflow-composer']) + result = script_runner.run(["dlt", "deploy", "debug_pipeline.py", "airflow-composer"]) assert result.returncode == -4 assert "The pipeline script does not exist" in result.stderr # now init - result = script_runner.run(['dlt', 'init', 'chess', 'dummy']) + result = script_runner.run(["dlt", "init", "chess", "dummy"]) assert result.returncode == 0 - result = script_runner.run(['dlt', 'deploy', 'chess_pipeline.py', 'github-action', '--schedule', '@daily']) + result = script_runner.run( + ["dlt", "deploy", "chess_pipeline.py", "github-action", "--schedule", "@daily"] + ) assert "NOTE: You must run the pipeline locally" in result.stdout - result = script_runner.run(['dlt', 'deploy', 'chess_pipeline.py', 'airflow-composer']) + result = script_runner.run(["dlt", "deploy", "chess_pipeline.py", "airflow-composer"]) assert "NOTE: You must run the pipeline locally" in result.stdout def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None: # NOTE: you can mock only once per test with ScriptRunner !! with patch("dlt.cli.deploy_command.deploy_command") as _deploy_command: - script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'github-action', '--schedule', '@daily']) + script_runner.run( + ["dlt", "deploy", "debug_pipeline.py", "github-action", "--schedule", "@daily"] + ) assert _deploy_command.called assert _deploy_command.call_args[1] == { "pipeline_script_path": "debug_pipeline.py", @@ -140,11 +151,25 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None: "command": "deploy", "schedule": "@daily", "run_manually": True, - "run_on_push": False + "run_on_push": False, } _deploy_command.reset_mock() - script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'github-action', '--schedule', '@daily', '--location', 'folder', '--branch', 'branch', '--run-on-push']) + script_runner.run( + [ + "dlt", + "deploy", + "debug_pipeline.py", + "github-action", + "--schedule", + "@daily", + "--location", + "folder", + "--branch", + "branch", + "--run-on-push", + ] + ) assert _deploy_command.called assert _deploy_command.call_args[1] == { "pipeline_script_path": "debug_pipeline.py", @@ -154,17 +179,17 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None: "command": "deploy", "schedule": "@daily", "run_manually": True, - "run_on_push": True + "run_on_push": True, } # no schedule fails _deploy_command.reset_mock() - result = script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'github-action']) + result = script_runner.run(["dlt", "deploy", "debug_pipeline.py", "github-action"]) assert not _deploy_command.called assert result.returncode != 0 assert "the following arguments are required: --schedule" in result.stderr # airflow without schedule works _deploy_command.reset_mock() - result = script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'airflow-composer']) + result = script_runner.run(["dlt", "deploy", "debug_pipeline.py", "airflow-composer"]) assert _deploy_command.called assert result.returncode == 0 assert _deploy_command.call_args[1] == { @@ -173,11 +198,13 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None: "repo_location": "https://github.com/dlt-hub/dlt-deploy-template.git", "branch": None, "command": "deploy", - 'secrets_format': 'toml' + "secrets_format": "toml", } # env secrets format _deploy_command.reset_mock() - result = script_runner.run(['dlt', 'deploy', 'debug_pipeline.py', 'airflow-composer', "--secrets-format", "env"]) + result = script_runner.run( + ["dlt", "deploy", "debug_pipeline.py", "airflow-composer", "--secrets-format", "env"] + ) assert _deploy_command.called assert result.returncode == 0 assert _deploy_command.call_args[1] == { @@ -186,5 +213,5 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None: "repo_location": "https://github.com/dlt-hub/dlt-deploy-template.git", "branch": None, "command": "deploy", - 'secrets_format': 'env' + "secrets_format": "env", } diff --git a/tests/cli/common/test_telemetry_command.py b/tests/cli/common/test_telemetry_command.py index 4a3a0f4be1..18bd67a5e0 100644 --- a/tests/cli/common/test_telemetry_command.py +++ b/tests/cli/common/test_telemetry_command.py @@ -30,7 +30,12 @@ def _initial_providers(): glob_ctx = ConfigProvidersContext() glob_ctx.providers = _initial_providers() - with set_working_dir(test_storage.make_full_path("project")), Container().injectable_context(glob_ctx), patch("dlt.common.configuration.specs.config_providers_context.ConfigProvidersContext.initial_providers", _initial_providers): + with set_working_dir(test_storage.make_full_path("project")), Container().injectable_context( + glob_ctx + ), patch( + "dlt.common.configuration.specs.config_providers_context.ConfigProvidersContext.initial_providers", + _initial_providers, + ): # no config files: status is ON with io.StringIO() as buf, contextlib.redirect_stdout(buf): telemetry_status_command() @@ -75,7 +80,6 @@ def _initial_providers(): def test_command_instrumentation() -> None: - @track_command("instrument_ok", False, "in_ok_param", "in_ok_param_2") def instrument_ok(in_ok_param: str, in_ok_param_2: int) -> int: return 0 @@ -126,7 +130,15 @@ def instrument_raises_2(in_raises_2: bool) -> int: def test_instrumentation_wrappers() -> None: - from dlt.cli._dlt import init_command_wrapper, list_verified_sources_command_wrapper, DEFAULT_VERIFIED_SOURCES_REPO, pipeline_command_wrapper, deploy_command_wrapper, COMMAND_DEPLOY_REPO_LOCATION, DeploymentMethods + from dlt.cli._dlt import ( + init_command_wrapper, + list_verified_sources_command_wrapper, + DEFAULT_VERIFIED_SOURCES_REPO, + pipeline_command_wrapper, + deploy_command_wrapper, + COMMAND_DEPLOY_REPO_LOCATION, + DeploymentMethods, + ) from dlt.common.exceptions import UnknownDestinationModule with patch("dlt.common.runtime.segment.before_send", _mock_before_send): @@ -140,7 +152,7 @@ def test_instrumentation_wrappers() -> None: msg = SENT_ITEMS[0] assert msg["event"] == "command_init" assert msg["properties"]["source_name"] == "instrumented_source" - assert msg["properties"]["destination_name"] == "" + assert msg["properties"]["destination_type"] == "" assert msg["properties"]["success"] is False SENT_ITEMS.clear() @@ -155,16 +167,22 @@ def test_instrumentation_wrappers() -> None: # assert msg["properties"]["operation"] == "list" SENT_ITEMS.clear() - deploy_command_wrapper("list.py", DeploymentMethods.github_actions.value, COMMAND_DEPLOY_REPO_LOCATION, schedule="* * * * *") + deploy_command_wrapper( + "list.py", + DeploymentMethods.github_actions.value, + COMMAND_DEPLOY_REPO_LOCATION, + schedule="* * * * *", + ) msg = SENT_ITEMS[0] assert msg["event"] == "command_deploy" assert msg["properties"]["deployment_method"] == DeploymentMethods.github_actions.value assert msg["properties"]["success"] is False - SENT_ITEMS = [] + + def _mock_before_send(event: DictStrAny, _unused_hint: Any = None) -> DictStrAny: SENT_ITEMS.append(event) # do not send this - return None \ No newline at end of file + return None diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index e3a47f6202..78efcd03c4 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -1 +1 @@ -from tests.utils import preserve_environ, autouse_test_storage, unload_modules, wipe_pipeline \ No newline at end of file +from tests.utils import preserve_environ, autouse_test_storage, unload_modules, wipe_pipeline diff --git a/tests/cli/test_config_toml_writer.py b/tests/cli/test_config_toml_writer.py index 5d08b23c05..8ccac21f99 100644 --- a/tests/cli/test_config_toml_writer.py +++ b/tests/cli/test_config_toml_writer.py @@ -15,10 +15,24 @@ def example_toml(): def test_write_value(example_toml): toml_table = example_toml - write_value(toml_table, "species", str, overwrite_existing=True, default_value="Homo sapiens", is_default_of_interest=True) + write_value( + toml_table, + "species", + str, + overwrite_existing=True, + default_value="Homo sapiens", + is_default_of_interest=True, + ) assert toml_table["species"] == "Homo sapiens" - write_value(toml_table, "species", str, overwrite_existing=False, default_value="Mus musculus", is_default_of_interest=True) + write_value( + toml_table, + "species", + str, + overwrite_existing=False, + default_value="Mus musculus", + is_default_of_interest=True, + ) assert toml_table["species"] == "Homo sapiens" # Test with is_default_of_interest=True and non-optional, non-final hint @@ -26,24 +40,42 @@ def test_write_value(example_toml): assert toml_table["species"] == "species" # Test with is_default_of_interest=False and non-optional, non-final hint, and no default - write_value(toml_table, "population", int, overwrite_existing=True, is_default_of_interest=False) + write_value( + toml_table, "population", int, overwrite_existing=True, is_default_of_interest=False + ) # non default get typed example value assert "population" in toml_table # Test with optional hint - write_value(toml_table, "habitat", Optional[str], overwrite_existing=True, is_default_of_interest=False) + write_value( + toml_table, "habitat", Optional[str], overwrite_existing=True, is_default_of_interest=False + ) assert "habitat" not in toml_table # test with optional hint of interest - write_value(toml_table, "habitat", Optional[str], overwrite_existing=True, is_default_of_interest=True) + write_value( + toml_table, "habitat", Optional[str], overwrite_existing=True, is_default_of_interest=True + ) assert "habitat" in toml_table # Test with final hint - write_value(toml_table, "immutable_trait", Final[str], overwrite_existing=True, is_default_of_interest=False) + write_value( + toml_table, + "immutable_trait", + Final[str], + overwrite_existing=True, + is_default_of_interest=False, + ) assert "immutable_trait" not in toml_table # Test with final hint of interest - write_value(toml_table, "immutable_trait", Final[str], overwrite_existing=True, is_default_of_interest=True) + write_value( + toml_table, + "immutable_trait", + Final[str], + overwrite_existing=True, + is_default_of_interest=True, + ) assert "immutable_trait" in toml_table @@ -61,7 +93,9 @@ def test_write_values(example_toml): new_values = [ WritableConfigValue("species", str, "Canis lupus", ("taxonomy", "genus")), - WritableConfigValue("species", str, "Canis lupus familiaris", ("taxonomy", "genus", "subgenus")), + WritableConfigValue( + "species", str, "Canis lupus familiaris", ("taxonomy", "genus", "subgenus") + ), WritableConfigValue("genome_size", float, 2.8, ("genomic_info",)), ] write_values(example_toml, new_values, overwrite_existing=False) @@ -118,7 +152,10 @@ def test_write_values_without_defaults(example_toml): assert example_toml["animal_info"]["is_animal"] is True assert example_toml["genomic_info"]["chromosome_data"]["chromosomes"] == ["a", "b", "c"] - assert example_toml["genomic_info"]["chromosome_data"]["chromosomes"].trivia.comment == EXAMPLE_COMMENT + assert ( + example_toml["genomic_info"]["chromosome_data"]["chromosomes"].trivia.comment + == EXAMPLE_COMMENT + ) assert example_toml["genomic_info"]["gene_data"]["genes"] == {"key": "value"} - assert example_toml["genomic_info"]["gene_data"]["genes"].trivia.comment == EXAMPLE_COMMENT \ No newline at end of file + assert example_toml["genomic_info"]["gene_data"]["genes"].trivia.comment == EXAMPLE_COMMENT diff --git a/tests/cli/test_deploy_command.py b/tests/cli/test_deploy_command.py index de84c5c307..685921ca6e 100644 --- a/tests/cli/test_deploy_command.py +++ b/tests/cli/test_deploy_command.py @@ -26,26 +26,40 @@ ("github-action", {"schedule": "*/30 * * * *", "run_on_push": True, "run_manually": True}), ("airflow-composer", {"secrets_format": "toml"}), ("airflow-composer", {"secrets_format": "env"}), - ] +] @pytest.mark.parametrize("deployment_method,deployment_args", DEPLOY_PARAMS) -def test_deploy_command_no_repo(test_storage: FileStorage, deployment_method: str, deployment_args: StrAny) -> None: +def test_deploy_command_no_repo( + test_storage: FileStorage, deployment_method: str, deployment_args: StrAny +) -> None: pipeline_wf = tempfile.mkdtemp() shutil.copytree("tests/cli/cases/deploy_pipeline", pipeline_wf, dirs_exist_ok=True) with set_working_dir(pipeline_wf): # we do not have repo with pytest.raises(InvalidGitRepositoryError): - deploy_command.deploy_command("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + deploy_command.deploy_command( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) # test wrapper - rc = _dlt.deploy_command_wrapper("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + rc = _dlt.deploy_command_wrapper( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert rc == -3 @pytest.mark.parametrize("deployment_method,deployment_args", DEPLOY_PARAMS) -def test_deploy_command(test_storage: FileStorage, deployment_method: str, deployment_args: StrAny) -> None: +def test_deploy_command( + test_storage: FileStorage, deployment_method: str, deployment_args: StrAny +) -> None: # drop pipeline p = dlt.pipeline(pipeline_name="debug_pipeline") p._wipe_working_folder() @@ -59,16 +73,36 @@ def test_deploy_command(test_storage: FileStorage, deployment_method: str, deplo with Repo.init(".") as repo: # test no origin with pytest.raises(CliCommandException) as py_ex: - deploy_command.deploy_command("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + deploy_command.deploy_command( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert "Your current repository has no origin set" in py_ex.value.args[0] - rc = _dlt.deploy_command_wrapper("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + rc = _dlt.deploy_command_wrapper( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert rc == -5 # we have a repo that was never run Remote.create(repo, "origin", "git@github.com:rudolfix/dlt-cmd-test-2.git") with pytest.raises(CannotRestorePipelineException): - deploy_command.deploy_command("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) - rc = _dlt.deploy_command_wrapper("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + deploy_command.deploy_command( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) + rc = _dlt.deploy_command_wrapper( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert rc == -2 # run the script with wrong credentials (it is postgres there) @@ -80,9 +114,19 @@ def test_deploy_command(test_storage: FileStorage, deployment_method: str, deplo venv.run_script("debug_pipeline.py") # print(py_ex.value.output) with pytest.raises(deploy_command.PipelineWasNotRun) as py_ex2: - deploy_command.deploy_command("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + deploy_command.deploy_command( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert "The last pipeline run ended with error" in py_ex2.value.args[0] - rc = _dlt.deploy_command_wrapper("debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + rc = _dlt.deploy_command_wrapper( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert rc == -2 os.environ["DESTINATION__POSTGRES__CREDENTIALS"] = pg_credentials @@ -103,8 +147,8 @@ def test_deploy_command(test_storage: FileStorage, deployment_method: str, deplo _out = buf.getvalue() print(_out) # make sure our secret and config values are all present - assert 'api_key_9x3ehash' in _out - assert 'dlt_data' in _out + assert "api_key_9x3ehash" in _out + assert "dlt_data" in _out if "schedule" in deployment_args: assert get_schedule_description(deployment_args["schedule"]) secrets_format = deployment_args.get("secrets_format", "env") @@ -115,8 +159,17 @@ def test_deploy_command(test_storage: FileStorage, deployment_method: str, deplo # non existing script name with pytest.raises(NoSuchPathError): - deploy_command.deploy_command("no_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + deploy_command.deploy_command( + "no_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) with echo.always_choose(False, always_choose_value=True): - rc = _dlt.deploy_command_wrapper("no_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, **deployment_args) + rc = _dlt.deploy_command_wrapper( + "no_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) assert rc == -4 - diff --git a/tests/cli/test_init_command.py b/tests/cli/test_init_command.py index 5dd24f4aaf..479bedb6fb 100644 --- a/tests/cli/test_init_command.py +++ b/tests/cli/test_init_command.py @@ -24,13 +24,25 @@ from dlt.cli import init_command, echo -from dlt.cli.init_command import SOURCES_MODULE_NAME, utils as cli_utils, files_ops, _select_source_files +from dlt.cli.init_command import ( + SOURCES_MODULE_NAME, + utils as cli_utils, + files_ops, + _select_source_files, +) from dlt.cli.exceptions import CliCommandException from dlt.cli.requirements import SourceRequirements from dlt.reflection.script_visitor import PipelineScriptVisitor from dlt.reflection import names as n -from tests.cli.utils import echo_default_choice, repo_dir, project_files, cloned_init_repo, get_repo_dir, get_project_files +from tests.cli.utils import ( + echo_default_choice, + repo_dir, + project_files, + cloned_init_repo, + get_repo_dir, + get_project_files, +) from tests.common.utils import modify_and_commit_file from tests.utils import IMPLEMENTED_DESTINATIONS, clean_test_storage @@ -83,7 +95,9 @@ def test_init_command_chess_verified_source(repo_dir: str, project_files: FileSt print(e) # now run the pipeline - os.environ.pop("DESTINATION__DUCKDB__CREDENTIALS", None) # settings from local project (secrets.toml etc.) + os.environ.pop( + "DESTINATION__DUCKDB__CREDENTIALS", None + ) # settings from local project (secrets.toml etc.) venv = Venv.restore_current() try: print(venv.run_script("chess_pipeline.py")) @@ -105,7 +119,9 @@ def test_init_list_verified_pipelines(repo_dir: str, project_files: FileStorage) init_command.list_verified_sources_command(repo_dir) -def test_init_list_verified_pipelines_update_warning(repo_dir: str, project_files: FileStorage) -> None: +def test_init_list_verified_pipelines_update_warning( + repo_dir: str, project_files: FileStorage +) -> None: """Sources listed include a warning if a different dlt version is required""" with mock.patch.object(SourceRequirements, "current_dlt_version", return_value="0.0.1"): with io.StringIO() as buf, contextlib.redirect_stdout(buf): @@ -121,7 +137,7 @@ def test_init_list_verified_pipelines_update_warning(repo_dir: str, project_file assert match # Try parsing the printed requiremnt string to verify it's valid parsed_requirement = Requirement(match.group(1)) - assert '0.0.1' not in parsed_requirement.specifier + assert "0.0.1" not in parsed_requirement.specifier def test_init_all_verified_sources_together(repo_dir: str, project_files: FileStorage) -> None: @@ -166,8 +182,10 @@ def test_init_all_verified_sources_isolated(cloned_init_repo: FileStorage) -> No assert_index_version_constraint(files, candidate) -@pytest.mark.parametrize('destination_name', IMPLEMENTED_DESTINATIONS) -def test_init_all_destinations(destination_name: str, project_files: FileStorage, repo_dir: str) -> None: +@pytest.mark.parametrize("destination_name", IMPLEMENTED_DESTINATIONS) +def test_init_all_destinations( + destination_name: str, project_files: FileStorage, repo_dir: str +) -> None: pipeline_name = f"generic_{destination_name}_pipeline" init_command.init_command(pipeline_name, destination_name, True, repo_dir) assert_init_files(project_files, pipeline_name, destination_name) @@ -189,7 +207,9 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) sources_storage.delete(del_file_path) source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") - remote_index = files_ops.get_remote_source_index(sources_storage.storage_path, source_files.files, ">=0.3.5") + remote_index = files_ops.get_remote_source_index( + sources_storage.storage_path, source_files.files, ">=0.3.5" + ) assert mod_file_path in remote_index["files"] assert remote_index["is_dirty"] is True assert remote_index["files"][mod_file_path]["sha3_256"] == new_content_hash @@ -200,7 +220,7 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) new, modified, deleted = files_ops.gen_index_diff(local_index, remote_index) # remote file entry in new assert new[new_file_path] == remote_index["files"][new_file_path] - #no git sha yet + # no git sha yet assert new[new_file_path]["git_sha"] is None # remote file entry in modified assert modified[mod_file_path] == remote_index["files"][mod_file_path] @@ -210,7 +230,9 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) assert deleted[del_file_path] == local_index["files"][del_file_path] # get conflicts - conflict_modified, conflict_deleted = files_ops.find_conflict_files(local_index, new, modified, deleted, project_files) + conflict_modified, conflict_deleted = files_ops.find_conflict_files( + local_index, new, modified, deleted, project_files + ) assert conflict_modified == [] assert conflict_deleted == [] @@ -231,30 +253,40 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) sources_storage.save(mod_file_path_2, local_content) local_index = files_ops.load_verified_sources_local_index("pipedrive") source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") - remote_index = files_ops.get_remote_source_index(sources_storage.storage_path, source_files.files, ">=0.3.5") + remote_index = files_ops.get_remote_source_index( + sources_storage.storage_path, source_files.files, ">=0.3.5" + ) new, modified, deleted = files_ops.gen_index_diff(local_index, remote_index) assert mod_file_path_2 in new - conflict_modified, conflict_deleted = files_ops.find_conflict_files(local_index, new, modified, deleted, project_files) + conflict_modified, conflict_deleted = files_ops.find_conflict_files( + local_index, new, modified, deleted, project_files + ) assert set(conflict_modified) == set([mod_file_path, new_file_path]) assert set(conflict_deleted) == set([del_file_path]) modified.update(new) # resolve conflicts in three different ways # skip option (the default) - res, sel_modified, sel_deleted = _select_source_files("pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted) + res, sel_modified, sel_deleted = _select_source_files( + "pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted + ) # noting is written, including non-conflicting file assert res == "s" assert sel_modified == {} assert sel_deleted == {} # Apply option - local changes will be lost with echo.always_choose(False, "a"): - res, sel_modified, sel_deleted = _select_source_files("pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted) + res, sel_modified, sel_deleted = _select_source_files( + "pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted + ) assert res == "a" assert sel_modified == modified assert sel_deleted == deleted # merge only non conflicting changes are applied with echo.always_choose(False, "m"): - res, sel_modified, sel_deleted = _select_source_files("pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted) + res, sel_modified, sel_deleted = _select_source_files( + "pipedrive", deepcopy(modified), deepcopy(deleted), conflict_modified, conflict_deleted + ) assert res == "m" assert len(sel_modified) == 1 and mod_file_path_2 in sel_modified assert sel_deleted == {} @@ -264,18 +296,26 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) sources_storage.save(mod_file_path, local_content) project_files.delete(del_file_path) source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") - remote_index = files_ops.get_remote_source_index(sources_storage.storage_path, source_files.files, ">=0.3.5") + remote_index = files_ops.get_remote_source_index( + sources_storage.storage_path, source_files.files, ">=0.3.5" + ) new, modified, deleted = files_ops.gen_index_diff(local_index, remote_index) - conflict_modified, conflict_deleted = files_ops.find_conflict_files(local_index, new, modified, deleted, project_files) + conflict_modified, conflict_deleted = files_ops.find_conflict_files( + local_index, new, modified, deleted, project_files + ) assert conflict_modified == [] assert conflict_deleted == [] # generate a conflict by deleting file locally that is modified on remote project_files.delete(mod_file_path) source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") - remote_index = files_ops.get_remote_source_index(sources_storage.storage_path, source_files.files, ">=0.3.5") + remote_index = files_ops.get_remote_source_index( + sources_storage.storage_path, source_files.files, ">=0.3.5" + ) new, modified, deleted = files_ops.gen_index_diff(local_index, remote_index) - conflict_modified, conflict_deleted = files_ops.find_conflict_files(local_index, new, modified, deleted, project_files) + conflict_modified, conflict_deleted = files_ops.find_conflict_files( + local_index, new, modified, deleted, project_files + ) assert conflict_modified == [mod_file_path] @@ -306,8 +346,14 @@ def test_init_code_update_no_conflict(repo_dir: str, project_files: FileStorage) assert new_local_index["is_dirty"] is False assert new_local_index["last_commit_sha"] == commit.hexsha assert new_local_index["files"][mod_local_path]["commit_sha"] == commit.hexsha - assert new_local_index["files"][mod_local_path]["sha3_256"] == hashlib.sha3_256(bytes(new_content, encoding="ascii")).hexdigest() - assert new_local_index["files"][mod_local_path]["git_sha"] != local_index["files"][mod_local_path]["git_sha"] + assert ( + new_local_index["files"][mod_local_path]["sha3_256"] + == hashlib.sha3_256(bytes(new_content, encoding="ascii")).hexdigest() + ) + assert ( + new_local_index["files"][mod_local_path]["git_sha"] + != local_index["files"][mod_local_path]["git_sha"] + ) # all the other files must keep the old hashes for old_f, new_f in zip(local_index["files"].items(), new_local_index["files"].items()): # assert new_f[1]["commit_sha"] == commit.hexsha @@ -349,7 +395,9 @@ def test_init_code_update_no_conflict(repo_dir: str, project_files: FileStorage) @pytest.mark.parametrize("resolution", ["s", "a", "m"]) -def test_init_code_update_conflict(repo_dir: str, project_files: FileStorage, resolution: str) -> None: +def test_init_code_update_conflict( + repo_dir: str, project_files: FileStorage, resolution: str +) -> None: init_command.init_command("pipedrive", "duckdb", False, repo_dir) repo_storage = FileStorage(repo_dir) mod_local_path = os.path.join("pipedrive", "__init__.py") @@ -406,12 +454,16 @@ def test_init_requirements_text(repo_dir: str, project_files: FileStorage) -> No assert "pip3 install" in _out -def test_pipeline_template_sources_in_single_file(repo_dir: str, project_files: FileStorage) -> None: +def test_pipeline_template_sources_in_single_file( + repo_dir: str, project_files: FileStorage +) -> None: init_command.init_command("debug_pipeline", "bigquery", False, repo_dir) # _SOURCES now contains the sources from pipeline.py which simulates loading from two places with pytest.raises(CliCommandException) as cli_ex: init_command.init_command("generic_pipeline", "redshift", True, repo_dir) - assert "In init scripts you must declare all sources and resources in single file." in str(cli_ex.value) + assert "In init scripts you must declare all sources and resources in single file." in str( + cli_ex.value + ) def test_incompatible_dlt_version_warning(repo_dir: str, project_files: FileStorage) -> None: @@ -420,11 +472,18 @@ def test_incompatible_dlt_version_warning(repo_dir: str, project_files: FileStor init_command.init_command("facebook_ads", "bigquery", False, repo_dir) _out = buf.getvalue() - assert "WARNING: This pipeline requires a newer version of dlt than your installed version (0.1.1)." in _out + assert ( + "WARNING: This pipeline requires a newer version of dlt than your installed version" + " (0.1.1)." + in _out + ) def assert_init_files( - project_files: FileStorage, pipeline_name: str, destination_name: str, dependency_destination: Optional[str] = None + project_files: FileStorage, + pipeline_name: str, + destination_name: str, + dependency_destination: Optional[str] = None, ) -> PipelineScriptVisitor: visitor, _ = assert_common_files(project_files, pipeline_name + ".py", destination_name) assert not project_files.has_folder(pipeline_name) @@ -437,7 +496,9 @@ def assert_requirements_txt(project_files: FileStorage, destination_name: str) - assert project_files.has_file(cli_utils.REQUIREMENTS_TXT) assert "dlt" in project_files.load(cli_utils.REQUIREMENTS_TXT) # dlt dependency specifies destination_name as extra - source_requirements = SourceRequirements.from_string(project_files.load(cli_utils.REQUIREMENTS_TXT)) + source_requirements = SourceRequirements.from_string( + project_files.load(cli_utils.REQUIREMENTS_TXT) + ) assert destination_name in source_requirements.dlt_requirement.extras # Check that atleast some version range is specified assert len(source_requirements.dlt_requirement.specifier) >= 1 @@ -447,11 +508,23 @@ def assert_index_version_constraint(project_files: FileStorage, source_name: str # check dlt version constraint in .sources index for given source matches the one in requirements.txt local_index = files_ops.load_verified_sources_local_index(source_name) index_constraint = local_index["dlt_version_constraint"] - assert index_constraint == SourceRequirements.from_string(project_files.load(cli_utils.REQUIREMENTS_TXT)).dlt_version_constraint() - - -def assert_source_files(project_files: FileStorage, source_name: str, destination_name: str, has_source_section: bool = True) -> Tuple[PipelineScriptVisitor, SecretsTomlProvider]: - visitor, secrets = assert_common_files(project_files, source_name + "_pipeline.py", destination_name) + assert ( + index_constraint + == SourceRequirements.from_string( + project_files.load(cli_utils.REQUIREMENTS_TXT) + ).dlt_version_constraint() + ) + + +def assert_source_files( + project_files: FileStorage, + source_name: str, + destination_name: str, + has_source_section: bool = True, +) -> Tuple[PipelineScriptVisitor, SecretsTomlProvider]: + visitor, secrets = assert_common_files( + project_files, source_name + "_pipeline.py", destination_name + ) assert project_files.has_folder(source_name) source_secrets = secrets.get_value(source_name, type, None, source_name) if has_source_section: @@ -472,7 +545,9 @@ def assert_source_files(project_files: FileStorage, source_name: str, destinatio return visitor, secrets -def assert_common_files(project_files: FileStorage, pipeline_script: str, destination_name: str) -> Tuple[PipelineScriptVisitor, SecretsTomlProvider]: +def assert_common_files( + project_files: FileStorage, pipeline_script: str, destination_name: str +) -> Tuple[PipelineScriptVisitor, SecretsTomlProvider]: # cwd must be project files - otherwise assert won't work assert os.getcwd() == project_files.storage_path assert project_files.has_file(make_dlt_settings_path(SECRETS_TOML)) @@ -480,7 +555,9 @@ def assert_common_files(project_files: FileStorage, pipeline_script: str, destin assert project_files.has_file(".gitignore") assert project_files.has_file(pipeline_script) # inspect script - visitor = cli_utils.parse_init_script("test", project_files.load(pipeline_script), pipeline_script) + visitor = cli_utils.parse_init_script( + "test", project_files.load(pipeline_script), pipeline_script + ) # check destinations for args in visitor.known_calls[n.PIPELINE]: assert args.arguments["destination"].value == destination_name @@ -490,7 +567,7 @@ def assert_common_files(project_files: FileStorage, pipeline_script: str, destin # destination is there assert secrets.get_value(destination_name, type, None, "destination") is not None # certain values are never there - for not_there in ["dataset_name", "destination_name", "default_schema_name", "as_staging", "staging_config"]: + for not_there in ["destination_name", "default_schema_name", "as_staging", "staging_config"]: assert secrets.get_value(not_there, type, None, "destination", destination_name)[0] is None return visitor, secrets diff --git a/tests/cli/test_pipeline_command.py b/tests/cli/test_pipeline_command.py index 401517f3c5..168b172d07 100644 --- a/tests/cli/test_pipeline_command.py +++ b/tests/cli/test_pipeline_command.py @@ -10,7 +10,14 @@ from dlt.cli import echo, init_command, pipeline_command -from tests.cli.utils import echo_default_choice, repo_dir, project_files, cloned_init_repo, get_repo_dir, get_project_files +from tests.cli.utils import ( + echo_default_choice, + repo_dir, + project_files, + cloned_init_repo, + get_repo_dir, + get_project_files, +) def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) -> None: @@ -24,7 +31,9 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) print(e) # now run the pipeline - os.environ.pop("DESTINATION__DUCKDB__CREDENTIALS", None) # settings from local project (secrets.toml etc.) + os.environ.pop( + "DESTINATION__DUCKDB__CREDENTIALS", None + ) # settings from local project (secrets.toml etc.) venv = Venv.restore_current() try: print(venv.run_script("chess_pipeline.py")) @@ -45,7 +54,8 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) pipeline_command.pipeline_command("info", "chess_pipeline", None, 0) _out = buf.getvalue() # do we have duckdb destination - assert "dlt.destinations.duckdb" in _out + assert "destination_name: duckdb" in _out + assert "destination_type: dlt.destinations.duckdb" in _out print(_out) with io.StringIO() as buf, contextlib.redirect_stdout(buf): @@ -59,7 +69,7 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) pipeline_command.pipeline_command("trace", "chess_pipeline", None, 0) _out = buf.getvalue() # basic trace - assert "Pipeline chess_pipeline completed in" in _out + assert "Pipeline chess_pipeline load step completed in" in _out print(_out) with io.StringIO() as buf, contextlib.redirect_stdout(buf): @@ -114,7 +124,9 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) with io.StringIO() as buf, contextlib.redirect_stdout(buf): with echo.always_choose(False, True): - pipeline_command.pipeline_command("drop", "chess_pipeline", None, 0, resources=["players_games"]) + pipeline_command.pipeline_command( + "drop", "chess_pipeline", None, 0, resources=["players_games"] + ) _out = buf.getvalue() assert "Selected resource(s): ['players_games']" in _out @@ -125,9 +137,17 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) with io.StringIO() as buf, contextlib.redirect_stdout(buf): # Test sync destination and drop when local state is missing - pipeline._pipeline_storage.delete_folder('', recursively=True) + pipeline._pipeline_storage.delete_folder("", recursively=True) with echo.always_choose(False, True): - pipeline_command.pipeline_command("drop", "chess_pipeline", None, 0, destination=pipeline.destination, dataset_name=pipeline.dataset_name, resources=["players_profiles"]) + pipeline_command.pipeline_command( + "drop", + "chess_pipeline", + None, + 0, + destination=pipeline.destination, + dataset_name=pipeline.dataset_name, + resources=["players_profiles"], + ) _out = buf.getvalue() assert "could not be restored: the pipeline was not found in " in _out @@ -192,18 +212,18 @@ def test_pipeline_command_drop_partial_loads(repo_dir: str, project_files: FileS pipeline_command.pipeline_command("info", "chess_pipeline", None, 1) _out = buf.getvalue() # one package is partially loaded - assert 'This package is partially loaded' in _out + assert "This package is partially loaded" in _out print(_out) with io.StringIO() as buf, contextlib.redirect_stdout(buf): with echo.always_choose(False, True): pipeline_command.pipeline_command("drop-pending-packages", "chess_pipeline", None, 1) _out = buf.getvalue() - assert 'Pending packages deleted' in _out + assert "Pending packages deleted" in _out print(_out) with io.StringIO() as buf, contextlib.redirect_stdout(buf): pipeline_command.pipeline_command("drop-pending-packages", "chess_pipeline", None, 1) _out = buf.getvalue() - assert 'No pending packages found' in _out - print(_out) \ No newline at end of file + assert "No pending packages found" in _out + print(_out) diff --git a/tests/cli/utils.py b/tests/cli/utils.py index eb3b4e3b84..56c614e3ae 100644 --- a/tests/cli/utils.py +++ b/tests/cli/utils.py @@ -30,7 +30,9 @@ def echo_default_choice() -> Iterator[None]: @pytest.fixture(scope="module") def cloned_init_repo() -> FileStorage: - return git.get_fresh_repo_files(INIT_REPO_LOCATION, get_dlt_repos_dir(), branch=INIT_REPO_BRANCH) + return git.get_fresh_repo_files( + INIT_REPO_LOCATION, get_dlt_repos_dir(), branch=INIT_REPO_BRANCH + ) @pytest.fixture @@ -46,7 +48,9 @@ def project_files() -> Iterator[FileStorage]: def get_repo_dir(cloned_init_repo: FileStorage) -> str: - repo_dir = os.path.abspath(os.path.join(TEST_STORAGE_ROOT, f"verified_sources_repo_{uniq_id()}")) + repo_dir = os.path.abspath( + os.path.join(TEST_STORAGE_ROOT, f"verified_sources_repo_{uniq_id()}") + ) # copy the whole repo into TEST_STORAGE_ROOT shutil.copytree(cloned_init_repo.storage_path, repo_dir) return repo_dir diff --git a/tests/common/cases/destinations/__init__.py b/tests/common/cases/destinations/__init__.py new file mode 100644 index 0000000000..08a9c6b9dd --- /dev/null +++ b/tests/common/cases/destinations/__init__.py @@ -0,0 +1,3 @@ +from .null import null + +__all__ = ["null"] diff --git a/tests/common/cases/destinations/null.py b/tests/common/cases/destinations/null.py new file mode 100644 index 0000000000..b2054cd7e8 --- /dev/null +++ b/tests/common/cases/destinations/null.py @@ -0,0 +1,22 @@ +from typing import Any, Type + +from dlt.common.destination.capabilities import DestinationCapabilitiesContext +from dlt.common.destination.reference import ( + Destination, + DestinationClientConfiguration, + JobClientBase, +) + + +class null(Destination[DestinationClientConfiguration, "JobClientBase"]): + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + + spec = DestinationClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return DestinationCapabilitiesContext.generic_capabilities() + + @property + def client_class(self) -> Type["JobClientBase"]: + return JobClientBase diff --git a/tests/common/cases/modules/uniq_mod_121.py b/tests/common/cases/modules/uniq_mod_121.py index 893d08d178..810eb35840 100644 --- a/tests/common/cases/modules/uniq_mod_121.py +++ b/tests/common/cases/modules/uniq_mod_121.py @@ -1,8 +1,10 @@ import inspect from dlt.common.utils import get_module_name + def find_my_module(): pass + if __name__ == "__main__": print(get_module_name(inspect.getmodule(find_my_module))) diff --git a/tests/common/cases/schemas/eth/ethereum_schema_v7.yml b/tests/common/cases/schemas/eth/ethereum_schema_v7.yml new file mode 100644 index 0000000000..f8645d78ae --- /dev/null +++ b/tests/common/cases/schemas/eth/ethereum_schema_v7.yml @@ -0,0 +1,459 @@ +version: 15 +version_hash: yjMtV4Zv0IJlfR5DPMwuXxGg8BRhy7E79L26XAHWEGE= +engine_version: 7 +name: ethereum +tables: + _dlt_loads: + columns: + load_id: + nullable: false + data_type: text + name: load_id + schema_name: + nullable: true + data_type: text + name: schema_name + status: + nullable: false + data_type: bigint + name: status + inserted_at: + nullable: false + data_type: timestamp + name: inserted_at + schema_version_hash: + nullable: true + data_type: text + name: schema_version_hash + write_disposition: skip + description: Created by DLT. Tracks completed loads + schema_contract: {} + name: _dlt_loads + resource: _dlt_loads + _dlt_version: + columns: + version: + nullable: false + data_type: bigint + name: version + engine_version: + nullable: false + data_type: bigint + name: engine_version + inserted_at: + nullable: false + data_type: timestamp + name: inserted_at + schema_name: + nullable: false + data_type: text + name: schema_name + version_hash: + nullable: false + data_type: text + name: version_hash + schema: + nullable: false + data_type: text + name: schema + write_disposition: skip + description: Created by DLT. Tracks schema updates + schema_contract: {} + name: _dlt_version + resource: _dlt_version + blocks: + description: Ethereum blocks + x-annotation: this will be preserved on save + write_disposition: append + filters: + includes: [] + excludes: [] + columns: + _dlt_load_id: + nullable: false + description: load id coming from the extractor + data_type: text + name: _dlt_load_id + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + number: + nullable: false + primary_key: true + data_type: bigint + name: number + parent_hash: + nullable: true + data_type: text + name: parent_hash + hash: + nullable: false + cluster: true + unique: true + data_type: text + name: hash + base_fee_per_gas: + nullable: false + data_type: wei + name: base_fee_per_gas + difficulty: + nullable: false + data_type: wei + name: difficulty + extra_data: + nullable: true + data_type: text + name: extra_data + gas_limit: + nullable: false + data_type: bigint + name: gas_limit + gas_used: + nullable: false + data_type: bigint + name: gas_used + logs_bloom: + nullable: true + data_type: binary + name: logs_bloom + miner: + nullable: true + data_type: text + name: miner + mix_hash: + nullable: true + data_type: text + name: mix_hash + nonce: + nullable: true + data_type: text + name: nonce + receipts_root: + nullable: true + data_type: text + name: receipts_root + sha3_uncles: + nullable: true + data_type: text + name: sha3_uncles + size: + nullable: true + data_type: bigint + name: size + state_root: + nullable: false + data_type: text + name: state_root + timestamp: + nullable: false + unique: true + sort: true + data_type: timestamp + name: timestamp + total_difficulty: + nullable: true + data_type: wei + name: total_difficulty + transactions_root: + nullable: false + data_type: text + name: transactions_root + schema_contract: {} + name: blocks + resource: blocks + blocks__transactions: + parent: blocks + columns: + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + block_number: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: block_number + transaction_index: + nullable: false + primary_key: true + data_type: bigint + name: transaction_index + hash: + nullable: false + unique: true + data_type: text + name: hash + block_hash: + nullable: false + cluster: true + data_type: text + name: block_hash + block_timestamp: + nullable: false + sort: true + data_type: timestamp + name: block_timestamp + chain_id: + nullable: true + data_type: text + name: chain_id + from: + nullable: true + data_type: text + name: from + gas: + nullable: true + data_type: bigint + name: gas + gas_price: + nullable: true + data_type: bigint + name: gas_price + input: + nullable: true + data_type: text + name: input + max_fee_per_gas: + nullable: true + data_type: wei + name: max_fee_per_gas + max_priority_fee_per_gas: + nullable: true + data_type: wei + name: max_priority_fee_per_gas + nonce: + nullable: true + data_type: bigint + name: nonce + r: + nullable: true + data_type: text + name: r + s: + nullable: true + data_type: text + name: s + status: + nullable: true + data_type: bigint + name: status + to: + nullable: true + data_type: text + name: to + type: + nullable: true + data_type: text + name: type + v: + nullable: true + data_type: bigint + name: v + value: + nullable: false + data_type: wei + name: value + eth_value: + nullable: true + data_type: decimal + name: eth_value + name: blocks__transactions + blocks__transactions__logs: + parent: blocks__transactions + columns: + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + address: + nullable: false + data_type: text + name: address + block_timestamp: + nullable: false + sort: true + data_type: timestamp + name: block_timestamp + block_hash: + nullable: false + cluster: true + data_type: text + name: block_hash + block_number: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: block_number + transaction_index: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: transaction_index + log_index: + nullable: false + primary_key: true + data_type: bigint + name: log_index + data: + nullable: true + data_type: text + name: data + removed: + nullable: true + data_type: bool + name: removed + transaction_hash: + nullable: false + data_type: text + name: transaction_hash + name: blocks__transactions__logs + blocks__transactions__logs__topics: + parent: blocks__transactions__logs + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__transactions__logs__topics + blocks__transactions__access_list: + parent: blocks__transactions + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + address: + nullable: true + data_type: text + name: address + name: blocks__transactions__access_list + blocks__transactions__access_list__storage_keys: + parent: blocks__transactions__access_list + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__transactions__access_list__storage_keys + blocks__uncles: + parent: blocks + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__uncles +settings: + default_hints: + foreign_key: + - _dlt_parent_id + not_null: + - re:^_dlt_id$ + - _dlt_root_id + - _dlt_parent_id + - _dlt_list_idx + unique: + - _dlt_id + cluster: + - block_hash + partition: + - block_timestamp + root_key: + - _dlt_root_id + preferred_types: + timestamp: timestamp + block_timestamp: timestamp + schema_contract: {} +normalizers: + names: dlt.common.normalizers.names.snake_case + json: + module: dlt.common.normalizers.json.relational + config: + generate_dlt_id: true + propagation: + root: + _dlt_id: _dlt_root_id + tables: + blocks: + timestamp: block_timestamp + hash: block_hash + diff --git a/tests/common/cases/schemas/eth/ethereum_schema_v8.yml b/tests/common/cases/schemas/eth/ethereum_schema_v8.yml new file mode 100644 index 0000000000..928c9a3e54 --- /dev/null +++ b/tests/common/cases/schemas/eth/ethereum_schema_v8.yml @@ -0,0 +1,461 @@ +version: 16 +version_hash: C5An8WClbavalXDdNSqXbdI7Swqh/mTWMcwWKCF//EE= +engine_version: 8 +name: ethereum +tables: + _dlt_loads: + columns: + load_id: + nullable: false + data_type: text + name: load_id + schema_name: + nullable: true + data_type: text + name: schema_name + status: + nullable: false + data_type: bigint + name: status + inserted_at: + nullable: false + data_type: timestamp + name: inserted_at + schema_version_hash: + nullable: true + data_type: text + name: schema_version_hash + write_disposition: skip + description: Created by DLT. Tracks completed loads + schema_contract: {} + name: _dlt_loads + resource: _dlt_loads + _dlt_version: + columns: + version: + nullable: false + data_type: bigint + name: version + engine_version: + nullable: false + data_type: bigint + name: engine_version + inserted_at: + nullable: false + data_type: timestamp + name: inserted_at + schema_name: + nullable: false + data_type: text + name: schema_name + version_hash: + nullable: false + data_type: text + name: version_hash + schema: + nullable: false + data_type: text + name: schema + write_disposition: skip + description: Created by DLT. Tracks schema updates + schema_contract: {} + name: _dlt_version + resource: _dlt_version + blocks: + description: Ethereum blocks + x-annotation: this will be preserved on save + write_disposition: append + filters: + includes: [] + excludes: [] + columns: + _dlt_load_id: + nullable: false + description: load id coming from the extractor + data_type: text + name: _dlt_load_id + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + number: + nullable: false + primary_key: true + data_type: bigint + name: number + parent_hash: + nullable: true + data_type: text + name: parent_hash + hash: + nullable: false + cluster: true + unique: true + data_type: text + name: hash + base_fee_per_gas: + nullable: false + data_type: wei + name: base_fee_per_gas + difficulty: + nullable: false + data_type: wei + name: difficulty + extra_data: + nullable: true + data_type: text + name: extra_data + gas_limit: + nullable: false + data_type: bigint + name: gas_limit + gas_used: + nullable: false + data_type: bigint + name: gas_used + logs_bloom: + nullable: true + data_type: binary + name: logs_bloom + miner: + nullable: true + data_type: text + name: miner + mix_hash: + nullable: true + data_type: text + name: mix_hash + nonce: + nullable: true + data_type: text + name: nonce + receipts_root: + nullable: true + data_type: text + name: receipts_root + sha3_uncles: + nullable: true + data_type: text + name: sha3_uncles + size: + nullable: true + data_type: bigint + name: size + state_root: + nullable: false + data_type: text + name: state_root + timestamp: + nullable: false + unique: true + sort: true + data_type: timestamp + name: timestamp + total_difficulty: + nullable: true + data_type: wei + name: total_difficulty + transactions_root: + nullable: false + data_type: text + name: transactions_root + schema_contract: {} + name: blocks + resource: blocks + blocks__transactions: + parent: blocks + columns: + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + block_number: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: block_number + transaction_index: + nullable: false + primary_key: true + data_type: bigint + name: transaction_index + hash: + nullable: false + unique: true + data_type: text + name: hash + block_hash: + nullable: false + cluster: true + data_type: text + name: block_hash + block_timestamp: + nullable: false + sort: true + data_type: timestamp + name: block_timestamp + chain_id: + nullable: true + data_type: text + name: chain_id + from: + nullable: true + data_type: text + name: from + gas: + nullable: true + data_type: bigint + name: gas + gas_price: + nullable: true + data_type: bigint + name: gas_price + input: + nullable: true + data_type: text + name: input + max_fee_per_gas: + nullable: true + data_type: wei + name: max_fee_per_gas + max_priority_fee_per_gas: + nullable: true + data_type: wei + name: max_priority_fee_per_gas + nonce: + nullable: true + data_type: bigint + name: nonce + r: + nullable: true + data_type: text + name: r + s: + nullable: true + data_type: text + name: s + status: + nullable: true + data_type: bigint + name: status + to: + nullable: true + data_type: text + name: to + type: + nullable: true + data_type: text + name: type + v: + nullable: true + data_type: bigint + name: v + value: + nullable: false + data_type: wei + name: value + eth_value: + nullable: true + data_type: decimal + name: eth_value + name: blocks__transactions + blocks__transactions__logs: + parent: blocks__transactions + columns: + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + address: + nullable: false + data_type: text + name: address + block_timestamp: + nullable: false + sort: true + data_type: timestamp + name: block_timestamp + block_hash: + nullable: false + cluster: true + data_type: text + name: block_hash + block_number: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: block_number + transaction_index: + nullable: false + primary_key: true + foreign_key: true + data_type: bigint + name: transaction_index + log_index: + nullable: false + primary_key: true + data_type: bigint + name: log_index + data: + nullable: true + data_type: text + name: data + removed: + nullable: true + data_type: bool + name: removed + transaction_hash: + nullable: false + data_type: text + name: transaction_hash + name: blocks__transactions__logs + blocks__transactions__logs__topics: + parent: blocks__transactions__logs + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__transactions__logs__topics + blocks__transactions__access_list: + parent: blocks__transactions + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + address: + nullable: true + data_type: text + name: address + name: blocks__transactions__access_list + blocks__transactions__access_list__storage_keys: + parent: blocks__transactions__access_list + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__transactions__access_list__storage_keys + blocks__uncles: + parent: blocks + columns: + _dlt_parent_id: + nullable: false + foreign_key: true + data_type: text + name: _dlt_parent_id + _dlt_list_idx: + nullable: false + data_type: bigint + name: _dlt_list_idx + _dlt_id: + nullable: false + unique: true + data_type: text + name: _dlt_id + _dlt_root_id: + nullable: false + root_key: true + data_type: text + name: _dlt_root_id + value: + nullable: true + data_type: text + name: value + name: blocks__uncles +settings: + default_hints: + foreign_key: + - _dlt_parent_id + not_null: + - re:^_dlt_id$ + - _dlt_root_id + - _dlt_parent_id + - _dlt_list_idx + unique: + - _dlt_id + cluster: + - block_hash + partition: + - block_timestamp + root_key: + - _dlt_root_id + preferred_types: + timestamp: timestamp + block_timestamp: timestamp + schema_contract: {} +normalizers: + names: dlt.common.normalizers.names.snake_case + json: + module: dlt.common.normalizers.json.relational + config: + generate_dlt_id: true + propagation: + root: + _dlt_id: _dlt_root_id + tables: + blocks: + timestamp: block_timestamp + hash: block_hash +previous_hashes: +- yjMtV4Zv0IJlfR5DPMwuXxGg8BRhy7E79L26XAHWEGE= + diff --git a/tests/common/configuration/test_accessors.py b/tests/common/configuration/test_accessors.py index e641afd22a..147d56abec 100644 --- a/tests/common/configuration/test_accessors.py +++ b/tests/common/configuration/test_accessors.py @@ -6,9 +6,16 @@ from dlt.common import json from dlt.common.configuration.exceptions import ConfigFieldMissingException -from dlt.common.configuration.providers import EnvironProvider, ConfigTomlProvider, SecretsTomlProvider +from dlt.common.configuration.providers import ( + EnvironProvider, + ConfigTomlProvider, + SecretsTomlProvider, +) from dlt.common.configuration.resolve import resolve_configuration -from dlt.common.configuration.specs import GcpServiceAccountCredentialsWithoutDefaults, ConnectionStringCredentials +from dlt.common.configuration.specs import ( + GcpServiceAccountCredentialsWithoutDefaults, + ConnectionStringCredentials, +) from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.configuration.utils import get_resolved_traces, ResolvedValueTrace from dlt.common.runners.configuration import PoolRunnerConfiguration @@ -39,19 +46,29 @@ def test_getter_accessor(toml_providers: ConfigProvidersContext, environment: An environment["VALUE"] = "{SET" assert dlt.config["value"] == "{SET" - assert RESOLVED_TRACES[".value"] == ResolvedValueTrace("value", "{SET", None, AnyType, [], EnvironProvider().name, None) + assert RESOLVED_TRACES[".value"] == ResolvedValueTrace( + "value", "{SET", None, AnyType, [], EnvironProvider().name, None + ) assert dlt.secrets["value"] == "{SET" - assert RESOLVED_TRACES[".value"] == ResolvedValueTrace("value", "{SET", None, TSecretValue, [], EnvironProvider().name, None) + assert RESOLVED_TRACES[".value"] == ResolvedValueTrace( + "value", "{SET", None, TSecretValue, [], EnvironProvider().name, None + ) # get sectioned values assert dlt.config["typecheck.str_val"] == "test string" - assert RESOLVED_TRACES["typecheck.str_val"] == ResolvedValueTrace("str_val", "test string", None, AnyType, ["typecheck"], ConfigTomlProvider().name, None) + assert RESOLVED_TRACES["typecheck.str_val"] == ResolvedValueTrace( + "str_val", "test string", None, AnyType, ["typecheck"], ConfigTomlProvider().name, None + ) environment["DLT__THIS__VALUE"] = "embedded" assert dlt.config["dlt.this.value"] == "embedded" - assert RESOLVED_TRACES["dlt.this.value"] == ResolvedValueTrace("value", "embedded", None, AnyType, ["dlt", "this"], EnvironProvider().name, None) + assert RESOLVED_TRACES["dlt.this.value"] == ResolvedValueTrace( + "value", "embedded", None, AnyType, ["dlt", "this"], EnvironProvider().name, None + ) assert dlt.secrets["dlt.this.value"] == "embedded" - assert RESOLVED_TRACES["dlt.this.value"] == ResolvedValueTrace("value", "embedded", None, TSecretValue, ["dlt", "this"], EnvironProvider().name, None) + assert RESOLVED_TRACES["dlt.this.value"] == ResolvedValueTrace( + "value", "embedded", None, TSecretValue, ["dlt", "this"], EnvironProvider().name, None + ) def test_getter_auto_cast(toml_providers: ConfigProvidersContext, environment: Any) -> None: @@ -83,7 +100,7 @@ def test_getter_auto_cast(toml_providers: ConfigProvidersContext, environment: A assert dlt.config["value"] == {"a": 1} assert dlt.config["value"]["a"] == 1 # if not dict or list then original string must be returned, null is a JSON -> None - environment["VALUE"] = 'null' + environment["VALUE"] = "null" assert dlt.config["value"] == "null" # typed values are returned as they are @@ -91,11 +108,32 @@ def test_getter_auto_cast(toml_providers: ConfigProvidersContext, environment: A # access dict from toml services_json_dict = dlt.secrets["destination.bigquery"] - assert dlt.secrets["destination.bigquery"]["client_email"] == "loader@a7513.iam.gserviceaccount.com" - assert RESOLVED_TRACES["destination.bigquery"] == ResolvedValueTrace("bigquery", services_json_dict, None, TSecretValue, ["destination"], SecretsTomlProvider().name, None) + assert ( + dlt.secrets["destination.bigquery"]["client_email"] + == "loader@a7513.iam.gserviceaccount.com" + ) + assert RESOLVED_TRACES["destination.bigquery"] == ResolvedValueTrace( + "bigquery", + services_json_dict, + None, + TSecretValue, + ["destination"], + SecretsTomlProvider().name, + None, + ) # equivalent - assert dlt.secrets["destination.bigquery.client_email"] == "loader@a7513.iam.gserviceaccount.com" - assert RESOLVED_TRACES["destination.bigquery.client_email"] == ResolvedValueTrace("client_email", "loader@a7513.iam.gserviceaccount.com", None, TSecretValue, ["destination", "bigquery"], SecretsTomlProvider().name, None) + assert ( + dlt.secrets["destination.bigquery.client_email"] == "loader@a7513.iam.gserviceaccount.com" + ) + assert RESOLVED_TRACES["destination.bigquery.client_email"] == ResolvedValueTrace( + "client_email", + "loader@a7513.iam.gserviceaccount.com", + None, + TSecretValue, + ["destination", "bigquery"], + SecretsTomlProvider().name, + None, + ) def test_getter_accessor_typed(toml_providers: ConfigProvidersContext, environment: Any) -> None: @@ -104,7 +142,9 @@ def test_getter_accessor_typed(toml_providers: ConfigProvidersContext, environme # the typed version coerces the value into desired type, in this case "dict" -> "str" assert dlt.secrets.get("credentials", str) == credentials_str # note that trace keeps original value of "credentials" which was of dictionary type - assert RESOLVED_TRACES[".credentials"] == ResolvedValueTrace("credentials", json.loads(credentials_str), None, str, [], SecretsTomlProvider().name, None) + assert RESOLVED_TRACES[".credentials"] == ResolvedValueTrace( + "credentials", json.loads(credentials_str), None, str, [], SecretsTomlProvider().name, None + ) # unchanged type assert isinstance(dlt.secrets.get("credentials"), dict) # fail on type coercion @@ -148,8 +188,13 @@ def test_setter(toml_providers: ConfigProvidersContext, environment: Any) -> Non # mod the config and use it to resolve the configuration dlt.config["pool"] = {"pool_type": "process", "workers": 21} - c = resolve_configuration(PoolRunnerConfiguration(), sections=("pool", )) - assert dict(c) == {"pool_type": "process", "workers": 21, 'run_sleep': 0.1} + c = resolve_configuration(PoolRunnerConfiguration(), sections=("pool",)) + assert dict(c) == { + "pool_type": "process", + "start_method": None, + "workers": 21, + "run_sleep": 0.1, + } def test_secrets_separation(toml_providers: ConfigProvidersContext) -> None: @@ -163,13 +208,19 @@ def test_secrets_separation(toml_providers: ConfigProvidersContext) -> None: def test_access_injection(toml_providers: ConfigProvidersContext) -> None: - @dlt.source - def the_source(api_type=dlt.config.value, credentials: GcpServiceAccountCredentialsWithoutDefaults=dlt.secrets.value, databricks_creds: ConnectionStringCredentials=dlt.secrets.value): + def the_source( + api_type=dlt.config.value, + credentials: GcpServiceAccountCredentialsWithoutDefaults = dlt.secrets.value, + databricks_creds: ConnectionStringCredentials = dlt.secrets.value, + ): assert api_type == "REST" assert credentials.client_email == "loader@a7513.iam.gserviceaccount.com" assert databricks_creds.drivername == "databricks+connector" - return dlt.resource([1,2,3], name="data") + return dlt.resource([1, 2, 3], name="data") # inject first argument, the rest pass explicitly - the_source(credentials=dlt.secrets["destination.credentials"], databricks_creds=dlt.secrets["databricks.credentials"]) + the_source( + credentials=dlt.secrets["destination.credentials"], + databricks_creds=dlt.secrets["databricks.credentials"], + ) diff --git a/tests/common/configuration/test_configuration.py b/tests/common/configuration/test_configuration.py index fc009d8444..81d49432d7 100644 --- a/tests/common/configuration/test_configuration.py +++ b/tests/common/configuration/test_configuration.py @@ -1,52 +1,99 @@ import pytest import datetime # noqa: I251 from unittest.mock import patch -from typing import Any, Dict, Final, List, Mapping, MutableMapping, NewType, Optional, Type, Union, TYPE_CHECKING +from typing import ( + Any, + Dict, + Final, + List, + Mapping, + MutableMapping, + NewType, + Optional, + Type, + Union, + TYPE_CHECKING, +) from dlt.common import json, pendulum, Decimal, Wei from dlt.common.configuration.providers.provider import ConfigProvider -from dlt.common.configuration.specs.gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults -from dlt.common.utils import custom_environ +from dlt.common.configuration.specs.gcp_credentials import ( + GcpServiceAccountCredentialsWithoutDefaults, +) +from dlt.common.utils import custom_environ, get_exception_trace, get_exception_trace_chain from dlt.common.typing import AnyType, DictStrAny, StrAny, TSecretValue, extract_inner_type from dlt.common.configuration.exceptions import ( - ConfigFieldMissingTypeHintException, ConfigFieldTypeHintNotSupported, - InvalidNativeValue, LookupTrace, ValueNotSecretException, UnmatchedConfigHintResolversException + ConfigFieldMissingTypeHintException, + ConfigFieldTypeHintNotSupported, + InvalidNativeValue, + LookupTrace, + ValueNotSecretException, + UnmatchedConfigHintResolversException, +) +from dlt.common.configuration import ( + configspec, + ConfigFieldMissingException, + ConfigValueCannotBeCoercedException, + resolve, + is_valid_hint, + resolve_type, +) +from dlt.common.configuration.specs import ( + BaseConfiguration, + RunConfiguration, + ConnectionStringCredentials, ) -from dlt.common.configuration import configspec, ConfigFieldMissingException, ConfigValueCannotBeCoercedException, resolve, is_valid_hint, resolve_type -from dlt.common.configuration.specs import BaseConfiguration, RunConfiguration, ConnectionStringCredentials from dlt.common.configuration.providers import environ as environ_provider, toml -from dlt.common.configuration.utils import get_resolved_traces, ResolvedValueTrace, serialize_value, deserialize_value, add_config_dict_to_env, add_config_to_env +from dlt.common.configuration.utils import ( + get_resolved_traces, + ResolvedValueTrace, + serialize_value, + deserialize_value, + add_config_dict_to_env, + add_config_to_env, +) from tests.utils import preserve_environ from tests.common.configuration.utils import ( - MockProvider, CoercionTestConfiguration, COERCIONS, SecretCredentials, WithCredentialsConfiguration, WrongConfiguration, SecretConfiguration, - SectionedConfiguration, environment, mock_provider, env_provider, reset_resolved_traces) + MockProvider, + CoercionTestConfiguration, + COERCIONS, + SecretCredentials, + WithCredentialsConfiguration, + WrongConfiguration, + SecretConfiguration, + SectionedConfiguration, + environment, + mock_provider, + env_provider, + reset_resolved_traces, +) INVALID_COERCIONS = { # 'STR_VAL': 'test string', # string always OK - 'int_val': "a12345", - 'bool_val': "not_bool", # bool overridden by string - that is the most common problem - 'list_val': {"2": 1, "3": 3.0}, - 'dict_val': "{'a': 1, 'b', '2'}", - 'bytes_val': 'Hello World!', - 'float_val': "invalid", + "int_val": "a12345", + "bool_val": "not_bool", # bool overridden by string - that is the most common problem + "list_val": {"2": 1, "3": 3.0}, + "dict_val": "{'a': 1, 'b', '2'}", + "bytes_val": "Hello World!", + "float_val": "invalid", "tuple_val": "{1:2}", "date_val": "01 May 2022", - "dec_val": True + "dec_val": True, } EXCEPTED_COERCIONS = { # allows to use int for float - 'float_val': 10, + "float_val": 10, # allows to use float for str - 'str_val': 10.0 + "str_val": 10.0, } COERCED_EXCEPTIONS = { # allows to use int for float - 'float_val': 10.0, + "float_val": 10.0, # allows to use float for str - 'str_val': "10.0" + "str_val": "10.0", } @@ -82,8 +129,8 @@ class FieldWithNoDefaultConfiguration(RunConfiguration): no_default: str if TYPE_CHECKING: - def __init__(self, no_default: str = None, sentry_dsn: str = None) -> None: - ... + + def __init__(self, no_default: str = None, sentry_dsn: str = None) -> None: ... @configspec @@ -110,8 +157,8 @@ def on_resolved(self) -> None: raise RuntimeError("Head over heels") if TYPE_CHECKING: - def __init__(self, head: str = None, tube: List[str] = None, heels: str = None) -> None: - ... + + def __init__(self, head: str = None, tube: List[str] = None, heels: str = None) -> None: ... @configspec @@ -121,8 +168,13 @@ class EmbeddedConfiguration(BaseConfiguration): sectioned: SectionedConfiguration if TYPE_CHECKING: - def __init__(self, default: str = None, instrumented: InstrumentedConfiguration = None, sectioned: SectionedConfiguration = None) -> None: - ... + + def __init__( + self, + default: str = None, + instrumented: InstrumentedConfiguration = None, + sectioned: SectionedConfiguration = None, + ) -> None: ... @configspec @@ -162,26 +214,26 @@ class ConfigWithDynamicType(BaseConfiguration): discriminator: str embedded_config: BaseConfiguration - @resolve_type('embedded_config') + @resolve_type("embedded_config") def resolve_embedded_type(self) -> Type[BaseConfiguration]: - if self.discriminator == 'a': + if self.discriminator == "a": return DynamicConfigA - elif self.discriminator == 'b': + elif self.discriminator == "b": return DynamicConfigB return BaseConfiguration @configspec class ConfigWithInvalidDynamicType(BaseConfiguration): - @resolve_type('a') + @resolve_type("a") def resolve_a_type(self) -> Type[BaseConfiguration]: return DynamicConfigA - @resolve_type('b') + @resolve_type("b") def resolve_b_type(self) -> Type[BaseConfiguration]: return DynamicConfigB - @resolve_type('c') + @resolve_type("c") def resolve_c_type(self) -> Type[BaseConfiguration]: return DynamicConfigC @@ -191,13 +243,13 @@ class SubclassConfigWithDynamicType(ConfigWithDynamicType): is_number: bool dynamic_type_field: Any - @resolve_type('embedded_config') + @resolve_type("embedded_config") def resolve_embedded_type(self) -> Type[BaseConfiguration]: - if self.discriminator == 'c': + if self.discriminator == "c": return DynamicConfigC return super().resolve_embedded_type() - @resolve_type('dynamic_type_field') + @resolve_type("dynamic_type_field") def resolve_dynamic_type_field(self) -> Type[Union[int, str]]: if self.is_number: return int @@ -221,7 +273,9 @@ def test_initial_config_state() -> None: def test_set_default_config_value(environment: Any) -> None: # set from init method - c = resolve.resolve_configuration(InstrumentedConfiguration(head="h", tube=["a", "b"], heels="he")) + c = resolve.resolve_configuration( + InstrumentedConfiguration(head="h", tube=["a", "b"], heels="he") + ) assert c.to_native_representation() == "h>a>b>he" # set from native form c = resolve.resolve_configuration(InstrumentedConfiguration(), explicit_value="h>a>b>he") @@ -229,7 +283,10 @@ def test_set_default_config_value(environment: Any) -> None: assert c.tube == ["a", "b"] assert c.heels == "he" # set from dictionary - c = resolve.resolve_configuration(InstrumentedConfiguration(), explicit_value={"head": "h", "tube": ["tu", "be"], "heels": "xhe"}) + c = resolve.resolve_configuration( + InstrumentedConfiguration(), + explicit_value={"head": "h", "tube": ["tu", "be"], "heels": "xhe"}, + ) assert c.to_native_representation() == "h>tu>be>xhe" @@ -238,9 +295,14 @@ def test_explicit_values(environment: Any) -> None: environment["PIPELINE_NAME"] = "env name" environment["CREATED_VAL"] = "12837" # set explicit values and allow partial config - c = resolve.resolve_configuration(CoercionTestConfiguration(), - explicit_value={"pipeline_name": "initial name", "none_val": type(environment), "bytes_val": b"str"}, - accept_partial=True + c = resolve.resolve_configuration( + CoercionTestConfiguration(), + explicit_value={ + "pipeline_name": "initial name", + "none_val": type(environment), + "bytes_val": b"str", + }, + accept_partial=True, ) # explicit assert c.pipeline_name == "initial name" @@ -249,13 +311,17 @@ def test_explicit_values(environment: Any) -> None: assert c.none_val == type(environment) # unknown field in explicit value dict is ignored - c = resolve.resolve_configuration(CoercionTestConfiguration(), explicit_value={"created_val": "3343"}, accept_partial=True) + c = resolve.resolve_configuration( + CoercionTestConfiguration(), explicit_value={"created_val": "3343"}, accept_partial=True + ) assert "created_val" not in c def test_explicit_values_false_when_bool() -> None: # values like 0, [], "" all coerce to bool False - c = resolve.resolve_configuration(InstrumentedConfiguration(), explicit_value={"head": "", "tube": [], "heels": ""}) + c = resolve.resolve_configuration( + InstrumentedConfiguration(), explicit_value={"head": "", "tube": [], "heels": ""} + ) assert c.head == "" assert c.tube == [] assert c.heels == "" @@ -280,7 +346,6 @@ def test_default_values(environment: Any) -> None: def test_raises_on_final_value_change(environment: Any) -> None: - @configspec class FinalConfiguration(BaseConfiguration): pipeline_name: Final[str] = "comp" @@ -293,9 +358,12 @@ class FinalConfiguration(BaseConfiguration): # config providers are ignored for final fields assert c.pipeline_name == "comp" - environment["PIPELINE_NAME"] = "comp" - assert dict(c) == {"pipeline_name": "comp"} - resolve.resolve_configuration(FinalConfiguration()) + @configspec + class FinalConfiguration2(BaseConfiguration): + pipeline_name: Final[str] = None + + c2 = resolve.resolve_configuration(FinalConfiguration2()) + assert dict(c2) == {"pipeline_name": None} def test_explicit_native_always_skips_resolve(environment: Any) -> None: @@ -313,7 +381,10 @@ def test_explicit_native_always_skips_resolve(environment: Any) -> None: # explicit representation environment["INS"] = "h>a>b>he" - c = resolve.resolve_configuration(InstrumentedConfiguration(), explicit_value={"head": "h", "tube": ["tu", "be"], "heels": "uhe"}) + c = resolve.resolve_configuration( + InstrumentedConfiguration(), + explicit_value={"head": "h", "tube": ["tu", "be"], "heels": "uhe"}, + ) assert c.heels == "uhe" # also the native explicit value @@ -336,7 +407,10 @@ def test_skip_lookup_native_config_value_if_no_config_section(environment: Any) # the INSTRUMENTED is not looked up because InstrumentedConfiguration has no section with custom_environ({"INSTRUMENTED": "he>tu>u>be>h"}): with pytest.raises(ConfigFieldMissingException) as py_ex: - resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"default": "set", "sectioned": {"password": "pwd"}}) + resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={"default": "set", "sectioned": {"password": "pwd"}}, + ) assert py_ex.value.spec_name == "InstrumentedConfiguration" assert py_ex.value.fields == ["head", "tube", "heels"] @@ -360,14 +434,28 @@ def test_on_resolved(environment: Any) -> None: def test_embedded_config(environment: Any) -> None: # resolve all embedded config, using explicit value for instrumented config and explicit dict for sectioned config - C = resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"default": "set", "instrumented": "h>tu>be>xhe", "sectioned": {"password": "pwd"}}) + C = resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={ + "default": "set", + "instrumented": "h>tu>be>xhe", + "sectioned": {"password": "pwd"}, + }, + ) assert C.default == "set" assert C.instrumented.to_native_representation() == "h>tu>be>xhe" assert C.sectioned.password == "pwd" # resolve but providing values via env with custom_environ( - {"INSTRUMENTED__HEAD": "h", "INSTRUMENTED__TUBE": '["tu", "u", "be"]', "INSTRUMENTED__HEELS": "xhe", "SECTIONED__PASSWORD": "passwd", "DEFAULT": "DEF"}): + { + "INSTRUMENTED__HEAD": "h", + "INSTRUMENTED__TUBE": '["tu", "u", "be"]', + "INSTRUMENTED__HEELS": "xhe", + "SECTIONED__PASSWORD": "passwd", + "DEFAULT": "DEF", + } + ): C = resolve.resolve_configuration(EmbeddedConfiguration()) assert C.default == "DEF" assert C.instrumented.to_native_representation() == "h>tu>u>be>xhe" @@ -391,11 +479,23 @@ def test_embedded_config(environment: Any) -> None: with patch.object(InstrumentedConfiguration, "__section__", "instrumented"): with custom_environ({"INSTRUMENTED": "he>tu>u>be>h"}): with pytest.raises(RuntimeError): - resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"default": "set", "sectioned": {"password": "pwd"}}) + resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={"default": "set", "sectioned": {"password": "pwd"}}, + ) # part via env part via explicit values - with custom_environ({"INSTRUMENTED__HEAD": "h", "INSTRUMENTED__TUBE": '["tu", "u", "be"]', "INSTRUMENTED__HEELS": "xhe"}): - C = resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"default": "set", "sectioned": {"password": "pwd"}}) + with custom_environ( + { + "INSTRUMENTED__HEAD": "h", + "INSTRUMENTED__TUBE": '["tu", "u", "be"]', + "INSTRUMENTED__HEELS": "xhe", + } + ): + C = resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={"default": "set", "sectioned": {"password": "pwd"}}, + ) assert C.instrumented.to_native_representation() == "h>tu>u>be>xhe" @@ -404,7 +504,11 @@ def test_embedded_explicit_value_over_provider(environment: Any) -> None: with patch.object(InstrumentedConfiguration, "__section__", "instrumented"): with custom_environ({"INSTRUMENTED": "h>tu>u>be>he"}): # explicit value over the env - c = resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"instrumented": "h>tu>be>xhe"}, accept_partial=True) + c = resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={"instrumented": "h>tu>be>xhe"}, + accept_partial=True, + ) assert c.instrumented.to_native_representation() == "h>tu>be>xhe" # parent configuration is not resolved assert not c.is_resolved() @@ -421,7 +525,9 @@ def test_provider_values_over_embedded_default(environment: Any) -> None: with custom_environ({"INSTRUMENTED": "h>tu>u>be>he"}): # read from env - over the default values InstrumentedConfiguration().parse_native_representation("h>tu>be>xhe") - c = resolve.resolve_configuration(EmbeddedConfiguration(instrumented=None), accept_partial=True) + c = resolve.resolve_configuration( + EmbeddedConfiguration(instrumented=None), accept_partial=True + ) assert c.instrumented.to_native_representation() == "h>tu>u>be>he" # parent configuration is not resolved assert not c.is_resolved() @@ -438,30 +544,29 @@ def test_run_configuration_gen_name(environment: Any) -> None: def test_configuration_is_mutable_mapping(environment: Any, env_provider: ConfigProvider) -> None: - @configspec class _SecretCredentials(RunConfiguration): pipeline_name: Optional[str] = "secret" secret_value: TSecretValue = None config_files_storage_path: str = "storage" - # configurations provide full MutableMapping support # here order of items in dict matters expected_dict = { - 'pipeline_name': 'secret', - 'sentry_dsn': None, - 'slack_incoming_hook': None, - 'dlthub_telemetry': True, - 'dlthub_telemetry_segment_write_key': 'TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB', - 'log_format': '{asctime}|[{levelname:<21}]|{process}|{name}|{filename}|{funcName}:{lineno}|{message}', - 'log_level': 'WARNING', - 'request_timeout': 60, - 'request_max_attempts': 5, - 'request_backoff_factor': 1, - 'request_max_retry_delay': 300, - 'config_files_storage_path': 'storage', - "secret_value": None + "pipeline_name": "secret", + "sentry_dsn": None, + "slack_incoming_hook": None, + "dlthub_telemetry": True, + "dlthub_telemetry_segment_write_key": "TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB", + "log_format": "{asctime}|[{levelname:<21}]|{process}|{thread}|{name}|{filename}|{funcName}:{lineno}|{message}", + "log_level": "WARNING", + "request_timeout": 60, + "request_max_attempts": 5, + "request_backoff_factor": 1, + "request_max_retry_delay": 300, + "config_files_storage_path": "storage", + "dlthub_dsn": None, + "secret_value": None, } assert dict(_SecretCredentials()) == expected_dict @@ -525,9 +630,10 @@ def test_init_method_gen(environment: Any) -> None: def test_multi_derivation_defaults(environment: Any) -> None: - @configspec - class MultiConfiguration(SectionedConfiguration, MockProdConfiguration, ConfigurationWithOptionalTypes): + class MultiConfiguration( + SectionedConfiguration, MockProdConfiguration, ConfigurationWithOptionalTypes + ): pass # apparently dataclasses set default in reverse mro so MockProdConfiguration overwrites @@ -556,24 +662,58 @@ def test_raises_on_unresolved_field(environment: Any, env_provider: ConfigProvid # toml providers were empty and are not returned in trace # assert trace[1] == LookupTrace("secrets.toml", [], "NoneConfigVar", None) # assert trace[2] == LookupTrace("config.toml", [], "NoneConfigVar", None) + # check the exception trace + exception_traces = get_exception_trace_chain(cf_missing_exc.value) + assert len(exception_traces) == 1 + exception_trace = exception_traces[0] + assert exception_trace["docstring"] == ConfigFieldMissingException.__doc__ + # serialized traces + assert "NoneConfigVar" in exception_trace["exception_attrs"]["traces"] + assert exception_trace["exception_attrs"]["spec_name"] == "WrongConfiguration" + assert exception_trace["exception_attrs"]["fields"] == ["NoneConfigVar"] def test_raises_on_many_unresolved_fields(environment: Any, env_provider: ConfigProvider) -> None: # via make configuration with pytest.raises(ConfigFieldMissingException) as cf_missing_exc: resolve.resolve_configuration(CoercionTestConfiguration()) + # check the exception trace + exception_trace = get_exception_trace(cf_missing_exc.value) + assert cf_missing_exc.value.spec_name == "CoercionTestConfiguration" # get all fields that must be set - val_fields = [f for f in CoercionTestConfiguration().get_resolvable_fields() if f.lower().endswith("_val")] + val_fields = [ + f for f in CoercionTestConfiguration().get_resolvable_fields() if f.lower().endswith("_val") + ] traces = cf_missing_exc.value.traces assert len(traces) == len(val_fields) for tr_field, exp_field in zip(traces, val_fields): assert len(traces[tr_field]) == 1 - assert traces[tr_field][0] == LookupTrace("Environment Variables", [], environ_provider.EnvironProvider.get_key_name(exp_field), None) + assert traces[tr_field][0] == LookupTrace( + "Environment Variables", + [], + environ_provider.EnvironProvider.get_key_name(exp_field), + None, + ) + # field must be in exception trace + assert tr_field in exception_trace["exception_attrs"]["fields"] + assert tr_field in exception_trace["exception_attrs"]["traces"] # assert traces[tr_field][1] == LookupTrace("secrets.toml", [], toml.TomlFileProvider.get_key_name(exp_field), None) # assert traces[tr_field][2] == LookupTrace("config.toml", [], toml.TomlFileProvider.get_key_name(exp_field), None) +def test_removes_trace_value_from_exception_trace_attrs( + environment: Any, env_provider: ConfigProvider +) -> None: + with pytest.raises(ConfigFieldMissingException) as cf_missing_exc: + resolve.resolve_configuration(CoercionTestConfiguration()) + cf_missing_exc.value.traces["str_val"][0] = cf_missing_exc.value.traces["str_val"][0]._replace(value="SECRET") # type: ignore[index] + assert cf_missing_exc.value.traces["str_val"][0].value == "SECRET" + attrs_ = cf_missing_exc.value.attrs() + # values got cleared up + assert attrs_["traces"]["str_val"][0].value is None + + def test_accepts_optional_missing_fields(environment: Any) -> None: # ConfigurationWithOptionalTypes has values for all non optional fields present C = ConfigurationWithOptionalTypes() @@ -581,7 +721,9 @@ def test_accepts_optional_missing_fields(environment: Any) -> None: # make optional config resolve.resolve_configuration(ConfigurationWithOptionalTypes()) # make config with optional values - resolve.resolve_configuration(ProdConfigurationWithOptionalTypes(), explicit_value={"int_val": None}) + resolve.resolve_configuration( + ProdConfigurationWithOptionalTypes(), explicit_value={"int_val": None} + ) # make config with optional embedded config C2 = resolve.resolve_configuration(EmbeddedOptionalConfiguration()) # embedded config was not fully resolved @@ -591,14 +733,18 @@ def test_accepts_optional_missing_fields(environment: Any) -> None: def test_find_all_keys() -> None: keys = VeryWrongConfiguration().get_resolvable_fields() # assert hints and types: LOG_COLOR had it hint overwritten in derived class - assert set({'str_val': str, 'int_val': int, 'NoneConfigVar': str, 'log_color': str}.items()).issubset(keys.items()) + assert set( + {"str_val": str, "int_val": int, "NoneConfigVar": str, "log_color": str}.items() + ).issubset(keys.items()) def test_coercion_to_hint_types(environment: Any) -> None: add_config_dict_to_env(COERCIONS) C = CoercionTestConfiguration() - resolve._resolve_config_fields(C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False) + resolve._resolve_config_fields( + C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False + ) for key in COERCIONS: assert getattr(C, key) == COERCIONS[key] @@ -659,7 +805,13 @@ def test_invalid_coercions(environment: Any) -> None: add_config_dict_to_env(INVALID_COERCIONS) for key, value in INVALID_COERCIONS.items(): try: - resolve._resolve_config_fields(C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False) + resolve._resolve_config_fields( + C, + explicit_values=None, + explicit_sections=(), + embedded_sections=(), + accept_partial=False, + ) except ConfigValueCannotBeCoercedException as coerc_exc: # must fail exactly on expected value if coerc_exc.field_name != key: @@ -674,7 +826,9 @@ def test_excepted_coercions(environment: Any) -> None: C = CoercionTestConfiguration() add_config_dict_to_env(COERCIONS) add_config_dict_to_env(EXCEPTED_COERCIONS, overwrite_keys=True) - resolve._resolve_config_fields(C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False) + resolve._resolve_config_fields( + C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False + ) for key in EXCEPTED_COERCIONS: assert getattr(C, key) == COERCED_EXCEPTIONS[key] @@ -686,6 +840,7 @@ def test_config_with_unsupported_types_in_hints(environment: Any) -> None: class InvalidHintConfiguration(BaseConfiguration): tuple_val: tuple = None # type: ignore set_val: set = None # type: ignore + InvalidHintConfiguration() @@ -695,6 +850,7 @@ def test_config_with_no_hints(environment: Any) -> None: @configspec class NoHintConfiguration(BaseConfiguration): tuple_val = None + NoHintConfiguration() @@ -703,8 +859,8 @@ def test_config_with_non_templated_complex_hints(environment: Any) -> None: environment["TUPLE_VAL"] = "(1,2,3)" environment["DICT_VAL"] = '{"a": 1}' c = resolve.resolve_configuration(NonTemplatedComplexTypesConfiguration()) - assert c.list_val == [1,2,3] - assert c.tuple_val == (1,2,3) + assert c.list_val == [1, 2, 3] + assert c.tuple_val == (1, 2, 3) assert c.dict_val == {"a": 1} @@ -718,7 +874,7 @@ def test_resolve_configuration(environment: Any) -> None: def test_dataclass_instantiation(environment: Any) -> None: # resolve_configuration works on instances of dataclasses and types are not modified - environment['SECRET_VALUE'] = "1" + environment["SECRET_VALUE"] = "1" C = resolve.resolve_configuration(SecretConfiguration()) # auto derived type holds the value assert C.secret_value == "1" @@ -778,14 +934,13 @@ def test_is_valid_hint() -> None: def test_configspec_auto_base_config_derivation() -> None: - @configspec class AutoBaseDerivationConfiguration: auto: str if TYPE_CHECKING: - def __init__(self, auto: str=None) -> None: - ... + + def __init__(self, auto: str = None) -> None: ... assert issubclass(AutoBaseDerivationConfiguration, BaseConfiguration) assert hasattr(AutoBaseDerivationConfiguration, "auto") @@ -873,30 +1028,59 @@ def test_last_resolve_exception(environment: Any) -> None: def test_resolved_trace(environment: Any) -> None: with custom_environ( - {"INSTRUMENTED__HEAD": "h", "INSTRUMENTED__TUBE": '["tu", "u", "be"]', "INSTRUMENTED__HEELS": "xhe", "SECTIONED__PASSWORD": "passwd", "DEFAULT": "DEF"}): + { + "INSTRUMENTED__HEAD": "h", + "INSTRUMENTED__TUBE": '["tu", "u", "be"]', + "INSTRUMENTED__HEELS": "xhe", + "SECTIONED__PASSWORD": "passwd", + "DEFAULT": "DEF", + } + ): c = resolve.resolve_configuration(EmbeddedConfiguration(default="_DEFF")) traces = get_resolved_traces() prov_name = environ_provider.EnvironProvider().name - assert traces[".default"] == ResolvedValueTrace("default", "DEF", "_DEFF", str, [], prov_name, c) - assert traces["instrumented.head"] == ResolvedValueTrace("head", "h", None, str, ["instrumented"], prov_name, c.instrumented) + assert traces[".default"] == ResolvedValueTrace( + "default", "DEF", "_DEFF", str, [], prov_name, c + ) + assert traces["instrumented.head"] == ResolvedValueTrace( + "head", "h", None, str, ["instrumented"], prov_name, c.instrumented + ) # value is before casting - assert traces["instrumented.tube"] == ResolvedValueTrace("tube", '["tu", "u", "be"]', None, List[str], ["instrumented"], prov_name, c.instrumented) - assert deserialize_value("tube", traces["instrumented.tube"].value, resolve.extract_inner_hint(List[str])) == ["tu", "u", "be"] - assert traces["instrumented.heels"] == ResolvedValueTrace("heels", "xhe", None, str, ["instrumented"], prov_name, c.instrumented) - assert traces["sectioned.password"] == ResolvedValueTrace("password", "passwd", None, str, ["sectioned"], prov_name, c.sectioned) + assert traces["instrumented.tube"] == ResolvedValueTrace( + "tube", '["tu", "u", "be"]', None, List[str], ["instrumented"], prov_name, c.instrumented + ) + assert deserialize_value( + "tube", traces["instrumented.tube"].value, resolve.extract_inner_hint(List[str]) + ) == ["tu", "u", "be"] + assert traces["instrumented.heels"] == ResolvedValueTrace( + "heels", "xhe", None, str, ["instrumented"], prov_name, c.instrumented + ) + assert traces["sectioned.password"] == ResolvedValueTrace( + "password", "passwd", None, str, ["sectioned"], prov_name, c.sectioned + ) assert len(traces) == 5 # try to get native representation with patch.object(InstrumentedConfiguration, "__section__", "snake"): with custom_environ( - {"INSTRUMENTED": "h>t>t>t>he", "SECTIONED__PASSWORD": "pass", "DEFAULT": "UNDEF", "SNAKE": "h>t>t>t>he"}): + { + "INSTRUMENTED": "h>t>t>t>he", + "SECTIONED__PASSWORD": "pass", + "DEFAULT": "UNDEF", + "SNAKE": "h>t>t>t>he", + } + ): c = resolve.resolve_configuration(EmbeddedConfiguration()) resolve.resolve_configuration(InstrumentedConfiguration()) assert traces[".default"] == ResolvedValueTrace("default", "UNDEF", None, str, [], prov_name, c) - assert traces[".instrumented"] == ResolvedValueTrace("instrumented", "h>t>t>t>he", None, InstrumentedConfiguration, [], prov_name, c) + assert traces[".instrumented"] == ResolvedValueTrace( + "instrumented", "h>t>t>t>he", None, InstrumentedConfiguration, [], prov_name, c + ) - assert traces[".snake"] == ResolvedValueTrace("snake", "h>t>t>t>he", None, InstrumentedConfiguration, [], prov_name, None) + assert traces[".snake"] == ResolvedValueTrace( + "snake", "h>t>t>t>he", None, InstrumentedConfiguration, [], prov_name, None + ) def test_extract_inner_hint() -> None: @@ -944,49 +1128,47 @@ def coerce_single_value(key: str, value: str, hint: Type[Any]) -> Any: def test_dynamic_type_hint(environment: Dict[str, str]) -> None: - """Test dynamic type hint using @resolve_type decorator - """ - environment['DUMMY__DISCRIMINATOR'] = 'b' - environment['DUMMY__EMBEDDED_CONFIG__FIELD_FOR_B'] = 'some_value' + """Test dynamic type hint using @resolve_type decorator""" + environment["DUMMY__DISCRIMINATOR"] = "b" + environment["DUMMY__EMBEDDED_CONFIG__FIELD_FOR_B"] = "some_value" - config = resolve.resolve_configuration(ConfigWithDynamicType(), sections=('dummy', )) + config = resolve.resolve_configuration(ConfigWithDynamicType(), sections=("dummy",)) assert isinstance(config.embedded_config, DynamicConfigB) - assert config.embedded_config.field_for_b == 'some_value' + assert config.embedded_config.field_for_b == "some_value" def test_dynamic_type_hint_subclass(environment: Dict[str, str]) -> None: - """Test overriding @resolve_type method in subclass - """ - environment['DUMMY__IS_NUMBER'] = 'true' - environment['DUMMY__DYNAMIC_TYPE_FIELD'] = '22' + """Test overriding @resolve_type method in subclass""" + environment["DUMMY__IS_NUMBER"] = "true" + environment["DUMMY__DYNAMIC_TYPE_FIELD"] = "22" # Test extended resolver method is applied - environment['DUMMY__DISCRIMINATOR'] = 'c' - environment['DUMMY__EMBEDDED_CONFIG__FIELD_FOR_C'] = 'some_value' + environment["DUMMY__DISCRIMINATOR"] = "c" + environment["DUMMY__EMBEDDED_CONFIG__FIELD_FOR_C"] = "some_value" - config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=('dummy', )) + config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=("dummy",)) assert isinstance(config.embedded_config, DynamicConfigC) - assert config.embedded_config.field_for_c == 'some_value' + assert config.embedded_config.field_for_c == "some_value" # Test super() call is applied correctly - environment['DUMMY__DISCRIMINATOR'] = 'b' - environment['DUMMY__EMBEDDED_CONFIG__FIELD_FOR_B'] = 'some_value' + environment["DUMMY__DISCRIMINATOR"] = "b" + environment["DUMMY__EMBEDDED_CONFIG__FIELD_FOR_B"] = "some_value" - config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=('dummy', )) + config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=("dummy",)) assert isinstance(config.embedded_config, DynamicConfigB) - assert config.embedded_config.field_for_b == 'some_value' + assert config.embedded_config.field_for_b == "some_value" # Test second dynamic field added in subclass - environment['DUMMY__IS_NUMBER'] = 'true' - environment['DUMMY__DYNAMIC_TYPE_FIELD'] = 'some' + environment["DUMMY__IS_NUMBER"] = "true" + environment["DUMMY__DYNAMIC_TYPE_FIELD"] = "some" with pytest.raises(ConfigValueCannotBeCoercedException) as e: - config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=('dummy', )) + config = resolve.resolve_configuration(SubclassConfigWithDynamicType(), sections=("dummy",)) - assert e.value.field_name == 'dynamic_type_field' + assert e.value.field_name == "dynamic_type_field" assert e.value.hint == int @@ -1005,31 +1187,49 @@ def test_add_config_to_env(environment: Dict[str, str]) -> None: EmbeddedConfiguration( instrumented="h>tu>u>be>he", # type: ignore[arg-type] sectioned=SectionedConfiguration(password="PASS"), - default="BUBA") + default="BUBA", + ) ) - add_config_to_env(c, ("dlt", )) + add_config_to_env(c, ("dlt",)) # must contain dlt prefix everywhere, INSTRUMENTED section taken from key and DLT_TEST taken from password - assert environment.items() >= { - 'DLT__DEFAULT': 'BUBA', - 'DLT__INSTRUMENTED__HEAD': 'h', 'DLT__INSTRUMENTED__TUBE': '["tu","u","be"]', 'DLT__INSTRUMENTED__HEELS': 'he', - 'DLT__DLT_TEST__PASSWORD': 'PASS' - }.items() + assert ( + environment.items() + >= { + "DLT__DEFAULT": "BUBA", + "DLT__INSTRUMENTED__HEAD": "h", + "DLT__INSTRUMENTED__TUBE": '["tu","u","be"]', + "DLT__INSTRUMENTED__HEELS": "he", + "DLT__DLT_TEST__PASSWORD": "PASS", + }.items() + ) # no dlt environment.clear() add_config_to_env(c) - assert environment.items() == { - 'DEFAULT': 'BUBA', - 'INSTRUMENTED__HEAD': 'h', 'INSTRUMENTED__TUBE': '["tu","u","be"]', 'INSTRUMENTED__HEELS': 'he', - 'DLT_TEST__PASSWORD': 'PASS' - }.items() + assert ( + environment.items() + == { + "DEFAULT": "BUBA", + "INSTRUMENTED__HEAD": "h", + "INSTRUMENTED__TUBE": '["tu","u","be"]', + "INSTRUMENTED__HEELS": "he", + "DLT_TEST__PASSWORD": "PASS", + }.items() + ) # starts with sectioned environment.clear() add_config_to_env(c.sectioned) - assert environment == {'DLT_TEST__PASSWORD': 'PASS'} + assert environment == {"DLT_TEST__PASSWORD": "PASS"} def test_configuration_copy() -> None: - c = resolve.resolve_configuration(EmbeddedConfiguration(), explicit_value={"default": "set", "instrumented": "h>tu>be>xhe", "sectioned": {"password": "pwd"}}) + c = resolve.resolve_configuration( + EmbeddedConfiguration(), + explicit_value={ + "default": "set", + "instrumented": "h>tu>be>xhe", + "sectioned": {"password": "pwd"}, + }, + ) assert c.is_resolved() copy_c = c.copy() assert copy_c.is_resolved() @@ -1042,7 +1242,9 @@ def test_configuration_copy() -> None: cred.parse_native_representation("postgresql://loader:loader@localhost:5432/dlt_data") copy_cred = cred.copy() assert dict(copy_cred) == dict(cred) - assert copy_cred.to_native_representation() == "postgresql://loader:loader@localhost:5432/dlt_data" + assert ( + copy_cred.to_native_representation() == "postgresql://loader:loader@localhost:5432/dlt_data" + ) # resolve the copy assert not copy_cred.is_resolved() resolved_cred_copy = c = resolve.resolve_configuration(copy_cred) # type: ignore[assignment] @@ -1050,7 +1252,6 @@ def test_configuration_copy() -> None: def test_configuration_with_configuration_as_default() -> None: - instrumented_default = InstrumentedConfiguration() instrumented_default.parse_native_representation("h>a>b>he") cred = ConnectionStringCredentials() diff --git a/tests/common/configuration/test_container.py b/tests/common/configuration/test_container.py index 928af63195..9521f5960d 100644 --- a/tests/common/configuration/test_container.py +++ b/tests/common/configuration/test_container.py @@ -1,12 +1,18 @@ +from concurrent.futures import ThreadPoolExecutor import pytest -from typing import Any, ClassVar, Literal, Optional, Iterator, TYPE_CHECKING +import threading +from typing import Any, ClassVar, Literal, Optional, Iterator, Type, TYPE_CHECKING from dlt.common.configuration import configspec from dlt.common.configuration.providers.context import ContextProvider from dlt.common.configuration.resolve import resolve_configuration from dlt.common.configuration.specs import BaseConfiguration, ContainerInjectableContext from dlt.common.configuration.container import Container -from dlt.common.configuration.exceptions import ConfigFieldMissingException, ContainerInjectableContextMangled, ContextDefaultCannotBeCreated +from dlt.common.configuration.exceptions import ( + ConfigFieldMissingException, + ContainerInjectableContextMangled, + ContextDefaultCannotBeCreated, +) from tests.utils import preserve_environ from tests.common.configuration.utils import environment @@ -20,8 +26,8 @@ def parse_native_representation(self, native_value: Any) -> None: raise ValueError(native_value) if TYPE_CHECKING: - def __init__(self, current_value: str = None) -> None: - ... + + def __init__(self, current_value: str = None) -> None: ... @configspec @@ -31,10 +37,14 @@ class EmbeddedWithInjectableContext(BaseConfiguration): @configspec class NoDefaultInjectableContext(ContainerInjectableContext): - can_create_default: ClassVar[bool] = False +@configspec +class GlobalTestContext(InjectableTestContext): + global_affinity: ClassVar[bool] = True + + @configspec class EmbeddedWithNoDefaultInjectableContext(BaseConfiguration): injected: NoDefaultInjectableContext @@ -57,25 +67,26 @@ def container() -> Iterator[Container]: def test_singleton(container: Container) -> None: # keep the old configurations list - container_configurations = container.contexts + container_configurations = container.thread_contexts singleton = Container() # make sure it is the same object assert container is singleton # that holds the same configurations dictionary - assert container_configurations is singleton.contexts + assert container_configurations is singleton.thread_contexts -def test_container_items(container: Container) -> None: +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_items(container: Container, spec: Type[InjectableTestContext]) -> None: # will add InjectableTestContext instance to container - container[InjectableTestContext] - assert InjectableTestContext in container - del container[InjectableTestContext] - assert InjectableTestContext not in container - container[InjectableTestContext] = InjectableTestContext(current_value="S") - assert container[InjectableTestContext].current_value == "S" - container[InjectableTestContext] = InjectableTestContext(current_value="SS") - assert container[InjectableTestContext].current_value == "SS" + container[spec] + assert spec in container + del container[spec] + assert spec not in container + container[spec] = spec(current_value="S") + assert container[spec].current_value == "S" + container[spec] = spec(current_value="SS") + assert container[spec].current_value == "SS" def test_get_default_injectable_config(container: Container) -> None: @@ -93,7 +104,10 @@ def test_raise_on_no_default_value(container: Container) -> None: assert container[NoDefaultInjectableContext] is injected -def test_container_injectable_context(container: Container) -> None: +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_injectable_context( + container: Container, spec: Type[InjectableTestContext] +) -> None: with container.injectable_context(InjectableTestContext()) as current_config: assert current_config.current_value is None current_config.current_value = "TEST" @@ -103,43 +117,131 @@ def test_container_injectable_context(container: Container) -> None: assert InjectableTestContext not in container -def test_container_injectable_context_restore(container: Container) -> None: +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_injectable_context_restore( + container: Container, spec: Type[InjectableTestContext] +) -> None: # this will create InjectableTestConfiguration - original = container[InjectableTestContext] + original = container[spec] original.current_value = "ORIGINAL" - with container.injectable_context(InjectableTestContext()) as current_config: + with container.injectable_context(spec()) as current_config: current_config.current_value = "TEST" # nested context is supported - with container.injectable_context(InjectableTestContext()) as inner_config: + with container.injectable_context(spec()) as inner_config: assert inner_config.current_value is None - assert container[InjectableTestContext] is inner_config - assert container[InjectableTestContext] is current_config + assert container[spec] is inner_config + assert container[spec] is current_config - assert container[InjectableTestContext] is original - assert container[InjectableTestContext].current_value == "ORIGINAL" + assert container[spec] is original + assert container[spec].current_value == "ORIGINAL" -def test_container_injectable_context_mangled(container: Container) -> None: - original = container[InjectableTestContext] +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_injectable_context_mangled( + container: Container, spec: Type[InjectableTestContext] +) -> None: + original = container[spec] original.current_value = "ORIGINAL" - context = InjectableTestContext() + context = spec() with pytest.raises(ContainerInjectableContextMangled) as py_ex: with container.injectable_context(context) as current_config: current_config.current_value = "TEST" # overwrite the config in container - container[InjectableTestContext] = InjectableTestContext() - assert py_ex.value.spec == InjectableTestContext + container[spec] = spec() + assert py_ex.value.spec == spec assert py_ex.value.expected_config == context -def test_container_provider(container: Container) -> None: +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_thread_affinity(container: Container, spec: Type[InjectableTestContext]) -> None: + event = threading.Semaphore(0) + thread_item: InjectableTestContext = None + + def _thread() -> None: + container[spec] = spec(current_value="THREAD") + event.release() + event.acquire() + nonlocal thread_item + thread_item = container[spec] + event.release() + + threading.Thread(target=_thread, daemon=True).start() + event.acquire() + # it may be or separate copy (InjectableTestContext) or single copy (GlobalTestContext) + main_item = container[spec] + main_item.current_value = "MAIN" + event.release() + main_item = container[spec] + event.release() + if spec is GlobalTestContext: + # just one context is kept globally + assert main_item is thread_item + # MAIN was set after thread + assert thread_item.current_value == "MAIN" + else: + assert main_item is not thread_item + assert main_item.current_value == "MAIN" + assert thread_item.current_value == "THREAD" + + +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_pool_affinity(container: Container, spec: Type[InjectableTestContext]) -> None: + event = threading.Semaphore(0) + thread_item: InjectableTestContext = None + + def _thread() -> None: + container[spec] = spec(current_value="THREAD") + event.release() + event.acquire() + nonlocal thread_item + thread_item = container[spec] + event.release() + + threading.Thread(target=_thread, daemon=True, name=Container.thread_pool_prefix()).start() + event.acquire() + # it may be or separate copy (InjectableTestContext) or single copy (GlobalTestContext) + main_item = container[spec] + main_item.current_value = "MAIN" + event.release() + main_item = container[spec] + event.release() + + # just one context is kept globally - Container user pool thread name to get the starting thread id + # and uses it to retrieve context + assert main_item is thread_item + # MAIN was set after thread + assert thread_item.current_value == "MAIN" + + +def test_thread_pool_affinity(container: Container) -> None: + def _context() -> InjectableTestContext: + return container[InjectableTestContext] + + main_item = container[InjectableTestContext] = InjectableTestContext(current_value="MAIN") + + with ThreadPoolExecutor(thread_name_prefix=container.thread_pool_prefix()) as p: + future = p.submit(_context) + item = future.result() + + assert item is main_item + + # create non affine pool + with ThreadPoolExecutor() as p: + future = p.submit(_context) + item = future.result() + + assert item is not main_item + + +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_provider(container: Container, spec: Type[InjectableTestContext]) -> None: provider = ContextProvider() # default value will be created - v, k = provider.get_value("n/a", InjectableTestContext, None) - assert isinstance(v, InjectableTestContext) - assert k == "InjectableTestContext" - assert InjectableTestContext in container + v, k = provider.get_value("n/a", spec, None) + assert isinstance(v, spec) + assert k == spec.__name__ + assert spec in container # provider does not create default value in Container v, k = provider.get_value("n/a", NoDefaultInjectableContext, None) @@ -154,7 +256,7 @@ def test_container_provider(container: Container) -> None: # must assert if sections are provided with pytest.raises(AssertionError): - provider.get_value("n/a", InjectableTestContext, None, "ns1") + provider.get_value("n/a", spec, None, "ns1") # type hints that are not classes literal = Literal["a"] @@ -173,7 +275,10 @@ def test_container_provider_embedded_inject(container: Container, environment: A assert C.injected is injected -def test_container_provider_embedded_no_default(container: Container) -> None: +@pytest.mark.parametrize("spec", (InjectableTestContext, GlobalTestContext)) +def test_container_provider_embedded_no_default( + container: Container, spec: Type[InjectableTestContext] +) -> None: with container.injectable_context(NoDefaultInjectableContext()): resolve_configuration(EmbeddedWithNoDefaultInjectableContext()) # default cannot be created so fails diff --git a/tests/common/configuration/test_credentials.py b/tests/common/configuration/test_credentials.py index adf5ac829d..ae9b96e903 100644 --- a/tests/common/configuration/test_credentials.py +++ b/tests/common/configuration/test_credentials.py @@ -4,8 +4,21 @@ import pytest from dlt.common.configuration import resolve_configuration from dlt.common.configuration.exceptions import ConfigFieldMissingException -from dlt.common.configuration.specs import ConnectionStringCredentials, GcpServiceAccountCredentialsWithoutDefaults, GcpServiceAccountCredentials, GcpOAuthCredentialsWithoutDefaults, GcpOAuthCredentials, AwsCredentials -from dlt.common.configuration.specs.exceptions import InvalidConnectionString, InvalidGoogleNativeCredentialsType, InvalidGoogleOauth2Json, InvalidGoogleServicesJson, OAuth2ScopesRequired +from dlt.common.configuration.specs import ( + ConnectionStringCredentials, + GcpServiceAccountCredentialsWithoutDefaults, + GcpServiceAccountCredentials, + GcpOAuthCredentialsWithoutDefaults, + GcpOAuthCredentials, + AwsCredentials, +) +from dlt.common.configuration.specs.exceptions import ( + InvalidConnectionString, + InvalidGoogleNativeCredentialsType, + InvalidGoogleOauth2Json, + InvalidGoogleServicesJson, + OAuth2ScopesRequired, +) from dlt.common.configuration.specs.run_configuration import RunConfiguration from tests.utils import preserve_environ @@ -155,7 +168,10 @@ def test_gcp_service_credentials_native_representation(environment) -> None: assert GcpServiceAccountCredentials.__config_gen_annotations__ == [] gcpc = GcpServiceAccountCredentials() - gcpc.parse_native_representation(SERVICE_JSON % '"private_key": "-----BEGIN PRIVATE KEY-----\\n\\n-----END PRIVATE KEY-----\\n",') + gcpc.parse_native_representation( + SERVICE_JSON + % '"private_key": "-----BEGIN PRIVATE KEY-----\\n\\n-----END PRIVATE KEY-----\\n",' + ) assert gcpc.private_key == "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n" assert gcpc.project_id == "chat-analytics" assert gcpc.client_email == "loader@iam.gserviceaccount.com" @@ -191,7 +207,6 @@ def test_gcp_service_credentials_resolved_from_native_representation(environment def test_gcp_oauth_credentials_native_representation(environment) -> None: - with pytest.raises(InvalidGoogleNativeCredentialsType): GcpOAuthCredentials().parse_native_representation(1) @@ -205,13 +220,15 @@ def test_gcp_oauth_credentials_native_representation(environment) -> None: # but is not partial - all required fields are present assert gcoauth.is_partial() is False assert gcoauth.project_id == "level-dragon-333983" - assert gcoauth.client_id == "921382012504-3mtjaj1s7vuvf53j88mgdq4te7akkjm3.apps.googleusercontent.com" + assert ( + gcoauth.client_id + == "921382012504-3mtjaj1s7vuvf53j88mgdq4te7akkjm3.apps.googleusercontent.com" + ) assert gcoauth.client_secret == "gOCSPX-XdY5znbrvjSMEG3pkpA_GHuLPPth" assert gcoauth.refresh_token == "refresh_token" assert gcoauth.token is None assert gcoauth.scopes == ["email", "service"] - # get native representation, it will also location _repr = gcoauth.to_native_representation() assert "localhost" in _repr @@ -289,16 +306,16 @@ def test_run_configuration_slack_credentials(environment: Any) -> None: def test_aws_credentials_resolved(environment: Dict[str, str]) -> None: - environment['CREDENTIALS__AWS_ACCESS_KEY_ID'] = 'fake_access_key' - environment['CREDENTIALS__AWS_SECRET_ACCESS_KEY'] = 'fake_secret_key' - environment['CREDENTIALS__AWS_SESSION_TOKEN'] = 'fake_session_token' - environment['CREDENTIALS__PROFILE_NAME'] = 'fake_profile' - environment['CREDENTIALS__REGION_NAME'] = 'eu-central' + environment["CREDENTIALS__AWS_ACCESS_KEY_ID"] = "fake_access_key" + environment["CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = "fake_secret_key" + environment["CREDENTIALS__AWS_SESSION_TOKEN"] = "fake_session_token" + environment["CREDENTIALS__PROFILE_NAME"] = "fake_profile" + environment["CREDENTIALS__REGION_NAME"] = "eu-central" config = resolve_configuration(AwsCredentials()) - assert config.aws_access_key_id == 'fake_access_key' - assert config.aws_secret_access_key == 'fake_secret_key' - assert config.aws_session_token == 'fake_session_token' - assert config.profile_name == 'fake_profile' + assert config.aws_access_key_id == "fake_access_key" + assert config.aws_secret_access_key == "fake_secret_key" + assert config.aws_session_token == "fake_session_token" + assert config.profile_name == "fake_profile" assert config.region_name == "eu-central" diff --git a/tests/common/configuration/test_environ_provider.py b/tests/common/configuration/test_environ_provider.py index ccac6c54eb..0608ea1d7a 100644 --- a/tests/common/configuration/test_environ_provider.py +++ b/tests/common/configuration/test_environ_provider.py @@ -2,7 +2,12 @@ from typing import Any from dlt.common.typing import TSecretValue -from dlt.common.configuration import configspec, ConfigFieldMissingException, ConfigFileNotFoundException, resolve +from dlt.common.configuration import ( + configspec, + ConfigFieldMissingException, + ConfigFileNotFoundException, + resolve, +) from dlt.common.configuration.specs import RunConfiguration, BaseConfiguration from dlt.common.configuration.providers import environ as environ_provider @@ -27,22 +32,25 @@ class MockProdRunConfigurationVar(RunConfiguration): pipeline_name: str = "comp" - def test_resolves_from_environ(environment: Any) -> None: environment["NONECONFIGVAR"] = "Some" C = WrongConfiguration() - resolve._resolve_config_fields(C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False) + resolve._resolve_config_fields( + C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False + ) assert not C.is_partial() assert C.NoneConfigVar == environment["NONECONFIGVAR"] def test_resolves_from_environ_with_coercion(environment: Any) -> None: - environment["RUNTIME__TEST_BOOL"] = 'yes' + environment["RUNTIME__TEST_BOOL"] = "yes" C = SimpleRunConfiguration() - resolve._resolve_config_fields(C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False) + resolve._resolve_config_fields( + C, explicit_values=None, explicit_sections=(), embedded_sections=(), accept_partial=False + ) assert not C.is_partial() # value will be coerced to bool @@ -52,13 +60,13 @@ def test_resolves_from_environ_with_coercion(environment: Any) -> None: def test_secret(environment: Any) -> None: with pytest.raises(ConfigFieldMissingException): resolve.resolve_configuration(SecretConfiguration()) - environment['SECRET_VALUE'] = "1" + environment["SECRET_VALUE"] = "1" C = resolve.resolve_configuration(SecretConfiguration()) assert C.secret_value == "1" # mock the path to point to secret storage # from dlt.common.configuration import config_utils path = environ_provider.SECRET_STORAGE_PATH - del environment['SECRET_VALUE'] + del environment["SECRET_VALUE"] try: # must read a secret file environ_provider.SECRET_STORAGE_PATH = "./tests/common/cases/%s" @@ -66,13 +74,13 @@ def test_secret(environment: Any) -> None: assert C.secret_value == "BANANA" # set some weird path, no secret file at all - del environment['SECRET_VALUE'] + del environment["SECRET_VALUE"] environ_provider.SECRET_STORAGE_PATH = "!C:\\PATH%s" with pytest.raises(ConfigFieldMissingException): resolve.resolve_configuration(SecretConfiguration()) # set env which is a fallback for secret not as file - environment['SECRET_VALUE'] = "1" + environment["SECRET_VALUE"] = "1" C = resolve.resolve_configuration(SecretConfiguration()) assert C.secret_value == "1" finally: @@ -87,7 +95,7 @@ def test_secret_kube_fallback(environment: Any) -> None: # all unix editors will add x10 at the end of file, it will be preserved assert C.secret_kube == "kube\n" # we propagate secrets back to environ and strip the whitespace - assert environment['SECRET_KUBE'] == "kube" + assert environment["SECRET_KUBE"] == "kube" finally: environ_provider.SECRET_STORAGE_PATH = path @@ -99,7 +107,10 @@ def test_configuration_files(environment: Any) -> None: assert C.config_files_storage_path == environment["RUNTIME__CONFIG_FILES_STORAGE_PATH"] assert C.has_configuration_file("hasn't") is False assert C.has_configuration_file("event.schema.json") is True - assert C.get_configuration_file_path("event.schema.json") == "./tests/common/cases/schemas/ev1/event.schema.json" + assert ( + C.get_configuration_file_path("event.schema.json") + == "./tests/common/cases/schemas/ev1/event.schema.json" + ) with C.open_configuration_file("event.schema.json", "r") as f: f.read() with pytest.raises(ConfigFileNotFoundException): diff --git a/tests/common/configuration/test_inject.py b/tests/common/configuration/test_inject.py index b52d6f64b9..8b9616ccd7 100644 --- a/tests/common/configuration/test_inject.py +++ b/tests/common/configuration/test_inject.py @@ -9,7 +9,11 @@ from dlt.common.configuration.providers import EnvironProvider from dlt.common.configuration.providers.toml import SECRETS_TOML from dlt.common.configuration.resolve import inject_section -from dlt.common.configuration.specs import BaseConfiguration, GcpServiceAccountCredentialsWithoutDefaults, ConnectionStringCredentials +from dlt.common.configuration.specs import ( + BaseConfiguration, + GcpServiceAccountCredentialsWithoutDefaults, + ConnectionStringCredentials, +) from dlt.common.configuration.specs.base_configuration import is_secret_hint from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.configuration.specs.config_section_context import ConfigSectionContext @@ -21,7 +25,6 @@ def test_arguments_are_explicit(environment: Any) -> None: - @with_config def f_var(user=dlt.config.value, path=dlt.config.value): # explicit args "survive" the injection: they have precedence over env @@ -43,7 +46,6 @@ def f_var_env(user=dlt.config.value, path=dlt.config.value): def test_default_values_are_resolved(environment: Any) -> None: - @with_config def f_var(user=dlt.config.value, path="a/b/c"): assert user == "env user" @@ -54,7 +56,6 @@ def f_var(user=dlt.config.value, path="a/b/c"): def test_arguments_dlt_literal_defaults_are_required(environment: Any) -> None: - @with_config def f_config(user=dlt.config.value): assert user is not None @@ -84,7 +85,6 @@ def f_secret(password=dlt.secrets.value): def test_inject_from_argument_section(toml_providers: ConfigProvidersContext) -> None: - # `gcp_storage` is a key in `secrets.toml` and the default `credentials` section of GcpServiceAccountCredentialsWithoutDefaults must be replaced with it @with_config @@ -96,11 +96,12 @@ def f_credentials(gcp_storage: GcpServiceAccountCredentialsWithoutDefaults = dlt def test_inject_secret_value_secret_type(environment: Any) -> None: - @with_config - def f_custom_secret_type(_dict: Dict[str, Any] = dlt.secrets.value, _int: int = dlt.secrets.value, **kwargs: Any): + def f_custom_secret_type( + _dict: Dict[str, Any] = dlt.secrets.value, _int: int = dlt.secrets.value, **kwargs: Any + ): # secret values were coerced into types - assert _dict == {"a":1} + assert _dict == {"a": 1} assert _int == 1234 cfg = last_config(**kwargs) spec: Type[BaseConfiguration] = cfg.__class__ @@ -158,23 +159,24 @@ def test_inject_with_sections() -> None: def test_inject_with_sections_and_sections_context() -> None: - @with_config def no_sections(value=dlt.config.value): return value - @with_config(sections=("test", )) + @with_config(sections=("test",)) def test_sections(value=dlt.config.value): return value # a section context that prefers existing context - @with_config(sections=("test", ), sections_merge_style=ConfigSectionContext.prefer_existing) + @with_config(sections=("test",), sections_merge_style=ConfigSectionContext.prefer_existing) def test_sections_pref_existing(value=dlt.config.value): return value - # a section that wants context like dlt resource - @with_config(sections=("test", "module", "name"), sections_merge_style=ConfigSectionContext.resource_merge_style) + @with_config( + sections=("test", "module", "name"), + sections_merge_style=ConfigSectionContext.resource_merge_style, + ) def test_sections_like_resource(value=dlt.config.value): return value @@ -189,7 +191,7 @@ def test_sections_like_resource(value=dlt.config.value): assert test_sections_pref_existing() == "test_section" assert test_sections_like_resource() == "test_section" - with inject_section(ConfigSectionContext(sections=("injected", ))): + with inject_section(ConfigSectionContext(sections=("injected",))): # the "injected" section is applied to "no_section" func that has no sections assert no_sections() == "injected_section" # but not to "test" - it won't be overridden by section context @@ -198,7 +200,9 @@ def test_sections_like_resource(value=dlt.config.value): # this one explicitly prefers existing context assert test_sections_pref_existing() == "injected_section" - with inject_section(ConfigSectionContext(sections=("test", "existing_module", "existing_name"))): + with inject_section( + ConfigSectionContext(sections=("test", "existing_module", "existing_name")) + ): assert test_sections_like_resource() == "resource_style_injected" @@ -256,10 +260,13 @@ def test_initial_spec_from_arg_with_spec_type() -> None: pass -def test_use_most_specific_union_type(environment: Any, toml_providers: ConfigProvidersContext) -> None: - +def test_use_most_specific_union_type( + environment: Any, toml_providers: ConfigProvidersContext +) -> None: @with_config - def postgres_union(local_credentials: Union[ConnectionStringCredentials, str, StrAny] = dlt.secrets.value): + def postgres_union( + local_credentials: Union[ConnectionStringCredentials, str, StrAny] = dlt.secrets.value + ): return local_credentials @with_config @@ -267,7 +274,13 @@ def postgres_direct(local_credentials: ConnectionStringCredentials = dlt.secrets return local_credentials conn_str = "postgres://loader:loader@localhost:5432/dlt_data" - conn_dict = {"host": "localhost", "database": "dlt_test", "username": "loader", "password": "loader", "drivername": "postgresql"} + conn_dict = { + "host": "localhost", + "database": "dlt_test", + "username": "loader", + "password": "loader", + "drivername": "postgresql", + } conn_cred = ConnectionStringCredentials() conn_cred.parse_native_representation(conn_str) @@ -313,7 +326,6 @@ def postgres_direct(local_credentials: ConnectionStringCredentials = dlt.secrets def test_auto_derived_spec_type_name() -> None: - class AutoNameTest: @with_config def __init__(self, pos_par=dlt.secrets.value, /, kw_par=None) -> None: @@ -334,7 +346,10 @@ def stuff_test(pos_par, /, kw_par) -> None: pass # name is composed via __qualname__ of func - assert _get_spec_name_from_f(AutoNameTest.__init__) == "TestAutoDerivedSpecTypeNameAutoNameTestInitConfiguration" + assert ( + _get_spec_name_from_f(AutoNameTest.__init__) + == "TestAutoDerivedSpecTypeNameAutoNameTestInitConfiguration" + ) # synthesized spec present in current module assert "TestAutoDerivedSpecTypeNameAutoNameTestInitConfiguration" in globals() # instantiate diff --git a/tests/common/configuration/test_providers.py b/tests/common/configuration/test_providers.py index 2408aae583..f8c7900c24 100644 --- a/tests/common/configuration/test_providers.py +++ b/tests/common/configuration/test_providers.py @@ -1,5 +1,6 @@ import pytest + @pytest.mark.skip("Not implemented") def test_providers_order() -> None: pass diff --git a/tests/common/configuration/test_sections.py b/tests/common/configuration/test_sections.py index 1298dd11f2..9e0bc7e26d 100644 --- a/tests/common/configuration/test_sections.py +++ b/tests/common/configuration/test_sections.py @@ -2,14 +2,25 @@ from typing import Any, Optional from dlt.common.configuration.container import Container -from dlt.common.configuration import configspec, ConfigFieldMissingException, resolve, inject_section +from dlt.common.configuration import ( + configspec, + ConfigFieldMissingException, + resolve, + inject_section, +) from dlt.common.configuration.providers.provider import ConfigProvider from dlt.common.configuration.specs import BaseConfiguration, ConfigSectionContext from dlt.common.configuration.exceptions import LookupTrace from dlt.common.typing import AnyType from tests.utils import preserve_environ -from tests.common.configuration.utils import MockProvider, SectionedConfiguration, environment, mock_provider, env_provider +from tests.common.configuration.utils import ( + MockProvider, + SectionedConfiguration, + environment, + mock_provider, + env_provider, +) @configspec @@ -53,7 +64,9 @@ def test_sectioned_configuration(environment: Any, env_provider: ConfigProvider) traces = exc_val.value.traces["password"] # only one provider and section was tried assert len(traces) == 1 - assert traces[0] == LookupTrace("Environment Variables", ["DLT_TEST"], "DLT_TEST__PASSWORD", None) + assert traces[0] == LookupTrace( + "Environment Variables", ["DLT_TEST"], "DLT_TEST__PASSWORD", None + ) # assert traces[1] == LookupTrace("secrets.toml", ["DLT_TEST"], "DLT_TEST.password", None) # assert traces[2] == LookupTrace("config.toml", ["DLT_TEST"], "DLT_TEST.password", None) @@ -109,7 +122,14 @@ def test_explicit_sections_with_sectioned_config(mock_provider: MockProvider) -> assert mock_provider.last_sections == [("ns1",), (), ("ns1", "DLT_TEST"), ("DLT_TEST",)] mock_provider.reset_stats() resolve.resolve_configuration(SectionedConfiguration(), sections=("ns1", "ns2")) - assert mock_provider.last_sections == [("ns1", "ns2"), ("ns1",), (), ("ns1", "ns2", "DLT_TEST"), ("ns1", "DLT_TEST"), ("DLT_TEST",)] + assert mock_provider.last_sections == [ + ("ns1", "ns2"), + ("ns1",), + (), + ("ns1", "ns2", "DLT_TEST"), + ("ns1", "DLT_TEST"), + ("DLT_TEST",), + ] def test_overwrite_config_section_from_embedded(mock_provider: MockProvider) -> None: @@ -135,7 +155,13 @@ def test_explicit_sections_from_embedded_config(mock_provider: MockProvider) -> # embedded section inner of explicit mock_provider.reset_stats() resolve.resolve_configuration(EmbeddedConfiguration(), sections=("ns1",)) - assert mock_provider.last_sections == [("ns1", "sv_config",), ("sv_config",)] + assert mock_provider.last_sections == [ + ( + "ns1", + "sv_config", + ), + ("sv_config",), + ] def test_ignore_embedded_section_by_field_name(mock_provider: MockProvider) -> None: @@ -156,7 +182,11 @@ def test_ignore_embedded_section_by_field_name(mock_provider: MockProvider) -> N mock_provider.reset_stats() mock_provider.return_value_on = ("DLT_TEST",) resolve.resolve_configuration(EmbeddedWithIgnoredEmbeddedConfiguration()) - assert mock_provider.last_sections == [('ignored_embedded',), ('ignored_embedded', 'DLT_TEST'), ('DLT_TEST',)] + assert mock_provider.last_sections == [ + ("ignored_embedded",), + ("ignored_embedded", "DLT_TEST"), + ("DLT_TEST",), + ] def test_injected_sections(mock_provider: MockProvider) -> None: @@ -174,7 +204,12 @@ def test_injected_sections(mock_provider: MockProvider) -> None: mock_provider.reset_stats() mock_provider.return_value_on = ("DLT_TEST",) resolve.resolve_configuration(SectionedConfiguration()) - assert mock_provider.last_sections == [("inj-ns1",), (), ("inj-ns1", "DLT_TEST"), ("DLT_TEST",)] + assert mock_provider.last_sections == [ + ("inj-ns1",), + (), + ("inj-ns1", "DLT_TEST"), + ("DLT_TEST",), + ] # injected section inner of ns coming from embedded config mock_provider.reset_stats() mock_provider.return_value_on = () @@ -196,7 +231,7 @@ def test_section_context() -> None: with pytest.raises(ValueError): ConfigSectionContext(sections=()).source_name() with pytest.raises(ValueError): - ConfigSectionContext(sections=("sources", )).source_name() + ConfigSectionContext(sections=("sources",)).source_name() with pytest.raises(ValueError): ConfigSectionContext(sections=("sources", "modules")).source_name() @@ -221,7 +256,7 @@ def test_section_with_pipeline_name(mock_provider: MockProvider) -> None: # PIPE section is exhausted then another lookup without PIPE assert mock_provider.last_sections == [("PIPE", "ns1"), ("PIPE",), ("ns1",), ()] - mock_provider.return_value_on = ("PIPE", ) + mock_provider.return_value_on = ("PIPE",) mock_provider.reset_stats() resolve.resolve_configuration(SingleValConfiguration(), sections=("ns1",)) assert mock_provider.last_sections == [("PIPE", "ns1"), ("PIPE",)] @@ -237,10 +272,12 @@ def test_section_with_pipeline_name(mock_provider: MockProvider) -> None: mock_provider.reset_stats() resolve.resolve_configuration(SectionedConfiguration()) # first the whole SectionedConfiguration is looked under key DLT_TEST (sections: ('PIPE',), ()), then fields of SectionedConfiguration - assert mock_provider.last_sections == [('PIPE',), (), ("PIPE", "DLT_TEST"), ("DLT_TEST",)] + assert mock_provider.last_sections == [("PIPE",), (), ("PIPE", "DLT_TEST"), ("DLT_TEST",)] # with pipeline and injected sections - with container.injectable_context(ConfigSectionContext(pipeline_name="PIPE", sections=("inj-ns1",))): + with container.injectable_context( + ConfigSectionContext(pipeline_name="PIPE", sections=("inj-ns1",)) + ): mock_provider.return_value_on = () mock_provider.reset_stats() resolve.resolve_configuration(SingleValConfiguration()) diff --git a/tests/common/configuration/test_spec_union.py b/tests/common/configuration/test_spec_union.py index f013c9c568..25c32920bc 100644 --- a/tests/common/configuration/test_spec_union.py +++ b/tests/common/configuration/test_spec_union.py @@ -145,8 +145,17 @@ def test_unresolved_union() -> None: resolve_configuration(ZenConfig()) assert cfm_ex.value.fields == ["credentials"] # all the missing fields from all the union elements are present - checked_keys = set(t.key for t in itertools.chain(*cfm_ex.value.traces.values()) if t.provider == EnvironProvider().name) - assert checked_keys == {"CREDENTIALS__EMAIL", "CREDENTIALS__PASSWORD", "CREDENTIALS__API_KEY", "CREDENTIALS__API_SECRET"} + checked_keys = set( + t.key + for t in itertools.chain(*cfm_ex.value.traces.values()) + if t.provider == EnvironProvider().name + ) + assert checked_keys == { + "CREDENTIALS__EMAIL", + "CREDENTIALS__PASSWORD", + "CREDENTIALS__API_KEY", + "CREDENTIALS__API_SECRET", + } def test_union_decorator() -> None: @@ -154,7 +163,10 @@ def test_union_decorator() -> None: # this will generate equivalent of ZenConfig @dlt.source - def zen_source(credentials: Union[ZenApiKeyCredentials, ZenEmailCredentials, str] = dlt.secrets.value, some_option: bool = False): + def zen_source( + credentials: Union[ZenApiKeyCredentials, ZenEmailCredentials, str] = dlt.secrets.value, + some_option: bool = False, + ): # depending on what the user provides in config, ZenApiKeyCredentials or ZenEmailCredentials will be injected in credentials # both classes implement `auth` so you can always call it credentials.auth() # type: ignore[union-attr] @@ -179,6 +191,7 @@ class GoogleAnalyticsCredentialsBase(CredentialsConfiguration): """ The Base version of all the GoogleAnalyticsCredentials classes. """ + pass @@ -187,6 +200,7 @@ class GoogleAnalyticsCredentialsOAuth(GoogleAnalyticsCredentialsBase): """ This class is used to store credentials Google Analytics """ + client_id: str client_secret: TSecretValue project_id: TSecretValue @@ -195,23 +209,27 @@ class GoogleAnalyticsCredentialsOAuth(GoogleAnalyticsCredentialsBase): @dlt.source(max_table_nesting=2) -def google_analytics(credentials: Union[GoogleAnalyticsCredentialsOAuth, GcpServiceAccountCredentials] = dlt.secrets.value): +def google_analytics( + credentials: Union[ + GoogleAnalyticsCredentialsOAuth, GcpServiceAccountCredentials + ] = dlt.secrets.value +): yield dlt.resource([credentials], name="creds") def test_google_auth_union(environment: Any) -> None: info = { - "type" : "service_account", - "project_id" : "dlthub-analytics", - "private_key_id" : "45cbe97fbd3d756d55d4633a5a72d8530a05b993", - "private_key" : "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n", - "client_email" : "105150287833-compute@developer.gserviceaccount.com", - "client_id" : "106404499083406128146", - "auth_uri" : "https://accounts.google.com/o/oauth2/auth", - "token_uri" : "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url" : "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url" : "https://www.googleapis.com/robot/v1/metadata/x509/105150287833-compute%40developer.gserviceaccount.com" - } + "type": "service_account", + "project_id": "dlthub-analytics", + "private_key_id": "45cbe97fbd3d756d55d4633a5a72d8530a05b993", + "private_key": "-----BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY-----\n", + "client_email": "105150287833-compute@developer.gserviceaccount.com", + "client_id": "106404499083406128146", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/105150287833-compute%40developer.gserviceaccount.com", + } credentials = list(google_analytics(credentials=info))[0] # type: ignore[arg-type] print(dict(credentials)) @@ -225,23 +243,23 @@ def sql_database(credentials: Union[ConnectionStringCredentials, Engine, str] = def test_union_concrete_type(environment: Any) -> None: # we can pass engine explicitly - engine = create_engine('sqlite:///:memory:', echo=True) + engine = create_engine("sqlite:///:memory:", echo=True) db = sql_database(credentials=engine) creds = list(db)[0] assert isinstance(creds, Engine) # we can pass valid connection string explicitly - db = sql_database(credentials='sqlite://user@/:memory:') + db = sql_database(credentials="sqlite://user@/:memory:") creds = list(db)[0] # but it is used as native value assert isinstance(creds, ConnectionStringCredentials) # pass instance of credentials - cn = ConnectionStringCredentials('sqlite://user@/:memory:') + cn = ConnectionStringCredentials("sqlite://user@/:memory:") db = sql_database(credentials=cn) # exactly that instance is returned assert list(db)[0] is cn # invalid cn with pytest.raises(InvalidNativeValue): - db = sql_database(credentials='?') + db = sql_database(credentials="?") with pytest.raises(InvalidNativeValue): db = sql_database(credentials=123) # type: ignore[arg-type] diff --git a/tests/common/configuration/test_toml_provider.py b/tests/common/configuration/test_toml_provider.py index 71ceb790e2..fcec881521 100644 --- a/tests/common/configuration/test_toml_provider.py +++ b/tests/common/configuration/test_toml_provider.py @@ -10,14 +10,34 @@ from dlt.common.configuration.container import Container from dlt.common.configuration.inject import with_config from dlt.common.configuration.exceptions import LookupTrace -from dlt.common.configuration.providers.toml import SECRETS_TOML, CONFIG_TOML, BaseTomlProvider, SecretsTomlProvider, ConfigTomlProvider, StringTomlProvider, TomlProviderReadException +from dlt.common.configuration.providers.toml import ( + SECRETS_TOML, + CONFIG_TOML, + BaseTomlProvider, + SecretsTomlProvider, + ConfigTomlProvider, + StringTomlProvider, + TomlProviderReadException, +) from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext -from dlt.common.configuration.specs import BaseConfiguration, GcpServiceAccountCredentialsWithoutDefaults, ConnectionStringCredentials +from dlt.common.configuration.specs import ( + BaseConfiguration, + GcpServiceAccountCredentialsWithoutDefaults, + ConnectionStringCredentials, +) from dlt.common.runners.configuration import PoolRunnerConfiguration from dlt.common.typing import TSecretValue from tests.utils import preserve_environ -from tests.common.configuration.utils import SecretCredentials, WithCredentialsConfiguration, CoercionTestConfiguration, COERCIONS, SecretConfiguration, environment, toml_providers +from tests.common.configuration.utils import ( + SecretCredentials, + WithCredentialsConfiguration, + CoercionTestConfiguration, + COERCIONS, + SecretConfiguration, + environment, + toml_providers, +) @configspec @@ -31,7 +51,6 @@ class EmbeddedWithGcpCredentials(BaseConfiguration): def test_secrets_from_toml_secrets(toml_providers: ConfigProvidersContext) -> None: - # remove secret_value to trigger exception del toml_providers["secrets.toml"]._toml["secret_value"] # type: ignore[attr-defined] @@ -63,10 +82,8 @@ def test_toml_types(toml_providers: ConfigProvidersContext) -> None: def test_config_provider_order(toml_providers: ConfigProvidersContext, environment: Any) -> None: - # add env provider - @with_config(sections=("api",)) def single_val(port=None): return port @@ -86,7 +103,11 @@ def test_toml_mixed_config_inject(toml_providers: ConfigProvidersContext) -> Non # get data from both providers @with_config - def mixed_val(api_type=dlt.config.value, secret_value: TSecretValue = dlt.secrets.value, typecheck: Any = dlt.config.value): + def mixed_val( + api_type=dlt.config.value, + secret_value: TSecretValue = dlt.secrets.value, + typecheck: Any = dlt.config.value, + ): return api_type, secret_value, typecheck _tup = mixed_val(None, None, None) @@ -109,13 +130,19 @@ def test_toml_sections(toml_providers: ConfigProvidersContext) -> None: def test_secrets_toml_credentials(environment: Any, toml_providers: ConfigProvidersContext) -> None: # there are credentials exactly under destination.bigquery.credentials - c = resolve.resolve_configuration(GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination", "bigquery")) + c = resolve.resolve_configuration( + GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination", "bigquery") + ) assert c.project_id.endswith("destination.bigquery.credentials") # there are no destination.gcp_storage.credentials so it will fallback to "destination"."credentials" - c = resolve.resolve_configuration(GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination", "gcp_storage")) + c = resolve.resolve_configuration( + GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination", "gcp_storage") + ) assert c.project_id.endswith("destination.credentials") # also explicit - c = resolve.resolve_configuration(GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination",)) + c = resolve.resolve_configuration( + GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination",) + ) assert c.project_id.endswith("destination.credentials") # there's "credentials" key but does not contain valid gcp credentials with pytest.raises(ConfigFieldMissingException): @@ -132,12 +159,18 @@ def test_secrets_toml_credentials(environment: Any, toml_providers: ConfigProvid resolve.resolve_configuration(c3, sections=("destination", "bigquery")) -def test_secrets_toml_embedded_credentials(environment: Any, toml_providers: ConfigProvidersContext) -> None: +def test_secrets_toml_embedded_credentials( + environment: Any, toml_providers: ConfigProvidersContext +) -> None: # will try destination.bigquery.credentials - c = resolve.resolve_configuration(EmbeddedWithGcpCredentials(), sections=("destination", "bigquery")) + c = resolve.resolve_configuration( + EmbeddedWithGcpCredentials(), sections=("destination", "bigquery") + ) assert c.credentials.project_id.endswith("destination.bigquery.credentials") # will try destination.gcp_storage.credentials and fallback to destination.credentials - c = resolve.resolve_configuration(EmbeddedWithGcpCredentials(), sections=("destination", "gcp_storage")) + c = resolve.resolve_configuration( + EmbeddedWithGcpCredentials(), sections=("destination", "gcp_storage") + ) assert c.credentials.project_id.endswith("destination.credentials") # will try everything until credentials in the root where incomplete credentials are present c = EmbeddedWithGcpCredentials() @@ -150,11 +183,15 @@ def test_secrets_toml_embedded_credentials(environment: Any, toml_providers: Con assert set(py_ex.value.traces.keys()) == {"client_email", "private_key"} # embed "gcp_storage" will bubble up to the very top, never reverts to "credentials" - c2 = resolve.resolve_configuration(EmbeddedWithGcpStorage(), sections=("destination", "bigquery")) + c2 = resolve.resolve_configuration( + EmbeddedWithGcpStorage(), sections=("destination", "bigquery") + ) assert c2.gcp_storage.project_id.endswith("-gcp-storage") # also explicit - c3 = resolve.resolve_configuration(GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination",)) + c3 = resolve.resolve_configuration( + GcpServiceAccountCredentialsWithoutDefaults(), sections=("destination",) + ) assert c3.project_id.endswith("destination.credentials") # there's "credentials" key but does not contain valid gcp credentials with pytest.raises(ConfigFieldMissingException): @@ -166,13 +203,22 @@ def test_dicts_are_not_enumerated() -> None: pass -def test_secrets_toml_credentials_from_native_repr(environment: Any, toml_providers: ConfigProvidersContext) -> None: +def test_secrets_toml_credentials_from_native_repr( + environment: Any, toml_providers: ConfigProvidersContext +) -> None: # cfg = toml_providers["secrets.toml"] # print(cfg._toml) # print(cfg._toml["source"]["credentials"]) # resolve gcp_credentials by parsing initial value which is str holding json doc - c = resolve.resolve_configuration(GcpServiceAccountCredentialsWithoutDefaults(), sections=("source",)) - assert c.private_key == "-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n...\n-----END PRIVATE KEY-----\n" + c = resolve.resolve_configuration( + GcpServiceAccountCredentialsWithoutDefaults(), sections=("source",) + ) + assert ( + c.private_key + == "-----BEGIN PRIVATE" + " KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n...\n-----END" + " PRIVATE KEY-----\n" + ) # but project id got overridden from credentials.project_id assert c.project_id.endswith("-credentials") # also try sql alchemy url (native repr) @@ -252,19 +298,33 @@ def test_write_value(toml_providers: ConfigProvidersContext) -> None: # this will create path of tables provider.set_value("deep_int", 2137, "deep_pipeline", "deep", "deep", "deep", "deep") assert provider._toml["deep_pipeline"]["deep"]["deep"]["deep"]["deep"]["deep_int"] == 2137 # type: ignore[index] - assert provider.get_value("deep_int", TAny, "deep_pipeline", "deep", "deep", "deep", "deep") == (2137, "deep_pipeline.deep.deep.deep.deep.deep_int") + assert provider.get_value( + "deep_int", TAny, "deep_pipeline", "deep", "deep", "deep", "deep" + ) == (2137, "deep_pipeline.deep.deep.deep.deep.deep_int") # same without the pipeline now = pendulum.now() provider.set_value("deep_date", now, None, "deep", "deep", "deep", "deep") - assert provider.get_value("deep_date", TAny, None, "deep", "deep", "deep", "deep") == (now, "deep.deep.deep.deep.deep_date") + assert provider.get_value("deep_date", TAny, None, "deep", "deep", "deep", "deep") == ( + now, + "deep.deep.deep.deep.deep_date", + ) # in existing path provider.set_value("deep_list", [1, 2, 3], None, "deep", "deep", "deep") - assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == ([1, 2, 3], "deep.deep.deep.deep_list") + assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == ( + [1, 2, 3], + "deep.deep.deep.deep_list", + ) # still there - assert provider.get_value("deep_date", TAny, None, "deep", "deep", "deep", "deep") == (now, "deep.deep.deep.deep.deep_date") + assert provider.get_value("deep_date", TAny, None, "deep", "deep", "deep", "deep") == ( + now, + "deep.deep.deep.deep.deep_date", + ) # overwrite value provider.set_value("deep_list", [1, 2, 3, 4], None, "deep", "deep", "deep") - assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == ([1, 2, 3, 4], "deep.deep.deep.deep_list") + assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == ( + [1, 2, 3, 4], + "deep.deep.deep.deep_list", + ) # invalid type with pytest.raises(ValueError): provider.set_value("deep_decimal", Decimal("1.2"), None, "deep", "deep", "deep", "deep") @@ -272,31 +332,49 @@ def test_write_value(toml_providers: ConfigProvidersContext) -> None: # write new dict to a new key test_d1 = {"key": "top", "embed": {"inner": "bottom", "inner_2": True}} provider.set_value("deep_dict", test_d1, None, "dict_test") - assert provider.get_value("deep_dict", TAny, None, "dict_test") == (test_d1, "dict_test.deep_dict") + assert provider.get_value("deep_dict", TAny, None, "dict_test") == ( + test_d1, + "dict_test.deep_dict", + ) # write same dict over dict provider.set_value("deep_dict", test_d1, None, "dict_test") - assert provider.get_value("deep_dict", TAny, None, "dict_test") == (test_d1, "dict_test.deep_dict") + assert provider.get_value("deep_dict", TAny, None, "dict_test") == ( + test_d1, + "dict_test.deep_dict", + ) # get a fragment - assert provider.get_value("inner_2", TAny, None, "dict_test", "deep_dict", "embed") == (True, "dict_test.deep_dict.embed.inner_2") + assert provider.get_value("inner_2", TAny, None, "dict_test", "deep_dict", "embed") == ( + True, + "dict_test.deep_dict.embed.inner_2", + ) # write a dict over non dict provider.set_value("deep_list", test_d1, None, "deep", "deep", "deep") - assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == (test_d1, "deep.deep.deep.deep_list") + assert provider.get_value("deep_list", TAny, None, "deep", "deep", "deep") == ( + test_d1, + "deep.deep.deep.deep_list", + ) # merge dicts test_d2 = {"key": "_top", "key2": "new2", "embed": {"inner": "_bottom", "inner_3": 2121}} provider.set_value("deep_dict", test_d2, None, "dict_test") test_m_d1_d2 = { "key": "_top", "embed": {"inner": "_bottom", "inner_2": True, "inner_3": 2121}, - "key2": "new2" + "key2": "new2", } - assert provider.get_value("deep_dict", TAny, None, "dict_test") == (test_m_d1_d2, "dict_test.deep_dict") + assert provider.get_value("deep_dict", TAny, None, "dict_test") == ( + test_m_d1_d2, + "dict_test.deep_dict", + ) # print(provider.get_value("deep_dict", Any, None, "dict_test")) # write configuration pool = PoolRunnerConfiguration(pool_type="none", workers=10) provider.set_value("runner_config", dict(pool), "new_pipeline") # print(provider._toml["new_pipeline"]["runner_config"].as_string()) - assert provider._toml["new_pipeline"]["runner_config"] == dict(pool) # type: ignore[index] + expected_pool = dict(pool) + # None is removed + expected_pool.pop("start_method") + assert provider._toml["new_pipeline"]["runner_config"] == expected_pool # type: ignore[index] # dict creates only shallow dict so embedded credentials will fail creds = WithCredentialsConfiguration() @@ -355,7 +433,6 @@ def test_write_toml_value(toml_providers: ConfigProvidersContext) -> None: def test_toml_string_provider() -> None: - # test basic reading provider = StringTomlProvider(""" [section1.subsection] @@ -365,8 +442,8 @@ def test_toml_string_provider() -> None: key2 = "value2" """) - assert provider.get_value("key1", "", "section1", "subsection") == ("value1", "section1.subsection.key1") # type: ignore[arg-type] - assert provider.get_value("key2", "", "section2", "subsection") == ("value2", "section2.subsection.key2") # type: ignore[arg-type] + assert provider.get_value("key1", "", "section1", "subsection") == ("value1", "section1.subsection.key1") # type: ignore[arg-type] + assert provider.get_value("key2", "", "section2", "subsection") == ("value2", "section2.subsection.key2") # type: ignore[arg-type] # test basic writing provider = StringTomlProvider("") diff --git a/tests/common/configuration/utils.py b/tests/common/configuration/utils.py index f0df420c45..73643561dc 100644 --- a/tests/common/configuration/utils.py +++ b/tests/common/configuration/utils.py @@ -1,13 +1,30 @@ import pytest from os import environ import datetime # noqa: I251 -from typing import Any, Iterator, List, Optional, Tuple, Type, Dict, MutableMapping, Optional, Sequence, TYPE_CHECKING +from typing import ( + Any, + Iterator, + List, + Optional, + Tuple, + Type, + Dict, + MutableMapping, + Optional, + Sequence, + TYPE_CHECKING, +) from dlt.common import Decimal, pendulum from dlt.common.configuration import configspec from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration from dlt.common.configuration.container import Container -from dlt.common.configuration.providers import ConfigProvider, EnvironProvider, ConfigTomlProvider, SecretsTomlProvider +from dlt.common.configuration.providers import ( + ConfigProvider, + EnvironProvider, + ConfigTomlProvider, + SecretsTomlProvider, +) from dlt.common.configuration.utils import get_resolved_traces from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.typing import TSecretValue, StrAny @@ -64,8 +81,8 @@ class SectionedConfiguration(BaseConfiguration): password: str = None if TYPE_CHECKING: - def __init__(self, password: str = None) -> None: - ... + + def __init__(self, password: str = None) -> None: ... @pytest.fixture(scope="function") @@ -115,7 +132,6 @@ def toml_providers() -> Iterator[ConfigProvidersContext]: class MockProvider(ConfigProvider): - def __init__(self) -> None: self.value: Any = None self.return_value_on: Tuple[str, ...] = () @@ -125,9 +141,11 @@ def reset_stats(self) -> None: self.last_section: Tuple[str, ...] = None self.last_sections: List[Tuple[str, ...]] = [] - def get_value(self, key: str, hint: Type[Any], pipeline_name: str, *sections: str) -> Tuple[Optional[Any], str]: + def get_value( + self, key: str, hint: Type[Any], pipeline_name: str, *sections: str + ) -> Tuple[Optional[Any], str]: if pipeline_name: - sections = (pipeline_name, ) + sections + sections = (pipeline_name,) + sections self.last_section = sections self.last_sections.append(sections) if sections == self.return_value_on: @@ -156,27 +174,21 @@ def supports_secrets(self) -> bool: COERCIONS = { - 'str_val': 'test string', - 'int_val': 12345, - 'bool_val': True, - 'list_val': [1, "2", [3]], - 'dict_val': { - 'a': 1, - "b": "2" - }, - 'bytes_val': b'Hello World!', - 'float_val': 1.18927, + "str_val": "test string", + "int_val": 12345, + "bool_val": True, + "list_val": [1, "2", [3]], + "dict_val": {"a": 1, "b": "2"}, + "bytes_val": b"Hello World!", + "float_val": 1.18927, "tuple_val": (1, 2, {"1": "complicated dicts allowed in literal eval"}), - 'any_val': "function() {}", - 'none_val': "none", - 'COMPLEX_VAL': { - "_": [1440, ["*"], []], - "change-email": [560, ["*"], []] - }, + "any_val": "function() {}", + "none_val": "none", + "COMPLEX_VAL": {"_": [1440, ["*"], []], "change-email": [560, ["*"], []]}, "date_val": pendulum.now(), "dec_val": Decimal("22.38"), "sequence_val": ["A", "B", "KAPPA"], "gen_list_val": ["C", "Z", "N"], "mapping_val": {"FL": 1, "FR": {"1": 2}}, - "mutable_mapping_val": {"str": "str"} + "mutable_mapping_val": {"str": "str"}, } diff --git a/tests/common/data_writers/test_buffered_writer.py b/tests/common/data_writers/test_buffered_writer.py deleted file mode 100644 index 85cfcb2d0c..0000000000 --- a/tests/common/data_writers/test_buffered_writer.py +++ /dev/null @@ -1,221 +0,0 @@ -import os -from typing import Iterator, Set, Literal - -import pytest - -from dlt.common.data_writers.buffered import BufferedDataWriter, DataWriter -from dlt.common.data_writers.exceptions import BufferedDataWriterClosed -from dlt.common.destination import TLoaderFileFormat, DestinationCapabilitiesContext -from dlt.common.schema.utils import new_column -from dlt.common.storages.file_storage import FileStorage - -from dlt.common.typing import DictStrAny - -from tests.utils import TEST_STORAGE_ROOT, write_version, autouse_test_storage -import datetime # noqa: 251 - - -ALL_WRITERS: Set[Literal[TLoaderFileFormat]] = {"insert_values", "jsonl", "parquet", "arrow", "puae-jsonl"} - - -def get_writer(_format: TLoaderFileFormat = "insert_values", buffer_max_items: int = 10, disable_compression: bool = False) -> BufferedDataWriter[DataWriter]: - caps = DestinationCapabilitiesContext.generic_capabilities() - caps.preferred_loader_file_format = _format - file_template = os.path.join(TEST_STORAGE_ROOT, f"{_format}.%s") - return BufferedDataWriter(_format, file_template, buffer_max_items=buffer_max_items, disable_compression=disable_compression, _caps=caps) - - -def test_write_no_item() -> None: - with get_writer() as writer: - pass - assert writer.closed - with pytest.raises(BufferedDataWriterClosed): - writer._ensure_open() - # no files rotated - assert writer.closed_files == [] - - -@pytest.mark.parametrize("disable_compression", [True, False], ids=["no_compression", "compression"]) -def test_rotation_on_schema_change(disable_compression: bool) -> None: - - c1 = new_column("col1", "bigint") - c2 = new_column("col2", "bigint") - c3 = new_column("col3", "text") - - t1 = {"col1": c1} - t2 = {"col2": c2, "col1": c1} - t3 = {"col3": c3, "col2": c2, "col1": c1} - - def c1_doc(count: int) -> Iterator[DictStrAny]: - return map(lambda x: {"col1": x}, range(0, count)) - - def c2_doc(count: int) -> Iterator[DictStrAny]: - return map(lambda x: {"col1": x, "col2": x*2+1}, range(0, count)) - - def c3_doc(count: int) -> Iterator[DictStrAny]: - return map(lambda x: {"col3": "col3_value"}, range(0, count)) - - # change schema before file first flush - with get_writer(disable_compression=disable_compression) as writer: - writer.write_data_item(list(c1_doc(8)), t1) - assert writer._current_columns == t1 - # but different instance - assert writer._current_columns is not t1 - writer.write_data_item(list(c2_doc(1)), t2) - # file name is there - assert writer._file_name is not None - # no file is open - assert writer._file is None - # writer is closed and data was written - assert len(writer.closed_files) == 1 - # check the content, mind that we swapped the columns - with FileStorage.open_zipsafe_ro(writer.closed_files[0], "r", encoding="utf-8") as f: - content = f.readlines() - assert "col2,col1" in content[0] - assert "NULL,0" in content[2] - # col2 first - assert "1,0" in content[-1] - - # data would flush and schema change - with get_writer() as writer: - writer.write_data_item(list(c1_doc(9)), t1) - old_file = writer._file_name - writer.write_data_item(list(c2_doc(1)), t2) # rotates here - # file is open - assert writer._file is not None - # no files were closed - assert len(writer.closed_files) == 0 - assert writer._file_name == old_file - # buffer is empty - assert writer._buffered_items == [] - - # file would rotate and schema change - with get_writer() as writer: - writer.file_max_items = 10 - writer.write_data_item(list(c1_doc(9)), t1) - old_file = writer._file_name - writer.write_data_item(list(c2_doc(1)), t2) # rotates here - # file is not open after rotation - assert writer._file is None - # file was rotated - assert len(writer.closed_files) == 1 - assert writer._file_name != old_file - # buffer is empty - assert writer._buffered_items == [] - - # schema change after flush rotates file - with get_writer() as writer: - writer.write_data_item(list(c1_doc(11)), t1) - writer.write_data_item(list(c2_doc(1)), t2) - assert len(writer.closed_files) == 1 - # now the file is closed - assert writer._file is None - old_file = writer._file_name - # so we can write schema change without rotation and flushing - writer.write_data_item(list(c2_doc(1)), t3) - assert writer._file is None - assert writer._file_name == old_file - # make it flush - writer.file_max_items = 10 - writer.write_data_item(list(c3_doc(20)), t3) - assert len(writer.closed_files) == 2 - assert writer._buffered_items == [] - # the last file must contain text value of the column3 - with FileStorage.open_zipsafe_ro(writer.closed_files[-1], "r", encoding="utf-8") as f: - content = f.readlines() - assert "(col3_value" in content[-1] - - -@pytest.mark.parametrize("disable_compression", [True, False], ids=["no_compression", "compression"]) -def test_NO_rotation_on_schema_change(disable_compression: bool) -> None: - c1 = new_column("col1", "bigint") - c2 = new_column("col2", "bigint") - - t1 = {"col1": c1} - t2 = {"col2": c2, "col1": c1} - - def c1_doc(count: int) -> Iterator[DictStrAny]: - return map(lambda x: {"col1": x}, range(0, count)) - - def c2_doc(count: int) -> Iterator[DictStrAny]: - return map(lambda x: {"col1": x, "col2": x*2+1}, range(0, count)) - - # change schema before file first flush - with get_writer(_format="jsonl", disable_compression=disable_compression) as writer: - writer.write_data_item(list(c1_doc(15)), t1) - # flushed - assert writer._file is not None - writer.write_data_item(list(c2_doc(2)), t2) - # no rotation - assert len(writer._buffered_items) == 2 - # only the initial 15 items written - assert writer._writer.items_count == 15 - # all written - with FileStorage.open_zipsafe_ro(writer.closed_files[-1], "r", encoding="utf-8") as f: - content = f.readlines() - assert content[-1] == '{"col1":1,"col2":3}\n' - - -@pytest.mark.parametrize("disable_compression", [True, False], ids=["no_compression", "compression"]) -def test_writer_requiring_schema(disable_compression: bool) -> None: - # assertion on flushing - with pytest.raises(AssertionError): - with get_writer(disable_compression=disable_compression) as writer: - writer.write_data_item([{"col1": 1}], None) - # just single schema is enough - c1 = new_column("col1", "bigint") - t1 = {"col1": c1} - with get_writer(disable_compression=disable_compression) as writer: - writer.write_data_item([{"col1": 1}], None) - writer.write_data_item([{"col1": 1}], t1) - - -@pytest.mark.parametrize("disable_compression", [True, False], ids=["no_compression", "compression"]) -def test_writer_optional_schema(disable_compression: bool) -> None: - with get_writer(_format="jsonl", disable_compression=disable_compression) as writer: - writer.write_data_item([{"col1": 1}], None) - writer.write_data_item([{"col1": 1}], None) - - -@pytest.mark.parametrize("writer_format", ALL_WRITERS - {"arrow"}) -def test_writer_items_count(writer_format: TLoaderFileFormat) -> None: - c1 = {"col1": new_column("col1", "bigint")} - with get_writer(_format=writer_format) as writer: - assert writer._buffered_items_count == 0 - # single item - writer.write_data_item({"col1": 1}, columns=c1) - assert writer._buffered_items_count == 1 - # list - writer.write_data_item([{"col1": 1}, {"col1": 2}], columns=c1) - assert writer._buffered_items_count == 3 - writer._flush_items() - assert writer._buffered_items_count == 0 - assert writer._writer.items_count == 3 - - -def test_writer_items_count_arrow() -> None: - import pyarrow as pa - c1 = {"col1": new_column("col1", "bigint")} - with get_writer(_format="arrow") as writer: - assert writer._buffered_items_count == 0 - # single item - writer.write_data_item(pa.Table.from_pylist([{"col1": 1}]), columns=c1) - assert writer._buffered_items_count == 1 - # single item with many rows - writer.write_data_item(pa.Table.from_pylist([{"col1": 1}, {"col1": 2}]), columns=c1) - assert writer._buffered_items_count == 3 - # empty list - writer.write_data_item([], columns=c1) - assert writer._buffered_items_count == 3 - # list with one item - writer.write_data_item([pa.Table.from_pylist([{"col1": 1}])], columns=c1) - assert writer._buffered_items_count == 4 - # list with many items - writer.write_data_item( - [pa.Table.from_pylist([{"col1": 1}]), pa.Table.from_pylist([{"col1": 1}, {"col1": 2}])], - columns=c1 - ) - assert writer._buffered_items_count == 7 - writer._flush_items() - assert writer._buffered_items_count == 0 - assert writer._writer.items_count == 7 diff --git a/tests/common/data_writers/test_data_writers.py b/tests/common/data_writers/test_data_writers.py index 66b8f765c7..ac4f118229 100644 --- a/tests/common/data_writers/test_data_writers.py +++ b/tests/common/data_writers/test_data_writers.py @@ -1,13 +1,32 @@ import io import pytest +import time from typing import Iterator from dlt.common import pendulum, json from dlt.common.typing import AnyFun + # from dlt.destinations.postgres import capabilities -from dlt.destinations.redshift import capabilities as redshift_caps -from dlt.common.data_writers.escape import escape_redshift_identifier, escape_bigquery_identifier, escape_redshift_literal, escape_postgres_literal, escape_duckdb_literal -from dlt.common.data_writers.writers import DataWriter, InsertValuesWriter, JsonlWriter, ParquetDataWriter +from dlt.destinations.impl.redshift import capabilities as redshift_caps +from dlt.common.data_writers.escape import ( + escape_redshift_identifier, + escape_bigquery_identifier, + escape_redshift_literal, + escape_postgres_literal, + escape_duckdb_literal, +) + +# import all writers here to check if it can be done without all the dependencies +from dlt.common.data_writers.writers import ( + DataWriter, + DataWriterMetrics, + EMPTY_DATA_WRITER_METRICS, + InsertValuesWriter, + JsonlWriter, + JsonlListPUAEncodeWriter, + ParquetDataWriter, + ArrowWriter, +) from tests.common.utils import load_json_case, row_to_column_schemas @@ -21,6 +40,7 @@ class _StringIOWriter(DataWriter): class _BytesIOWriter(DataWriter): _f: io.BytesIO + @pytest.fixture def insert_writer() -> Iterator[DataWriter]: with io.StringIO() as f: @@ -48,7 +68,7 @@ def test_simple_jsonl_writer(jsonl_writer: _BytesIOWriter) -> None: jsonl_writer.write_all(None, rows) # remove b'' at the end lines = jsonl_writer._f.getvalue().split(b"\n") - assert lines[-1] == b'' + assert lines[-1] == b"" assert len(lines) == 3 @@ -93,13 +113,22 @@ def test_string_literal_escape() -> None: assert escape_redshift_literal(", NULL'); DROP TABLE --") == "', NULL''); DROP TABLE --'" assert escape_redshift_literal(", NULL');\n DROP TABLE --") == "', NULL'');\\n DROP TABLE --'" assert escape_redshift_literal(", NULL);\n DROP TABLE --") == "', NULL);\\n DROP TABLE --'" - assert escape_redshift_literal(", NULL);\\n DROP TABLE --\\") == "', NULL);\\\\n DROP TABLE --\\\\'" + assert ( + escape_redshift_literal(", NULL);\\n DROP TABLE --\\") + == "', NULL);\\\\n DROP TABLE --\\\\'" + ) # assert escape_redshift_literal(b'hello_word') == "\\x68656c6c6f5f776f7264" @pytest.mark.parametrize("escaper", ALL_LITERAL_ESCAPE) def test_string_complex_escape(escaper: AnyFun) -> None: - doc = {"complex":[1,2,3,"a"], "link": "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\0xA \0x0 \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085"} + doc = { + "complex": [1, 2, 3, "a"], + "link": ( + "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\0xA \0x0" + " \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085" + ), + } escaped = escaper(doc) # should be same as string escape if escaper == escape_redshift_literal: @@ -109,16 +138,42 @@ def test_string_complex_escape(escaper: AnyFun) -> None: def test_identifier_escape() -> None: - assert escape_redshift_identifier(", NULL'); DROP TABLE\" -\\-") == '", NULL\'); DROP TABLE"" -\\\\-"' + assert ( + escape_redshift_identifier(", NULL'); DROP TABLE\" -\\-") + == '", NULL\'); DROP TABLE"" -\\\\-"' + ) def test_identifier_escape_bigquery() -> None: - assert escape_bigquery_identifier(", NULL'); DROP TABLE\"` -\\-") == '`, NULL\'); DROP TABLE"\\` -\\\\-`' + assert ( + escape_bigquery_identifier(", NULL'); DROP TABLE\"` -\\-") + == "`, NULL'); DROP TABLE\"\\` -\\\\-`" + ) def test_string_literal_escape_unicode() -> None: # test on some unicode characters assert escape_redshift_literal(", NULL);\n DROP TABLE --") == "', NULL);\\n DROP TABLE --'" - assert escape_redshift_literal("イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム") == "'イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム'" - assert escape_redshift_identifier("ąćł\"") == '"ąćł"""' - assert escape_redshift_identifier("イロハニホヘト チリヌルヲ \"ワカヨタレソ ツネナラム") == '"イロハニホヘト チリヌルヲ ""ワカヨタレソ ツネナラム"' + assert ( + escape_redshift_literal("イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム") + == "'イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム'" + ) + assert escape_redshift_identifier('ąćł"') == '"ąćł"""' + assert ( + escape_redshift_identifier('イロハニホヘト チリヌルヲ "ワカヨタレソ ツネナラム') + == '"イロハニホヘト チリヌルヲ ""ワカヨタレソ ツネナラム"' + ) + + +def test_data_writer_metrics_add() -> None: + now = time.time() + metrics = DataWriterMetrics("file", 10, 100, now, now + 10) + add_m: DataWriterMetrics = metrics + EMPTY_DATA_WRITER_METRICS # type: ignore[assignment] + assert add_m == DataWriterMetrics("", 10, 100, now, now + 10) + assert metrics + metrics == DataWriterMetrics("", 20, 200, now, now + 10) + assert sum((metrics, metrics, metrics), EMPTY_DATA_WRITER_METRICS) == DataWriterMetrics( + "", 30, 300, now, now + 10 + ) + # time range extends when added + add_m = metrics + DataWriterMetrics("file", 99, 120, now - 10, now + 20) # type: ignore[assignment] + assert add_m == DataWriterMetrics("", 109, 220, now - 10, now + 20) diff --git a/tests/common/data_writers/utils.py b/tests/common/data_writers/utils.py new file mode 100644 index 0000000000..a02d654728 --- /dev/null +++ b/tests/common/data_writers/utils.py @@ -0,0 +1,35 @@ +import os +from typing import Set, Literal + + +from dlt.common.data_writers.buffered import BufferedDataWriter, DataWriter +from dlt.common.destination import TLoaderFileFormat, DestinationCapabilitiesContext + +from tests.utils import TEST_STORAGE_ROOT + +ALL_WRITERS: Set[Literal[TLoaderFileFormat]] = { + "insert_values", + "jsonl", + "parquet", + "arrow", + "puae-jsonl", +} + + +def get_writer( + _format: TLoaderFileFormat = "insert_values", + buffer_max_items: int = 10, + file_max_items: int = 5000, + disable_compression: bool = False, +) -> BufferedDataWriter[DataWriter]: + caps = DestinationCapabilitiesContext.generic_capabilities() + caps.preferred_loader_file_format = _format + file_template = os.path.join(TEST_STORAGE_ROOT, f"{_format}.%s") + return BufferedDataWriter( + _format, + file_template, + buffer_max_items=buffer_max_items, + file_max_items=file_max_items, + disable_compression=disable_compression, + _caps=caps, + ) diff --git a/tests/common/normalizers/custom_normalizers.py b/tests/common/normalizers/custom_normalizers.py index 8e24ffab5a..3ae65c8b53 100644 --- a/tests/common/normalizers/custom_normalizers.py +++ b/tests/common/normalizers/custom_normalizers.py @@ -5,7 +5,6 @@ class NamingConvention(SnakeCaseNamingConvention): - def normalize_identifier(self, identifier: str) -> str: if identifier.startswith("column_"): return identifier @@ -13,12 +12,12 @@ def normalize_identifier(self, identifier: str) -> str: class DataItemNormalizer(RelationalNormalizer): - def extend_schema(self) -> None: json_config = self.schema._normalizers_config["json"]["config"] d_h = self.schema._settings.setdefault("default_hints", {}) d_h["not_null"] = json_config["not_null"] - - def normalize_data_item(self, source_event: TDataItem, load_id: str, table_name) -> TNormalizedRowIterator: + def normalize_data_item( + self, source_event: TDataItem, load_id: str, table_name + ) -> TNormalizedRowIterator: yield (table_name, None), source_event diff --git a/tests/common/normalizers/test_import_normalizers.py b/tests/common/normalizers/test_import_normalizers.py index ea5842f206..df6b973943 100644 --- a/tests/common/normalizers/test_import_normalizers.py +++ b/tests/common/normalizers/test_import_normalizers.py @@ -10,26 +10,28 @@ from dlt.common.normalizers.naming import direct from dlt.common.normalizers.naming.exceptions import InvalidNamingModule, UnknownNamingModule -from tests.common.normalizers.custom_normalizers import DataItemNormalizer as CustomRelationalNormalizer +from tests.common.normalizers.custom_normalizers import ( + DataItemNormalizer as CustomRelationalNormalizer, +) from tests.utils import preserve_environ def test_default_normalizers() -> None: config = explicit_normalizers() - assert config['names'] is None - assert config['json'] is None + assert config["names"] is None + assert config["json"] is None # pass explicit config = explicit_normalizers("direct", {"module": "custom"}) - assert config['names'] == "direct" - assert config['json'] == {"module": "custom"} + assert config["names"] == "direct" + assert config["json"] == {"module": "custom"} # use environ os.environ["SCHEMA__NAMING"] = "direct" os.environ["SCHEMA__JSON_NORMALIZER"] = '{"module": "custom"}' config = explicit_normalizers() - assert config['names'] == "direct" - assert config['json'] == {"module": "custom"} + assert config["names"] == "direct" + assert config["json"] == {"module": "custom"} def test_default_normalizers_with_caps() -> None: @@ -38,8 +40,7 @@ def test_default_normalizers_with_caps() -> None: destination_caps.naming_convention = "direct" with Container().injectable_context(destination_caps): config = explicit_normalizers() - assert config['names'] == "direct" - + assert config["names"] == "direct" def test_import_normalizers() -> None: @@ -52,7 +53,9 @@ def test_import_normalizers() -> None: assert config["json"] == {"module": "dlt.common.normalizers.json.relational"} os.environ["SCHEMA__NAMING"] = "direct" - os.environ["SCHEMA__JSON_NORMALIZER"] = '{"module": "tests.common.normalizers.custom_normalizers"}' + os.environ["SCHEMA__JSON_NORMALIZER"] = ( + '{"module": "tests.common.normalizers.custom_normalizers"}' + ) config, naming, json_normalizer = import_normalizers(explicit_normalizers()) assert config["names"] == "direct" assert config["json"] == {"module": "tests.common.normalizers.custom_normalizers"} diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py index 7169044117..502ce619dd 100644 --- a/tests/common/normalizers/test_json_relational.py +++ b/tests/common/normalizers/test_json_relational.py @@ -7,11 +7,18 @@ from dlt.common.schema import Schema, TTableSchema from dlt.common.schema.utils import new_table -from dlt.common.normalizers.json.relational import RelationalNormalizerConfigPropagation, DataItemNormalizer as RelationalNormalizer, DLT_ID_LENGTH_BYTES, TDataItemRow +from dlt.common.normalizers.json.relational import ( + RelationalNormalizerConfigPropagation, + DataItemNormalizer as RelationalNormalizer, + DLT_ID_LENGTH_BYTES, + TDataItemRow, +) + # _flatten, _get_child_row_hash, _normalize_row, normalize_data_item, from tests.utils import create_schema_with_name + @pytest.fixture def norm() -> RelationalNormalizer: return Schema("default").data_item_normalizer # type: ignore[return-value] @@ -21,15 +28,7 @@ def test_flatten_fix_field_name(norm: RelationalNormalizer) -> None: row = { "f-1": "! 30", "f 2": [], - "f!3": { - "f4": "a", - "f-5": "b", - "f*6": { - "c": 7, - "c v": 8, - "c x": [] - } - } + "f!3": {"f4": "a", "f-5": "b", "f*6": {"c": 7, "c v": 8, "c x": []}}, } flattened_row, lists = norm._flatten("mock_table", row, 0) # type: ignore[arg-type] assert "f_1" in flattened_row @@ -41,29 +40,33 @@ def test_flatten_fix_field_name(norm: RelationalNormalizer) -> None: # assert "f_3__f_6__c_x" in flattened_row assert "f_3" not in flattened_row - assert ("f_2", ) in lists - assert ("f_3", "fx6", "c_x", ) in lists + assert ("f_2",) in lists + assert ( + "f_3", + "fx6", + "c_x", + ) in lists def test_preserve_complex_value(norm: RelationalNormalizer) -> None: # add table with complex column norm.schema.update_table( - new_table("with_complex", - columns = [{ - "name": "value", - "data_type": "complex", - "nullable": "true" # type: ignore[typeddict-item] - }]) + new_table( + "with_complex", + columns=[ + { + "name": "value", + "data_type": "complex", + "nullable": "true", # type: ignore[typeddict-item] + } + ], + ) ) - row_1 = { - "value": 1 - } + row_1 = {"value": 1} flattened_row, _ = norm._flatten("with_complex", row_1, 0) # type: ignore[arg-type] assert flattened_row["value"] == 1 # type: ignore[typeddict-item] - row_2 = { - "value": {"complex": True} - } + row_2 = {"value": {"complex": True}} flattened_row, _ = norm._flatten("with_complex", row_2, 0) # type: ignore[arg-type] assert flattened_row["value"] == row_2["value"] # type: ignore[typeddict-item] # complex value is not flattened @@ -75,15 +78,11 @@ def test_preserve_complex_value_with_hint(norm: RelationalNormalizer) -> None: norm.schema._settings.setdefault("preferred_types", {})[TSimpleRegex("re:^value$")] = "complex" norm.schema._compile_settings() - row_1 = { - "value": 1 - } + row_1 = {"value": 1} flattened_row, _ = norm._flatten("any_table", row_1, 0) # type: ignore[arg-type] assert flattened_row["value"] == 1 # type: ignore[typeddict-item] - row_2 = { - "value": {"complex": True} - } + row_2 = {"value": {"complex": True}} flattened_row, _ = norm._flatten("any_table", row_2, 0) # type: ignore[arg-type] assert flattened_row["value"] == row_2["value"] # type: ignore[typeddict-item] # complex value is not flattened @@ -91,17 +90,11 @@ def test_preserve_complex_value_with_hint(norm: RelationalNormalizer) -> None: def test_child_table_linking(norm: RelationalNormalizer) -> None: - row = { - "f": [{ - "l": ["a", "b", "c"], - "v": 120, - "o": [{"a": 1}, {"a": 2}] - }] - } + row = {"f": [{"l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}]} # request _dlt_root_id propagation add_dlt_root_id_propagation(norm) - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] # should have 7 entries (root + level 1 + 3 * list + 2 * object) assert len(rows) == 7 # root elem will not have a root hash if not explicitly added, "extend" is added only to child @@ -144,17 +137,12 @@ def test_child_table_linking(norm: RelationalNormalizer) -> None: def test_child_table_linking_primary_key(norm: RelationalNormalizer) -> None: row = { "id": "level0", - "f": [{ - "id": "level1", - "l": ["a", "b", "c"], - "v": 120, - "o": [{"a": 1}, {"a": 2}] - }] + "f": [{"id": "level1", "l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}], } norm.schema.merge_hints({"primary_key": [TSimpleRegex("id")]}) norm.schema._compile_settings() - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] root = next(t for t in rows if t[0][0] == "table")[1] # record hash is random for primary keys, not based on their content # this is a change introduced in dlt 0.2.0a30 @@ -169,7 +157,9 @@ def test_child_table_linking_primary_key(norm: RelationalNormalizer) -> None: assert "_dlt_root_id" not in t_f list_rows = [t for t in rows if t[0][0] == "table__f__l"] - assert all(e[1]["_dlt_parent_id"] != digest128("level1", DLT_ID_LENGTH_BYTES) for e in list_rows) + assert all( + e[1]["_dlt_parent_id"] != digest128("level1", DLT_ID_LENGTH_BYTES) for e in list_rows + ) assert all(r[0][1] == "table__f" for r in list_rows) obj_rows = [t for t in rows if t[0][0] == "table__f__o"] assert all(e[1]["_dlt_parent_id"] != digest128("level1", DLT_ID_LENGTH_BYTES) for e in obj_rows) @@ -179,50 +169,56 @@ def test_child_table_linking_primary_key(norm: RelationalNormalizer) -> None: def test_yields_parents_first(norm: RelationalNormalizer) -> None: row = { "id": "level0", - "f": [{ - "id": "level1", - "l": ["a", "b", "c"], - "v": 120, - "o": [{"a": 1}, {"a": 2}] - }], - "g": [{ - "id": "level2_g", - "l": ["a"] - }] + "f": [{"id": "level1", "l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}], + "g": [{"id": "level2_g", "l": ["a"]}], } - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] tables = list(r[0][0] for r in rows) # child tables are always yielded before parent tables - expected_tables = ['table', 'table__f', 'table__f__l', 'table__f__l', 'table__f__l', 'table__f__o', 'table__f__o', 'table__g', 'table__g__l'] + expected_tables = [ + "table", + "table__f", + "table__f__l", + "table__f__l", + "table__f__l", + "table__f__o", + "table__f__o", + "table__g", + "table__g__l", + ] assert expected_tables == tables def test_yields_parent_relation(norm: RelationalNormalizer) -> None: row = { "id": "level0", - "f": [{ - "id": "level1", - "l": ["a"], - "o": [{"a": 1}], - "b": { - "a": [ {"id": "level5"}], + "f": [ + { + "id": "level1", + "l": ["a"], + "o": [{"a": 1}], + "b": { + "a": [{"id": "level5"}], + }, } - }], + ], "d": { - "a": [ {"id": "level4"}], + "a": [{"id": "level4"}], "b": { - "a": [ {"id": "level5"}], + "a": [{"id": "level5"}], }, - "c": "x" + "c": "x", }, - "e": [{ - "o": [{"a": 1}], - "b": { - "a": [ {"id": "level5"}], + "e": [ + { + "o": [{"a": 1}], + "b": { + "a": [{"id": "level5"}], + }, } - }] + ], } - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] # normalizer must return parent table first and move in order of the list elements when yielding child tables # the yielding order if fully defined expected_parents = [ @@ -238,7 +234,7 @@ def test_yields_parent_relation(norm: RelationalNormalizer) -> None: # table__e is yielded it however only contains linking information ("table__e", "table"), ("table__e__o", "table__e"), - ("table__e__b__a", "table__e") + ("table__e__b__a", "table__e"), ] parents = list(r[0] for r in rows) assert parents == expected_parents @@ -281,13 +277,9 @@ def test_yields_parent_relation(norm: RelationalNormalizer) -> None: def test_list_position(norm: RelationalNormalizer) -> None: row: StrAny = { - "f": [{ - "l": ["a", "b", "c"], - "v": 120, - "lo": [{"e": "a"}, {"e": "b"}, {"e":"c"}] - }] + "f": [{"l": ["a", "b", "c"], "v": 120, "lo": [{"e": "a"}, {"e": "b"}, {"e": "c"}]}] } - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] # root has no pos root = [t for t in rows if t[0][0] == "table"][0][1] assert "_dlt_list_idx" not in root @@ -321,31 +313,77 @@ def test_list_position(norm: RelationalNormalizer) -> None: # print(rows) +def test_control_descending(norm: RelationalNormalizer) -> None: + row: StrAny = { + "f": [{"l": ["a", "b", "c"], "v": 120, "lo": [[{"e": "a"}, {"e": "b"}, {"e": "c"}]]}], + "g": "val", + } + + # break at first row + rows_gen = norm.normalize_data_item(row, "load_id", "table") + rows_gen.send(None) + # won't yield anything else + with pytest.raises(StopIteration): + rows_gen.send(False) + + # prevent yielding descendants of "f" but yield all else + rows_gen = norm.normalize_data_item(row, "load_id", "table") + rows_gen.send(None) + (table, _), _ = rows_gen.send(True) + assert table == "table__f" + # won't yield anything else + with pytest.raises(StopIteration): + rows_gen.send(False) + + # descend into "l" + rows_gen = norm.normalize_data_item(row, "load_id", "table") + rows_gen.send(None) + rows_gen.send(True) + (table, _), one_row = rows_gen.send(True) + assert table == "table__f__l" + assert one_row["value"] == "a" + # get next element in the list - even with sending False - we do not descend + (table, _), one_row = rows_gen.send(False) + assert table == "table__f__l" + assert one_row["value"] == "b" + + # prevent descending into list of lists + rows_gen = norm.normalize_data_item(row, "load_id", "table") + rows_gen.send(None) + rows_gen.send(True) + # yield "l" + next(rows_gen) + next(rows_gen) + next(rows_gen) + (table, _), one_row = rows_gen.send(True) + assert table == "table__f__lo" + # do not descend into lists + with pytest.raises(StopIteration): + rows_gen.send(False) + + def test_list_in_list() -> None: chats = { "_dlt_id": "123456", "created_at": "2023-05-12T12:34:56Z", "ended_at": "2023-05-12T13:14:32Z", - "webpath": [[ - { - "url": "https://www.website.com/", - "timestamp": "2023-05-12T12:35:01Z" - }, - { - "url": "https://www.website.com/products", - "timestamp": "2023-05-12T12:38:45Z" - }, + "webpath": [ + [ + {"url": "https://www.website.com/", "timestamp": "2023-05-12T12:35:01Z"}, + {"url": "https://www.website.com/products", "timestamp": "2023-05-12T12:38:45Z"}, { "url": "https://www.website.com/products/item123", - "timestamp": "2023-05-12T12:42:22Z" + "timestamp": "2023-05-12T12:42:22Z", }, - [{ - "url": "https://www.website.com/products/item1234", - "timestamp": "2023-05-12T12:42:22Z" - }] + [ + { + "url": "https://www.website.com/products/item1234", + "timestamp": "2023-05-12T12:42:22Z", + } + ], ], - [1, 2, 3] - ] + [1, 2, 3], + ], } schema = create_schema_with_name("other") # root @@ -355,12 +393,12 @@ def test_list_in_list() -> None: zen__webpath = [row for row in rows if row[0][0] == "zen__webpath"] # two rows in web__zenpath for two lists assert len(zen__webpath) == 2 - assert zen__webpath[0][0] == ('zen__webpath', 'zen') + assert zen__webpath[0][0] == ("zen__webpath", "zen") # _dlt_id was hardcoded in the original row assert zen__webpath[0][1]["_dlt_parent_id"] == "123456" - assert zen__webpath[0][1]['_dlt_list_idx'] == 0 - assert zen__webpath[1][1]['_dlt_list_idx'] == 1 - assert zen__webpath[1][0] == ('zen__webpath', 'zen') + assert zen__webpath[0][1]["_dlt_list_idx"] == 0 + assert zen__webpath[1][1]["_dlt_list_idx"] == 1 + assert zen__webpath[1][0] == ("zen__webpath", "zen") # inner lists zen__webpath__list = [row for row in rows if row[0][0] == "zen__webpath__list"] # actually both list of objects and list of number will be in the same table @@ -374,7 +412,9 @@ def test_list_in_list() -> None: zen_table = new_table("zen") schema.update_table(zen_table) - path_table = new_table("zen__webpath", parent_table_name="zen", columns=[{"name": "list", "data_type": "complex"}]) + path_table = new_table( + "zen__webpath", parent_table_name="zen", columns=[{"name": "list", "data_type": "complex"}] + ) schema.update_table(path_table) rows = list(schema.normalize_data_item(chats, "1762162.1212", "zen")) # both lists are complex types now @@ -388,13 +428,9 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: # directly set record hash so it will be adopted in normalizer as top level hash row = { "_dlt_id": row_id, - "f": [{ - "l": ["a", "b", "c"], - "v": 120, - "lo": [{"e": "a"}, {"e": "b"}, {"e":"c"}] - }] + "f": [{"l": ["a", "b", "c"], "v": 120, "lo": [{"e": "a"}, {"e": "b"}, {"e": "c"}]}], } - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] children = [t for t in rows if t[0][0] != "table"] # all hashes must be different distinct_hashes = set([ch[1]["_dlt_id"] for ch in children]) @@ -402,7 +438,9 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: # compute hashes for all children for (table, _), ch in children: - expected_hash = digest128(f"{ch['_dlt_parent_id']}_{table}_{ch['_dlt_list_idx']}", DLT_ID_LENGTH_BYTES) + expected_hash = digest128( + f"{ch['_dlt_parent_id']}_{table}_{ch['_dlt_list_idx']}", DLT_ID_LENGTH_BYTES + ) assert ch["_dlt_id"] == expected_hash # direct compute one of the @@ -411,54 +449,64 @@ def test_child_row_deterministic_hash(norm: RelationalNormalizer) -> None: assert f_lo_p2["_dlt_id"] == digest128(f"{el_f['_dlt_id']}_table__f__lo_2", DLT_ID_LENGTH_BYTES) # same data with same table and row_id - rows_2 = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows_2 = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] children_2 = [t for t in rows_2 if t[0][0] != "table"] # corresponding hashes must be identical assert all(ch[0][1]["_dlt_id"] == ch[1][1]["_dlt_id"] for ch in zip(children, children_2)) # change parent table and all child hashes must be different - rows_4 = list(norm._normalize_row(row, {}, ("other_table", ))) # type: ignore[arg-type] + rows_4 = list(norm._normalize_row(row, {}, ("other_table",))) # type: ignore[arg-type] children_4 = [t for t in rows_4 if t[0][0] != "other_table"] assert all(ch[0][1]["_dlt_id"] != ch[1][1]["_dlt_id"] for ch in zip(children, children_4)) # change parent hash and all child hashes must be different row["_dlt_id"] = uniq_id() - rows_3 = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + rows_3 = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] children_3 = [t for t in rows_3 if t[0][0] != "table"] assert all(ch[0][1]["_dlt_id"] != ch[1][1]["_dlt_id"] for ch in zip(children, children_3)) def test_keeps_dlt_id(norm: RelationalNormalizer) -> None: h = uniq_id() - row = { - "a": "b", - "_dlt_id": h - } - rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + row = {"a": "b", "_dlt_id": h} + rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] root = [t for t in rows if t[0][0] == "table"][0][1] assert root["_dlt_id"] == h def test_propagate_hardcoded_context(norm: RelationalNormalizer) -> None: row = {"level": 1, "list": ["a", "b", "c"], "comp": [{"_timestamp": "a"}]} - rows = list(norm._normalize_row(row, {"_timestamp": 1238.9, "_dist_key": "SENDER_3000"}, ("table", ))) # type: ignore[arg-type] + rows = list(norm._normalize_row(row, {"_timestamp": 1238.9, "_dist_key": "SENDER_3000"}, ("table",))) # type: ignore[arg-type] # context is not added to root element root = next(t for t in rows if t[0][0] == "table")[1] assert "_timestamp" in root assert "_dist_key" in root # the original _timestamp field will be overwritten in children and added to lists - assert all(e[1]["_timestamp"] == 1238.9 and e[1]["_dist_key"] == "SENDER_3000" for e in rows if e[0][0] != "table") + assert all( + e[1]["_timestamp"] == 1238.9 and e[1]["_dist_key"] == "SENDER_3000" + for e in rows + if e[0][0] != "table" + ) def test_propagates_root_context(norm: RelationalNormalizer) -> None: add_dlt_root_id_propagation(norm) # add timestamp propagation - norm.schema._normalizers_config["json"]["config"]["propagation"]["root"]["timestamp"] = "_partition_ts" + norm.schema._normalizers_config["json"]["config"]["propagation"]["root"][ + "timestamp" + ] = "_partition_ts" # add propagation for non existing element - norm.schema._normalizers_config["json"]["config"]["propagation"]["root"]["__not_found"] = "__not_found" + norm.schema._normalizers_config["json"]["config"]["propagation"]["root"][ + "__not_found" + ] = "__not_found" - row = {"_dlt_id": "###", "timestamp": 12918291.1212, "dependent_list":[1, 2,3], "dependent_objects": [{"vx": "ax"}]} - normalized_rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + row = { + "_dlt_id": "###", + "timestamp": 12918291.1212, + "dependent_list": [1, 2, 3], + "dependent_objects": [{"vx": "ax"}], + } + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] # all non-root rows must have: non_root = [r for r in normalized_rows if r[0][1] is not None] assert all(r[1]["_dlt_root_id"] == "###" for r in non_root) @@ -467,15 +515,19 @@ def test_propagates_root_context(norm: RelationalNormalizer) -> None: @pytest.mark.parametrize("add_pk,add_dlt_id", [(False, False), (True, False), (True, True)]) -def test_propagates_table_context(norm: RelationalNormalizer, add_pk: bool, add_dlt_id: bool) -> None: +def test_propagates_table_context( + norm: RelationalNormalizer, add_pk: bool, add_dlt_id: bool +) -> None: add_dlt_root_id_propagation(norm) - prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"]["config"]["propagation"] + prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"][ + "config" + ]["propagation"] prop_config["root"]["timestamp"] = "_partition_ts" # type: ignore[index] # for table "table__lvl1" request to propagate "vx" and "partition_ovr" as "_partition_ts" (should overwrite root) prop_config["tables"]["table__lvl1"] = { # type: ignore[index] "vx": "__vx", "partition_ovr": "_partition_ts", - "__not_found": "__not_found" + "__not_found": "__not_found", } if add_pk: @@ -483,21 +535,17 @@ def test_propagates_table_context(norm: RelationalNormalizer, add_pk: bool, add_ norm.schema.merge_hints({"primary_key": [TSimpleRegex("vx")]}) row = { - "_dlt_id": "###", - "timestamp": 12918291.1212, - "lvl1": [{ - "vx": "ax", - "partition_ovr": 1283.12, - "lvl2": [{ - "_partition_ts": "overwritten" - }] - }] - } + "_dlt_id": "###", + "timestamp": 12918291.1212, + "lvl1": [ + {"vx": "ax", "partition_ovr": 1283.12, "lvl2": [{"_partition_ts": "overwritten"}]} + ], + } if add_dlt_id: # to reproduce a bug where rows with _dlt_id set were not extended row["lvl1"][0]["_dlt_id"] = "row_id_lvl1" # type: ignore[index] - normalized_rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] non_root = [r for r in normalized_rows if r[0][1] is not None] # _dlt_root_id in all non root assert all(r[1]["_dlt_root_id"] == "###" for r in non_root) @@ -506,21 +554,30 @@ def test_propagates_table_context(norm: RelationalNormalizer, add_pk: bool, add_ # _partition_ts == timestamp only at lvl1 assert all(r[1]["_partition_ts"] == 12918291.1212 for r in non_root if r[0][0] == "table__lvl1") # _partition_ts == partition_ovr and __vx only at lvl2 - assert all(r[1]["_partition_ts"] == 1283.12 and r[1]["__vx"] == "ax" for r in non_root if r[0][0] == "table__lvl1__lvl2") - assert any(r[1]["_partition_ts"] == 1283.12 and r[1]["__vx"] == "ax" for r in non_root if r[0][0] != "table__lvl1__lvl2") is False + assert all( + r[1]["_partition_ts"] == 1283.12 and r[1]["__vx"] == "ax" + for r in non_root + if r[0][0] == "table__lvl1__lvl2" + ) + assert ( + any( + r[1]["_partition_ts"] == 1283.12 and r[1]["__vx"] == "ax" + for r in non_root + if r[0][0] != "table__lvl1__lvl2" + ) + is False + ) def test_propagates_table_context_to_lists(norm: RelationalNormalizer) -> None: add_dlt_root_id_propagation(norm) - prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"]["config"]["propagation"] + prop_config: RelationalNormalizerConfigPropagation = norm.schema._normalizers_config["json"][ + "config" + ]["propagation"] prop_config["root"]["timestamp"] = "_partition_ts" # type: ignore[index] - row = { - "_dlt_id": "###", - "timestamp": 12918291.1212, - "lvl1": [1, 2, 3, [4, 5, 6]] - } - normalized_rows = list(norm._normalize_row(row, {}, ("table", ))) # type: ignore[arg-type] + row = {"_dlt_id": "###", "timestamp": 12918291.1212, "lvl1": [1, 2, 3, [4, 5, 6]]} + normalized_rows = list(norm._normalize_row(row, {}, ("table",))) # type: ignore[arg-type] # _partition_ts == timestamp on all child tables non_root = [r for r in normalized_rows if r[0][1] is not None] assert all(r[1]["_partition_ts"] == 12918291.1212 for r in non_root) @@ -533,7 +590,7 @@ def test_removes_normalized_list(norm: RelationalNormalizer) -> None: # after normalizing the list that got normalized into child table must be deleted row = {"comp": [{"_timestamp": "a"}]} # get iterator - normalized_rows_i = norm._normalize_row(row, {}, ("table", )) # type: ignore[arg-type] + normalized_rows_i = norm._normalize_row(row, {}, ("table",)) # type: ignore[arg-type] # yield just one item root_row = next(normalized_rows_i) # root_row = next(r for r in normalized_rows if r[0][1] is None) @@ -544,17 +601,20 @@ def test_preserves_complex_types_list(norm: RelationalNormalizer) -> None: # the exception to test_removes_normalized_list # complex types should be left as they are # add table with complex column - norm.schema.update_table(new_table("event_slot", - columns = [{ - "name": "value", - "data_type": "complex", - "nullable": "true" # type: ignore[typeddict-item] - }]) + norm.schema.update_table( + new_table( + "event_slot", + columns=[ + { + "name": "value", + "data_type": "complex", + "nullable": "true", # type: ignore[typeddict-item] + } + ], + ) ) - row = { - "value": ["from", {"complex": True}] - } - normalized_rows = list(norm._normalize_row(row, {}, ("event_slot", ))) # type: ignore[arg-type] + row = {"value": ["from", {"complex": True}]} + normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # type: ignore[arg-type] # make sure only 1 row is emitted, the list is not normalized assert len(normalized_rows) == 1 # value is kept in root row -> market as complex @@ -562,10 +622,8 @@ def test_preserves_complex_types_list(norm: RelationalNormalizer) -> None: assert root_row[1]["value"] == row["value"] # same should work for a list - row = { - "value": ["from", ["complex", True]] # type: ignore[list-item] - } - normalized_rows = list(norm._normalize_row(row, {}, ("event_slot", ))) # type: ignore[arg-type] + row = {"value": ["from", ["complex", True]]} # type: ignore[list-item] + normalized_rows = list(norm._normalize_row(row, {}, ("event_slot",))) # type: ignore[arg-type] # make sure only 1 row is emitted, the list is not normalized assert len(normalized_rows) == 1 # value is kept in root row -> market as complex @@ -581,7 +639,10 @@ def test_wrap_in_dict(norm: RelationalNormalizer) -> None: # wrap a list rows = list(norm.schema.normalize_data_item([1, 2, 3, 4, "A"], "load_id", "listex")) assert len(rows) == 6 - assert rows[0][0] == ("listex", None,) + assert rows[0][0] == ( + "listex", + None, + ) assert rows[1][0] == ("listex__value", "listex") assert rows[-1][1]["value"] == "A" @@ -591,15 +652,19 @@ def test_complex_types_for_recursion_level(norm: RelationalNormalizer) -> None: # if max recursion depth is set, nested elements will be kept as complex row = { "_dlt_id": "row_id", - "f": [{ - "l": ["a"], # , "b", "c" - "v": 120, - "lo": [{"e": {"v": 1}}] # , {"e": {"v": 2}}, {"e":{"v":3 }} - }] + "f": [ + { + "l": ["a"], # , "b", "c" + "v": 120, + "lo": [{"e": {"v": 1}}], # , {"e": {"v": 2}}, {"e":{"v":3 }} + } + ], } n_rows_nl = list(norm.schema.normalize_data_item(row, "load_id", "default")) # all nested elements were yielded - assert ["default", "default__f", "default__f__l", "default__f__lo"] == [r[0][0] for r in n_rows_nl] + assert ["default", "default__f", "default__f__l", "default__f__lo"] == [ + r[0][0] for r in n_rows_nl + ] # set max nesting to 0 set_max_nesting(norm, 0) @@ -644,12 +709,10 @@ def test_extract_with_table_name_meta() -> None: "flags": 0, "parent_id": None, "guild_id": "815421435900198962", - "permission_overwrites": [] + "permission_overwrites": [], } # force table name - rows = list( - create_schema_with_name("discord").normalize_data_item(row, "load_id", "channel") - ) + rows = list(create_schema_with_name("discord").normalize_data_item(row, "load_id", "channel")) # table is channel assert rows[0][0][0] == "channel" normalized_row = rows[0][1] @@ -676,13 +739,7 @@ def test_parse_with_primary_key() -> None: schema._compile_settings() add_dlt_root_id_propagation(schema.data_item_normalizer) # type: ignore[arg-type] - row = { - "id": "817949077341208606", - "w_id":[{ - "id": 9128918293891111, - "wo_id": [1, 2, 3] - }] - } + row = {"id": "817949077341208606", "w_id": [{"id": 9128918293891111, "wo_id": [1, 2, 3]}]} rows = list(schema.normalize_data_item(row, "load_id", "discord")) # get root root = next(t[1] for t in rows if t[0][0] == "discord") @@ -700,11 +757,15 @@ def test_parse_with_primary_key() -> None: assert "_dlt_root_id" in el_w_id # this must have deterministic child key - f_wo_id = next(t[1] for t in rows if t[0][0] == "discord__w_id__wo_id" and t[1]["_dlt_list_idx"] == 2) + f_wo_id = next( + t[1] for t in rows if t[0][0] == "discord__w_id__wo_id" and t[1]["_dlt_list_idx"] == 2 + ) assert f_wo_id["value"] == 3 assert f_wo_id["_dlt_root_id"] != digest128("817949077341208606", DLT_ID_LENGTH_BYTES) assert f_wo_id["_dlt_parent_id"] != digest128("9128918293891111", DLT_ID_LENGTH_BYTES) - assert f_wo_id["_dlt_id"] == RelationalNormalizer._get_child_row_hash(f_wo_id["_dlt_parent_id"], "discord__w_id__wo_id", 2) + assert f_wo_id["_dlt_id"] == RelationalNormalizer._get_child_row_hash( + f_wo_id["_dlt_parent_id"], "discord__w_id__wo_id", 2 + ) def test_keeps_none_values() -> None: @@ -724,16 +785,10 @@ def test_normalize_and_shorten_deterministically() -> None: data = { "short>ident:1": { - "short>ident:2": { - "short>ident:3": "a" - }, + "short>ident:2": {"short>ident:3": "a"}, }, - "LIST+ident:1": { - "LIST+ident:2": { - "LIST+ident:3": [1] - } - }, - "long+long:SO+LONG:_>16": True + "LIST+ident:1": {"LIST+ident:2": {"LIST+ident:3": [1]}}, + "long+long:SO+LONG:_>16": True, } rows = list(schema.normalize_data_item(data, "1762162.1212", "s")) # all identifiers are 16 chars or shorter @@ -747,14 +802,20 @@ def test_normalize_and_shorten_deterministically() -> None: root_data = rows[0][1] root_data_keys = list(root_data.keys()) # "short:ident:2": "a" will be flattened into root - tag = NamingConvention._compute_tag("short_ident_1__short_ident_2__short_ident_3", NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + "short_ident_1__short_ident_2__short_ident_3", NamingConvention._DEFAULT_COLLISION_PROB + ) assert tag in root_data_keys[0] # long:SO+LONG:_>16 shortened on normalized name - tag = NamingConvention._compute_tag("long+long:SO+LONG:_>16", NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + "long+long:SO+LONG:_>16", NamingConvention._DEFAULT_COLLISION_PROB + ) assert tag in root_data_keys[1] # table name in second row table_name = rows[1][0][0] - tag = NamingConvention._compute_tag("s__lis_txident_1__lis_txident_2__lis_txident_3", NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + "s__lis_txident_1__lis_txident_2__lis_txident_3", NamingConvention._DEFAULT_COLLISION_PROB + ) assert tag in table_name @@ -776,7 +837,6 @@ def test_normalize_empty_keys() -> None: # could also be in schema tests def test_propagation_update_on_table_change(norm: RelationalNormalizer): - # append does not have propagated columns table_1 = new_table("table_1", write_disposition="append") norm.schema.update_table(table_1) @@ -785,40 +845,41 @@ def test_propagation_update_on_table_change(norm: RelationalNormalizer): # change table to merge table_1["write_disposition"] = "merge" norm.schema.update_table(table_1) - assert norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"][table_1["name"]] == {'_dlt_id': '_dlt_root_id'} + assert norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + table_1["name"] + ] == {"_dlt_id": "_dlt_root_id"} # add subtable table_2 = new_table("table_2", parent_table_name="table_1") norm.schema.update_table(table_2) - assert "table_2" not in norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"] + assert ( + "table_2" not in norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"] + ) # test merging into existing propagation - norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"]["table_3"] = {'prop1': 'prop2'} + norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"]["table_3"] = { + "prop1": "prop2" + } table_3 = new_table("table_3", write_disposition="merge") norm.schema.update_table(table_3) - assert norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"]["table_3"] == { - '_dlt_id': '_dlt_root_id', - 'prop1': 'prop2' - } - + assert norm.schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "table_3" + ] == {"_dlt_id": "_dlt_root_id", "prop1": "prop2"} def set_max_nesting(norm: RelationalNormalizer, max_nesting: int) -> None: - RelationalNormalizer.update_normalizer_config(norm.schema, - { - "max_nesting": max_nesting - } - ) + RelationalNormalizer.update_normalizer_config(norm.schema, {"max_nesting": max_nesting}) norm._reset() def add_dlt_root_id_propagation(norm: RelationalNormalizer) -> None: - RelationalNormalizer.update_normalizer_config(norm.schema, { - "propagation": { - "root": { - "_dlt_id": "_dlt_root_id" # type: ignore[dict-item] - }, - "tables": {} + RelationalNormalizer.update_normalizer_config( + norm.schema, + { + "propagation": { + "root": {"_dlt_id": "_dlt_root_id"}, # type: ignore[dict-item] + "tables": {}, } - }) + }, + ) norm._reset() diff --git a/tests/common/normalizers/test_naming.py b/tests/common/normalizers/test_naming.py index 02ff6e3c38..3bf4762c35 100644 --- a/tests/common/normalizers/test_naming.py +++ b/tests/common/normalizers/test_naming.py @@ -15,11 +15,17 @@ IDENT_20_CHARS = "she played cello well" RAW_IDENT = ".\n'played CELLO🚧_" RAW_IDENT_W_SPACES = f" {RAW_IDENT} \t\n" -RAW_IDENT_2 = "123.\"\rhello😄!" +RAW_IDENT_2 = '123."\rhello😄!' RAW_IDENT_2_W_SPACES = f"\n {RAW_IDENT_2} \t " RAW_PATH = [RAW_IDENT, RAW_IDENT_2_W_SPACES, RAW_IDENT_2, RAW_IDENT_2_W_SPACES] EMPTY_IDENT = " \t\n " -RAW_PATH_WITH_EMPTY_IDENT = [RAW_IDENT, RAW_IDENT_2_W_SPACES, EMPTY_IDENT, RAW_IDENT_2, RAW_IDENT_2_W_SPACES] +RAW_PATH_WITH_EMPTY_IDENT = [ + RAW_IDENT, + RAW_IDENT_2_W_SPACES, + EMPTY_IDENT, + RAW_IDENT_2, + RAW_IDENT_2_W_SPACES, +] def test_tag_collisions() -> None: @@ -29,52 +35,61 @@ def test_tag_collisions() -> None: generations = 100000 collisions = 0 for _ in range(0, generations): - tag = NamingConvention._compute_tag(uniq_id(32), collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + uniq_id(32), collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) if tag in tags: collisions += 1 else: tags[tag] = tag - assert collisions/generations < 0.001 + assert collisions / generations < 0.001 def test_tag_generation() -> None: # is content hash content = 20 * LONG_PATH - content_tag = NamingConvention._compute_tag(content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + content_tag = NamingConvention._compute_tag( + content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) # no randomness for _ in range(0, 20): - tag = NamingConvention._compute_tag(content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) assert content_tag == tag fixture = [ - ('3f17271231504b8cf65690bcdc379df8a3b8aabe12efe1ea82848ec5f497cb69', 'gds0iw'), - ('58e5c351b53ffe1233e0656a532a721ae1d2ac7af71b6cfec8ceb64c63b10721', 'uyboiq'), - ('e3f34629839cedcabba95354e48a78dc80b0cd35c02ddfbbf20196ba7f968866', '51wdcg'), - ('f0f22b8e8c58389a6c21dbcc1e261ee0354704e24996a0ec541276f58d1f2f52', 'bpm7ca'), - ('0d0de95c7c12ceee919d28d22c970285d80a36dea4fe32dbdd667a888ae6d47f', 'doqcuq'), - ('4973509ea648ddfbaf6c50e1fef33c3b0a3d1c1a82dff543a8255e60b6572567', 'cl7rpq'), - ('877c89f0dcbd24b8c3f787624ddca09deb6a44e4a72f12527209d78e4d9ed247', 'xrnycg'), - ('064df58cd3a51c50dbf30e975e63961a501212ff8e8ca544ab396727f4b8a367', 'kgiizq'), - ('c8f7da1b5c44c1ca10da67c1514c4cf365e4d5912685b25a39206d5c8c1966a1', 'dj9zqq'), - ('222d42333592ea87823fd2e7868d59fb0aded20603f433319691918299513cb6', 'futp4w'), - ('757d64eb242a91b494ec9e2661a7946410d68144d33860d6f4154092d65d5009', 'wetlpg'), - ('3c7348d43478292b4c4e0689d41a536fc8ccabdbd9fb9d0dfbe757a83d34cebe', 'avxagg'), - ('6896fac1546c201d4dc91d2c51bdcd9c820fe92fd0555947e59fdc89ca6f045d', 'wbaj3w'), - ('b4def322a4487dd90fcc4abd2f1efde0cdce81d8e0a580fd1897203ab4ebcebe', 'whojmw'), - ('07d974124b92adafc90473a3968ceb5e8329d815e0e48260473d70a781adb8ae', 'aiqcea'), - ('c67183a762e379290652cc26a786b21eff347643b1cc9012138f460901ce5d53', 'zfztpg'), - ('430976db5adef67d0009aa3cd9a2daca106829b36a7232732c5d694e7197c6d1', 'evr7rq'), - ('c1c8c0ff6933fa4e23fab5605139124b2c6cda0150a412daaea274818ee46e35', 'er0nxq'), - ('0060c538b6ce02b8d8e2c85b4e2810c58b846f4096ed7ab871fc092c45ac09d9', 'zh9xgg'), - ('4d4b99ff5d2a3d5cd076782c9cd088cd85d5c789d7de6bdc19c1d206b687d485', '2vvr5a') + ("3f17271231504b8cf65690bcdc379df8a3b8aabe12efe1ea82848ec5f497cb69", "gds0iw"), + ("58e5c351b53ffe1233e0656a532a721ae1d2ac7af71b6cfec8ceb64c63b10721", "uyboiq"), + ("e3f34629839cedcabba95354e48a78dc80b0cd35c02ddfbbf20196ba7f968866", "51wdcg"), + ("f0f22b8e8c58389a6c21dbcc1e261ee0354704e24996a0ec541276f58d1f2f52", "bpm7ca"), + ("0d0de95c7c12ceee919d28d22c970285d80a36dea4fe32dbdd667a888ae6d47f", "doqcuq"), + ("4973509ea648ddfbaf6c50e1fef33c3b0a3d1c1a82dff543a8255e60b6572567", "cl7rpq"), + ("877c89f0dcbd24b8c3f787624ddca09deb6a44e4a72f12527209d78e4d9ed247", "xrnycg"), + ("064df58cd3a51c50dbf30e975e63961a501212ff8e8ca544ab396727f4b8a367", "kgiizq"), + ("c8f7da1b5c44c1ca10da67c1514c4cf365e4d5912685b25a39206d5c8c1966a1", "dj9zqq"), + ("222d42333592ea87823fd2e7868d59fb0aded20603f433319691918299513cb6", "futp4w"), + ("757d64eb242a91b494ec9e2661a7946410d68144d33860d6f4154092d65d5009", "wetlpg"), + ("3c7348d43478292b4c4e0689d41a536fc8ccabdbd9fb9d0dfbe757a83d34cebe", "avxagg"), + ("6896fac1546c201d4dc91d2c51bdcd9c820fe92fd0555947e59fdc89ca6f045d", "wbaj3w"), + ("b4def322a4487dd90fcc4abd2f1efde0cdce81d8e0a580fd1897203ab4ebcebe", "whojmw"), + ("07d974124b92adafc90473a3968ceb5e8329d815e0e48260473d70a781adb8ae", "aiqcea"), + ("c67183a762e379290652cc26a786b21eff347643b1cc9012138f460901ce5d53", "zfztpg"), + ("430976db5adef67d0009aa3cd9a2daca106829b36a7232732c5d694e7197c6d1", "evr7rq"), + ("c1c8c0ff6933fa4e23fab5605139124b2c6cda0150a412daaea274818ee46e35", "er0nxq"), + ("0060c538b6ce02b8d8e2c85b4e2810c58b846f4096ed7ab871fc092c45ac09d9", "zh9xgg"), + ("4d4b99ff5d2a3d5cd076782c9cd088cd85d5c789d7de6bdc19c1d206b687d485", "2vvr5a"), ] for content, expected_tag in fixture: - tag = NamingConvention._compute_tag(content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) assert len(tag) == 6 assert tag == expected_tag # print(f"('{content}', '{tag}'),") + def test_tag_placement() -> None: # tags are placed in the middle of string and that must happen deterministically tag = "123456" @@ -99,20 +114,26 @@ def test_tag_placement() -> None: def test_shorten_identifier() -> None: # no limit - long_ident = 8*LONG_PATH + long_ident = 8 * LONG_PATH assert NamingConvention.shorten_identifier(long_ident, long_ident, None) == long_ident # within limit assert NamingConvention.shorten_identifier("012345678", "xxx012345678xxx", 10) == "012345678" - assert NamingConvention.shorten_identifier("0123456789", "xxx012345678xx?", 10) == "0123456789" # max_length + assert ( + NamingConvention.shorten_identifier("0123456789", "xxx012345678xx?", 10) == "0123456789" + ) # max_length # tag based on original string placed in the middle - tag = NamingConvention._compute_tag(IDENT_20_CHARS, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + IDENT_20_CHARS, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) norm_ident = NamingConvention.shorten_identifier(IDENT_20_CHARS, IDENT_20_CHARS, 20) assert tag in norm_ident assert len(norm_ident) == 20 assert norm_ident == "she plauanpualo well" # the tag must be based on raw string, not normalized string, one test case with spaces for raw_content in [uniq_id(), f" {uniq_id()} "]: - tag = NamingConvention._compute_tag(raw_content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + raw_content, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) norm_ident = NamingConvention.shorten_identifier(IDENT_20_CHARS, raw_content, 20) assert tag in norm_ident assert len(norm_ident) == 20 @@ -135,7 +156,9 @@ def test_normalize_with_shorten_identifier(convention: Type[NamingConvention]) - # force to shorten naming = convention(len(RAW_IDENT) // 2) # tag expected on stripped RAW_IDENT - tag = NamingConvention._compute_tag(RAW_IDENT, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + RAW_IDENT, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) # spaces are stripped assert naming.normalize_identifier(RAW_IDENT) == naming.normalize_identifier(RAW_IDENT_W_SPACES) assert tag in naming.normalize_identifier(RAW_IDENT) @@ -192,7 +215,11 @@ def test_normalize_path(convention: Type[NamingConvention]) -> None: norm_path_str = naming.normalize_path(raw_path_str) assert len(naming.break_path(norm_path_str)) == len(RAW_PATH) # double norm path does not change anything - assert naming.normalize_path(raw_path_str) == naming.normalize_path(norm_path_str) == naming.normalize_path(naming.normalize_path(norm_path_str)) + assert ( + naming.normalize_path(raw_path_str) + == naming.normalize_path(norm_path_str) + == naming.normalize_path(naming.normalize_path(norm_path_str)) + ) # empty element in path is ignored assert naming.make_path(*RAW_PATH_WITH_EMPTY_IDENT) == raw_path_str assert naming.normalize_path(raw_path_str) == norm_path_str @@ -200,12 +227,18 @@ def test_normalize_path(convention: Type[NamingConvention]) -> None: # preserve idents but shorten path naming = convention(len(RAW_IDENT) * 2) # give enough max length # tag computed from raw path - tag = NamingConvention._compute_tag(raw_path_str, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB) + tag = NamingConvention._compute_tag( + raw_path_str, collision_prob=NamingConvention._DEFAULT_COLLISION_PROB + ) tagged_raw_path_str = naming.normalize_path(raw_path_str) # contains tag assert tag in tagged_raw_path_str # idempotent - assert tagged_raw_path_str == naming.normalize_path(tagged_raw_path_str) == naming.normalize_path(naming.normalize_path(tagged_raw_path_str)) + assert ( + tagged_raw_path_str + == naming.normalize_path(tagged_raw_path_str) + == naming.normalize_path(naming.normalize_path(tagged_raw_path_str)) + ) assert tagged_raw_path_str == naming.make_path(*naming.break_path(tagged_raw_path_str)) # also cut idents diff --git a/tests/common/normalizers/test_naming_duck_case.py b/tests/common/normalizers/test_naming_duck_case.py index ed63800ca9..099134ca2f 100644 --- a/tests/common/normalizers/test_naming_duck_case.py +++ b/tests/common/normalizers/test_naming_duck_case.py @@ -17,8 +17,11 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None: assert naming_unlimited.normalize_identifier("🦚🦚Peacocks") == "🦚🦚Peacocks" assert naming_unlimited.normalize_identifier("🦚🦚peacocks") == "🦚🦚peacocks" # non latin alphabets - assert naming_unlimited.normalize_identifier("Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder") == "Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder" + assert ( + naming_unlimited.normalize_identifier("Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder") + == "Ölübeµrsईउऊऋऌऍऎएc⇨usǁs⛔lÄnder" + ) def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None: - assert naming_unlimited.normalize_identifier("A\nB\"C\rD") == "A_B_C_D" + assert naming_unlimited.normalize_identifier('A\nB"C\rD') == "A_B_C_D" diff --git a/tests/common/normalizers/test_naming_snake_case.py b/tests/common/normalizers/test_naming_snake_case.py index b51801b6c4..6d619b5257 100644 --- a/tests/common/normalizers/test_naming_snake_case.py +++ b/tests/common/normalizers/test_naming_snake_case.py @@ -38,7 +38,10 @@ def test_normalize_identifier(naming_unlimited: NamingConvention) -> None: def test_alphabet_reduction(naming_unlimited: NamingConvention) -> None: - assert naming_unlimited.normalize_identifier(SnakeCaseNamingConvention._REDUCE_ALPHABET[0]) == SnakeCaseNamingConvention._REDUCE_ALPHABET[1] + assert ( + naming_unlimited.normalize_identifier(SnakeCaseNamingConvention._REDUCE_ALPHABET[0]) + == SnakeCaseNamingConvention._REDUCE_ALPHABET[1] + ) def test_normalize_path(naming_unlimited: NamingConvention) -> None: @@ -78,6 +81,7 @@ def test_normalize_make_path(convention: Type[NamingConvention]) -> None: def test_normalizes_underscores(naming_unlimited: NamingConvention) -> None: - assert naming_unlimited.normalize_identifier("event__value_value2____") == "event_value_value2xxxx" + assert ( + naming_unlimited.normalize_identifier("event__value_value2____") == "event_value_value2xxxx" + ) assert naming_unlimited.normalize_path("e_vent__value_value2___") == "e_vent__value_value2__x" - diff --git a/tests/common/reflection/test_reflect_spec.py b/tests/common/reflection/test_reflect_spec.py index 17ec9ade47..11c66a2763 100644 --- a/tests/common/reflection/test_reflect_spec.py +++ b/tests/common/reflection/test_reflect_spec.py @@ -5,7 +5,11 @@ from dlt.common import Decimal from dlt.common.typing import TSecretValue, is_optional_type from dlt.common.configuration.inject import get_fun_spec, with_config -from dlt.common.configuration.specs import BaseConfiguration, RunConfiguration, ConnectionStringCredentials +from dlt.common.configuration.specs import ( + BaseConfiguration, + RunConfiguration, + ConnectionStringCredentials, +) from dlt.common.reflection.spec import spec_from_signature, _get_spec_name_from_f from dlt.common.reflection.utils import get_func_def_node, get_literal_defaults @@ -13,14 +17,21 @@ _DECIMAL_DEFAULT = Decimal("0.01") _SECRET_DEFAULT = TSecretValue("PASS") _CONFIG_DEFAULT = RunConfiguration() -_CREDENTIALS_DEFAULT = ConnectionStringCredentials("postgresql://loader:loader@localhost:5432/dlt_data") +_CREDENTIALS_DEFAULT = ConnectionStringCredentials( + "postgresql://loader:loader@localhost:5432/dlt_data" +) def test_synthesize_spec_from_sig() -> None: - # spec from typed signature without defaults - def f_typed(p1: str = None, p2: Decimal = None, p3: Any = None, p4: Optional[RunConfiguration] = None, p5: TSecretValue = dlt.secrets.value) -> None: + def f_typed( + p1: str = None, + p2: Decimal = None, + p3: Any = None, + p4: Optional[RunConfiguration] = None, + p5: TSecretValue = dlt.secrets.value, + ) -> None: pass SPEC: Any = spec_from_signature(f_typed, inspect.signature(f_typed)) @@ -30,11 +41,23 @@ def f_typed(p1: str = None, p2: Decimal = None, p3: Any = None, p4: Optional[Run assert SPEC.p4 is None assert SPEC.p5 is None fields = SPEC.get_resolvable_fields() - assert fields == {"p1": Optional[str], "p2": Optional[Decimal], "p3": Optional[Any], "p4": Optional[RunConfiguration], "p5": TSecretValue} + assert fields == { + "p1": Optional[str], + "p2": Optional[Decimal], + "p3": Optional[Any], + "p4": Optional[RunConfiguration], + "p5": TSecretValue, + } # spec from typed signatures with defaults - def f_typed_default(t_p1: str = "str", t_p2: Decimal = _DECIMAL_DEFAULT, t_p3: Any = _SECRET_DEFAULT, t_p4: RunConfiguration = _CONFIG_DEFAULT, t_p5: str = None) -> None: + def f_typed_default( + t_p1: str = "str", + t_p2: Decimal = _DECIMAL_DEFAULT, + t_p3: Any = _SECRET_DEFAULT, + t_p4: RunConfiguration = _CONFIG_DEFAULT, + t_p5: str = None, + ) -> None: pass SPEC = spec_from_signature(f_typed_default, inspect.signature(f_typed_default)) @@ -46,11 +69,17 @@ def f_typed_default(t_p1: str = "str", t_p2: Decimal = _DECIMAL_DEFAULT, t_p3: A fields = SPEC().get_resolvable_fields() # Any will not assume TSecretValue type because at runtime it's a str # setting default as None will convert type into optional (t_p5) - assert fields == {"t_p1": str, "t_p2": Decimal, "t_p3": str, "t_p4": RunConfiguration, "t_p5": Optional[str]} + assert fields == { + "t_p1": str, + "t_p2": Decimal, + "t_p3": str, + "t_p4": RunConfiguration, + "t_p5": Optional[str], + } # spec from untyped signature - def f_untyped(untyped_p1 = None, untyped_p2 = dlt.config.value) -> None: + def f_untyped(untyped_p1=None, untyped_p2=dlt.config.value) -> None: pass SPEC = spec_from_signature(f_untyped, inspect.signature(f_untyped)) @@ -61,11 +90,14 @@ def f_untyped(untyped_p1 = None, untyped_p2 = dlt.config.value) -> None: # spec types derived from defaults - - def f_untyped_default(untyped_p1 = "str", untyped_p2 = _DECIMAL_DEFAULT, untyped_p3 = _CREDENTIALS_DEFAULT, untyped_p4 = None) -> None: + def f_untyped_default( + untyped_p1="str", + untyped_p2=_DECIMAL_DEFAULT, + untyped_p3=_CREDENTIALS_DEFAULT, + untyped_p4=None, + ) -> None: pass - SPEC = spec_from_signature(f_untyped_default, inspect.signature(f_untyped_default)) assert SPEC.untyped_p1 == "str" assert SPEC.untyped_p2 == _DECIMAL_DEFAULT @@ -73,11 +105,23 @@ def f_untyped_default(untyped_p1 = "str", untyped_p2 = _DECIMAL_DEFAULT, untyped assert SPEC.untyped_p4 is None fields = SPEC.get_resolvable_fields() # untyped_p4 converted to Optional[Any] - assert fields == {"untyped_p1": str, "untyped_p2": Decimal, "untyped_p3": ConnectionStringCredentials, "untyped_p4": Optional[Any]} + assert fields == { + "untyped_p1": str, + "untyped_p2": Decimal, + "untyped_p3": ConnectionStringCredentials, + "untyped_p4": Optional[Any], + } # spec from signatures containing positional only and keywords only args - def f_pos_kw_only(pos_only_1=dlt.config.value, pos_only_2: str = "default", /, *, kw_only_1=None, kw_only_2: int = 2) -> None: + def f_pos_kw_only( + pos_only_1=dlt.config.value, + pos_only_2: str = "default", + /, + *, + kw_only_1=None, + kw_only_2: int = 2, + ) -> None: pass SPEC = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only)) @@ -86,12 +130,19 @@ def f_pos_kw_only(pos_only_1=dlt.config.value, pos_only_2: str = "default", /, * assert SPEC.kw_only_1 is None assert SPEC.kw_only_2 == 2 fields = SPEC.get_resolvable_fields() - assert fields == {"pos_only_1": Any, "pos_only_2": str, "kw_only_1": Optional[Any], "kw_only_2": int} + assert fields == { + "pos_only_1": Any, + "pos_only_2": str, + "kw_only_1": Optional[Any], + "kw_only_2": int, + } # skip arguments with defaults # deregister spec to disable cache del globals()[SPEC.__name__] - SPEC = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only), include_defaults=False) + SPEC = spec_from_signature( + f_pos_kw_only, inspect.signature(f_pos_kw_only), include_defaults=False + ) assert not hasattr(SPEC, "kw_only_1") assert not hasattr(SPEC, "kw_only_2") assert not hasattr(SPEC, "pos_only_2") @@ -111,7 +162,6 @@ def f_variadic(var_1: str = "A", *args, kw_var_1: str, **kwargs) -> None: def test_spec_none_when_no_fields() -> None: - def f_default_only(arg1, arg2=None): pass @@ -119,7 +169,9 @@ def f_default_only(arg1, arg2=None): assert SPEC is not None del globals()[SPEC.__name__] - SPEC = spec_from_signature(f_default_only, inspect.signature(f_default_only), include_defaults=False) + SPEC = spec_from_signature( + f_default_only, inspect.signature(f_default_only), include_defaults=False + ) assert SPEC is None def f_no_spec(arg1): @@ -129,20 +181,39 @@ def f_no_spec(arg1): assert SPEC is None -def f_top_kw_defaults_args(arg1, arg2 = "top", arg3 = dlt.config.value, *args, kw1, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): +def f_top_kw_defaults_args( + arg1, + arg2="top", + arg3=dlt.config.value, + *args, + kw1, + kw_lit="12131", + kw_secret_val=dlt.secrets.value, + **kwargs, +): pass def test_argument_have_dlt_config_defaults() -> None: - def f_defaults( - req_val, config_val = dlt.config.value, secret_val = dlt.secrets.value, /, - pos_cf = None, pos_cf_val = dlt.config.value, pos_secret_val = dlt.secrets.value, *, - kw_val = None, kw_cf_val = dlt.config.value, kw_secret_val = dlt.secrets.value): + req_val, + config_val=dlt.config.value, + secret_val=dlt.secrets.value, + /, + pos_cf=None, + pos_cf_val=dlt.config.value, + pos_secret_val=dlt.secrets.value, + *, + kw_val=None, + kw_cf_val=dlt.config.value, + kw_secret_val=dlt.secrets.value, + ): pass @with_config - def f_kw_defaults(*, kw1 = dlt.config.value, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): + def f_kw_defaults( + *, kw1=dlt.config.value, kw_lit="12131", kw_secret_val=dlt.secrets.value, **kwargs + ): pass # do not delete those spaces @@ -151,18 +222,42 @@ def f_kw_defaults(*, kw1 = dlt.config.value, kw_lit = "12131", kw_secret_val = d @with_config # they are part of the test - def f_kw_defaults_args(arg1, arg2 = 2, arg3 = dlt.config.value, *args, kw1, kw_lit = "12131", kw_secret_val = dlt.secrets.value, **kwargs): + def f_kw_defaults_args( + arg1, + arg2=2, + arg3=dlt.config.value, + *args, + kw1, + kw_lit="12131", + kw_secret_val=dlt.secrets.value, + **kwargs, + ): pass - node = get_func_def_node(f_defaults) assert node.name == "f_defaults" literal_defaults = get_literal_defaults(node) - assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_cf_val': 'dlt.config.value', 'kw_val': 'None', 'pos_secret_val': 'dlt.secrets.value', 'pos_cf_val': 'dlt.config.value', 'pos_cf': 'None', 'secret_val': 'dlt.secrets.value', 'config_val': 'dlt.config.value'} + assert literal_defaults == { + "kw_secret_val": "dlt.secrets.value", + "kw_cf_val": "dlt.config.value", + "kw_val": "None", + "pos_secret_val": "dlt.secrets.value", + "pos_cf_val": "dlt.config.value", + "pos_cf": "None", + "secret_val": "dlt.secrets.value", + "config_val": "dlt.config.value", + } SPEC = spec_from_signature(f_defaults, inspect.signature(f_defaults)) fields = SPEC.get_resolvable_fields() # fields market with dlt config are not optional, same for required fields - for arg in ["config_val", "secret_val", "pos_cf_val", "pos_secret_val", "kw_cf_val", "kw_secret_val"]: + for arg in [ + "config_val", + "secret_val", + "pos_cf_val", + "pos_secret_val", + "kw_cf_val", + "kw_secret_val", + ]: assert not is_optional_type(fields[arg]) for arg in ["pos_cf", "kw_val"]: assert is_optional_type(fields[arg]) @@ -172,7 +267,11 @@ def f_kw_defaults_args(arg1, arg2 = 2, arg3 = dlt.config.value, *args, kw1, kw_l node = get_func_def_node(f_kw_defaults) assert node.name == "f_kw_defaults" literal_defaults = get_literal_defaults(node) - assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", "kw1": "dlt.config.value"} + assert literal_defaults == { + "kw_secret_val": "dlt.secrets.value", + "kw_lit": "'12131'", + "kw1": "dlt.config.value", + } SPEC = spec_from_signature(f_kw_defaults, inspect.signature(f_kw_defaults)) fields = SPEC.get_resolvable_fields() assert not is_optional_type(fields["kw_lit"]) @@ -183,9 +282,19 @@ def f_kw_defaults_args(arg1, arg2 = 2, arg3 = dlt.config.value, *args, kw1, kw_l assert node.name == "f_kw_defaults_args" literal_defaults = get_literal_defaults(node) # print(literal_defaults) - assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", 'arg3': 'dlt.config.value', 'arg2': '2'} + assert literal_defaults == { + "kw_secret_val": "dlt.secrets.value", + "kw_lit": "'12131'", + "arg3": "dlt.config.value", + "arg2": "2", + } node = get_func_def_node(f_top_kw_defaults_args) assert node.name == "f_top_kw_defaults_args" literal_defaults = get_literal_defaults(node) - assert literal_defaults == {'kw_secret_val': 'dlt.secrets.value', 'kw_lit': "'12131'", 'arg3': 'dlt.config.value', 'arg2': "'top'"} + assert literal_defaults == { + "kw_secret_val": "dlt.secrets.value", + "kw_lit": "'12131'", + "arg3": "dlt.config.value", + "arg2": "'top'", + } diff --git a/tests/common/runners/test_pipes.py b/tests/common/runners/test_pipes.py index 706bef3860..6db7c2d0e2 100644 --- a/tests/common/runners/test_pipes.py +++ b/tests/common/runners/test_pipes.py @@ -27,6 +27,7 @@ class _TestPickler(NamedTuple): # self.s1 = s1 # self.s2 = s2 + class _TestClassUnkField: pass # def __init__(self, s1: _TestPicklex, s2: str) -> None: @@ -55,19 +56,25 @@ def test_pickle_encoder_none() -> None: def test_synth_pickler_unknown_types() -> None: # synth unknown tuple - obj = decode_obj("LfDoYo19lgUOtTn0Ib6JgASVQAAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjAxfVGVzdFBpY2tsZXiUk5SMA1hZWpRLe4aUgZQu") + obj = decode_obj( + "LfDoYo19lgUOtTn0Ib6JgASVQAAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjAxfVGVzdFBpY2tsZXiUk5SMA1hZWpRLe4aUgZQu" + ) assert type(obj).__name__.endswith("_TestPicklex") # this is completely different type assert not isinstance(obj, tuple) # synth unknown class containing other unknown types - obj = decode_obj("Koyo502yl4IKMqIxUTJFgASVbQAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjApfVGVzdENsYXNzlJOUKYGUfZQojAJzMZRoAIwMX1Rlc3RQaWNrbGV4lJOUjAFZlEsXhpSBlIwCczKUjAFVlIwDX3MzlEsDdWIu") + obj = decode_obj( + "Koyo502yl4IKMqIxUTJFgASVbQAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjApfVGVzdENsYXNzlJOUKYGUfZQojAJzMZRoAIwMX1Rlc3RQaWNrbGV4lJOUjAFZlEsXhpSBlIwCczKUjAFVlIwDX3MzlEsDdWIu" + ) assert type(obj).__name__.endswith("_TestClass") # tuple inside will be synthesized as well assert type(obj.s1).__name__.endswith("_TestPicklex") # known class containing unknown types - obj = decode_obj("PozhjHuf2oS7jPcRxKoagASVbQAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjBJfVGVzdENsYXNzVW5rRmllbGSUk5QpgZR9lCiMAnMxlGgAjAxfVGVzdFBpY2tsZXiUk5SMAVmUSxeGlIGUjAJzMpSMAVWUdWIu") + obj = decode_obj( + "PozhjHuf2oS7jPcRxKoagASVbQAAAAAAAACMH3Rlc3RzLmNvbW1vbi5ydW5uZXJzLnRlc3RfcGlwZXOUjBJfVGVzdENsYXNzVW5rRmllbGSUk5QpgZR9lCiMAnMxlGgAjAxfVGVzdFBpY2tsZXiUk5SMAVmUSxeGlIGUjAJzMpSMAVWUdWIu" + ) assert isinstance(obj, _TestClassUnkField) assert type(obj.s1).__name__.endswith("_TestPicklex") # type: ignore[attr-defined] @@ -88,7 +95,9 @@ def test_iter_stdout() -> None: lines = list(iter_stdout(venv, "python", "tests/common/scripts/empty.py")) assert lines == [] with pytest.raises(CalledProcessError) as cpe: - list(iter_stdout(venv, "python", "tests/common/scripts/no_stdout_no_stderr_with_fail.py")) + list( + iter_stdout(venv, "python", "tests/common/scripts/no_stdout_no_stderr_with_fail.py") + ) # empty stdout assert cpe.value.output == "" assert cpe.value.stderr == "" @@ -102,7 +111,9 @@ def test_iter_stdout_raises() -> None: with Venv.create(tempfile.mkdtemp()) as venv: expected = ["0", "1", "2"] with pytest.raises(CalledProcessError) as cpe: - for i, line in enumerate(iter_stdout(venv, "python", "tests/common/scripts/raising_counter.py")): + for i, line in enumerate( + iter_stdout(venv, "python", "tests/common/scripts/raising_counter.py") + ): assert expected[i] == line assert cpe.value.returncode == 1 # the last output line is available @@ -120,7 +131,9 @@ def test_iter_stdout_raises() -> None: # three lines with 1 MB size + newline _i = -1 with pytest.raises(CalledProcessError) as cpe: - for _i, line in enumerate(iter_stdout(venv, "python", "tests/common/scripts/long_lines_fails.py")): + for _i, line in enumerate( + iter_stdout(venv, "python", "tests/common/scripts/long_lines_fails.py") + ): assert len(line) == 1024 * 1024 assert line == "a" * 1024 * 1024 # there were 3 lines @@ -158,11 +171,15 @@ def test_iter_stdout_with_result() -> None: assert iter_until_returns(i) is None # it just excepts without encoding exception with pytest.raises(CalledProcessError): - i = iter_stdout_with_result(venv, "python", "tests/common/scripts/no_stdout_no_stderr_with_fail.py") + i = iter_stdout_with_result( + venv, "python", "tests/common/scripts/no_stdout_no_stderr_with_fail.py" + ) iter_until_returns(i) # this raises a decoded exception: UnsupportedProcessStartMethodException with pytest.raises(UnsupportedProcessStartMethodException): - i = iter_stdout_with_result(venv, "python", "tests/common/scripts/stdout_encode_exception.py") + i = iter_stdout_with_result( + venv, "python", "tests/common/scripts/stdout_encode_exception.py" + ) iter_until_returns(i) diff --git a/tests/common/runners/test_runnable.py b/tests/common/runners/test_runnable.py index eae4a46a70..e25f28e521 100644 --- a/tests/common/runners/test_runnable.py +++ b/tests/common/runners/test_runnable.py @@ -1,6 +1,7 @@ import gc import pytest import multiprocessing + # from multiprocessing.pool import Pool # from multiprocessing.dummy import Pool as ThreadPool from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor @@ -9,10 +10,15 @@ from dlt.normalize.configuration import SchemaStorageConfiguration from dlt.common.runners import Runnable -from tests.common.runners.utils import _TestRunnableWorkerMethod, _TestRunnableWorker, ALL_METHODS, mp_method_auto +from tests.common.runners.utils import ( + _TestRunnableWorkerMethod, + _TestRunnableWorker, + ALL_METHODS, + mp_method_auto, +) -@pytest.mark.parametrize('method', ALL_METHODS) +@pytest.mark.parametrize("method", ALL_METHODS) def test_runnable_process_pool(method: str) -> None: # 4 tasks r = _TestRunnableWorker(4) @@ -45,7 +51,7 @@ def test_runnable_direct_worker_call() -> None: assert rv[0] == 199 -@pytest.mark.parametrize('method', ALL_METHODS) +@pytest.mark.parametrize("method", ALL_METHODS) def test_process_worker_started_early(method: str) -> None: with ProcessPoolExecutor(4, mp_context=multiprocessing.get_context(method)) as p: r = _TestRunnableWorkerMethod(4) @@ -71,7 +77,7 @@ def test_weak_pool_ref() -> None: r = wref[rid] -@pytest.mark.parametrize('method', ALL_METHODS) +@pytest.mark.parametrize("method", ALL_METHODS) def test_configuredworker(method: str) -> None: # call worker method with CONFIG values that should be restored into CONFIG type config = SchemaStorageConfiguration() diff --git a/tests/common/runners/test_runners.py b/tests/common/runners/test_runners.py index 9045d40ad9..3b56b64156 100644 --- a/tests/common/runners/test_runners.py +++ b/tests/common/runners/test_runners.py @@ -10,7 +10,12 @@ from dlt.common.runtime import initialize_runtime from dlt.common.runners.configuration import PoolRunnerConfiguration, TPoolType -from tests.common.runners.utils import _TestRunnableWorkerMethod, _TestRunnableWorker, ALL_METHODS, mp_method_auto +from tests.common.runners.utils import ( + _TestRunnableWorkerMethod, + _TestRunnableWorker, + ALL_METHODS, + mp_method_auto, +) from tests.utils import init_test_logging @@ -43,6 +48,7 @@ def logger_autouse() -> None: _counter = 0 + @pytest.fixture(autouse=True) def default_args() -> None: signals._received_signal = 0 @@ -117,25 +123,31 @@ def test_single_non_idle_run() -> None: def test_runnable_with_runner() -> None: r = _TestRunnableWorkerMethod(4) - runs_count = runner.run_pool( - configure(ThreadPoolConfiguration), - r - ) + runs_count = runner.run_pool(configure(ThreadPoolConfiguration), r) assert runs_count == 1 assert [v[0] for v in r.rv] == list(range(4)) -@pytest.mark.parametrize('method', ALL_METHODS) -def test_pool_runner_process_methods(method) -> None: +@pytest.mark.parametrize("method", ALL_METHODS) +def test_pool_runner_process_methods_forced(method) -> None: multiprocessing.set_start_method(method, force=True) r = _TestRunnableWorker(4) # make sure signals and logging is initialized C = resolve_configuration(RunConfiguration()) initialize_runtime(C) - runs_count = runner.run_pool( - configure(ProcessPoolConfiguration), - r - ) + runs_count = runner.run_pool(configure(ProcessPoolConfiguration), r) + assert runs_count == 1 + assert [v[0] for v in r.rv] == list(range(4)) + + +@pytest.mark.parametrize("method", ALL_METHODS) +def test_pool_runner_process_methods_configured(method) -> None: + r = _TestRunnableWorker(4) + # make sure signals and logging is initialized + C = resolve_configuration(RunConfiguration()) + initialize_runtime(C) + + runs_count = runner.run_pool(ProcessPoolConfiguration(start_method=method), r) assert runs_count == 1 assert [v[0] for v in r.rv] == list(range(4)) diff --git a/tests/common/runners/test_venv.py b/tests/common/runners/test_venv.py index 79e485862e..ee62df3c83 100644 --- a/tests/common/runners/test_venv.py +++ b/tests/common/runners/test_venv.py @@ -235,7 +235,9 @@ def test_start_command() -> None: # custom environ with custom_environ({"_CUSTOM_ENV_VALUE": "uniq"}): - with venv.start_command("python", "tests/common/scripts/environ.py", stdout=PIPE, text=True) as process: + with venv.start_command( + "python", "tests/common/scripts/environ.py", stdout=PIPE, text=True + ) as process: output, _ = process.communicate() assert process.poll() == 0 assert "_CUSTOM_ENV_VALUE" in output diff --git a/tests/common/runners/utils.py b/tests/common/runners/utils.py index 1791a0ed28..3d6adbf70c 100644 --- a/tests/common/runners/utils.py +++ b/tests/common/runners/utils.py @@ -10,7 +10,7 @@ from dlt.common.utils import uniq_id # remove fork-server because it hangs the tests no CI -ALL_METHODS = set(multiprocessing.get_all_start_methods()).intersection(['fork', 'spawn']) +ALL_METHODS = set(multiprocessing.get_all_start_methods()).intersection(["fork", "spawn"]) @pytest.fixture(autouse=True) @@ -38,7 +38,9 @@ def worker(self: "_TestRunnableWorkerMethod", v: int) -> Tuple[int, str, int]: def _run(self, pool: Executor) -> List[Tuple[int, str, int]]: rid = id(self) assert rid in _TestRunnableWorkerMethod.RUNNING - self.rv = rv = list(pool.map(_TestRunnableWorkerMethod.worker, *zip(*[(rid, i) for i in range(self.tasks)]))) + self.rv = rv = list( + pool.map(_TestRunnableWorkerMethod.worker, *zip(*[(rid, i) for i in range(self.tasks)])) + ) assert rid in _TestRunnableWorkerMethod.RUNNING return rv @@ -62,7 +64,9 @@ def worker(v: int) -> Tuple[int, int]: return (v, os.getpid()) def _run(self, pool: Executor) -> List[Tuple[int, int]]: - self.rv = rv = list(pool.map(_TestRunnableWorker.worker, *zip(*[(i, ) for i in range(self.tasks)]))) + self.rv = rv = list( + pool.map(_TestRunnableWorker.worker, *zip(*[(i,) for i in range(self.tasks)])) + ) return rv def run(self, pool: Executor) -> TRunMetrics: diff --git a/tests/common/runtime/test_collector.py b/tests/common/runtime/test_collector.py index 600c3b3d4b..dbe4b8c94d 100644 --- a/tests/common/runtime/test_collector.py +++ b/tests/common/runtime/test_collector.py @@ -45,4 +45,4 @@ def test_dict_collector_reset_counters(): assert collector.counters["counter1"] == 5 with DictCollector()("test2") as collector: - assert collector.counters == defaultdict(int) \ No newline at end of file + assert collector.counters == defaultdict(int) diff --git a/tests/common/runtime/test_logging.py b/tests/common/runtime/test_logging.py index 357cd180fb..19f67fe899 100644 --- a/tests/common/runtime/test_logging.py +++ b/tests/common/runtime/test_logging.py @@ -28,11 +28,16 @@ def test_version_extract(environment: DictStrStr) -> None: version = exec_info.dlt_version_info("logger") # assert version["dlt_version"].startswith(code_version) lib_version = pkg_version("dlt") - assert version == {'dlt_version': lib_version, 'pipeline_name': 'logger'} + assert version == {"dlt_version": lib_version, "pipeline_name": "logger"} # mock image info available in container mock_image_env(environment) version = exec_info.dlt_version_info("logger") - assert version == {'dlt_version': lib_version, 'commit_sha': '192891', 'pipeline_name': 'logger', 'image_version': 'scale/v:112'} + assert version == { + "dlt_version": lib_version, + "commit_sha": "192891", + "pipeline_name": "logger", + "image_version": "scale/v:112", + } def test_pod_info_extract(environment: DictStrStr) -> None: @@ -40,17 +45,29 @@ def test_pod_info_extract(environment: DictStrStr) -> None: assert pod_info == {} mock_pod_env(environment) pod_info = exec_info.kube_pod_info() - assert pod_info == {'kube_node_name': 'node_name', 'kube_pod_name': 'pod_name', 'kube_pod_namespace': 'namespace'} + assert pod_info == { + "kube_node_name": "node_name", + "kube_pod_name": "pod_name", + "kube_pod_namespace": "namespace", + } def test_github_info_extract(environment: DictStrStr) -> None: mock_github_env(environment) github_info = exec_info.github_info() - assert github_info == {"github_user": "rudolfix", "github_repository": "dlt-hub/beginners-workshop-2022", "github_repository_owner": "dlt-hub"} + assert github_info == { + "github_user": "rudolfix", + "github_repository": "dlt-hub/beginners-workshop-2022", + "github_repository_owner": "dlt-hub", + } mock_github_env(environment) del environment["GITHUB_USER"] github_info = exec_info.github_info() - assert github_info == {"github_user": "dlt-hub", "github_repository": "dlt-hub/beginners-workshop-2022", "github_repository_owner": "dlt-hub"} + assert github_info == { + "github_user": "dlt-hub", + "github_repository": "dlt-hub/beginners-workshop-2022", + "github_repository_owner": "dlt-hub", + } @pytest.mark.forked @@ -68,9 +85,9 @@ def test_text_logger_init(environment: DictStrStr) -> None: @pytest.mark.forked - def test_json_logger_init(environment: DictStrStr) -> None: from dlt.common.runtime import json_logging + mock_image_env(environment) mock_pod_env(environment) init_test_logging(JsonLoggerConfiguration()) @@ -87,7 +104,6 @@ def test_json_logger_init(environment: DictStrStr) -> None: @pytest.mark.forked def test_double_log_init(environment: DictStrStr) -> None: - mock_image_env(environment) mock_pod_env(environment) diff --git a/tests/common/runtime/test_signals.py b/tests/common/runtime/test_signals.py index 839738f904..179491de16 100644 --- a/tests/common/runtime/test_signals.py +++ b/tests/common/runtime/test_signals.py @@ -65,7 +65,6 @@ def test_delayed_signals_context_manager() -> None: def test_sleep_signal() -> None: - thread_signal = 0 def _thread() -> None: diff --git a/tests/common/runtime/test_telemetry.py b/tests/common/runtime/test_telemetry.py index 9ffc5dc628..eece36aae7 100644 --- a/tests/common/runtime/test_telemetry.py +++ b/tests/common/runtime/test_telemetry.py @@ -13,13 +13,21 @@ from tests.common.runtime.utils import mock_image_env, mock_github_env, mock_pod_env from tests.common.configuration.utils import environment -from tests.utils import preserve_environ, skipifspawn, skipifwindows, init_test_logging, start_test_telemetry +from tests.utils import ( + preserve_environ, + skipifspawn, + skipifwindows, + init_test_logging, + start_test_telemetry, +) @configspec class SentryLoggerConfiguration(RunConfiguration): pipeline_name: str = "logger" - sentry_dsn: str = "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" + sentry_dsn: str = ( + "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" + ) dlthub_telemetry_segment_write_key: str = "TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB" @@ -28,17 +36,19 @@ class SentryLoggerCriticalConfiguration(SentryLoggerConfiguration): log_level: str = "CRITICAL" if TYPE_CHECKING: + def __init__( self, pipeline_name: str = "logger", sentry_dsn: str = "https://sentry.io", dlthub_telemetry_segment_write_key: str = "TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB", log_level: str = "CRITICAL", - ) -> None: - ... + ) -> None: ... + def test_sentry_log_level() -> None: from dlt.common.runtime.sentry import _get_sentry_log_level + sll = _get_sentry_log_level(SentryLoggerCriticalConfiguration(log_level="CRITICAL")) assert sll._handler.level == logging._nameToLevel["CRITICAL"] sll = _get_sentry_log_level(SentryLoggerCriticalConfiguration(log_level="ERROR")) @@ -97,6 +107,8 @@ def test_cleanup(environment: DictStrStr) -> None: SENT_ITEMS = [] + + def _mock_before_send(event: DictStrAny, _unused_hint: Any = None) -> DictStrAny: # print(event) SENT_ITEMS.append(event) diff --git a/tests/common/schema/test_coercion.py b/tests/common/schema/test_coercion.py index 2ac11e71d8..922024a89b 100644 --- a/tests/common/schema/test_coercion.py +++ b/tests/common/schema/test_coercion.py @@ -30,9 +30,12 @@ def test_coerce_type_to_text() -> None: # double into text assert coerce_value("text", "double", -1726.1288) == "-1726.1288" # bytes to text (base64) - assert coerce_value("text", "binary", b'binary string') == "YmluYXJ5IHN0cmluZw==" + assert coerce_value("text", "binary", b"binary string") == "YmluYXJ5IHN0cmluZw==" # HexBytes to text (hex with prefix) - assert coerce_value("text", "binary", HexBytes(b'binary string')) == "0x62696e61727920737472696e67" + assert ( + coerce_value("text", "binary", HexBytes(b"binary string")) == "0x62696e61727920737472696e67" + ) + # Str enum value class StrEnum(Enum): a = "a_value" @@ -42,6 +45,7 @@ class StrEnum(Enum): # Make sure we get the bare str value, not the enum instance assert not isinstance(str_enum_result, Enum) assert str_enum_result == "b_value" + # Mixed enum value class MixedEnum(Enum): a = "a_value" @@ -68,7 +72,7 @@ def test_coerce_type_to_bool() -> None: with pytest.raises(ValueError): coerce_value("bool", "complex", {"a": True}) with pytest.raises(ValueError): - coerce_value("bool", "binary", b'True') + coerce_value("bool", "binary", b"True") with pytest.raises(ValueError): coerce_value("bool", "timestamp", pendulum.now()) @@ -79,7 +83,7 @@ def test_coerce_type_to_double() -> None: # text into double if parsable assert coerce_value("double", "text", " -1726.1288 ") == -1726.1288 # hex text into double - assert coerce_value("double", "text", "0xff") == 255.0 + assert coerce_value("double", "text", "0xff") == 255.0 # wei, decimal to double assert coerce_value("double", "wei", Wei.from_int256(2137, decimals=2)) == 21.37 assert coerce_value("double", "decimal", Decimal("-1121.11")) == -1121.11 @@ -123,10 +127,7 @@ class IntEnum(int, Enum): assert int_enum_result == 2 -@pytest.mark.parametrize("dec_cls,data_type", [ - (Decimal, "decimal"), - (Wei, "wei") -]) +@pytest.mark.parametrize("dec_cls,data_type", [(Decimal, "decimal"), (Wei, "wei")]) def test_coerce_to_numeric(dec_cls: Type[Any], data_type: TDataType) -> None: v = coerce_value(data_type, "text", " -1726.839283 ") assert type(v) is dec_cls @@ -162,20 +163,36 @@ def test_coerce_type_from_hex_text() -> None: def test_coerce_type_to_timestamp() -> None: # timestamp cases - assert coerce_value("timestamp", "text", " 1580405246 ") == pendulum.parse("2020-01-30T17:27:26+00:00") + assert coerce_value("timestamp", "text", " 1580405246 ") == pendulum.parse( + "2020-01-30T17:27:26+00:00" + ) # the tenths of microseconds will be ignored - assert coerce_value("timestamp", "double", 1633344898.7415245) == pendulum.parse("2021-10-04T10:54:58.741524+00:00") + assert coerce_value("timestamp", "double", 1633344898.7415245) == pendulum.parse( + "2021-10-04T10:54:58.741524+00:00" + ) # if text is ISO string it will be coerced - assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466000+00:00") == pendulum.parse("2022-05-10T03:41:31.466000+00:00") - assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466+02:00") == pendulum.parse("2022-05-10T01:41:31.466Z") - assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466+0200") == pendulum.parse("2022-05-10T01:41:31.466Z") + assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466000+00:00") == pendulum.parse( + "2022-05-10T03:41:31.466000+00:00" + ) + assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466+02:00") == pendulum.parse( + "2022-05-10T01:41:31.466Z" + ) + assert coerce_value("timestamp", "text", "2022-05-10T03:41:31.466+0200") == pendulum.parse( + "2022-05-10T01:41:31.466Z" + ) # parse almost ISO compliant string - assert coerce_value("timestamp", "text", "2022-04-26 10:36+02") == pendulum.parse("2022-04-26T10:36:00+02:00") - assert coerce_value("timestamp", "text", "2022-04-26 10:36") == pendulum.parse("2022-04-26T10:36:00+00:00") + assert coerce_value("timestamp", "text", "2022-04-26 10:36+02") == pendulum.parse( + "2022-04-26T10:36:00+02:00" + ) + assert coerce_value("timestamp", "text", "2022-04-26 10:36") == pendulum.parse( + "2022-04-26T10:36:00+00:00" + ) # parse date string assert coerce_value("timestamp", "text", "2021-04-25") == pendulum.parse("2021-04-25") # from date type - assert coerce_value("timestamp", "date", datetime.date(2023, 2, 27)) == pendulum.parse("2023-02-27") + assert coerce_value("timestamp", "date", datetime.date(2023, 2, 27)) == pendulum.parse( + "2023-02-27" + ) # fails on "now" - yes pendulum by default parses "now" as .now() with pytest.raises(ValueError): @@ -222,21 +239,35 @@ def test_coerce_type_to_timestamp() -> None: def test_coerce_type_to_date() -> None: # from datetime object - assert coerce_value("date", "timestamp", pendulum.datetime(1995, 5, 6, 00, 1, 1, tz=UTC)) == pendulum.parse("1995-05-06", exact=True) + assert coerce_value( + "date", "timestamp", pendulum.datetime(1995, 5, 6, 00, 1, 1, tz=UTC) + ) == pendulum.parse("1995-05-06", exact=True) # from unix timestamp - assert coerce_value("date", "double", 1677546399.494264) == pendulum.parse("2023-02-28", exact=True) + assert coerce_value("date", "double", 1677546399.494264) == pendulum.parse( + "2023-02-28", exact=True + ) assert coerce_value("date", "text", " 1677546399 ") == pendulum.parse("2023-02-28", exact=True) # ISO date string assert coerce_value("date", "text", "2023-02-27") == pendulum.parse("2023-02-27", exact=True) # ISO datetime string - assert coerce_value("date", "text", "2022-05-10T03:41:31.466000+00:00") == pendulum.parse("2022-05-10", exact=True) - assert coerce_value("date", "text", "2022-05-10T03:41:31.466+02:00") == pendulum.parse("2022-05-10", exact=True) - assert coerce_value("date", "text", "2022-05-10T03:41:31.466+0200") == pendulum.parse("2022-05-10", exact=True) + assert coerce_value("date", "text", "2022-05-10T03:41:31.466000+00:00") == pendulum.parse( + "2022-05-10", exact=True + ) + assert coerce_value("date", "text", "2022-05-10T03:41:31.466+02:00") == pendulum.parse( + "2022-05-10", exact=True + ) + assert coerce_value("date", "text", "2022-05-10T03:41:31.466+0200") == pendulum.parse( + "2022-05-10", exact=True + ) # almost ISO compliant string - assert coerce_value("date", "text", "2022-04-26 10:36+02") == pendulum.parse("2022-04-26", exact=True) - assert coerce_value("date", "text", "2022-04-26 10:36") == pendulum.parse("2022-04-26", exact=True) + assert coerce_value("date", "text", "2022-04-26 10:36+02") == pendulum.parse( + "2022-04-26", exact=True + ) + assert coerce_value("date", "text", "2022-04-26 10:36") == pendulum.parse( + "2022-04-26", exact=True + ) - # iso time string fails + # iso time string fails with pytest.raises(ValueError): coerce_value("timestamp", "text", "03:41:31.466") @@ -247,18 +278,26 @@ def test_coerce_type_to_date() -> None: def test_coerce_type_to_time() -> None: # from ISO time string - assert coerce_value("time", "text", "03:41:31.466000") == pendulum.parse("03:41:31.466000", exact=True) + assert coerce_value("time", "text", "03:41:31.466000") == pendulum.parse( + "03:41:31.466000", exact=True + ) # time object returns same value - assert coerce_value("time", "time", pendulum.time(3, 41, 31, 466000)) == pendulum.time(3, 41, 31, 466000) + assert coerce_value("time", "time", pendulum.time(3, 41, 31, 466000)) == pendulum.time( + 3, 41, 31, 466000 + ) # from datetime object fails with pytest.raises(TypeError): coerce_value("time", "timestamp", pendulum.datetime(1995, 5, 6, 00, 1, 1, tz=UTC)) # from unix timestamp fails with pytest.raises(TypeError): - assert coerce_value("time", "double", 1677546399.494264) == pendulum.parse("01:06:39.494264", exact=True) + assert coerce_value("time", "double", 1677546399.494264) == pendulum.parse( + "01:06:39.494264", exact=True + ) with pytest.raises(ValueError): - assert coerce_value("time", "text", " 1677546399 ") == pendulum.parse("01:06:39", exact=True) + assert coerce_value("time", "text", " 1677546399 ") == pendulum.parse( + "01:06:39", exact=True + ) # ISO date string fails with pytest.raises(ValueError): assert coerce_value("time", "text", "2023-02-27") == pendulum.parse("00:00:00", exact=True) @@ -269,9 +308,9 @@ def test_coerce_type_to_time() -> None: def test_coerce_type_to_binary() -> None: # from hex string - assert coerce_value("binary", "text", "0x30") == b'0' + assert coerce_value("binary", "text", "0x30") == b"0" # from base64 - assert coerce_value("binary", "text", "YmluYXJ5IHN0cmluZw==") == b'binary string' + assert coerce_value("binary", "text", "YmluYXJ5IHN0cmluZw==") == b"binary string" # int into bytes assert coerce_value("binary", "bigint", 15) == b"\x0f" # can't into double @@ -344,8 +383,16 @@ def test_coerce_type_complex() -> None: def test_coerce_type_complex_with_pua() -> None: - v_dict = {"list": [1, Wei.from_int256(10**18), f"{_DATETIME}2022-05-10T01:41:31.466Z"], "str": "complex", "pua_date": f"{_DATETIME}2022-05-10T01:41:31.466Z"} - exp_v = {"list":[1, Wei.from_int256(10**18), "2022-05-10T01:41:31.466Z"],"str":"complex","pua_date":"2022-05-10T01:41:31.466Z"} + v_dict = { + "list": [1, Wei.from_int256(10**18), f"{_DATETIME}2022-05-10T01:41:31.466Z"], + "str": "complex", + "pua_date": f"{_DATETIME}2022-05-10T01:41:31.466Z", + } + exp_v = { + "list": [1, Wei.from_int256(10**18), "2022-05-10T01:41:31.466Z"], + "str": "complex", + "pua_date": "2022-05-10T01:41:31.466Z", + } assert coerce_value("complex", "complex", copy(v_dict)) == exp_v assert coerce_value("text", "complex", copy(v_dict)) == json.dumps(exp_v) # also decode recursively diff --git a/tests/common/schema/test_detections.py b/tests/common/schema/test_detections.py index 13cb09faec..cba2767c94 100644 --- a/tests/common/schema/test_detections.py +++ b/tests/common/schema/test_detections.py @@ -2,7 +2,16 @@ from dlt.common import pendulum, Decimal, Wei from dlt.common.schema.utils import autodetect_sc_type -from dlt.common.schema.detections import is_hexbytes_to_text, is_timestamp, is_iso_timestamp, is_iso_date, is_large_integer, is_wei_to_double, _FLOAT_TS_RANGE, _NOW_TS +from dlt.common.schema.detections import ( + is_hexbytes_to_text, + is_timestamp, + is_iso_timestamp, + is_iso_date, + is_large_integer, + is_wei_to_double, + _FLOAT_TS_RANGE, + _NOW_TS, +) def test_timestamp_detection() -> None: @@ -69,12 +78,12 @@ def test_detection_large_integer() -> None: assert is_large_integer(int, 2**64 // 2) == "wei" assert is_large_integer(int, 578960446186580977117854925043439539267) == "text" assert is_large_integer(int, 2**64 // 2 - 1) is None - assert is_large_integer(int, -2**64 // 2 - 1) is None + assert is_large_integer(int, -(2**64) // 2 - 1) is None def test_detection_hexbytes_to_text() -> None: - assert is_hexbytes_to_text(bytes, b'hey') is None - assert is_hexbytes_to_text(HexBytes, b'hey') == "text" + assert is_hexbytes_to_text(bytes, b"hey") is None + assert is_hexbytes_to_text(HexBytes, b"hey") == "text" def test_wei_to_double() -> None: @@ -89,7 +98,10 @@ def test_detection_function() -> None: assert autodetect_sc_type(["iso_date"], str, str(pendulum.now().date())) == "date" assert autodetect_sc_type(["iso_date"], float, str(pendulum.now().date())) is None assert autodetect_sc_type(["timestamp"], str, str(pendulum.now())) is None - assert autodetect_sc_type(["timestamp", "iso_timestamp"], float, pendulum.now().timestamp()) == "timestamp" + assert ( + autodetect_sc_type(["timestamp", "iso_timestamp"], float, pendulum.now().timestamp()) + == "timestamp" + ) assert autodetect_sc_type(["timestamp", "large_integer"], int, 2**64) == "wei" - assert autodetect_sc_type(["large_integer", "hexbytes_to_text"], HexBytes, b'hey') == "text" + assert autodetect_sc_type(["large_integer", "hexbytes_to_text"], HexBytes, b"hey") == "text" assert autodetect_sc_type(["large_integer", "wei_to_double"], Wei, Wei(10**18)) == "double" diff --git a/tests/common/schema/test_filtering.py b/tests/common/schema/test_filtering.py index 9a7fe01f54..8cfac9309f 100644 --- a/tests/common/schema/test_filtering.py +++ b/tests/common/schema/test_filtering.py @@ -50,9 +50,14 @@ def test_whole_row_filter_with_exception(schema: Schema) -> None: # mind that path event_bot__custom_data__included_object was also eliminated assert filtered_case == {} # this child of the row has exception (^event_bot__custom_data__included_object__ - the __ at the end select all childern but not the parent) - filtered_case = schema.filter_row("event_bot__custom_data__included_object", deepcopy(bot_case)["custom_data"]["included_object"]) + filtered_case = schema.filter_row( + "event_bot__custom_data__included_object", + deepcopy(bot_case)["custom_data"]["included_object"], + ) assert filtered_case == bot_case["custom_data"]["included_object"] - filtered_case = schema.filter_row("event_bot__custom_data__excluded_path", deepcopy(bot_case)["custom_data"]["excluded_path"]) + filtered_case = schema.filter_row( + "event_bot__custom_data__excluded_path", deepcopy(bot_case)["custom_data"]["excluded_path"] + ) assert filtered_case == {} @@ -60,16 +65,13 @@ def test_filter_parent_table_schema_update(schema: Schema) -> None: # filter out parent table and leave just child one. that should break the child-parent relationship and reject schema update _add_excludes(schema) source_row = { - "metadata": [{ - "elvl1": [{ - "elvl2": [{ - "id": "level3_kept" - }], - "f": "elvl1_removed" - }], - "f": "metadata_removed" - }] - } + "metadata": [ + { + "elvl1": [{"elvl2": [{"id": "level3_kept"}], "f": "elvl1_removed"}], + "f": "metadata_removed", + } + ] + } updates = [] @@ -96,7 +98,9 @@ def test_filter_parent_table_schema_update(schema: Schema) -> None: updates.clear() schema = Schema("event") _add_excludes(schema) - schema.get_table("event_bot")["filters"]["includes"].extend([TSimpleRegex("re:^metadata___dlt_"), TSimpleRegex("re:^metadata__elvl1___dlt_")]) + schema.get_table("event_bot")["filters"]["includes"].extend( + [TSimpleRegex("re:^metadata___dlt_"), TSimpleRegex("re:^metadata__elvl1___dlt_")] + ) schema._compile_settings() for (t, p), row in schema.normalize_data_item(source_row, "load_id", "event_bot"): row = schema.filter_row(t, row) @@ -118,7 +122,9 @@ def _add_excludes(schema: Schema) -> None: bot_table = new_table("event_bot") bot_table.setdefault("filters", {})["excludes"] = ["re:^metadata", "re:^is_flagged$", "re:^data", "re:^custom_data"] # type: ignore[typeddict-item] bot_table["filters"]["includes"] = [ - TSimpleRegex("re:^data__custom$"), TSimpleRegex("re:^custom_data__included_object__"), TSimpleRegex("re:^metadata__elvl1__elvl2__") + TSimpleRegex("re:^data__custom$"), + TSimpleRegex("re:^custom_data__included_object__"), + TSimpleRegex("re:^metadata__elvl1__elvl2__"), ] schema.update_table(bot_table) schema._compile_settings() diff --git a/tests/common/schema/test_inference.py b/tests/common/schema/test_inference.py index 24c97219fc..8d567f6993 100644 --- a/tests/common/schema/test_inference.py +++ b/tests/common/schema/test_inference.py @@ -7,7 +7,12 @@ from dlt.common.json import custom_pua_decode from dlt.common.schema import Schema, utils from dlt.common.schema.typing import TSimpleRegex -from dlt.common.schema.exceptions import CannotCoerceColumnException, CannotCoerceNullException, ParentTableNotFoundException, TablePropertiesConflictException +from dlt.common.schema.exceptions import ( + CannotCoerceColumnException, + CannotCoerceNullException, + ParentTableNotFoundException, + TablePropertiesConflictException, +) from tests.common.utils import load_json_case @@ -80,7 +85,12 @@ def test_coerce_row(schema: Schema) -> None: timestamp_float = 78172.128 timestamp_str = "1970-01-01T21:42:52.128000+00:00" # add new column with preferred - row_1 = {"timestamp": timestamp_float, "confidence": "0.1", "value": "0xFF", "number": Decimal("128.67")} + row_1 = { + "timestamp": timestamp_float, + "confidence": "0.1", + "value": "0xFF", + "number": Decimal("128.67"), + } new_row_1, new_table = schema.coerce_row("event_user", None, row_1) # convert columns to list, they must correspond to the order of fields in row_1 new_columns = list(new_table["columns"].values()) @@ -94,7 +104,12 @@ def test_coerce_row(schema: Schema) -> None: assert new_columns[3]["data_type"] == "decimal" assert "variant" not in new_columns[3] # also rows values should be coerced (confidence) - assert new_row_1 == {"timestamp": pendulum.parse(timestamp_str), "confidence": 0.1, "value": 255, "number": Decimal("128.67")} + assert new_row_1 == { + "timestamp": pendulum.parse(timestamp_str), + "confidence": 0.1, + "value": 255, + "number": Decimal("128.67"), + } # update schema schema.update_table(new_table) @@ -137,7 +152,9 @@ def test_coerce_row(schema: Schema) -> None: schema.update_table(new_table) # variant column clashes with existing column - create new_colbool_v_binary column that would be created for binary variant, but give it a type datetime - _, new_table = schema.coerce_row("event_user", None, {"new_colbool": False, "new_colbool__v_timestamp": b"not fit"}) + _, new_table = schema.coerce_row( + "event_user", None, {"new_colbool": False, "new_colbool__v_timestamp": b"not fit"} + ) schema.update_table(new_table) with pytest.raises(CannotCoerceColumnException) as exc_val: # now pass the binary that would create binary variant - but the column is occupied by text type @@ -179,7 +196,12 @@ def test_shorten_variant_column(schema: Schema) -> None: _add_preferred_types(schema) timestamp_float = 78172.128 # add new column with preferred - row_1 = {"timestamp": timestamp_float, "confidence": "0.1", "value": "0xFF", "number": Decimal("128.67")} + row_1 = { + "timestamp": timestamp_float, + "confidence": "0.1", + "value": "0xFF", + "number": Decimal("128.67"), + } _, new_table = schema.coerce_row("event_user", None, row_1) # schema assumes that identifiers are already normalized so confidence even if it is longer than 9 chars schema.update_table(new_table) @@ -188,7 +210,9 @@ def test_shorten_variant_column(schema: Schema) -> None: # now variant is created and this will be normalized # TODO: we should move the handling of variants to normalizer new_row_2, new_table = schema.coerce_row("event_user", None, {"confidence": False}) - tag = schema.naming._compute_tag("confidence__v_bool", collision_prob=schema.naming._DEFAULT_COLLISION_PROB) + tag = schema.naming._compute_tag( + "confidence__v_bool", collision_prob=schema.naming._DEFAULT_COLLISION_PROB + ) new_row_2_keys = list(new_row_2.keys()) assert tag in new_row_2_keys[0] assert len(new_row_2_keys[0]) == 9 @@ -252,15 +276,18 @@ def test_supports_variant_pua_decode(schema: Schema) -> None: # pua encoding still present assert normalized_row[0][1]["wad"].startswith("") # decode pua - decoded_row = {k: custom_pua_decode(v) for k,v in normalized_row[0][1].items()} + decoded_row = {k: custom_pua_decode(v) for k, v in normalized_row[0][1].items()} assert isinstance(decoded_row["wad"], Wei) c_row, new_table = schema.coerce_row("eth", None, decoded_row) - assert c_row["wad__v_str"] == str(2**256-1) + assert c_row["wad__v_str"] == str(2**256 - 1) assert new_table["columns"]["wad__v_str"]["data_type"] == "text" def test_supports_variant(schema: Schema) -> None: - rows = [{"evm": Wei.from_int256(2137*10**16, decimals=18)}, {"evm": Wei.from_int256(2**256-1)}] + rows = [ + {"evm": Wei.from_int256(2137 * 10**16, decimals=18)}, + {"evm": Wei.from_int256(2**256 - 1)}, + ] normalized_rows: List[Any] = [] for row in rows: normalized_rows.extend(schema.normalize_data_item(row, "128812.2131", "event")) @@ -270,7 +297,7 @@ def test_supports_variant(schema: Schema) -> None: # row 2 contains Wei assert "evm" in normalized_rows[1][1] assert isinstance(normalized_rows[1][1]["evm"], Wei) - assert normalized_rows[1][1]["evm"] == 2**256-1 + assert normalized_rows[1][1]["evm"] == 2**256 - 1 # coerce row c_row, new_table = schema.coerce_row("eth", None, normalized_rows[0][1]) assert isinstance(c_row["evm"], Wei) @@ -281,13 +308,12 @@ def test_supports_variant(schema: Schema) -> None: # coerce row that should expand to variant c_row, new_table = schema.coerce_row("eth", None, normalized_rows[1][1]) assert isinstance(c_row["evm__v_str"], str) - assert c_row["evm__v_str"] == str(2**256-1) + assert c_row["evm__v_str"] == str(2**256 - 1) assert new_table["columns"]["evm__v_str"]["data_type"] == "text" assert new_table["columns"]["evm__v_str"]["variant"] is True def test_supports_recursive_variant(schema: Schema) -> None: - class RecursiveVariant(int): # provide __call__ for SupportVariant def __call__(self) -> Any: @@ -296,18 +322,16 @@ def __call__(self) -> Any: else: return ("div2", RecursiveVariant(self // 2)) - row = {"rv": RecursiveVariant(8)} c_row, new_table = schema.coerce_row("rec_variant", None, row) # this variant keeps expanding until the value is 1, we start from 8 so there are log2(8) == 3 divisions - col_name = "rv" + "__v_div2"*3 + col_name = "rv" + "__v_div2" * 3 assert c_row[col_name] == 1 assert new_table["columns"][col_name]["data_type"] == "bigint" assert new_table["columns"][col_name]["variant"] is True def test_supports_variant_autovariant_conflict(schema: Schema) -> None: - class PureVariant(int): def __init__(self, v: Any) -> None: self.v = v @@ -319,7 +343,7 @@ def __call__(self) -> Any: if isinstance(self.v, float): return ("text", self.v) - assert issubclass(PureVariant,int) + assert issubclass(PureVariant, int) rows = [{"pv": PureVariant(3377)}, {"pv": PureVariant(21.37)}] normalized_rows: List[Any] = [] for row in rows: @@ -413,9 +437,13 @@ def test_update_schema_table_prop_conflict(schema: Schema) -> None: def test_update_schema_column_conflict(schema: Schema) -> None: - tab1 = utils.new_table("tab1", write_disposition="append", columns=[ - {"name": "col1", "data_type": "text", "nullable": False}, - ]) + tab1 = utils.new_table( + "tab1", + write_disposition="append", + columns=[ + {"name": "col1", "data_type": "text", "nullable": False}, + ], + ) schema.update_table(tab1) tab1_u1 = deepcopy(tab1) # simulate column that had other datatype inferred @@ -508,15 +536,20 @@ def test_infer_on_incomplete_column(schema: Schema) -> None: schema.update_table(table) # make sure that column is still incomplete and has no default hints assert schema.get_table("table")["columns"]["I"] == { - 'name': 'I', - 'nullable': False, - 'primary_key': True, - 'x-special': 'spec' + "name": "I", + "nullable": False, + "primary_key": True, + "x-special": "spec", } timestamp_float = 78172.128 # add new column with preferred - row_1 = {"timestamp": timestamp_float, "confidence": "0.1", "I": "0xFF", "number": Decimal("128.67")} + row_1 = { + "timestamp": timestamp_float, + "confidence": "0.1", + "I": "0xFF", + "number": Decimal("128.67"), + } _, new_table = schema.coerce_row("table", None, row_1) assert "I" in new_table["columns"] i_column = new_table["columns"]["I"] diff --git a/tests/common/schema/test_merges.py b/tests/common/schema/test_merges.py index 64e90c7c21..0bb7818b31 100644 --- a/tests/common/schema/test_merges.py +++ b/tests/common/schema/test_merges.py @@ -2,38 +2,38 @@ from copy import copy, deepcopy from dlt.common.schema import Schema, utils -from dlt.common.schema.exceptions import CannotCoerceColumnException, CannotCoerceNullException, TablePropertiesConflictException +from dlt.common.schema.exceptions import ( + CannotCoerceColumnException, + CannotCoerceNullException, + TablePropertiesConflictException, +) from dlt.common.schema.typing import TStoredSchema, TTableSchema, TColumnSchema COL_1_HINTS: TColumnSchema = { # type: ignore[typeddict-unknown-key] - "cluster": False, - "foreign_key": True, - "data_type": "text", - "name": "test", - "x-special": True, - "x-special-int": 100, - "nullable": False, - "x-special-bool": False, - "prop": None - } + "cluster": False, + "foreign_key": True, + "data_type": "text", + "name": "test", + "x-special": True, + "x-special-int": 100, + "nullable": False, + "x-special-bool": False, + "prop": None, +} COL_1_HINTS_DEFAULTS: TColumnSchema = { # type: ignore[typeddict-unknown-key] - 'foreign_key': True, - 'data_type': 'text', - 'name': 'test', - 'x-special': True, - 'x-special-int': 100, - 'nullable': False, - "x-special-bool": False, - } - -COL_2_HINTS: TColumnSchema = { - "nullable": True, - "name": "test_2", - "primary_key": False + "foreign_key": True, + "data_type": "text", + "name": "test", + "x-special": True, + "x-special-int": 100, + "nullable": False, + "x-special-bool": False, } +COL_2_HINTS: TColumnSchema = {"nullable": True, "name": "test_2", "primary_key": False} + def test_check_column_defaults() -> None: assert utils.has_default_column_hint_value("data_type", "text") is False @@ -77,21 +77,17 @@ def test_remove_defaults_stored_schema() -> None: "description": "description", "resource": "🦚Table", "x-special": 128, - "columns": { - "test": COL_1_HINTS, - "test_2": COL_2_HINTS - } + "columns": {"test": COL_1_HINTS, "test_2": COL_2_HINTS}, } stored_schema: TStoredSchema = { # type: ignore[typeddict-unknown-key] "name": "schema", - "tables": { - "table": deepcopy(table), - "table_copy": deepcopy(table) - }, - "x-top-level": True + "tables": {"table": deepcopy(table), "table_copy": deepcopy(table)}, + "x-top-level": True, } # mock the case in table_copy where resource == table_name - stored_schema["tables"]["table_copy"]["resource"] = stored_schema["tables"]["table_copy"]["name"] = "table_copy" + stored_schema["tables"]["table_copy"]["resource"] = stored_schema["tables"]["table_copy"][ + "name" + ] = "table_copy" default_stored = utils.remove_defaults(stored_schema) # nullability always present @@ -141,13 +137,13 @@ def test_merge_columns() -> None: assert col_a == { "name": "test_2", "nullable": False, - 'cluster': False, - 'foreign_key': True, - 'data_type': 'text', - 'x-special': True, - 'x-special-int': 100, - 'x-special-bool': False, - 'prop': None + "cluster": False, + "foreign_key": True, + "data_type": "text", + "x-special": True, + "x-special-int": 100, + "x-special-bool": False, + "prop": None, } col_a = utils.merge_columns(copy(COL_1_HINTS), copy(COL_2_HINTS), merge_defaults=True) @@ -155,14 +151,14 @@ def test_merge_columns() -> None: assert col_a == { "name": "test_2", "nullable": True, - 'cluster': False, - 'foreign_key': True, - 'data_type': 'text', - 'x-special': True, - 'x-special-int': 100, - 'x-special-bool': False, - 'prop': None, - 'primary_key': False + "cluster": False, + "foreign_key": True, + "data_type": "text", + "x-special": True, + "x-special-int": 100, + "x-special-bool": False, + "prop": None, + "primary_key": False, } @@ -172,10 +168,7 @@ def test_diff_tables() -> None: "description": "description", "resource": "🦚Table", "x-special": 128, - "columns": { - "test": COL_1_HINTS, - "test_2": COL_2_HINTS - } + "columns": {"test": COL_1_HINTS, "test_2": COL_2_HINTS}, } empty = utils.new_table("table") del empty["resource"] @@ -193,29 +186,24 @@ def test_diff_tables() -> None: changed["name"] = "new name" partial = utils.diff_tables(deepcopy(table), changed) print(partial) - assert partial == { - "name": "new name", - "description": "new description", - "columns": {} - } + assert partial == {"name": "new name", "description": "new description", "columns": {}} # ignore identical table props existing = deepcopy(table) changed["write_disposition"] = "append" + changed["schema_contract"] = "freeze" partial = utils.diff_tables(deepcopy(existing), changed) assert partial == { "name": "new name", "description": "new description", "write_disposition": "append", - "columns": {} + "schema_contract": "freeze", + "columns": {}, } existing["write_disposition"] = "append" + existing["schema_contract"] = "freeze" partial = utils.diff_tables(deepcopy(existing), changed) - assert partial == { - "name": "new name", - "description": "new description", - "columns": {} - } + assert partial == {"name": "new name", "description": "new description", "columns": {}} # detect changed column existing = deepcopy(table) @@ -249,10 +237,7 @@ def test_diff_tables_conflicts() -> None: "parent": "parent", "description": "description", "x-special": 128, - "columns": { - "test": COL_1_HINTS, - "test_2": COL_2_HINTS - } + "columns": {"test": COL_1_HINTS, "test_2": COL_2_HINTS}, } other = utils.new_table("table_2") @@ -274,10 +259,7 @@ def test_merge_tables() -> None: "description": "description", "resource": "🦚Table", "x-special": 128, - "columns": { - "test": COL_1_HINTS, - "test_2": COL_2_HINTS - } + "columns": {"test": COL_1_HINTS, "test_2": COL_2_HINTS}, } changed = deepcopy(table) changed["x-special"] = 129 # type: ignore[typeddict-unknown-key] diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py index 8b465d796e..54892eeae5 100644 --- a/tests/common/schema/test_schema.py +++ b/tests/common/schema/test_schema.py @@ -13,8 +13,18 @@ from dlt.common.typing import DictStrAny, StrAny from dlt.common.utils import uniq_id from dlt.common.schema import TColumnSchema, Schema, TStoredSchema, utils, TColumnHint -from dlt.common.schema.exceptions import InvalidSchemaName, ParentTableNotFoundException, SchemaEngineNoUpgradePathException -from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME, TColumnName, TSimpleRegex, COLUMN_HINTS +from dlt.common.schema.exceptions import ( + InvalidSchemaName, + ParentTableNotFoundException, + SchemaEngineNoUpgradePathException, +) +from dlt.common.schema.typing import ( + LOADS_TABLE_NAME, + VERSION_TABLE_NAME, + TColumnName, + TSimpleRegex, + COLUMN_HINTS, +) from dlt.common.storages import SchemaStorage from tests.utils import autouse_test_storage, preserve_environ @@ -30,17 +40,15 @@ def schema_storage() -> SchemaStorage: SchemaStorageConfiguration(), explicit_value={ "import_schema_path": "tests/common/cases/schemas/rasa", - "external_schema_format": "json" - } + "external_schema_format": "json", + }, ) return SchemaStorage(C, makedirs=True) @pytest.fixture def schema_storage_no_import() -> SchemaStorage: - C = resolve_configuration( - SchemaStorageConfiguration() - ) + C = resolve_configuration(SchemaStorageConfiguration()) return SchemaStorage(C, makedirs=True) @@ -51,15 +59,16 @@ def schema() -> Schema: @pytest.fixture def cn_schema() -> Schema: - return Schema("column_default", { - "names": "tests.common.normalizers.custom_normalizers", - "json": { - "module": "tests.common.normalizers.custom_normalizers", - "config": { - "not_null": ["fake_id"] - } - } - }) + return Schema( + "column_default", + { + "names": "tests.common.normalizers.custom_normalizers", + "json": { + "module": "tests.common.normalizers.custom_normalizers", + "config": {"not_null": ["fake_id"]}, + }, + }, + ) def test_normalize_schema_name(schema: Schema) -> None: @@ -118,7 +127,9 @@ def test_simple_regex_validator() -> None: assert utils.simple_regex_validator(".", "k", "v", TSimpleRegex) is True # validate regex - assert utils.simple_regex_validator(".", "k", TSimpleRegex("re:^_record$"), TSimpleRegex) is True + assert ( + utils.simple_regex_validator(".", "k", TSimpleRegex("re:^_record$"), TSimpleRegex) is True + ) # invalid regex with pytest.raises(DictValidationException) as e: utils.simple_regex_validator(".", "k", "re:[[^_record$", TSimpleRegex) @@ -132,10 +143,10 @@ def test_simple_regex_validator() -> None: def test_load_corrupted_schema() -> None: - eth_v4: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v4") - del eth_v4["tables"]["blocks"] + eth_v8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8") + del eth_v8["tables"]["blocks"] with pytest.raises(ParentTableNotFoundException): - utils.validate_stored_schema(eth_v4) + utils.validate_stored_schema(eth_v8) def test_column_name_validator(schema: Schema) -> None: @@ -169,7 +180,7 @@ def test_schema_name() -> None: Schema("1_a") # too long with pytest.raises(InvalidSchemaName) as exc: - Schema("a"*65) + Schema("a" * 65) def test_create_schema_with_normalize_name() -> None: @@ -178,10 +189,15 @@ def test_create_schema_with_normalize_name() -> None: def test_schema_descriptions_and_annotations(schema_storage: SchemaStorage): - schema = SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event", extensions=("yaml", )) + schema = SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event", extensions=("yaml",) + ) assert schema.tables["blocks"]["description"] == "Ethereum blocks" assert schema.tables["blocks"]["x-annotation"] == "this will be preserved on save" # type: ignore[typeddict-item] - assert schema.tables["blocks"]["columns"]["_dlt_load_id"]["description"] == "load id coming from the extractor" + assert ( + schema.tables["blocks"]["columns"]["_dlt_load_id"]["description"] + == "load id coming from the extractor" + ) assert schema.tables["blocks"]["columns"]["_dlt_load_id"]["x-column-annotation"] == "column annotation preserved on save" # type: ignore[typeddict-item] # mod and save @@ -194,7 +210,9 @@ def test_schema_descriptions_and_annotations(schema_storage: SchemaStorage): loaded_schema = schema_storage.load_schema("event") assert loaded_schema.tables["blocks"]["description"].endswith("Saved") assert loaded_schema.tables["blocks"]["x-annotation"].endswith("Saved") # type: ignore[typeddict-item] - assert loaded_schema.tables["blocks"]["columns"]["_dlt_load_id"]["description"].endswith("Saved") + assert loaded_schema.tables["blocks"]["columns"]["_dlt_load_id"]["description"].endswith( + "Saved" + ) assert loaded_schema.tables["blocks"]["columns"]["_dlt_load_id"]["x-column-annotation"].endswith("Saved") # type: ignore[typeddict-item] @@ -203,20 +221,37 @@ def test_replace_schema_content() -> None: eth_v5: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v5") eth_v5["imported_version_hash"] = "IMP_HASH" schema_eth = Schema.from_dict(eth_v5) # type: ignore[arg-type] - schema_eth.bump_version() schema.replace_schema_content(schema_eth) assert schema_eth.stored_version_hash == schema.stored_version_hash assert schema_eth.version == schema.version assert schema_eth.version_hash == schema.version_hash assert schema_eth._imported_version_hash == schema._imported_version_hash - -@pytest.mark.parametrize("columns,hint,value", [ - (["_dlt_id", "_dlt_root_id", "_dlt_load_id", "_dlt_parent_id", "_dlt_list_idx"], "nullable", False), - (["_dlt_id"], "unique", True), - (["_dlt_parent_id"], "foreign_key", True), -]) -def test_relational_normalizer_schema_hints(columns: Sequence[str], hint: str, value: bool, schema_storage: SchemaStorage) -> None: + # replace content of modified schema + eth_v5 = load_yml_case("schemas/eth/ethereum_schema_v5") + schema_eth = Schema.from_dict(eth_v5, bump_version=False) # type: ignore[arg-type] + assert schema_eth.version_hash != schema_eth.stored_version_hash + # replace content does not bump version + schema = Schema("simple") + schema.replace_schema_content(schema_eth) + assert schema.version_hash != schema.stored_version_hash + + +@pytest.mark.parametrize( + "columns,hint,value", + [ + ( + ["_dlt_id", "_dlt_root_id", "_dlt_load_id", "_dlt_parent_id", "_dlt_list_idx"], + "nullable", + False, + ), + (["_dlt_id"], "unique", True), + (["_dlt_parent_id"], "foreign_key", True), + ], +) +def test_relational_normalizer_schema_hints( + columns: Sequence[str], hint: str, value: bool, schema_storage: SchemaStorage +) -> None: schema = schema_storage.load_schema("event") for name in columns: # infer column hints @@ -241,13 +276,17 @@ def test_save_store_schema(schema: Schema, schema_storage: SchemaStorage) -> Non assert_new_schema_values(schema_copy) -def test_save_store_schema_custom_normalizers(cn_schema: Schema, schema_storage: SchemaStorage) -> None: +def test_save_store_schema_custom_normalizers( + cn_schema: Schema, schema_storage: SchemaStorage +) -> None: schema_storage.save_schema(cn_schema) schema_copy = schema_storage.load_schema(cn_schema.name) assert_new_schema_values_custom_normalizers(schema_copy) -def test_save_load_incomplete_column(schema: Schema, schema_storage_no_import: SchemaStorage) -> None: +def test_save_load_incomplete_column( + schema: Schema, schema_storage_no_import: SchemaStorage +) -> None: # make sure that incomplete column is saved and restored without default hints incomplete_col = utils.new_column("I", nullable=False) incomplete_col["primary_key"] = True @@ -257,10 +296,10 @@ def test_save_load_incomplete_column(schema: Schema, schema_storage_no_import: S schema_storage_no_import.save_schema(schema) schema_copy = schema_storage_no_import.load_schema("event") assert schema_copy.get_table("table")["columns"]["I"] == { - 'name': 'I', - 'nullable': False, - 'primary_key': True, - 'x-special': 'spec' + "name": "I", + "nullable": False, + "primary_key": True, + "x-special": "spec", } @@ -279,21 +318,30 @@ def test_upgrade_engine_v1_schema() -> None: assert schema_dict["engine_version"] == 2 upgraded = utils.migrate_schema(schema_dict, from_engine=2, to_engine=4) assert upgraded["engine_version"] == 4 - utils.validate_stored_schema(upgraded) # upgrade 1 -> 4 schema_dict = load_json_case("schemas/ev1/event.schema") assert schema_dict["engine_version"] == 1 upgraded = utils.migrate_schema(schema_dict, from_engine=1, to_engine=4) assert upgraded["engine_version"] == 4 - utils.validate_stored_schema(upgraded) # upgrade 1 -> 6 schema_dict = load_json_case("schemas/ev1/event.schema") assert schema_dict["engine_version"] == 1 upgraded = utils.migrate_schema(schema_dict, from_engine=1, to_engine=6) assert upgraded["engine_version"] == 6 - utils.validate_stored_schema(upgraded) + + # upgrade 1 -> 7 + schema_dict = load_json_case("schemas/ev1/event.schema") + assert schema_dict["engine_version"] == 1 + upgraded = utils.migrate_schema(schema_dict, from_engine=1, to_engine=7) + assert upgraded["engine_version"] == 7 + + # upgrade 1 -> 8 + schema_dict = load_json_case("schemas/ev1/event.schema") + assert schema_dict["engine_version"] == 1 + upgraded = utils.migrate_schema(schema_dict, from_engine=1, to_engine=8) + assert upgraded["engine_version"] == 8 def test_unknown_engine_upgrade() -> None: @@ -306,7 +354,9 @@ def test_unknown_engine_upgrade() -> None: def test_preserve_column_order(schema: Schema, schema_storage: SchemaStorage) -> None: # python dicts are ordered from v3.6, add 50 column with random names - update: List[TColumnSchema] = [schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50)] + update: List[TColumnSchema] = [ + schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50) + ] schema.update_table(utils.new_table("event_test_order", columns=update)) def verify_items(table, update) -> None: @@ -321,7 +371,9 @@ def verify_items(table, update) -> None: table = loaded_schema.get_table_columns("event_test_order") verify_items(table, update) # add more columns - update2: List[TColumnSchema] = [schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50)] + update2: List[TColumnSchema] = [ + schema._infer_column(uniq_id(), pendulum.now().timestamp()) for _ in range(50) + ] loaded_schema.update_table(utils.new_table("event_test_order", columns=update2)) table = loaded_schema.get_table_columns("event_test_order") verify_items(table, update + update2) @@ -329,7 +381,7 @@ def verify_items(table, update) -> None: schema_storage.save_schema(loaded_schema) loaded_schema = schema_storage.load_schema("event") table = loaded_schema.get_table_columns("event_test_order") - verify_items(table, update + update2) + verify_items(table, update + update2) def test_get_schema_new_exist(schema_storage: SchemaStorage) -> None: @@ -337,16 +389,35 @@ def test_get_schema_new_exist(schema_storage: SchemaStorage) -> None: schema_storage.load_schema("wrongschema") -@pytest.mark.parametrize("columns,hint,value", [ - (["timestamp", "_timestamp", "_dist_key", "_dlt_id", "_dlt_root_id", "_dlt_load_id", "_dlt_parent_id", "_dlt_list_idx", "sender_id"], "nullable", False), - (["confidence", "_sender_id"], "nullable", True), - (["timestamp", "_timestamp"], "partition", True), - (["_dist_key", "sender_id"], "cluster", True), - (["_dlt_id"], "unique", True), - (["_dlt_parent_id"], "foreign_key", True), - (["timestamp", "_timestamp"], "sort", True), -]) -def test_rasa_event_hints(columns: Sequence[str], hint: str, value: bool, schema_storage: SchemaStorage) -> None: +@pytest.mark.parametrize( + "columns,hint,value", + [ + ( + [ + "timestamp", + "_timestamp", + "_dist_key", + "_dlt_id", + "_dlt_root_id", + "_dlt_load_id", + "_dlt_parent_id", + "_dlt_list_idx", + "sender_id", + ], + "nullable", + False, + ), + (["confidence", "_sender_id"], "nullable", True), + (["timestamp", "_timestamp"], "partition", True), + (["_dist_key", "sender_id"], "cluster", True), + (["_dlt_id"], "unique", True), + (["_dlt_parent_id"], "foreign_key", True), + (["timestamp", "_timestamp"], "sort", True), + ], +) +def test_rasa_event_hints( + columns: Sequence[str], hint: str, value: bool, schema_storage: SchemaStorage +) -> None: schema = schema_storage.load_schema("event") for name in columns: # infer column hints @@ -414,10 +485,16 @@ def test_merge_hints(schema: Schema) -> None: schema._settings["default_hints"] = {} schema._compiled_hints = {} new_hints = { - "not_null": ["_dlt_id", "_dlt_root_id", "_dlt_parent_id", "_dlt_list_idx", "re:^_dlt_load_id$"], - "foreign_key": ["re:^_dlt_parent_id$"], - "unique": ["re:^_dlt_id$"] - } + "not_null": [ + "_dlt_id", + "_dlt_root_id", + "_dlt_parent_id", + "_dlt_list_idx", + "re:^_dlt_load_id$", + ], + "foreign_key": ["re:^_dlt_parent_id$"], + "unique": ["re:^_dlt_id$"], + } schema.merge_hints(new_hints) # type: ignore[arg-type] assert schema._settings["default_hints"] == new_hints @@ -428,17 +505,21 @@ def test_merge_hints(schema: Schema) -> None: assert set(new_hints[k]) == set(schema._settings["default_hints"][k]) # type: ignore[index] # add new stuff - new_new_hints = { - "not_null": ["timestamp"], - "primary_key": ["id"] - } + new_new_hints = {"not_null": ["timestamp"], "primary_key": ["id"]} schema.merge_hints(new_new_hints) # type: ignore[arg-type] expected_hints = { - "not_null": ["_dlt_id", "_dlt_root_id", "_dlt_parent_id", "_dlt_list_idx", "re:^_dlt_load_id$", "timestamp"], - "foreign_key": ["re:^_dlt_parent_id$"], - "unique": ["re:^_dlt_id$"], - "primary_key": ["id"] - } + "not_null": [ + "_dlt_id", + "_dlt_root_id", + "_dlt_parent_id", + "_dlt_list_idx", + "re:^_dlt_load_id$", + "timestamp", + ], + "foreign_key": ["re:^_dlt_parent_id$"], + "unique": ["re:^_dlt_id$"], + "primary_key": ["id"], + } assert len(expected_hints) == len(schema._settings["default_hints"]) for k in expected_hints: assert set(expected_hints[k]) == set(schema._settings["default_hints"][k]) # type: ignore[index] @@ -449,8 +530,8 @@ def test_default_table_resource() -> None: eth_v5 = load_yml_case("schemas/eth/ethereum_schema_v5") tables = Schema.from_dict(eth_v5).tables - assert tables['blocks']['resource'] == 'blocks' - assert all([t.get('resource') is None for t in tables.values() if t.get('parent')]) + assert tables["blocks"]["resource"] == "blocks" + assert all([t.get("resource") is None for t in tables.values() if t.get("parent")]) def test_data_tables(schema: Schema, schema_storage: SchemaStorage) -> None: @@ -460,8 +541,41 @@ def test_data_tables(schema: Schema, schema_storage: SchemaStorage) -> None: # with tables schema = schema_storage.load_schema("event") # some of them are incomplete - assert set(schema.tables.keys()) == set([LOADS_TABLE_NAME, VERSION_TABLE_NAME, 'event_slot', 'event_user', 'event_bot']) - assert [t["name"] for t in schema.data_tables()] == ['event_slot'] + assert set(schema.tables.keys()) == set( + [LOADS_TABLE_NAME, VERSION_TABLE_NAME, "event_slot", "event_user", "event_bot"] + ) + assert [t["name"] for t in schema.data_tables()] == ["event_slot"] + assert schema.is_new_table("event_slot") is False + assert schema.is_new_table("new_table") is True + assert schema.is_new_table("event_user") is True + assert len(schema.get_table_columns("event_user")) == 0 + assert len(schema.get_table_columns("event_user", include_incomplete=True)) == 0 + + # add incomplete column + schema.update_table( + { + "name": "event_user", + "columns": {"name": {"name": "name", "primary_key": True, "nullable": False}}, + } + ) + assert [t["name"] for t in schema.data_tables()] == ["event_slot"] + assert schema.is_new_table("event_user") is True + assert len(schema.get_table_columns("event_user")) == 0 + assert len(schema.get_table_columns("event_user", include_incomplete=True)) == 1 + + # make it complete + schema.update_table( + {"name": "event_user", "columns": {"name": {"name": "name", "data_type": "text"}}} + ) + assert [t["name"] for t in schema.data_tables()] == ["event_slot", "event_user"] + assert [t["name"] for t in schema.data_tables(include_incomplete=True)] == [ + "event_slot", + "event_user", + "event_bot", + ] + assert schema.is_new_table("event_user") is False + assert len(schema.get_table_columns("event_user")) == 1 + assert len(schema.get_table_columns("event_user", include_incomplete=True)) == 1 def test_write_disposition(schema_storage: SchemaStorage) -> None: @@ -486,28 +600,39 @@ def test_write_disposition(schema_storage: SchemaStorage) -> None: def test_compare_columns() -> None: - table = utils.new_table("test_table", columns=[ - {"name": "col1", "data_type": "text", "nullable": True}, - {"name": "col2", "data_type": "text", "nullable": False}, - {"name": "col3", "data_type": "timestamp", "nullable": True}, - {"name": "col4", "data_type": "timestamp", "nullable": True} - ]) - table2 = utils.new_table("test_table", columns=[ - {"name": "col1", "data_type": "text", "nullable": False} - ]) + table = utils.new_table( + "test_table", + columns=[ + {"name": "col1", "data_type": "text", "nullable": True}, + {"name": "col2", "data_type": "text", "nullable": False}, + {"name": "col3", "data_type": "timestamp", "nullable": True}, + {"name": "col4", "data_type": "timestamp", "nullable": True}, + ], + ) + table2 = utils.new_table( + "test_table", columns=[{"name": "col1", "data_type": "text", "nullable": False}] + ) # columns identical with self for c in table["columns"].values(): assert utils.compare_complete_columns(c, c) is True - assert utils.compare_complete_columns(table["columns"]["col3"], table["columns"]["col4"]) is False + assert ( + utils.compare_complete_columns(table["columns"]["col3"], table["columns"]["col4"]) is False + ) # data type may not differ - assert utils.compare_complete_columns(table["columns"]["col1"], table["columns"]["col3"]) is False + assert ( + utils.compare_complete_columns(table["columns"]["col1"], table["columns"]["col3"]) is False + ) # nullability may differ - assert utils.compare_complete_columns(table["columns"]["col1"], table2["columns"]["col1"]) is True + assert ( + utils.compare_complete_columns(table["columns"]["col1"], table2["columns"]["col1"]) is True + ) # any of the hints may differ for hint in COLUMN_HINTS: table["columns"]["col3"][hint] = True # type: ignore[typeddict-unknown-key] # name may not differ - assert utils.compare_complete_columns(table["columns"]["col3"], table["columns"]["col4"]) is False + assert ( + utils.compare_complete_columns(table["columns"]["col3"], table["columns"]["col4"]) is False + ) def test_normalize_table_identifiers() -> None: @@ -518,24 +643,16 @@ def test_normalize_table_identifiers() -> None: issues_table = deepcopy(schema.tables["issues"]) # this schema is already normalized so normalization is idempotent assert schema.tables["issues"] == schema.normalize_table_identifiers(issues_table) - assert schema.tables["issues"] == schema.normalize_table_identifiers(schema.normalize_table_identifiers(issues_table)) + assert schema.tables["issues"] == schema.normalize_table_identifiers( + schema.normalize_table_identifiers(issues_table) + ) def test_normalize_table_identifiers_merge_columns() -> None: # create conflicting columns table_create = [ - { - "name": "case", - "data_type": "bigint", - "nullable": False, - "x-description": "desc" - }, - { - "name": "Case", - "data_type": "double", - "nullable": True, - "primary_key": True - }, + {"name": "case", "data_type": "bigint", "nullable": False, "x-description": "desc"}, + {"name": "Case", "data_type": "double", "nullable": True, "primary_key": True}, ] # schema normalizing to snake case will conflict on case and Case table = utils.new_table("blend", columns=table_create) # type: ignore[arg-type] @@ -543,18 +660,21 @@ def test_normalize_table_identifiers_merge_columns() -> None: # only one column assert len(norm_table["columns"]) == 1 assert norm_table["columns"]["case"] == { - 'nullable': False, # remove default, preserve non default - 'primary_key': True, - 'name': 'case', - 'data_type': 'double', - 'x-description': 'desc' + "nullable": False, # remove default, preserve non default + "primary_key": True, + "name": "case", + "data_type": "double", + "x-description": "desc", } def assert_new_schema_values_custom_normalizers(schema: Schema) -> None: # check normalizers config assert schema._normalizers_config["names"] == "tests.common.normalizers.custom_normalizers" - assert schema._normalizers_config["json"]["module"] == "tests.common.normalizers.custom_normalizers" + assert ( + schema._normalizers_config["json"]["module"] + == "tests.common.normalizers.custom_normalizers" + ) # check if schema was extended by json normalizer assert ["fake_id"] == schema.settings["default_hints"]["not_null"] # call normalizers @@ -573,16 +693,21 @@ def assert_new_schema_values(schema: Schema) -> None: assert schema.stored_version == 1 assert schema.stored_version_hash is not None assert schema.version_hash is not None - assert schema.ENGINE_VERSION == 6 + assert schema.ENGINE_VERSION == 8 + assert schema._stored_previous_hashes == [] assert len(schema.settings["default_hints"]) > 0 # check settings - assert utils.standard_type_detections() == schema.settings["detections"] == schema._type_detections + assert ( + utils.standard_type_detections() == schema.settings["detections"] == schema._type_detections + ) # check normalizers config assert schema._normalizers_config["names"] == "snake_case" assert schema._normalizers_config["json"]["module"] == "dlt.common.normalizers.json.relational" assert isinstance(schema.naming, snake_case.NamingConvention) # check if schema was extended by json normalizer - assert set(["_dlt_id", "_dlt_root_id", "_dlt_parent_id", "_dlt_list_idx", "_dlt_load_id"]).issubset(schema.settings["default_hints"]["not_null"]) + assert set( + ["_dlt_id", "_dlt_root_id", "_dlt_parent_id", "_dlt_list_idx", "_dlt_load_id"] + ).issubset(schema.settings["default_hints"]["not_null"]) # call normalizers assert schema.naming.normalize_identifier("A") == "a" assert schema.naming.normalize_path("A__B") == "a__b" @@ -605,35 +730,62 @@ def test_group_tables_by_resource(schema: Schema) -> None: schema.update_table(utils.new_table("b_events", columns=[])) schema.update_table(utils.new_table("c_products", columns=[], resource="products")) schema.update_table(utils.new_table("a_events__1", columns=[], parent_table_name="a_events")) - schema.update_table(utils.new_table("a_events__1__2", columns=[], parent_table_name="a_events__1")) + schema.update_table( + utils.new_table("a_events__1__2", columns=[], parent_table_name="a_events__1") + ) schema.update_table(utils.new_table("b_events__1", columns=[], parent_table_name="b_events")) # All resources without filter expected_tables = { - "a_events": [schema.tables["a_events"], schema.tables["a_events__1"], schema.tables["a_events__1__2"]], + "a_events": [ + schema.tables["a_events"], + schema.tables["a_events__1"], + schema.tables["a_events__1__2"], + ], "b_events": [schema.tables["b_events"], schema.tables["b_events__1"]], "products": [schema.tables["c_products"]], "_dlt_version": [schema.tables["_dlt_version"]], - "_dlt_loads": [schema.tables["_dlt_loads"]] + "_dlt_loads": [schema.tables["_dlt_loads"]], } result = utils.group_tables_by_resource(schema.tables) assert result == expected_tables # With resource filter - result = utils.group_tables_by_resource(schema.tables, pattern=utils.compile_simple_regex(TSimpleRegex("re:[a-z]_events"))) + result = utils.group_tables_by_resource( + schema.tables, pattern=utils.compile_simple_regex(TSimpleRegex("re:[a-z]_events")) + ) assert result == { - "a_events": [schema.tables["a_events"], schema.tables["a_events__1"], schema.tables["a_events__1__2"]], + "a_events": [ + schema.tables["a_events"], + schema.tables["a_events__1"], + schema.tables["a_events__1__2"], + ], "b_events": [schema.tables["b_events"], schema.tables["b_events__1"]], } # With resources that has many top level tables schema.update_table(utils.new_table("mc_products", columns=[], resource="products")) - schema.update_table(utils.new_table("mc_products__sub", columns=[], parent_table_name="mc_products")) - result = utils.group_tables_by_resource(schema.tables, pattern=utils.compile_simple_regex(TSimpleRegex("products"))) + schema.update_table( + utils.new_table("mc_products__sub", columns=[], parent_table_name="mc_products") + ) + result = utils.group_tables_by_resource( + schema.tables, pattern=utils.compile_simple_regex(TSimpleRegex("products")) + ) # both tables with resource "products" must be here - assert result == {'products': [ - {'columns': {}, 'name': 'c_products', 'resource': 'products', 'write_disposition': 'append'}, - {'columns': {}, 'name': 'mc_products', 'resource': 'products', 'write_disposition': 'append'}, - {'columns': {}, 'name': 'mc_products__sub', 'parent': 'mc_products'} + assert result == { + "products": [ + { + "columns": {}, + "name": "c_products", + "resource": "products", + "write_disposition": "append", + }, + { + "columns": {}, + "name": "mc_products", + "resource": "products", + "write_disposition": "append", + }, + {"columns": {}, "name": "mc_products__sub", "parent": "mc_products"}, ] } diff --git a/tests/common/schema/test_schema_contract.py b/tests/common/schema/test_schema_contract.py new file mode 100644 index 0000000000..32f9583b26 --- /dev/null +++ b/tests/common/schema/test_schema_contract.py @@ -0,0 +1,356 @@ +from typing import cast + +import pytest +import copy + +from dlt.common.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE, TSchemaContractDict +from dlt.common.schema.exceptions import DataValidationError +from dlt.common.schema.typing import TTableSchema + + +def get_schema() -> Schema: + s = Schema("event") + + columns = { + "column_1": {"name": "column_1", "data_type": "text"}, + "column_2": {"name": "column_2", "data_type": "bigint", "is_variant": True}, + } + + incomplete_columns = { + "incomplete_column_1": { + "name": "incomplete_column_1", + }, + "incomplete_column_2": { + "name": "incomplete_column_2", + }, + } + + # add some tables + s.update_table(cast(TTableSchema, {"name": "tables", "columns": columns})) + + s.update_table( + cast(TTableSchema, {"name": "child_table", "parent": "tables", "columns": columns}) + ) + + s.update_table(cast(TTableSchema, {"name": "incomplete_table", "columns": incomplete_columns})) + + s.update_table( + cast(TTableSchema, {"name": "mixed_table", "columns": {**incomplete_columns, **columns}}) + ) + + s.update_table( + cast( + TTableSchema, + { + "name": "evolve_once_table", + "x-normalizer": {"evolve-columns-once": True}, + "columns": {**incomplete_columns, **columns}, + }, + ) + ) + + return s + + +def test_resolve_contract_settings() -> None: + # defaults + schema = get_schema() + assert schema.resolve_contract_settings_for_table("tables") == DEFAULT_SCHEMA_CONTRACT_MODE + assert schema.resolve_contract_settings_for_table("child_table") == DEFAULT_SCHEMA_CONTRACT_MODE + + # table specific full setting + schema = get_schema() + schema.tables["tables"]["schema_contract"] = "freeze" + assert schema.resolve_contract_settings_for_table("tables") == { + "tables": "freeze", + "columns": "freeze", + "data_type": "freeze", + } + assert schema.resolve_contract_settings_for_table("child_table") == { + "tables": "freeze", + "columns": "freeze", + "data_type": "freeze", + } + + # table specific single setting + schema = get_schema() + schema.tables["tables"]["schema_contract"] = { + "tables": "freeze", + "columns": "discard_value", + } + assert schema.resolve_contract_settings_for_table("tables") == { + "tables": "freeze", + "columns": "discard_value", + "data_type": "evolve", + } + assert schema.resolve_contract_settings_for_table("child_table") == { + "tables": "freeze", + "columns": "discard_value", + "data_type": "evolve", + } + + # schema specific full setting + schema = get_schema() + schema._settings["schema_contract"] = "freeze" + assert schema.resolve_contract_settings_for_table("tables") == { + "tables": "freeze", + "columns": "freeze", + "data_type": "freeze", + } + assert schema.resolve_contract_settings_for_table("child_table") == { + "tables": "freeze", + "columns": "freeze", + "data_type": "freeze", + } + + # schema specific single setting + schema = get_schema() + schema._settings["schema_contract"] = { + "tables": "freeze", + "columns": "discard_value", + } + assert schema.resolve_contract_settings_for_table("tables") == { + "tables": "freeze", + "columns": "discard_value", + "data_type": "evolve", + } + assert schema.resolve_contract_settings_for_table("child_table") == { + "tables": "freeze", + "columns": "discard_value", + "data_type": "evolve", + } + + # mixed settings: table setting always prevails + schema = get_schema() + schema._settings["schema_contract"] = "freeze" + schema.tables["tables"]["schema_contract"] = { + "tables": "evolve", + "columns": "discard_value", + } + assert schema.resolve_contract_settings_for_table("tables") == { + "tables": "evolve", + "columns": "discard_value", + "data_type": "evolve", + } + assert schema.resolve_contract_settings_for_table("child_table") == { + "tables": "evolve", + "columns": "discard_value", + "data_type": "evolve", + } + + +# ensure other settings do not interfere with the main setting we are testing +base_settings = [ + {"tables": "evolve", "columns": "evolve", "data_type": "evolve"}, + {"tables": "discard_row", "columns": "discard_row", "data_type": "discard_row"}, + {"tables": "discard_value", "columns": "discard_value", "data_type": "discard_value"}, + {"tables": "freeze", "columns": "freeze", "data_type": "freeze"}, +] + + +@pytest.mark.parametrize("base_settings", base_settings) +def test_check_adding_table(base_settings) -> None: + schema = get_schema() + new_table = copy.deepcopy(schema.tables["tables"]) + new_table["name"] = "new_table" + + # + # check adding new table + # + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"tables": "evolve"}}), new_table + ) + assert (partial, filters) == (new_table, []) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"tables": "discard_row"}}), new_table + ) + assert (partial, filters) == (None, [("tables", "new_table", "discard_row")]) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"tables": "discard_value"}}), new_table + ) + assert (partial, filters) == (None, [("tables", "new_table", "discard_value")]) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"tables": "freeze"}}), + new_table, + raise_on_freeze=False, + ) + assert (partial, filters) == (None, [("tables", "new_table", "freeze")]) + + with pytest.raises(DataValidationError) as val_ex: + schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"tables": "freeze"}}), + new_table, + data_item={"item": 1}, + ) + assert val_ex.value.schema_name == schema.name + assert val_ex.value.table_name == "new_table" + assert val_ex.value.column_name is None + assert val_ex.value.schema_entity == "tables" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema is None # there's no validating schema on new table + assert val_ex.value.data_item == {"item": 1} + + +@pytest.mark.parametrize("base_settings", base_settings) +def test_check_adding_new_columns(base_settings) -> None: + schema = get_schema() + + def assert_new_column(table_update: TTableSchema, column_name: str) -> None: + popped_table_update = copy.deepcopy(table_update) + popped_table_update["columns"].pop(column_name) + + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"columns": "evolve"}}), + copy.deepcopy(table_update), + ) + assert (partial, filters) == (table_update, []) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"columns": "discard_row"}}), + copy.deepcopy(table_update), + ) + assert (partial, filters) == ( + popped_table_update, + [("columns", column_name, "discard_row")], + ) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"columns": "discard_value"}}), + copy.deepcopy(table_update), + ) + assert (partial, filters) == ( + popped_table_update, + [("columns", column_name, "discard_value")], + ) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"columns": "freeze"}}), + copy.deepcopy(table_update), + raise_on_freeze=False, + ) + assert (partial, filters) == (popped_table_update, [("columns", column_name, "freeze")]) + + with pytest.raises(DataValidationError) as val_ex: + schema.apply_schema_contract( + cast(TSchemaContractDict, {**base_settings, **{"columns": "freeze"}}), + copy.deepcopy(table_update), + {column_name: 1}, + ) + assert val_ex.value.schema_name == schema.name + assert val_ex.value.table_name == table_update["name"] + assert val_ex.value.column_name == column_name + assert val_ex.value.schema_entity == "columns" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema == schema.get_table(table_update["name"]) + assert val_ex.value.data_item == {column_name: 1} + + # + # check adding new column + # + table_update: TTableSchema = { + "name": "tables", + "columns": {"new_column": {"name": "new_column", "data_type": "text"}}, + } + assert_new_column(table_update, "new_column") + + # + # check adding new column if target column is not complete + # + table_update = { + "name": "mixed_table", + "columns": { + "incomplete_column_1": { + "name": "incomplete_column_1", + } + }, + } + assert_new_column(table_update, "incomplete_column_1") + + # + # check x-normalize evolve_once behaving as evolve override + # + table_update = { + "name": "evolve_once_table", + "columns": { + "new_column": {"name": "new_column", "data_type": "text"}, + "incomplete_column_1": { + "name": "incomplete_column_1", + }, + }, + } + partial, filters = schema.apply_schema_contract(base_settings, copy.deepcopy(table_update)) + assert (partial, filters) == (table_update, []) + + +def test_check_adding_new_variant() -> None: + schema = get_schema() + + # + # check adding new variant column + # + table_update: TTableSchema = { + "name": "tables", + "columns": { + "column_2_variant": {"name": "column_2_variant", "data_type": "bigint", "variant": True} + }, + } + popped_table_update = copy.deepcopy(table_update) + popped_table_update["columns"].pop("column_2_variant") + + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "evolve"}}), + copy.deepcopy(table_update), + ) + assert (partial, filters) == (table_update, []) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "discard_row"}}), + copy.deepcopy(table_update), + ) + assert (partial, filters) == ( + popped_table_update, + [("columns", "column_2_variant", "discard_row")], + ) + partial, filters = schema.apply_schema_contract( + cast( + TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "discard_value"}} + ), + copy.deepcopy(table_update), + ) + assert (partial, filters) == ( + popped_table_update, + [("columns", "column_2_variant", "discard_value")], + ) + partial, filters = schema.apply_schema_contract( + cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}}), + copy.deepcopy(table_update), + raise_on_freeze=False, + ) + assert (partial, filters) == (popped_table_update, [("columns", "column_2_variant", "freeze")]) + + with pytest.raises(DataValidationError) as val_ex: + schema.apply_schema_contract( + cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}}), + copy.deepcopy(table_update), + ) + assert val_ex.value.schema_name == schema.name + assert val_ex.value.table_name == table_update["name"] + assert val_ex.value.column_name == "column_2_variant" + assert val_ex.value.schema_entity == "data_type" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema == schema.get_table(table_update["name"]) + assert val_ex.value.data_item is None # we do not pass it to apply_schema_contract + + # variants are not new columns - new data types + partial, filters = schema.apply_schema_contract( + cast( + TSchemaContractDict, + {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "evolve", "columns": "freeze"}}, + ), + copy.deepcopy(table_update), + ) + assert (partial, filters) == (table_update, []) + + # evolve once does not apply to variant evolution + table_update["name"] = "evolve_once_table" + with pytest.raises(DataValidationError): + schema.apply_schema_contract( + cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}}), + copy.deepcopy(table_update), + ) diff --git a/tests/common/schema/test_versioning.py b/tests/common/schema/test_versioning.py index 1bfaaa5da2..5b794f51ee 100644 --- a/tests/common/schema/test_versioning.py +++ b/tests/common/schema/test_versioning.py @@ -1,5 +1,6 @@ import pytest import yaml +from copy import deepcopy from dlt.common import json from dlt.common.schema import utils @@ -83,10 +84,10 @@ def test_infer_column_bumps_version() -> None: def test_preserve_version_on_load() -> None: - eth_v6: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v6") - version = eth_v6["version"] - version_hash = eth_v6["version_hash"] - schema = Schema.from_dict(eth_v6) # type: ignore[arg-type] + eth_v8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8") + version = eth_v8["version"] + version_hash = eth_v8["version_hash"] + schema = Schema.from_dict(eth_v8) # type: ignore[arg-type] # version should not be bumped assert version_hash == schema._stored_version_hash assert version_hash == schema.version_hash @@ -95,8 +96,8 @@ def test_preserve_version_on_load() -> None: @pytest.mark.parametrize("remove_defaults", [True, False]) def test_version_preserve_on_reload(remove_defaults: bool) -> None: - eth_v6: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v6") - schema = Schema.from_dict(eth_v6) # type: ignore[arg-type] + eth_v8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8") + schema = Schema.from_dict(eth_v8) # type: ignore[arg-type] to_save_dict = schema.to_dict(remove_defaults=remove_defaults) assert schema.stored_version == to_save_dict["version"] @@ -122,3 +123,31 @@ def test_version_preserve_on_reload(remove_defaults: bool) -> None: saved_rasa_schema = Schema.from_dict(yaml.safe_load(rasa_yml)) assert saved_rasa_schema.stored_version == rasa_schema.stored_version assert saved_rasa_schema.stored_version_hash == rasa_schema.stored_version_hash + + +def test_create_ancestry() -> None: + eth_v8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8") + schema = Schema.from_dict(eth_v8) # type: ignore[arg-type] + assert schema._stored_previous_hashes == ["yjMtV4Zv0IJlfR5DPMwuXxGg8BRhy7E79L26XAHWEGE="] + version = schema._stored_version + + # modify save and load schema 15 times and check ancestry + expected_previous_hashes = ["yjMtV4Zv0IJlfR5DPMwuXxGg8BRhy7E79L26XAHWEGE="] + for i in range(1, 15): + # keep expected previous_hashes + expected_previous_hashes.insert(0, schema._stored_version_hash) + + # update schema + row = {f"float{i}": 78172.128} + _, new_table = schema.coerce_row("event_user", None, row) + schema.update_table(new_table) + schema_dict = schema.to_dict() + schema = Schema.from_stored_schema(schema_dict) + + assert schema._stored_previous_hashes == expected_previous_hashes[:10] + assert schema._stored_version == version + i + + # we never have more than 10 previous_hashes + assert len(schema._stored_previous_hashes) == i + 1 if i + 1 <= 10 else 10 + + assert len(schema._stored_previous_hashes) == 10 diff --git a/tests/common/scripts/args.py b/tests/common/scripts/args.py index 627daeb76b..67c6cc651a 100644 --- a/tests/common/scripts/args.py +++ b/tests/common/scripts/args.py @@ -1,4 +1,4 @@ import sys print(len(sys.argv)) -print(sys.argv) \ No newline at end of file +print(sys.argv) diff --git a/tests/common/scripts/counter.py b/tests/common/scripts/counter.py index 99352cd1f3..a7fa34dfec 100644 --- a/tests/common/scripts/counter.py +++ b/tests/common/scripts/counter.py @@ -6,4 +6,4 @@ print(i) sys.stdout.flush() sleep(0.3) -print("exit") \ No newline at end of file +print("exit") diff --git a/tests/common/scripts/cwd.py b/tests/common/scripts/cwd.py index 404cf43ada..ea065561f3 100644 --- a/tests/common/scripts/cwd.py +++ b/tests/common/scripts/cwd.py @@ -1,3 +1,3 @@ import os -print(os.getcwd()) \ No newline at end of file +print(os.getcwd()) diff --git a/tests/common/scripts/long_lines.py b/tests/common/scripts/long_lines.py index ca5469cd4c..0d22c692ba 100644 --- a/tests/common/scripts/long_lines.py +++ b/tests/common/scripts/long_lines.py @@ -10,4 +10,4 @@ # without new lines print(line_b, file=sys.stderr, end="") -print(line_a, end="") \ No newline at end of file +print(line_a, end="") diff --git a/tests/common/scripts/long_lines_fails.py b/tests/common/scripts/long_lines_fails.py index 0633f078e0..37e2f13e31 100644 --- a/tests/common/scripts/long_lines_fails.py +++ b/tests/common/scripts/long_lines_fails.py @@ -11,4 +11,4 @@ # without new lines print(line_b, file=sys.stderr, end="") print(line_a, end="") -exit(-1) \ No newline at end of file +exit(-1) diff --git a/tests/common/scripts/no_stdout_exception.py b/tests/common/scripts/no_stdout_exception.py index 90c71a4551..75bebd8cc7 100644 --- a/tests/common/scripts/no_stdout_exception.py +++ b/tests/common/scripts/no_stdout_exception.py @@ -1 +1 @@ -raise Exception("no stdout") \ No newline at end of file +raise Exception("no stdout") diff --git a/tests/common/scripts/no_stdout_no_stderr_with_fail.py b/tests/common/scripts/no_stdout_no_stderr_with_fail.py index 8e7ef7e83f..d0d1c88de8 100644 --- a/tests/common/scripts/no_stdout_no_stderr_with_fail.py +++ b/tests/common/scripts/no_stdout_no_stderr_with_fail.py @@ -1 +1 @@ -exit(-1) \ No newline at end of file +exit(-1) diff --git a/tests/common/scripts/raising_counter.py b/tests/common/scripts/raising_counter.py index 74c9a53b20..fcc7cbc7d8 100644 --- a/tests/common/scripts/raising_counter.py +++ b/tests/common/scripts/raising_counter.py @@ -8,4 +8,4 @@ if i == 2: raise Exception("end") sleep(0.3) -print("exit") \ No newline at end of file +print("exit") diff --git a/tests/common/scripts/stdout_encode_exception.py b/tests/common/scripts/stdout_encode_exception.py index 57658d431b..c08f812b04 100644 --- a/tests/common/scripts/stdout_encode_exception.py +++ b/tests/common/scripts/stdout_encode_exception.py @@ -5,11 +5,11 @@ from dlt.common.runners.stdout import exec_to_stdout - def worker(data1, data2): print("in func") raise UnsupportedProcessStartMethodException("this") + f = partial(worker, "this is string", TRunMetrics(True, 300)) with exec_to_stdout(f) as rv: print(rv) diff --git a/tests/common/scripts/stdout_encode_result.py b/tests/common/scripts/stdout_encode_result.py index b399734a4d..51c9b553db 100644 --- a/tests/common/scripts/stdout_encode_result.py +++ b/tests/common/scripts/stdout_encode_result.py @@ -8,6 +8,7 @@ def worker(data1, data2): print("in func") return data1, data2 + f = partial(worker, "this is string", TRunMetrics(True, 300)) with exec_to_stdout(f) as rv: print(rv) diff --git a/tests/common/storages/test_file_storage.py b/tests/common/storages/test_file_storage.py index 194fcb9afb..9f212070e8 100644 --- a/tests/common/storages/test_file_storage.py +++ b/tests/common/storages/test_file_storage.py @@ -69,7 +69,10 @@ def test_in_storage(test_storage: FileStorage) -> None: assert test_storage.in_storage(".") is True assert test_storage.in_storage(os.curdir) is True assert test_storage.in_storage(os.path.realpath(os.curdir)) is False - assert test_storage.in_storage(os.path.join(os.path.realpath(os.curdir), TEST_STORAGE_ROOT)) is True + assert ( + test_storage.in_storage(os.path.join(os.path.realpath(os.curdir), TEST_STORAGE_ROOT)) + is True + ) def test_from_wd_to_relative_path(test_storage: FileStorage) -> None: @@ -129,31 +132,31 @@ def test_validate_file_name_component() -> None: @pytest.mark.parametrize("action", ("rename_tree_files", "rename_tree", "atomic_rename")) def test_rename_nested_tree(test_storage: FileStorage, action: str) -> None: - source_dir = os.path.join(test_storage.storage_path, 'source') - nested_dir_1 = os.path.join(source_dir, 'nested1') - nested_dir_2 = os.path.join(nested_dir_1, 'nested2') - empty_dir = os.path.join(source_dir, 'empty') + source_dir = os.path.join(test_storage.storage_path, "source") + nested_dir_1 = os.path.join(source_dir, "nested1") + nested_dir_2 = os.path.join(nested_dir_1, "nested2") + empty_dir = os.path.join(source_dir, "empty") os.makedirs(nested_dir_2) os.makedirs(empty_dir) - with open(os.path.join(source_dir, 'test1.txt'), 'w', encoding="utf-8") as f: - f.write('test') - with open(os.path.join(nested_dir_1, 'test2.txt'), 'w', encoding="utf-8") as f: - f.write('test') - with open(os.path.join(nested_dir_2, 'test3.txt'), 'w', encoding="utf-8") as f: - f.write('test') + with open(os.path.join(source_dir, "test1.txt"), "w", encoding="utf-8") as f: + f.write("test") + with open(os.path.join(nested_dir_1, "test2.txt"), "w", encoding="utf-8") as f: + f.write("test") + with open(os.path.join(nested_dir_2, "test3.txt"), "w", encoding="utf-8") as f: + f.write("test") - dest_dir = os.path.join(test_storage.storage_path, 'dest') + dest_dir = os.path.join(test_storage.storage_path, "dest") getattr(test_storage, action)(source_dir, dest_dir) assert not os.path.exists(source_dir) assert os.path.exists(dest_dir) - assert os.path.exists(os.path.join(dest_dir, 'nested1')) - assert os.path.exists(os.path.join(dest_dir, 'nested1', 'nested2')) - assert os.path.exists(os.path.join(dest_dir, 'empty')) - assert os.path.exists(os.path.join(dest_dir, 'test1.txt')) - assert os.path.exists(os.path.join(dest_dir, 'nested1', 'test2.txt')) - assert os.path.exists(os.path.join(dest_dir, 'nested1', 'nested2', 'test3.txt')) + assert os.path.exists(os.path.join(dest_dir, "nested1")) + assert os.path.exists(os.path.join(dest_dir, "nested1", "nested2")) + assert os.path.exists(os.path.join(dest_dir, "empty")) + assert os.path.exists(os.path.join(dest_dir, "test1.txt")) + assert os.path.exists(os.path.join(dest_dir, "nested1", "test2.txt")) + assert os.path.exists(os.path.join(dest_dir, "nested1", "nested2", "test3.txt")) @skipifnotwindows diff --git a/tests/common/storages/test_load_package.py b/tests/common/storages/test_load_package.py new file mode 100644 index 0000000000..f671ddcf32 --- /dev/null +++ b/tests/common/storages/test_load_package.py @@ -0,0 +1,121 @@ +import os +import pytest +from pathlib import Path + +from dlt.common import sleep +from dlt.common.schema import Schema +from dlt.common.storages import PackageStorage, LoadStorage, ParsedLoadJobFileName +from dlt.common.utils import uniq_id + +from tests.common.storages.utils import start_loading_file, assert_package_info, load_storage +from tests.utils import autouse_test_storage + + +def test_is_partially_loaded(load_storage: LoadStorage) -> None: + load_id, file_name = start_loading_file( + load_storage, [{"content": "a"}, {"content": "b"}], start_job=False + ) + info = load_storage.get_load_package_info(load_id) + # all jobs are new + assert PackageStorage.is_package_partially_loaded(info) is False + # start job + load_storage.normalized_packages.start_job(load_id, file_name) + info = load_storage.get_load_package_info(load_id) + assert PackageStorage.is_package_partially_loaded(info) is True + # complete job + load_storage.normalized_packages.complete_job(load_id, file_name) + info = load_storage.get_load_package_info(load_id) + assert PackageStorage.is_package_partially_loaded(info) is True + # must complete package + load_storage.complete_load_package(load_id, False) + info = load_storage.get_load_package_info(load_id) + assert PackageStorage.is_package_partially_loaded(info) is False + + # abort package + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + load_storage.complete_load_package(load_id, True) + info = load_storage.get_load_package_info(load_id) + assert PackageStorage.is_package_partially_loaded(info) is True + + +def test_save_load_schema(load_storage: LoadStorage) -> None: + # mock schema version to some random number so we know we load what we save + schema = Schema("event") + schema._stored_version = 762171 + + load_storage.new_packages.create_package("copy") + saved_file_name = load_storage.new_packages.save_schema("copy", schema) + assert saved_file_name.endswith( + os.path.join( + load_storage.new_packages.storage.storage_path, "copy", PackageStorage.SCHEMA_FILE_NAME + ) + ) + assert load_storage.new_packages.storage.has_file( + os.path.join("copy", PackageStorage.SCHEMA_FILE_NAME) + ) + schema_copy = load_storage.new_packages.load_schema("copy") + assert schema.stored_version == schema_copy.stored_version + + +def test_job_elapsed_time_seconds(load_storage: LoadStorage) -> None: + load_id, fn = start_loading_file(load_storage, "test file") # type: ignore[arg-type] + fp = load_storage.normalized_packages.storage.make_full_path( + load_storage.normalized_packages.get_job_file_path(load_id, "started_jobs", fn) + ) + elapsed = PackageStorage._job_elapsed_time_seconds(fp) + sleep(0.3) + # do not touch file + elapsed_2 = PackageStorage._job_elapsed_time_seconds(fp) + assert elapsed_2 - elapsed >= 0.3 + # rename the file + fp = load_storage.normalized_packages.retry_job(load_id, fn) + # retry_job increases retry number in file name so the line below does not work + # fp = storage.storage._make_path(storage._get_job_file_path(load_id, "new_jobs", fn)) + elapsed_2 = PackageStorage._job_elapsed_time_seconds(fp) + # it should keep its mod original date after rename + assert elapsed_2 - elapsed >= 0.3 + + +def test_retry_job(load_storage: LoadStorage) -> None: + load_id, fn = start_loading_file(load_storage, "test file") # type: ignore[arg-type] + job_fn_t = ParsedLoadJobFileName.parse(fn) + assert job_fn_t.table_name == "mock_table" + assert job_fn_t.retry_count == 0 + # now retry + new_fp = load_storage.normalized_packages.retry_job(load_id, fn) + assert_package_info(load_storage, load_id, "normalized", "new_jobs") + assert ParsedLoadJobFileName.parse(new_fp).retry_count == 1 + # try again + fn = Path(new_fp).name + load_storage.normalized_packages.start_job(load_id, fn) + new_fp = load_storage.normalized_packages.retry_job(load_id, fn) + assert ParsedLoadJobFileName.parse(new_fp).retry_count == 2 + + +def test_build_parse_job_path(load_storage: LoadStorage) -> None: + file_id = ParsedLoadJobFileName.new_file_id() + f_n_t = ParsedLoadJobFileName("test_table", file_id, 0, "jsonl") + job_f_n = PackageStorage.build_job_file_name( + f_n_t.table_name, file_id, 0, loader_file_format=load_storage.loader_file_format + ) + # test the exact representation but we should probably not test for that + assert job_f_n == f"test_table.{file_id}.0.jsonl" + assert ParsedLoadJobFileName.parse(job_f_n) == f_n_t + # also parses full paths correctly + assert ParsedLoadJobFileName.parse("load_id/" + job_f_n) == f_n_t + + # parts cannot contain dots + with pytest.raises(ValueError): + PackageStorage.build_job_file_name( + "test.table", file_id, 0, loader_file_format=load_storage.loader_file_format + ) + PackageStorage.build_job_file_name( + "test_table", "f.id", 0, loader_file_format=load_storage.loader_file_format + ) + + # parsing requires 4 parts and retry count + with pytest.raises(ValueError): + ParsedLoadJobFileName.parse(job_f_n + ".more") + + with pytest.raises(ValueError): + ParsedLoadJobFileName.parse("tab.id.wrong_retry.jsonl") diff --git a/tests/common/storages/test_load_storage.py b/tests/common/storages/test_load_storage.py new file mode 100644 index 0000000000..e239ec30a2 --- /dev/null +++ b/tests/common/storages/test_load_storage.py @@ -0,0 +1,178 @@ +import os +import pytest + +from dlt.common import json, pendulum +from dlt.common.schema import TSchemaTables +from dlt.common.storages import PackageStorage, LoadStorage +from dlt.common.storages.exceptions import LoadPackageNotFound, NoMigrationPathException + +from tests.common.storages.utils import start_loading_file, assert_package_info, load_storage +from tests.utils import write_version, autouse_test_storage + + +def test_complete_successful_package(load_storage: LoadStorage) -> None: + # should delete package in full + load_storage.config.delete_completed_jobs = True + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + assert load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + load_storage.normalized_packages.complete_job(load_id, file_name) + assert_package_info(load_storage, load_id, "normalized", "completed_jobs") + load_storage.complete_load_package(load_id, False) + # deleted from loading + assert not load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + # has package + assert load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) + assert load_storage.storage.has_file( + os.path.join( + load_storage.get_loaded_package_path(load_id), + PackageStorage.PACKAGE_COMPLETED_FILE_NAME, + ) + ) + # but completed packages are deleted + load_storage.maybe_remove_completed_jobs(load_id) + assert not load_storage.loaded_packages.storage.has_folder( + load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + ) + assert_package_info(load_storage, load_id, "loaded", "completed_jobs", jobs_count=0) + # delete completed package + load_storage.delete_loaded_package(load_id) + assert not load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) + # do not delete completed jobs + load_storage.config.delete_completed_jobs = False + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + load_storage.normalized_packages.complete_job(load_id, file_name) + load_storage.complete_load_package(load_id, False) + # deleted from loading + assert not load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + # has load preserved + assert load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) + assert load_storage.storage.has_file( + os.path.join( + load_storage.get_loaded_package_path(load_id), + PackageStorage.PACKAGE_COMPLETED_FILE_NAME, + ) + ) + # has completed loads + assert load_storage.loaded_packages.storage.has_folder( + load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + ) + load_storage.delete_loaded_package(load_id) + assert not load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) + + +def test_wipe_normalized_packages(load_storage: LoadStorage) -> None: + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + load_storage.wipe_normalized_packages() + assert not load_storage.storage.has_folder(load_storage.NORMALIZED_FOLDER) + + +def test_complete_package_failed_jobs(load_storage: LoadStorage) -> None: + # loads with failed jobs are always persisted + load_storage.config.delete_completed_jobs = True + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + assert load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + load_storage.normalized_packages.fail_job(load_id, file_name, "EXCEPTION") + assert_package_info(load_storage, load_id, "normalized", "failed_jobs") + load_storage.complete_load_package(load_id, False) + # deleted from loading + assert not load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + # present in completed loads folder + assert load_storage.storage.has_folder(load_storage.get_loaded_package_path(load_id)) + # has completed loads + assert load_storage.loaded_packages.storage.has_folder( + load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + ) + assert_package_info(load_storage, load_id, "loaded", "failed_jobs") + + # get failed jobs info + failed_files = sorted(load_storage.loaded_packages.list_failed_jobs(load_id)) + # job + message + assert len(failed_files) == 2 + assert load_storage.loaded_packages.storage.has_file(failed_files[0]) + failed_info = load_storage.list_failed_jobs_in_loaded_package(load_id) + assert failed_info[0].file_path == load_storage.loaded_packages.storage.make_full_path( + failed_files[0] + ) + assert failed_info[0].failed_message == "EXCEPTION" + assert failed_info[0].job_file_info.table_name == "mock_table" + # a few stats + assert failed_info[0].file_size == 32 + assert (pendulum.now() - failed_info[0].created_at).seconds < 2 + assert failed_info[0].elapsed < 2 + + package_info = load_storage.get_load_package_info(load_id) + assert package_info.state == "loaded" + assert package_info.schema_update == {} + assert package_info.jobs["failed_jobs"] == failed_info + + +def test_abort_package(load_storage: LoadStorage) -> None: + # loads with failed jobs are always persisted + load_storage.config.delete_completed_jobs = True + load_id, file_name = start_loading_file(load_storage, [{"content": "a"}, {"content": "b"}]) + assert load_storage.storage.has_folder(load_storage.get_normalized_package_path(load_id)) + load_storage.normalized_packages.fail_job(load_id, file_name, "EXCEPTION") + assert_package_info(load_storage, load_id, "normalized", "failed_jobs") + load_storage.complete_load_package(load_id, True) + assert load_storage.loaded_packages.storage.has_folder( + load_storage.loaded_packages.get_job_folder_path(load_id, "completed_jobs") + ) + assert_package_info(load_storage, load_id, "aborted", "failed_jobs") + + +def test_process_schema_update(load_storage: LoadStorage) -> None: + with pytest.raises(FileNotFoundError): + load_storage.begin_schema_update("load_id") + load_id, fn = start_loading_file(load_storage, "test file") # type: ignore[arg-type] + assert load_storage.begin_schema_update(load_id) == {} + assert load_storage.begin_schema_update(load_id) == {} + # store the applied schema update + applied_update: TSchemaTables = {"table": {"name": "table", "columns": {}}} + load_storage.commit_schema_update(load_id, applied_update) + with pytest.raises(FileNotFoundError): + load_storage.commit_schema_update(load_id, applied_update) + assert load_storage.begin_schema_update(load_id) is None + # processed file exists + applied_update_path = os.path.join( + load_storage.get_normalized_package_path(load_id), + PackageStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME, + ) + assert load_storage.storage.has_file(applied_update_path) is True + assert json.loads(load_storage.storage.load(applied_update_path)) == applied_update + # verify info package + package_info = assert_package_info(load_storage, load_id, "normalized", "started_jobs") + # applied update is present + assert package_info.schema_update == applied_update + # should be in dict + package_dict = package_info.asdict() + assert len(package_dict["tables"]) == 1 + # commit package + load_storage.complete_load_package(load_id, False) + package_info = assert_package_info(load_storage, load_id, "loaded", "started_jobs") + # applied update is present + assert package_info.schema_update == applied_update + + +def test_get_unknown_package_info(load_storage: LoadStorage) -> None: + with pytest.raises(LoadPackageNotFound): + load_storage.get_load_package_info("UNKNOWN LOAD ID") + + +def test_full_migration_path() -> None: + # create directory structure + s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) + # overwrite known initial version + write_version(s.storage, "1.0.0") + # must be able to migrate to current version + s = LoadStorage(False, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) + assert s.version == LoadStorage.STORAGE_VERSION + + +def test_unknown_migration_path() -> None: + # create directory structure + s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) + # overwrite known initial version + write_version(s.storage, "10.0.0") + # must be able to migrate to current version + with pytest.raises(NoMigrationPathException): + LoadStorage(False, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) diff --git a/tests/common/storages/test_loader_storage.py b/tests/common/storages/test_loader_storage.py deleted file mode 100644 index 1acfeb873b..0000000000 --- a/tests/common/storages/test_loader_storage.py +++ /dev/null @@ -1,296 +0,0 @@ -import os -import pytest -from pathlib import Path -from typing import Sequence, Tuple - -from dlt.common import sleep, json, pendulum -from dlt.common.schema import Schema, TSchemaTables -from dlt.common.storages.load_storage import LoadPackageInfo, LoadStorage, ParsedLoadJobFileName, TJobState -from dlt.common.configuration import resolve_configuration -from dlt.common.storages import LoadStorageConfiguration -from dlt.common.storages.exceptions import LoadPackageNotFound, NoMigrationPathException -from dlt.common.typing import StrAny -from dlt.common.utils import uniq_id - -from tests.utils import TEST_STORAGE_ROOT, write_version, autouse_test_storage - - -@pytest.fixture -def storage() -> LoadStorage: - C = resolve_configuration(LoadStorageConfiguration()) - s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS, C) - return s - - -def test_complete_successful_package(storage: LoadStorage) -> None: - # should delete package in full - storage.config.delete_completed_jobs = True - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - assert storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - storage.complete_job(load_id, file_name) - assert_package_info(storage, load_id, "normalized", "completed_jobs") - storage.complete_load_package(load_id, False) - # deleted from loading - assert not storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - # has package - assert storage.storage.has_folder(storage.get_completed_package_path(load_id)) - assert storage.storage.has_file(os.path.join(storage.get_completed_package_path(load_id), LoadStorage.PACKAGE_COMPLETED_FILE_NAME)) - # but completed packages are deleted - assert not storage.storage.has_folder(storage._get_job_folder_completed_path(load_id, "completed_jobs")) - assert_package_info(storage, load_id, "loaded", "completed_jobs", jobs_count=0) - # delete completed package - storage.delete_completed_package(load_id) - assert not storage.storage.has_folder(storage.get_completed_package_path(load_id)) - # do not delete completed jobs - storage.config.delete_completed_jobs = False - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - storage.complete_job(load_id, file_name) - storage.complete_load_package(load_id, False) - # deleted from loading - assert not storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - # has load preserved - assert storage.storage.has_folder(storage.get_completed_package_path(load_id)) - assert storage.storage.has_file(os.path.join(storage.get_completed_package_path(load_id), LoadStorage.PACKAGE_COMPLETED_FILE_NAME)) - # has completed loads - assert storage.storage.has_folder(storage._get_job_folder_completed_path(load_id, "completed_jobs")) - storage.delete_completed_package(load_id) - assert not storage.storage.has_folder(storage.get_completed_package_path(load_id)) - - -def test_wipe_normalized_packages(storage: LoadStorage) -> None: - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - storage.wipe_normalized_packages() - assert not storage.storage.has_folder(storage.NORMALIZED_FOLDER) - - -def test_is_partially_loaded(storage: LoadStorage) -> None: - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}], start_job=False) - info = storage.get_load_package_info(load_id) - # all jobs are new - assert LoadStorage.is_package_partially_loaded(info) is False - # start job - storage.start_job(load_id, file_name) - info = storage.get_load_package_info(load_id) - assert LoadStorage.is_package_partially_loaded(info) is True - # complete job - storage.complete_job(load_id, file_name) - info = storage.get_load_package_info(load_id) - assert LoadStorage.is_package_partially_loaded(info) is True - # must complete package - storage.complete_load_package(load_id, False) - info = storage.get_load_package_info(load_id) - assert LoadStorage.is_package_partially_loaded(info) is False - - # abort package - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - storage.complete_load_package(load_id, True) - info = storage.get_load_package_info(load_id) - assert LoadStorage.is_package_partially_loaded(info) is True - - -def test_complete_package_failed_jobs(storage: LoadStorage) -> None: - # loads with failed jobs are always persisted - storage.config.delete_completed_jobs = True - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - assert storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - storage.fail_job(load_id, file_name, "EXCEPTION") - assert_package_info(storage, load_id, "normalized", "failed_jobs") - storage.complete_load_package(load_id, False) - # deleted from loading - assert not storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - # present in completed loads folder - assert storage.storage.has_folder(storage.get_completed_package_path(load_id)) - # has completed loads - assert storage.storage.has_folder(storage._get_job_folder_completed_path(load_id, "completed_jobs")) - assert_package_info(storage, load_id, "loaded", "failed_jobs") - - # get failed jobs info - failed_files = sorted(storage.list_completed_failed_jobs(load_id)) - # job + message - assert len(failed_files) == 2 - assert storage.storage.has_file(failed_files[0]) - failed_info = storage.list_failed_jobs_in_completed_package(load_id) - assert failed_info[0].file_path == storage.storage.make_full_path(failed_files[0]) - assert failed_info[0].failed_message == "EXCEPTION" - assert failed_info[0].job_file_info.table_name == "mock_table" - # a few stats - assert failed_info[0].file_size == 32 - assert (pendulum.now() - failed_info[0].created_at).seconds < 2 - assert failed_info[0].elapsed < 2 - - package_info = storage.get_load_package_info(load_id) - assert package_info.state == "loaded" - assert package_info.schema_update == {} - assert package_info.jobs["failed_jobs"] == failed_info - - -def test_abort_package(storage: LoadStorage) -> None: - # loads with failed jobs are always persisted - storage.config.delete_completed_jobs = True - load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}]) - assert storage.storage.has_folder(storage.get_normalized_package_path(load_id)) - storage.fail_job(load_id, file_name, "EXCEPTION") - assert_package_info(storage, load_id, "normalized", "failed_jobs") - storage.complete_load_package(load_id, True) - assert storage.storage.has_folder(storage._get_job_folder_completed_path(load_id, "completed_jobs")) - assert_package_info(storage, load_id, "aborted", "failed_jobs") - - -def test_save_load_schema(storage: LoadStorage) -> None: - # mock schema version to some random number so we know we load what we save - schema = Schema("event") - schema._stored_version = 762171 - - storage.create_temp_load_package("copy") - saved_file_name = storage.save_temp_schema(schema, "copy") - assert saved_file_name.endswith(os.path.join(storage.storage.storage_path, "copy", LoadStorage.SCHEMA_FILE_NAME)) - assert storage.storage.has_file(os.path.join("copy",LoadStorage.SCHEMA_FILE_NAME)) - schema_copy = storage.load_temp_schema("copy") - assert schema.stored_version == schema_copy.stored_version - - -def test_job_elapsed_time_seconds(storage: LoadStorage) -> None: - load_id, fn = start_loading_file(storage, "test file") # type: ignore[arg-type] - fp = storage.storage.make_full_path(storage._get_job_file_path(load_id, "started_jobs", fn)) - elapsed = storage.job_elapsed_time_seconds(fp) - sleep(0.3) - # do not touch file - elapsed_2 = storage.job_elapsed_time_seconds(fp) - assert elapsed_2 - elapsed >= 0.3 - # rename the file - fp = storage.retry_job(load_id, fn) - # retry_job increases retry number in file name so the line below does not work - # fp = storage.storage._make_path(storage._get_job_file_path(load_id, "new_jobs", fn)) - elapsed_2 = storage.job_elapsed_time_seconds(fp) - # it should keep its mod original date after rename - assert elapsed_2 - elapsed >= 0.3 - - -def test_retry_job(storage: LoadStorage) -> None: - load_id, fn = start_loading_file(storage, "test file") # type: ignore[arg-type] - job_fn_t = LoadStorage.parse_job_file_name(fn) - assert job_fn_t.table_name == "mock_table" - assert job_fn_t.retry_count == 0 - # now retry - new_fp = storage.retry_job(load_id, fn) - assert_package_info(storage, load_id, "normalized", "new_jobs") - assert LoadStorage.parse_job_file_name(new_fp).retry_count == 1 - # try again - fn = Path(new_fp).name - storage.start_job(load_id, fn) - new_fp = storage.retry_job(load_id, fn) - assert LoadStorage.parse_job_file_name(new_fp).retry_count == 2 - - -def test_build_parse_job_path(storage: LoadStorage) -> None: - file_id = uniq_id(5) - f_n_t = ParsedLoadJobFileName("test_table", file_id, 0, "jsonl") - job_f_n = storage.build_job_file_name(f_n_t.table_name, file_id, 0) - # test the exact representation but we should probably not test for that - assert job_f_n == f"test_table.{file_id}.0.jsonl" - assert LoadStorage.parse_job_file_name(job_f_n) == f_n_t - # also parses full paths correctly - assert LoadStorage.parse_job_file_name("load_id/" + job_f_n) == f_n_t - - # parts cannot contain dots - with pytest.raises(ValueError): - storage.build_job_file_name("test.table", file_id, 0) - storage.build_job_file_name("test_table", "f.id", 0) - - # parsing requires 4 parts and retry count - with pytest.raises(ValueError): - LoadStorage.parse_job_file_name(job_f_n + ".more") - LoadStorage.parse_job_file_name("tab.id.wrong_retry.jsonl") - # must know the file format - LoadStorage.parse_job_file_name("tab.id.300.avr") - - -def test_process_schema_update(storage: LoadStorage) -> None: - with pytest.raises(FileNotFoundError): - storage.begin_schema_update("load_id") - load_id, fn = start_loading_file(storage, "test file") # type: ignore[arg-type] - assert storage.begin_schema_update(load_id) == {} - assert storage.begin_schema_update(load_id) == {} - # store the applied schema update - applied_update: TSchemaTables = {"table": {"name": "table", "columns": {}}} - storage.commit_schema_update(load_id, applied_update) - with pytest.raises(FileNotFoundError): - storage.commit_schema_update(load_id, applied_update) - assert storage.begin_schema_update(load_id) is None - # processed file exists - applied_update_path = os.path.join(storage.get_normalized_package_path(load_id), LoadStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME) - assert storage.storage.has_file(applied_update_path) is True - assert json.loads(storage.storage.load(applied_update_path)) == applied_update - # verify info package - package_info = assert_package_info(storage, load_id, "normalized", "started_jobs") - # applied update is present - assert package_info.schema_update == applied_update - # should be in dict - package_dict = package_info.asdict() - assert len(package_dict["tables"]) == 1 - # commit package - storage.complete_load_package(load_id, False) - package_info = assert_package_info(storage, load_id, "loaded", "started_jobs") - # applied update is present - assert package_info.schema_update == applied_update - - -def test_get_unknown_package_info(storage: LoadStorage) -> None: - with pytest.raises(LoadPackageNotFound): - storage.get_load_package_info("UNKNOWN LOAD ID") - - -def test_full_migration_path() -> None: - # create directory structure - s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) - # overwrite known initial version - write_version(s.storage, "1.0.0") - # must be able to migrate to current version - s = LoadStorage(False, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) - assert s.version == LoadStorage.STORAGE_VERSION - - -def test_unknown_migration_path() -> None: - # create directory structure - s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) - # overwrite known initial version - write_version(s.storage, "10.0.0") - # must be able to migrate to current version - with pytest.raises(NoMigrationPathException): - LoadStorage(False, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) - - -def start_loading_file(s: LoadStorage, content: Sequence[StrAny], start_job: bool = True) -> Tuple[str, str]: - load_id = uniq_id() - s.create_temp_load_package(load_id) - # write test file - file_name = s.write_temp_job_file(load_id, "mock_table", None, uniq_id(), content) - # write schema and schema update - s.save_temp_schema(Schema("mock"), load_id) - s.save_temp_schema_updates(load_id, {}) - s.commit_temp_load_package(load_id) - assert_package_info(s, load_id, "normalized", "new_jobs") - if start_job: - s.start_job(load_id, file_name) - assert_package_info(s, load_id, "normalized", "started_jobs") - return load_id, file_name - - -def assert_package_info(storage: LoadStorage, load_id: str, package_state: str, job_state: TJobState, jobs_count: int = 1) -> LoadPackageInfo: - package_info = storage.get_load_package_info(load_id) - # make sure it is serializable - json.dumps(package_info) - # generate str - str(package_info) - package_info.asstr() - package_info.asstr(verbosity=1) - assert package_info.state == package_state - assert package_info.schema_name == "mock" - assert len(package_info.jobs[job_state]) == jobs_count - if package_state == "normalized": - assert package_info.completed_at is None - else: - assert (pendulum.now() - package_info.completed_at).seconds < 2 - # get dict - package_info.asdict() - return package_info diff --git a/tests/common/storages/test_local_filesystem.py b/tests/common/storages/test_local_filesystem.py index e9550a3173..3827535fbd 100644 --- a/tests/common/storages/test_local_filesystem.py +++ b/tests/common/storages/test_local_filesystem.py @@ -11,7 +11,9 @@ TEST_SAMPLE_FILES = "tests/common/storages/samples" -@pytest.mark.parametrize("bucket_url,load_content", itertools.product(["file:///", "/", ""], [True, False])) +@pytest.mark.parametrize( + "bucket_url,load_content", itertools.product(["file:///", "/", ""], [True, False]) +) def test_filesystem_dict_local(bucket_url: str, load_content: bool) -> None: if bucket_url in [""]: # relative paths @@ -20,7 +22,7 @@ def test_filesystem_dict_local(bucket_url: str, load_content: bool) -> None: if bucket_url == "/": bucket_url = os.path.abspath(TEST_SAMPLE_FILES) else: - bucket_url = pathlib.Path(TEST_SAMPLE_FILES).absolute().as_uri() + bucket_url = pathlib.Path(TEST_SAMPLE_FILES).absolute().as_uri() config = FilesystemConfiguration(bucket_url=bucket_url) filesystem, _ = fsspec_from_config(config) diff --git a/tests/common/storages/test_normalize_storage.py b/tests/common/storages/test_normalize_storage.py index 7199405c12..5758429146 100644 --- a/tests/common/storages/test_normalize_storage.py +++ b/tests/common/storages/test_normalize_storage.py @@ -3,28 +3,10 @@ from dlt.common.utils import uniq_id from dlt.common.storages import NormalizeStorage, NormalizeStorageConfiguration from dlt.common.storages.exceptions import NoMigrationPathException -from dlt.common.storages.normalize_storage import TParsedNormalizeFileName from tests.utils import write_version, autouse_test_storage -@pytest.mark.skip() -def test_load_events_and_group_by_sender() -> None: - # TODO: create fixture with two sender ids and 3 files and check the result - pass - - -def test_build_extracted_file_name() -> None: - load_id = uniq_id() - name = NormalizeStorage.build_extracted_file_stem("event", "table_with_parts__many", load_id) + ".jsonl" - assert NormalizeStorage.get_schema_name(name) == "event" - assert NormalizeStorage.parse_normalize_file_name(name) == TParsedNormalizeFileName("event", "table_with_parts__many", load_id, "jsonl") - - # empty schema should be supported - name = NormalizeStorage.build_extracted_file_stem("", "table", load_id) + ".jsonl" - assert NormalizeStorage.parse_normalize_file_name(name) == TParsedNormalizeFileName("", "table", load_id, "jsonl") - - def test_full_migration_path() -> None: # create directory structure s = NormalizeStorage(True) diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py index f45773e4f5..c72fa75927 100644 --- a/tests/common/storages/test_schema_storage.py +++ b/tests/common/storages/test_schema_storage.py @@ -7,11 +7,25 @@ from dlt.common.schema.schema import Schema from dlt.common.schema.typing import TStoredSchema from dlt.common.schema.utils import explicit_normalizers -from dlt.common.storages.exceptions import InStorageSchemaModified, SchemaNotFoundError, UnexpectedSchemaName -from dlt.common.storages import SchemaStorageConfiguration, SchemaStorage, LiveSchemaStorage, FileStorage +from dlt.common.storages.exceptions import ( + InStorageSchemaModified, + SchemaNotFoundError, + UnexpectedSchemaName, +) +from dlt.common.storages import ( + SchemaStorageConfiguration, + SchemaStorage, + LiveSchemaStorage, + FileStorage, +) from tests.utils import autouse_test_storage, TEST_STORAGE_ROOT -from tests.common.utils import load_yml_case, yml_case_path, COMMON_TEST_CASES_PATH, IMPORTED_VERSION_HASH_ETH_V6 +from tests.common.utils import ( + load_yml_case, + yml_case_path, + COMMON_TEST_CASES_PATH, + IMPORTED_VERSION_HASH_ETH_V8, +) @pytest.fixture @@ -22,13 +36,23 @@ def storage() -> SchemaStorage: @pytest.fixture def synced_storage() -> SchemaStorage: # will be created in /schemas - return init_storage(SchemaStorageConfiguration(import_schema_path=TEST_STORAGE_ROOT + "/import", export_schema_path=TEST_STORAGE_ROOT + "/import")) + return init_storage( + SchemaStorageConfiguration( + import_schema_path=TEST_STORAGE_ROOT + "/import", + export_schema_path=TEST_STORAGE_ROOT + "/import", + ) + ) @pytest.fixture def ie_storage() -> SchemaStorage: # will be created in /schemas - return init_storage(SchemaStorageConfiguration(import_schema_path=TEST_STORAGE_ROOT + "/import", export_schema_path=TEST_STORAGE_ROOT + "/export")) + return init_storage( + SchemaStorageConfiguration( + import_schema_path=TEST_STORAGE_ROOT + "/import", + export_schema_path=TEST_STORAGE_ROOT + "/export", + ) + ) def init_storage(C: SchemaStorageConfiguration) -> SchemaStorage: @@ -49,7 +73,9 @@ def test_load_non_existing(storage: SchemaStorage) -> None: def test_load_schema_with_upgrade() -> None: # point the storage root to v4 schema google_spreadsheet_v3.schema - storage = LiveSchemaStorage(SchemaStorageConfiguration(COMMON_TEST_CASES_PATH + "schemas/sheets")) + storage = LiveSchemaStorage( + SchemaStorageConfiguration(COMMON_TEST_CASES_PATH + "schemas/sheets") + ) # the hash when computed on the schema does not match the version_hash in the file so it should raise InStorageSchemaModified # but because the version upgrade is required, the check is skipped and the load succeeds storage.load_schema("google_spreadsheet_v4") @@ -64,7 +90,9 @@ def test_import_initial(synced_storage: SchemaStorage, storage: SchemaStorage) - assert_schema_imported(synced_storage, storage) -def test_import_overwrites_existing_if_modified(synced_storage: SchemaStorage, storage: SchemaStorage) -> None: +def test_import_overwrites_existing_if_modified( + synced_storage: SchemaStorage, storage: SchemaStorage +) -> None: schema = Schema("ethereum") storage.save_schema(schema) # now import schema that wil overwrite schema in storage as it is not linked to external schema @@ -87,6 +115,7 @@ def test_skip_import_if_not_modified(synced_storage: SchemaStorage, storage: Sch assert storage_schema.version == reloaded_schema.stored_version assert storage_schema.version_hash == reloaded_schema.stored_version_hash assert storage_schema._imported_version_hash == reloaded_schema._imported_version_hash + assert storage_schema.previous_hashes == reloaded_schema.previous_hashes # the import schema gets modified storage_schema.tables["_dlt_loads"]["write_disposition"] = "append" storage_schema.tables.pop("event_user") @@ -96,7 +125,11 @@ def test_skip_import_if_not_modified(synced_storage: SchemaStorage, storage: Sch # we have overwritten storage schema assert reloaded_schema.tables["_dlt_loads"]["write_disposition"] == "append" assert "event_user" not in reloaded_schema.tables + + # hash and ancestry stay the same assert reloaded_schema._imported_version_hash == storage_schema.version_hash + assert storage_schema.previous_hashes == reloaded_schema.previous_hashes + # but original version has increased assert reloaded_schema.stored_version == storage_schema.version + 1 @@ -194,12 +227,13 @@ def test_save_store_schema_over_import(ie_storage: SchemaStorage) -> None: ie_storage.save_schema(schema) assert schema.version_hash == schema_hash # we linked schema to import schema - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V6 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V8 # load schema and make sure our new schema is here schema = ie_storage.load_schema("ethereum") - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V6 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V8 assert schema._stored_version_hash == schema_hash assert schema.version_hash == schema_hash + assert schema.previous_hashes == [] # we have simple schema in export folder fs = FileStorage(ie_storage.config.export_schema_path) exported_name = ie_storage._file_name_in_store("ethereum", "yaml") @@ -213,12 +247,13 @@ def test_save_store_schema_over_import_sync(synced_storage: SchemaStorage) -> No schema = Schema("ethereum") schema_hash = schema.version_hash synced_storage.save_schema(schema) - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V6 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V8 # import schema is overwritten fs = FileStorage(synced_storage.config.import_schema_path) exported_name = synced_storage._file_name_in_store("ethereum", "yaml") exported_schema = yaml.safe_load(fs.load(exported_name)) assert schema.version_hash == exported_schema["version_hash"] == schema_hash + assert schema.previous_hashes == [] # when it is loaded we will import schema again which is identical to the current one but the import link # will be set to itself schema = synced_storage.load_schema("ethereum") @@ -235,28 +270,43 @@ def test_save_store_schema(storage: SchemaStorage) -> None: d_n["names"] = "tests.common.normalizers.custom_normalizers" schema = Schema("column_event", normalizers=d_n) storage.save_schema(schema) - assert storage.storage.has_file(SchemaStorage.NAMED_SCHEMA_FILE_PATTERN % ("column_event", "json")) + assert storage.storage.has_file( + SchemaStorage.NAMED_SCHEMA_FILE_PATTERN % ("column_event", "json") + ) loaded_schema = storage.load_schema("column_event") # also tables gets normalized inside so custom_ is added - assert loaded_schema.to_dict()["tables"]["column__dlt_loads"] == schema.to_dict()["tables"]["column__dlt_loads"] + assert ( + loaded_schema.to_dict()["tables"]["column__dlt_loads"] + == schema.to_dict()["tables"]["column__dlt_loads"] + ) assert loaded_schema.to_dict() == schema.to_dict() def test_schema_from_file() -> None: # json has precedence - schema = SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event") + schema = SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event" + ) assert schema.name == "event" - schema = SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event", extensions=("yaml",)) + schema = SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "event", extensions=("yaml",) + ) assert schema.name == "event" assert "blocks" in schema.tables with pytest.raises(SchemaNotFoundError): - SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "eth", extensions=("yaml",)) + SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "eth", extensions=("yaml",) + ) # file name and schema content mismatch with pytest.raises(UnexpectedSchemaName): - SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), "name_mismatch", extensions=("yaml",)) + SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/local"), + "name_mismatch", + extensions=("yaml",), + ) # def test_save_empty_schema_name(storage: SchemaStorage) -> None: @@ -269,18 +319,21 @@ def test_schema_from_file() -> None: def prepare_import_folder(storage: SchemaStorage) -> None: - shutil.copy(yml_case_path("schemas/eth/ethereum_schema_v6"), os.path.join(storage.storage.storage_path, "../import/ethereum.schema.yaml")) + shutil.copy( + yml_case_path("schemas/eth/ethereum_schema_v8"), + os.path.join(storage.storage.storage_path, "../import/ethereum.schema.yaml"), + ) def assert_schema_imported(synced_storage: SchemaStorage, storage: SchemaStorage) -> Schema: prepare_import_folder(synced_storage) - eth_v6: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v6") + eth_V8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8") schema = synced_storage.load_schema("ethereum") # is linked to imported schema - schema._imported_version_hash = eth_v6["version_hash"] + schema._imported_version_hash = eth_V8["version_hash"] # also was saved in storage assert synced_storage.has_schema("ethereum") # and has link to imported schema s well (load without import) schema = storage.load_schema("ethereum") - assert schema._imported_version_hash == eth_v6["version_hash"] + assert schema._imported_version_hash == eth_V8["version_hash"] return schema diff --git a/tests/common/storages/test_transactional_file.py b/tests/common/storages/test_transactional_file.py index 119b5ee3dd..7afdf10c38 100644 --- a/tests/common/storages/test_transactional_file.py +++ b/tests/common/storages/test_transactional_file.py @@ -109,7 +109,9 @@ def test_file_transaction_multiple_writers(fs: fsspec.AbstractFileSystem, file_n assert writer_2.read() == b"test 4" -def test_file_transaction_multiple_writers_with_races(fs: fsspec.AbstractFileSystem, file_name: str): +def test_file_transaction_multiple_writers_with_races( + fs: fsspec.AbstractFileSystem, file_name: str +): writer_1 = TransactionalFile(file_name, fs) time.sleep(0.5) writer_2 = TransactionalFile(file_name, fs) @@ -129,8 +131,10 @@ def test_file_transaction_simultaneous(fs: fsspec.AbstractFileSystem): pool = ThreadPoolExecutor(max_workers=40) results = pool.map( - lambda _: TransactionalFile( - "/bucket/test_123", fs).acquire_lock(blocking=False, jitter_mean=0.3), range(200) + lambda _: TransactionalFile("/bucket/test_123", fs).acquire_lock( + blocking=False, jitter_mean=0.3 + ), + range(200), ) assert sum(results) == 1 diff --git a/tests/common/storages/test_versioned_storage.py b/tests/common/storages/test_versioned_storage.py index ff23480a48..2859c7662c 100644 --- a/tests/common/storages/test_versioned_storage.py +++ b/tests/common/storages/test_versioned_storage.py @@ -9,7 +9,9 @@ class MigratedStorage(VersionedStorage): - def migrate_storage(self, from_version: semver.VersionInfo, to_version: semver.VersionInfo) -> None: + def migrate_storage( + self, from_version: semver.VersionInfo, to_version: semver.VersionInfo + ) -> None: # migration example: if from_version == "1.0.0" and from_version < to_version: from_version = semver.VersionInfo.parse("1.1.0") @@ -56,4 +58,4 @@ def test_downgrade_not_possible(test_storage: FileStorage) -> None: write_version(test_storage, "1.2.0") with pytest.raises(NoMigrationPathException) as wmpe: MigratedStorage("1.1.0", True, test_storage) - assert wmpe.value.migrated_version == "1.2.0" \ No newline at end of file + assert wmpe.value.migrated_version == "1.2.0" diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py index 6900c6fdcf..91d8c3c77f 100644 --- a/tests/common/storages/utils.py +++ b/tests/common/storages/utils.py @@ -1,14 +1,35 @@ -from typing import List +import pytest +from typing import List, Sequence, Tuple from fsspec import AbstractFileSystem -import pandas -from pyarrow import parquet -from dlt.common import pendulum -from dlt.common.storages import FilesystemConfiguration +from dlt.common import pendulum, json +from dlt.common.configuration.resolve import resolve_configuration +from dlt.common.schema import Schema +from dlt.common.storages import ( + LoadStorageConfiguration, + FilesystemConfiguration, + LoadPackageInfo, + TJobState, + LoadStorage, +) from dlt.common.storages.fsspec_filesystem import FileItem, FileItemDict +from dlt.common.typing import StrAny +from dlt.common.utils import uniq_id -def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFileSystem, config: FilesystemConfiguration, load_content: bool) -> None: +@pytest.fixture +def load_storage() -> LoadStorage: + C = resolve_configuration(LoadStorageConfiguration()) + s = LoadStorage(True, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS, C) + return s + + +def assert_sample_files( + all_file_items: List[FileItem], + filesystem: AbstractFileSystem, + config: FilesystemConfiguration, + load_content: bool, +) -> None: for item in all_file_items: assert isinstance(item["file_name"], str) assert item["file_url"].endswith(item["file_name"]) @@ -29,13 +50,17 @@ def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFile assert content == f.read() # read via various readers if item["mime_type"] == "text/csv": - with file_dict.open() as f: - df = pandas.read_csv(f, header="infer") - assert len(df.to_dict(orient="records")) > 0 + # parse csv + with file_dict.open(mode="rt") as f: + from csv import DictReader + + elements = list(DictReader(f)) + assert len(elements) > 0 if item["mime_type"] == "application/parquet": + # verify it is a real parquet with file_dict.open() as f: - table = parquet.ParquetFile(f).read() - assert len(table.to_pylist()) + parquet: bytes = f.read() + assert parquet.startswith(b"PAR1") if item["mime_type"].startswith("text"): with file_dict.open(mode="rt") as f_txt: lines = f_txt.readlines() @@ -44,14 +69,58 @@ def assert_sample_files(all_file_items: List[FileItem], filesystem: AbstractFile assert len(all_file_items) == 10 assert set([item["file_name"] for item in all_file_items]) == { - 'csv/freshman_kgs.csv', - 'csv/freshman_lbs.csv', - 'csv/mlb_players.csv', - 'csv/mlb_teams_2012.csv', - 'jsonl/mlb_players.jsonl', - 'met_csv/A801/A881_20230920.csv', - 'met_csv/A803/A803_20230919.csv', - 'met_csv/A803/A803_20230920.csv', - 'parquet/mlb_players.parquet', - 'sample.txt' - } \ No newline at end of file + "csv/freshman_kgs.csv", + "csv/freshman_lbs.csv", + "csv/mlb_players.csv", + "csv/mlb_teams_2012.csv", + "jsonl/mlb_players.jsonl", + "met_csv/A801/A881_20230920.csv", + "met_csv/A803/A803_20230919.csv", + "met_csv/A803/A803_20230920.csv", + "parquet/mlb_players.parquet", + "sample.txt", + } + + +def start_loading_file( + s: LoadStorage, content: Sequence[StrAny], start_job: bool = True +) -> Tuple[str, str]: + load_id = uniq_id() + s.new_packages.create_package(load_id) + # write test file + file_name = s._write_temp_job_file(load_id, "mock_table", None, uniq_id(), content) + # write schema and schema update + s.new_packages.save_schema(load_id, Schema("mock")) + s.new_packages.save_schema_updates(load_id, {}) + s.commit_new_load_package(load_id) + assert_package_info(s, load_id, "normalized", "new_jobs") + if start_job: + s.normalized_packages.start_job(load_id, file_name) + assert_package_info(s, load_id, "normalized", "started_jobs") + return load_id, file_name + + +def assert_package_info( + storage: LoadStorage, + load_id: str, + package_state: str, + job_state: TJobState, + jobs_count: int = 1, +) -> LoadPackageInfo: + package_info = storage.get_load_package_info(load_id) + # make sure it is serializable + json.dumps(package_info) + # generate str + str(package_info) + package_info.asstr() + package_info.asstr(verbosity=1) + assert package_info.state == package_state + assert package_info.schema_name == "mock" + assert len(package_info.jobs[job_state]) == jobs_count + if package_state == "normalized": + assert package_info.completed_at is None + else: + assert (pendulum.now() - package_info.completed_at).seconds < 2 + # get dict + package_info.asdict() + return package_info diff --git a/tests/common/test_arithmetics.py b/tests/common/test_arithmetics.py index 4912d976eb..87c0a94751 100644 --- a/tests/common/test_arithmetics.py +++ b/tests/common/test_arithmetics.py @@ -18,7 +18,6 @@ def test_default_numeric_quantize() -> None: scale_18 = Decimal("0.5327010784") assert str(numeric_default_quantize(scale_18)) == "0.532701078" - # less than 0 digits scale_5 = Decimal("0.4") assert str(numeric_default_quantize(scale_5)) == "0.400000000" @@ -27,7 +26,7 @@ def test_default_numeric_quantize() -> None: def test_numeric_context() -> None: # we reach (38,9) numeric with numeric_default_context(): - v = Decimal(10**29-1) + Decimal("0.532701079") + v = Decimal(10**29 - 1) + Decimal("0.532701079") assert str(v) == "99999999999999999999999999999.532701079" assert numeric_default_quantize(v) == v diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 7afa10ed68..a7547d27e0 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -1,9 +1,9 @@ import pytest -from dlt.common.destination.reference import DestinationClientDwhConfiguration, DestinationReference +from dlt.common.destination.reference import DestinationClientDwhConfiguration, Destination +from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.exceptions import InvalidDestinationReference, UnknownDestinationModule from dlt.common.schema import Schema -from dlt.common.schema.exceptions import InvalidDatasetName from tests.utils import ACTIVE_DESTINATIONS @@ -11,57 +11,173 @@ def test_import_unknown_destination() -> None: # standard destination with pytest.raises(UnknownDestinationModule): - DestinationReference.from_name("meltdb") + Destination.from_reference("meltdb") # custom module with pytest.raises(UnknownDestinationModule): - DestinationReference.from_name("melt.db") + Destination.from_reference("melt.db") def test_invalid_destination_reference() -> None: with pytest.raises(InvalidDestinationReference): - DestinationReference.from_name("tests.load.cases.fake_destination") + Destination.from_reference("tests.load.cases.fake_destination.not_a_destination") + + +def test_custom_destination_module() -> None: + destination = Destination.from_reference( + "tests.common.cases.destinations.null", destination_name="null-test" + ) + assert destination.destination_name == "null-test" + assert ( + destination.destination_type == "tests.common.cases.destinations.null.null" + ) # a full type name + + +def test_import_module_by_path() -> None: + # importing works directly from dlt destinations + dest = Destination.from_reference("dlt.destinations.postgres") + assert dest.destination_name == "postgres" + assert dest.destination_type == "dlt.destinations.postgres" + + # try again directly with the output from the first dest + dest2 = Destination.from_reference(dest.destination_type, destination_name="my_pg") + assert dest2.destination_name == "my_pg" + assert dest2.destination_type == "dlt.destinations.postgres" + + # try again with the path into the impl folder + dest3 = Destination.from_reference( + "dlt.destinations.impl.postgres.factory.postgres", destination_name="my_pg_2" + ) + assert dest3.destination_name == "my_pg_2" + assert dest3.destination_type == "dlt.destinations.postgres" def test_import_all_destinations() -> None: # this must pass without the client dependencies being imported - for module in ACTIVE_DESTINATIONS: - dest = DestinationReference.from_name(module) - assert dest.__name__ == "dlt.destinations." + module + for dest_type in ACTIVE_DESTINATIONS: + dest = Destination.from_reference(dest_type, None, dest_type + "_name", "production") + assert dest.destination_type == "dlt.destinations." + dest_type + assert dest.destination_name == dest_type + "_name" + assert dest.config_params["environment"] == "production" + assert dest.config_params["destination_name"] == dest_type + "_name" dest.spec() - dest.capabilities() + assert isinstance(dest.capabilities(), DestinationCapabilitiesContext) + + +def test_import_destination_config() -> None: + # importing destination by type will work + dest = Destination.from_reference(ref="dlt.destinations.duckdb", environment="stage") + assert dest.destination_type == "dlt.destinations.duckdb" + assert dest.config_params["environment"] == "stage" + config = dest.configuration(dest.spec(dataset_name="dataset")) # type: ignore + assert config.destination_type == "duckdb" + assert config.destination_name == "duckdb" + assert config.environment == "stage" + + # importing destination by will work + dest = Destination.from_reference(ref=None, destination_name="duckdb", environment="production") + assert dest.destination_type == "dlt.destinations.duckdb" + assert dest.config_params["environment"] == "production" + config = dest.configuration(dest.spec(dataset_name="dataset")) # type: ignore + assert config.destination_type == "duckdb" + assert config.destination_name == "duckdb" + assert config.environment == "production" + + # importing with different name will propagate name + dest = Destination.from_reference( + ref="duckdb", destination_name="my_destination", environment="devel" + ) + assert dest.destination_type == "dlt.destinations.duckdb" + assert dest.config_params["environment"] == "devel" + config = dest.configuration(dest.spec(dataset_name="dataset")) # type: ignore + assert config.destination_type == "duckdb" + assert config.destination_name == "my_destination" + assert config.environment == "devel" + + # incorrect name will fail with correct error + with pytest.raises(UnknownDestinationModule): + Destination.from_reference(ref=None, destination_name="balh") def test_normalize_dataset_name() -> None: # with schema name appended - assert DestinationClientDwhConfiguration(dataset_name="ban_ana_dataset", default_schema_name="default").normalize_dataset_name(Schema("banana")) == "ban_ana_dataset_banana" + assert ( + DestinationClientDwhConfiguration( + dataset_name="ban_ana_dataset", default_schema_name="default" + ).normalize_dataset_name(Schema("banana")) + == "ban_ana_dataset_banana" + ) # without schema name appended - assert DestinationClientDwhConfiguration(dataset_name="ban_ana_dataset", default_schema_name="default").normalize_dataset_name(Schema("default")) == "ban_ana_dataset" + assert ( + DestinationClientDwhConfiguration( + dataset_name="ban_ana_dataset", default_schema_name="default" + ).normalize_dataset_name(Schema("default")) + == "ban_ana_dataset" + ) # dataset name will be normalized (now it is up to destination to normalize this) - assert DestinationClientDwhConfiguration(dataset_name="BaNaNa", default_schema_name="default").normalize_dataset_name(Schema("banana")) == "ba_na_na_banana" + assert ( + DestinationClientDwhConfiguration( + dataset_name="BaNaNa", default_schema_name="default" + ).normalize_dataset_name(Schema("banana")) + == "ba_na_na_banana" + ) # empty schemas are invalid with pytest.raises(ValueError): - DestinationClientDwhConfiguration(dataset_name="banana_dataset", default_schema_name=None).normalize_dataset_name(Schema(None)) + DestinationClientDwhConfiguration( + dataset_name="banana_dataset", default_schema_name=None + ).normalize_dataset_name(Schema(None)) with pytest.raises(ValueError): - DestinationClientDwhConfiguration(dataset_name="banana_dataset", default_schema_name="").normalize_dataset_name(Schema("")) + DestinationClientDwhConfiguration( + dataset_name="banana_dataset", default_schema_name="" + ).normalize_dataset_name(Schema("")) # empty dataset name is valid! - assert DestinationClientDwhConfiguration(dataset_name="", default_schema_name="ban_schema").normalize_dataset_name(Schema("schema_ana")) == "_schema_ana" + assert ( + DestinationClientDwhConfiguration( + dataset_name="", default_schema_name="ban_schema" + ).normalize_dataset_name(Schema("schema_ana")) + == "_schema_ana" + ) # empty dataset name is valid! - assert DestinationClientDwhConfiguration(dataset_name="", default_schema_name="schema_ana").normalize_dataset_name(Schema("schema_ana")) == "" + assert ( + DestinationClientDwhConfiguration( + dataset_name="", default_schema_name="schema_ana" + ).normalize_dataset_name(Schema("schema_ana")) + == "" + ) # None dataset name is valid! - assert DestinationClientDwhConfiguration(dataset_name=None, default_schema_name="ban_schema").normalize_dataset_name(Schema("schema_ana")) == "_schema_ana" + assert ( + DestinationClientDwhConfiguration( + dataset_name=None, default_schema_name="ban_schema" + ).normalize_dataset_name(Schema("schema_ana")) + == "_schema_ana" + ) # None dataset name is valid! - assert DestinationClientDwhConfiguration(dataset_name=None, default_schema_name="schema_ana").normalize_dataset_name(Schema("schema_ana")) is None + assert ( + DestinationClientDwhConfiguration( + dataset_name=None, default_schema_name="schema_ana" + ).normalize_dataset_name(Schema("schema_ana")) + is None + ) # now mock the schema name to make sure that it is normalized schema = Schema("barbapapa") schema._schema_name = "BarbaPapa" - assert DestinationClientDwhConfiguration(dataset_name="set", default_schema_name="default").normalize_dataset_name(schema) == "set_barba_papa" + assert ( + DestinationClientDwhConfiguration( + dataset_name="set", default_schema_name="default" + ).normalize_dataset_name(schema) + == "set_barba_papa" + ) def test_normalize_dataset_name_none_default_schema() -> None: # if default schema is None, suffix is not added - assert DestinationClientDwhConfiguration(dataset_name="ban_ana_dataset", default_schema_name=None).normalize_dataset_name(Schema("default")) == "ban_ana_dataset" + assert ( + DestinationClientDwhConfiguration( + dataset_name="ban_ana_dataset", default_schema_name=None + ).normalize_dataset_name(Schema("default")) + == "ban_ana_dataset" + ) diff --git a/tests/common/test_git.py b/tests/common/test_git.py index 96a5f33d94..10bc05970e 100644 --- a/tests/common/test_git.py +++ b/tests/common/test_git.py @@ -3,7 +3,15 @@ import pytest from dlt.common.storages import FileStorage -from dlt.common.git import clone_repo, ensure_remote_head, git_custom_key_command, get_fresh_repo_files, get_repo, is_dirty, is_clean_and_synced +from dlt.common.git import ( + clone_repo, + ensure_remote_head, + git_custom_key_command, + get_fresh_repo_files, + get_repo, + is_dirty, + is_clean_and_synced, +) from tests.utils import test_storage, skipifwindows from tests.common.utils import load_secret, modify_and_commit_file, restore_secret_storage_path @@ -42,7 +50,12 @@ def test_clone(test_storage: FileStorage) -> None: def test_clone_with_commit_id(test_storage: FileStorage) -> None: repo_path = test_storage.make_full_path("awesome_repo") # clone a small public repo - clone_repo(AWESOME_REPO, repo_path, with_git_command=None, branch="7f88000be2d4f265c83465fec4b0b3613af347dd").close() + clone_repo( + AWESOME_REPO, + repo_path, + with_git_command=None, + branch="7f88000be2d4f265c83465fec4b0b3613af347dd", + ).close() assert test_storage.has_folder("awesome_repo") # cannot pull detached head with pytest.raises(GitError): diff --git a/tests/common/test_json.py b/tests/common/test_json.py index 983484d326..8136ed3ad2 100644 --- a/tests/common/test_json.py +++ b/tests/common/test_json.py @@ -6,10 +6,24 @@ from dlt.common import json, Decimal, pendulum from dlt.common.arithmetics import numeric_default_context -from dlt.common.json import _DECIMAL, _WEI, custom_pua_decode, _orjson, _simplejson, SupportsJson, _DATETIME +from dlt.common.json import ( + _DECIMAL, + _WEI, + custom_pua_decode, + may_have_pua, + _orjson, + _simplejson, + SupportsJson, + _DATETIME, +) from tests.utils import autouse_test_storage, TEST_STORAGE_ROOT -from tests.cases import JSON_TYPED_DICT, JSON_TYPED_DICT_DECODED, JSON_TYPED_DICT_NESTED, JSON_TYPED_DICT_NESTED_DECODED +from tests.cases import ( + JSON_TYPED_DICT, + JSON_TYPED_DICT_DECODED, + JSON_TYPED_DICT_NESTED, + JSON_TYPED_DICT_NESTED_DECODED, +) from tests.common.utils import json_case_path, load_json_case @@ -158,7 +172,10 @@ def test_json_decimals(json_impl: SupportsJson) -> None: # serialize out of local context s = json_impl.dumps(doc) # full precision. you need to quantize yourself if you need it - assert s == '{"decimal":"99999999999999999999999999999999999999999999999999999999999999999999999999.999"}' + assert ( + s + == '{"decimal":"99999999999999999999999999999999999999999999999999999999999999999999999999.999"}' + ) @pytest.mark.parametrize("json_impl", _JSON_IMPL) @@ -199,18 +216,27 @@ def test_json_pendulum(json_impl: SupportsJson) -> None: @pytest.mark.parametrize("json_impl", _JSON_IMPL) def test_json_named_tuple(json_impl: SupportsJson) -> None: - assert json_impl.dumps(NamedTupleTest("STR", Decimal("1.3333"))) == '{"str_field":"STR","dec_field":"1.3333"}' + assert ( + json_impl.dumps(NamedTupleTest("STR", Decimal("1.3333"))) + == '{"str_field":"STR","dec_field":"1.3333"}' + ) with io.BytesIO() as b: json_impl.typed_dump(NamedTupleTest("STR", Decimal("1.3333")), b) - assert b.getvalue().decode("utf-8") == '{"str_field":"STR","dec_field":"\uF0261.3333"}' + assert b.getvalue().decode("utf-8") == '{"str_field":"STR","dec_field":"\uf0261.3333"}' @pytest.mark.parametrize("json_impl", _JSON_IMPL) def test_data_class(json_impl: SupportsJson) -> None: - assert json_impl.dumps(DataClassTest(str_field="AAA")) == '{"str_field":"AAA","int_field":5,"dec_field":"0.5"}' + assert ( + json_impl.dumps(DataClassTest(str_field="AAA")) + == '{"str_field":"AAA","int_field":5,"dec_field":"0.5"}' + ) with io.BytesIO() as b: json_impl.typed_dump(DataClassTest(str_field="AAA"), b) - assert b.getvalue().decode("utf-8") == '{"str_field":"AAA","int_field":5,"dec_field":"\uF0260.5"}' + assert ( + b.getvalue().decode("utf-8") + == '{"str_field":"AAA","int_field":5,"dec_field":"\uf0260.5"}' + ) @pytest.mark.parametrize("json_impl", _JSON_IMPL) @@ -246,10 +272,21 @@ def test_json_typed_encode(json_impl: SupportsJson) -> None: assert d["decimal"][0] == _DECIMAL assert d["wei"][0] == _WEI # decode all - d_d = {k: custom_pua_decode(v) for k,v in d.items()} + d_d = {k: custom_pua_decode(v) for k, v in d.items()} assert d_d == JSON_TYPED_DICT_DECODED +@pytest.mark.parametrize("json_impl", _JSON_IMPL) +def test_pua_detection(json_impl: SupportsJson) -> None: + with io.BytesIO() as b: + json_impl.typed_dump(JSON_TYPED_DICT, b) + content_b = b.getvalue() + assert may_have_pua(content_b) + with open(json_case_path("rasa_event_bot_metadata"), "rb") as f: + content_b = f.read() + assert not may_have_pua(content_b) + + def test_load_and_compare_all_impls() -> None: with open(json_case_path("rasa_event_bot_metadata"), "rb") as f: content_b = f.read() @@ -260,6 +297,6 @@ def test_load_and_compare_all_impls() -> None: # same docs, same output for idx in range(0, len(docs) - 1): - assert docs[idx] == docs[idx+1] - assert dump_s[idx] == dump_s[idx+1] - assert dump_b[idx] == dump_b[idx+1] + assert docs[idx] == docs[idx + 1] + assert dump_s[idx] == dump_s[idx + 1] + assert dump_b[idx] == dump_b[idx + 1] diff --git a/tests/common/test_pipeline_state.py b/tests/common/test_pipeline_state.py index cce610839f..2c6a89b978 100644 --- a/tests/common/test_pipeline_state.py +++ b/tests/common/test_pipeline_state.py @@ -11,7 +11,7 @@ def test_delete_source_state_keys() -> None: "a": {"b": {"c": 1}}, "x": {"y": {"c": 2}}, "y": {"x": {"a": 3}}, - "resources": {"some_data": {"incremental": {"last_value": 123}}} + "resources": {"some_data": {"incremental": {"last_value": 123}}}, } state = deepcopy(_fake_source_state) @@ -54,12 +54,12 @@ def test_get_matching_resources() -> None: # with state argument results = ps._get_matching_resources(pattern, _fake_source_state) - assert sorted(results) == ['events_a', 'events_b'] + assert sorted(results) == ["events_a", "events_b"] # with state context with mock.patch.object(ps, "source_state", autospec=True, return_value=_fake_source_state): results = ps._get_matching_resources(pattern, _fake_source_state) - assert sorted(results) == ['events_a', 'events_b'] + assert sorted(results) == ["events_a", "events_b"] # no resources key results = ps._get_matching_resources(pattern, {}) diff --git a/tests/common/test_pyarrow.py b/tests/common/test_pyarrow.py deleted file mode 100644 index 6dbdae00cb..0000000000 --- a/tests/common/test_pyarrow.py +++ /dev/null @@ -1,51 +0,0 @@ -from copy import deepcopy - -import pyarrow as pa - -from dlt.common.libs.pyarrow import py_arrow_to_table_schema_columns, get_py_arrow_datatype -from dlt.common.destination import DestinationCapabilitiesContext -from tests.cases import TABLE_UPDATE_COLUMNS_SCHEMA - - - -def test_py_arrow_to_table_schema_columns(): - dlt_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) - - caps = DestinationCapabilitiesContext.generic_capabilities() - # The arrow schema will add precision - dlt_schema['col4']['precision'] = caps.timestamp_precision - dlt_schema['col6']['precision'], dlt_schema['col6']['scale'] = caps.decimal_precision - dlt_schema['col11']['precision'] = caps.timestamp_precision - dlt_schema['col4_null']['precision'] = caps.timestamp_precision - dlt_schema['col6_null']['precision'], dlt_schema['col6_null']['scale'] = caps.decimal_precision - dlt_schema['col11_null']['precision'] = caps.timestamp_precision - - # Ignoring wei as we can't distinguish from decimal - dlt_schema['col8']['precision'], dlt_schema['col8']['scale'] = (76, 0) - dlt_schema['col8']['data_type'] = 'decimal' - dlt_schema['col8_null']['precision'], dlt_schema['col8_null']['scale'] = (76, 0) - dlt_schema['col8_null']['data_type'] = 'decimal' - # No json type - dlt_schema['col9']['data_type'] = 'text' - del dlt_schema['col9']['variant'] - dlt_schema['col9_null']['data_type'] = 'text' - del dlt_schema['col9_null']['variant'] - - # arrow string fields don't have precision - del dlt_schema['col5_precision']['precision'] - - - # Convert to arrow schema - arrow_schema = pa.schema( - [ - pa.field( - column["name"], get_py_arrow_datatype(column, caps, "UTC"), nullable=column["nullable"] - ) - for column in dlt_schema.values() - ] - ) - - result = py_arrow_to_table_schema_columns(arrow_schema) - - # Resulting schema should match the original - assert result == dlt_schema diff --git a/tests/common/test_pydantic.py b/tests/common/test_pydantic.py deleted file mode 100644 index 770fcce6e5..0000000000 --- a/tests/common/test_pydantic.py +++ /dev/null @@ -1,134 +0,0 @@ -import pytest -from typing import Union, Optional, List, Dict, Any -from enum import Enum - -from datetime import datetime, date, time # noqa: I251 -from dlt.common import Decimal -from dlt.common import json - -from pydantic import BaseModel, Json, AnyHttpUrl -from dlt.common.libs.pydantic import pydantic_to_table_schema_columns - - -class StrEnum(str, Enum): - a = "a_value" - b = "b_value" - c = "c_value" - - -class IntEnum(int, Enum): - a = 0 - b = 1 - c = 2 - - -class MixedEnum(Enum): - a_int = 0 - b_str = "b_value" - c_int = 2 - - -class NestedModel(BaseModel): - nested_field: str - - -class Model(BaseModel): - bigint_field: int - text_field: str - timestamp_field: datetime - date_field: date - decimal_field: Decimal - double_field: float - time_field: time - - nested_field: NestedModel - list_field: List[str] - - union_field: Union[int, str] - - optional_field: Optional[float] - - blank_dict_field: dict # type: ignore[type-arg] - parametrized_dict_field: Dict[str, int] - - str_enum_field: StrEnum - int_enum_field: IntEnum - # Both of these shouold coerce to str - mixed_enum_int_field: MixedEnum - mixed_enum_str_field: MixedEnum - - json_field: Json[List[str]] - - url_field: AnyHttpUrl - - any_field: Any - json_any_field: Json[Any] - - - -@pytest.mark.parametrize('instance', [True, False]) -def test_pydantic_model_to_columns(instance: bool) -> None: - if instance: - model = Model( - bigint_field=1, text_field="text", timestamp_field=datetime.now(), - date_field=date.today(), decimal_field=Decimal(1.1), double_field=1.1, - time_field=time(1, 2, 3, 12345), - nested_field=NestedModel(nested_field="nested"), - list_field=["a", "b", "c"], - union_field=1, - optional_field=None, - blank_dict_field={}, - parametrized_dict_field={"a": 1, "b": 2, "c": 3}, - str_enum_field=StrEnum.a, - int_enum_field=IntEnum.a, - mixed_enum_int_field=MixedEnum.a_int, - mixed_enum_str_field=MixedEnum.b_str, - json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type] - url_field="https://example.com", # type: ignore[arg-type] - any_field="any_string", - json_any_field=json.dumps("any_string"), - ) - else: - model = Model # type: ignore[assignment] - - result = pydantic_to_table_schema_columns(model) - - assert result["bigint_field"]["data_type"] == "bigint" - assert result["text_field"]["data_type"] == "text" - assert result["timestamp_field"]["data_type"] == "timestamp" - assert result["date_field"]["data_type"] == "date" - assert result["decimal_field"]["data_type"] == "decimal" - assert result["double_field"]["data_type"] == "double" - assert result["time_field"]["data_type"] == "time" - assert result["nested_field"]["data_type"] == "complex" - assert result['list_field']['data_type'] == 'complex' - assert result['union_field']['data_type'] == 'bigint' - assert result['optional_field']['data_type'] == 'double' - assert result['optional_field']['nullable'] is True - assert result['blank_dict_field']['data_type'] == 'complex' - assert result['parametrized_dict_field']['data_type'] == 'complex' - assert result['str_enum_field']['data_type'] == 'text' - assert result['int_enum_field']['data_type'] == 'bigint' - assert result['mixed_enum_int_field']['data_type'] == 'text' - assert result['mixed_enum_str_field']['data_type'] == 'text' - assert result['json_field']['data_type'] == 'complex' - assert result['url_field']['data_type'] == 'text' - - # Any type fields are excluded from schema - assert 'any_field' not in result - assert 'json_any_field' not in result - - -def test_pydantic_model_skip_complex_types() -> None: - result = pydantic_to_table_schema_columns(Model, skip_complex_types=True) - - assert result["bigint_field"]["data_type"] == "bigint" - - assert "nested_field" not in result - assert "list_field" not in result - assert "blank_dict_field" not in result - assert "parametrized_dict_field" not in result - assert "json_field" not in result - assert result["bigint_field"]["data_type"] == "bigint" - assert result["text_field"]["data_type"] == "text" - assert result["timestamp_field"]["data_type"] == "timestamp" diff --git a/tests/common/test_time.py b/tests/common/test_time.py index 56c6849ab8..72a9098e4d 100644 --- a/tests/common/test_time.py +++ b/tests/common/test_time.py @@ -3,7 +3,12 @@ from pendulum.tz import UTC from dlt.common import pendulum -from dlt.common.time import timestamp_before, timestamp_within, ensure_pendulum_datetime, ensure_pendulum_date +from dlt.common.time import ( + timestamp_before, + timestamp_within, + ensure_pendulum_datetime, + ensure_pendulum_date, +) from dlt.common.typing import TAnyDateTime @@ -72,9 +77,7 @@ def test_before() -> None: @pytest.mark.parametrize("date_value, expected", test_params) -def test_ensure_pendulum_datetime( - date_value: TAnyDateTime, expected: pendulum.DateTime -) -> None: +def test_ensure_pendulum_datetime(date_value: TAnyDateTime, expected: pendulum.DateTime) -> None: dt = ensure_pendulum_datetime(date_value) assert dt == expected # always UTC @@ -86,4 +89,6 @@ def test_ensure_pendulum_datetime( def test_ensure_pendulum_date_utc() -> None: # when converting from datetimes make sure to shift to UTC before doing date assert ensure_pendulum_date("2021-01-01T00:00:00+05:00") == pendulum.date(2020, 12, 31) - assert ensure_pendulum_date(datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=8)))) == pendulum.date(2020, 12, 31) \ No newline at end of file + assert ensure_pendulum_date( + datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=8))) + ) == pendulum.date(2020, 12, 31) diff --git a/tests/common/test_typing.py b/tests/common/test_typing.py index 399ab284ea..8ea48d7e14 100644 --- a/tests/common/test_typing.py +++ b/tests/common/test_typing.py @@ -1,10 +1,38 @@ - -from typing import List, Literal, Mapping, MutableMapping, MutableSequence, NewType, Sequence, TypeVar, TypedDict, Optional, Union -from dlt.common.configuration.specs.base_configuration import BaseConfiguration, get_config_if_union_hint +from typing import ( + ClassVar, + Final, + List, + Literal, + Mapping, + MutableMapping, + MutableSequence, + NewType, + Sequence, + TypeVar, + TypedDict, + Optional, + Union, +) +from typing_extensions import Annotated, get_args + +from dlt.common.configuration.specs.base_configuration import ( + BaseConfiguration, + get_config_if_union_hint, +) from dlt.common.configuration.specs import GcpServiceAccountCredentialsWithoutDefaults - -from dlt.common.typing import StrAny, extract_inner_type, extract_optional_type, is_dict_generic_type, is_list_generic_type, is_literal_type, is_newtype_type, is_optional_type, is_typeddict - +from dlt.common.typing import ( + StrAny, + extract_inner_type, + extract_union_types, + is_dict_generic_type, + is_list_generic_type, + is_literal_type, + is_newtype_type, + is_optional_type, + is_typeddict, + is_union_type, + is_annotated, +) class TTestTyDi(TypedDict): @@ -15,6 +43,8 @@ class TTestTyDi(TypedDict): TOptionalLi = Optional[TTestLi] TOptionalTyDi = Optional[TTestTyDi] +TOptionalUnionLiTyDi = Optional[Union[TTestTyDi, TTestLi]] + def test_is_typeddict() -> None: assert is_typeddict(TTestTyDi) is True @@ -28,6 +58,7 @@ def test_is_list_generic_type() -> None: assert is_list_generic_type(List[str]) is True assert is_list_generic_type(Sequence[str]) is True assert is_list_generic_type(MutableSequence[str]) is True + assert is_list_generic_type(TOptionalUnionLiTyDi) is False # type: ignore[arg-type] def test_is_dict_generic_type() -> None: @@ -38,25 +69,47 @@ def test_is_dict_generic_type() -> None: def test_is_literal() -> None: assert is_literal_type(TTestLi) is True # type: ignore[arg-type] + assert is_literal_type(Final[TTestLi]) is True # type: ignore[arg-type] assert is_literal_type("a") is False # type: ignore[arg-type] assert is_literal_type(List[str]) is False def test_optional() -> None: assert is_optional_type(TOptionalLi) is True # type: ignore[arg-type] + assert is_optional_type(ClassVar[TOptionalLi]) is True # type: ignore[arg-type] assert is_optional_type(TOptionalTyDi) is True # type: ignore[arg-type] assert is_optional_type(TTestTyDi) is False - assert extract_optional_type(TOptionalLi) is TTestLi # type: ignore[arg-type] - assert extract_optional_type(TOptionalTyDi) is TTestTyDi # type: ignore[arg-type] + assert extract_union_types(TOptionalLi) == [TTestLi, type(None)] # type: ignore[arg-type] + assert extract_union_types(TOptionalTyDi) == [TTestTyDi, type(None)] # type: ignore[arg-type] + + +def test_union_types() -> None: + assert is_optional_type(TOptionalLi) is True # type: ignore[arg-type] + assert is_optional_type(TOptionalTyDi) is True # type: ignore[arg-type] + assert is_optional_type(TTestTyDi) is False + assert extract_union_types(TOptionalLi) == [TTestLi, type(None)] # type: ignore[arg-type] + assert extract_union_types(TOptionalTyDi) == [TTestTyDi, type(None)] # type: ignore[arg-type] + assert is_optional_type(TOptionalUnionLiTyDi) is True # type: ignore[arg-type] + assert extract_union_types(TOptionalUnionLiTyDi) == [TTestTyDi, TTestLi, type(None)] # type: ignore[arg-type] + assert is_union_type(MutableSequence[str]) is False def test_is_newtype() -> None: NT1 = NewType("NT1", str) assert is_newtype_type(NT1) is True + assert is_newtype_type(ClassVar[NT1]) is True # type: ignore[arg-type] assert is_newtype_type(TypeVar("TV1", bound=str)) is False # type: ignore[arg-type] assert is_newtype_type(1) is False # type: ignore[arg-type] +def test_is_annotated() -> None: + TA = Annotated[str, "PII", "name"] + assert is_annotated(TA) is True + a_t, *a_m = get_args(TA) + assert a_t is str + assert a_m == ["PII", "name"] + + def test_extract_inner_type() -> None: assert extract_inner_type(1) == 1 # type: ignore[arg-type] assert extract_inner_type(str) is str @@ -77,6 +130,9 @@ def test_get_config_if_union() -> None: assert get_config_if_union_hint(Union[BaseException, str, StrAny]) is None # type: ignore[arg-type] assert get_config_if_union_hint(Union[BaseConfiguration, str, StrAny]) is BaseConfiguration # type: ignore[arg-type] assert get_config_if_union_hint(Union[str, BaseConfiguration, StrAny]) is BaseConfiguration # type: ignore[arg-type] - assert get_config_if_union_hint( - Union[GcpServiceAccountCredentialsWithoutDefaults, StrAny, str] # type: ignore[arg-type] - ) is GcpServiceAccountCredentialsWithoutDefaults + assert ( + get_config_if_union_hint( + Union[GcpServiceAccountCredentialsWithoutDefaults, StrAny, str] # type: ignore[arg-type] + ) + is GcpServiceAccountCredentialsWithoutDefaults + ) diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py index d51f54d6d1..7cd8e9f1a2 100644 --- a/tests/common/test_utils.py +++ b/tests/common/test_utils.py @@ -3,11 +3,25 @@ import binascii import pytest from typing import Dict +from dlt.common.exceptions import IdentifierTooLongException, PipelineException, TerminalValueError from dlt.common.runners import Venv -from dlt.common.utils import (graph_find_scc_nodes, flatten_list_of_str_or_dicts, digest128, graph_edges_to_nodes, map_nested_in_place, - reveal_pseudo_secret, obfuscate_pseudo_secret, get_module_name, concat_strings_with_limit, increase_row_count, - merge_row_count, extend_list_deduplicated) +from dlt.common.utils import ( + graph_find_scc_nodes, + flatten_list_of_str_or_dicts, + digest128, + graph_edges_to_nodes, + map_nested_in_place, + reveal_pseudo_secret, + obfuscate_pseudo_secret, + get_module_name, + concat_strings_with_limit, + increase_row_count, + merge_row_counts, + extend_list_deduplicated, + get_exception_trace, + get_exception_trace_chain, +) def test_flatten_list_of_str_or_dicts() -> None: @@ -21,30 +35,27 @@ def test_flatten_list_of_str_or_dicts() -> None: def test_digest128_length() -> None: - assert len(digest128("hash it")) == 120/6 + assert len(digest128("hash it")) == 120 / 6 def test_map_dicts_in_place() -> None: - _d = { - "a": "1", - "b": ["a", "b", ["a", "b"], {"a": "c"}], - "c": { - "d": "e", - "e": ["a", 2] - } + _d = {"a": "1", "b": ["a", "b", ["a", "b"], {"a": "c"}], "c": {"d": "e", "e": ["a", 2]}} + exp_d = { + "a": "11", + "b": ["aa", "bb", ["aa", "bb"], {"a": "cc"}], + "c": {"d": "ee", "e": ["aa", 4]}, } - exp_d = {'a': '11', 'b': ['aa', 'bb', ['aa', 'bb'], {'a': 'cc'}], 'c': {'d': 'ee', 'e': ['aa', 4]}} - assert map_nested_in_place(lambda v: v*2, _d) == exp_d + assert map_nested_in_place(lambda v: v * 2, _d) == exp_d # in place assert _d == exp_d _l = ["a", "b", ["a", "b"], {"a": "c"}] exp_l = ["aa", "bb", ["aa", "bb"], {"a": "cc"}] - assert map_nested_in_place(lambda v: v*2, _l) == exp_l + assert map_nested_in_place(lambda v: v * 2, _l) == exp_l assert _l == exp_l with pytest.raises(ValueError): - map_nested_in_place(lambda v: v*2, "a") + map_nested_in_place(lambda v: v * 2, "a") def test_pseudo_obfuscation() -> None: @@ -79,9 +90,25 @@ def test_concat_strings_with_limit() -> None: assert list(concat_strings_with_limit(philosopher, ";\n", 15)) == ["Bertrand Russell"] # only two strings will be merged (22 chars total) - philosophers = ["Bertrand Russell", "Ludwig Wittgenstein", "G.E. Moore", "J.L. Mackie", "Alfred Tarski"] - moore_merged = ['Bertrand Russell', 'Ludwig Wittgenstein', 'G.E. Moore J.L. Mackie', 'Alfred Tarski'] - moore_merged_2 = ['Bertrand Russell', 'Ludwig Wittgenstein', 'G.E. Moore;\nJ.L. Mackie', 'Alfred Tarski'] + philosophers = [ + "Bertrand Russell", + "Ludwig Wittgenstein", + "G.E. Moore", + "J.L. Mackie", + "Alfred Tarski", + ] + moore_merged = [ + "Bertrand Russell", + "Ludwig Wittgenstein", + "G.E. Moore J.L. Mackie", + "Alfred Tarski", + ] + moore_merged_2 = [ + "Bertrand Russell", + "Ludwig Wittgenstein", + "G.E. Moore;\nJ.L. Mackie", + "Alfred Tarski", + ] assert list(concat_strings_with_limit(philosophers, " ", 22)) == moore_merged # none will be merged assert list(concat_strings_with_limit(philosophers, ";\n", 22)) == philosophers @@ -94,7 +121,7 @@ def test_concat_strings_with_limit() -> None: def test_find_scc_nodes() -> None: - edges = [('A', 'B'), ('B', 'C'), ('D', 'E'), ('F', 'G'), ('G', 'H'), ('I', 'I'), ('J', 'J')] + edges = [("A", "B"), ("B", "C"), ("D", "E"), ("F", "G"), ("G", "H"), ("I", "I"), ("J", "J")] def _comp(s): return sorted([tuple(sorted(c)) for c in s]) @@ -113,8 +140,28 @@ def _comp(s): def test_graph_edges_to_nodes() -> None: - edges = [('A', 'B'), ('A', 'C'), ('B', 'C'), ('D', 'E'), ('F', 'G'), ('G', 'H'), ('I', 'I'), ('J', 'J')] - graph = {"A": {"B", "C"}, "B": {"C"}, "C": set(), "D": {"E"}, "E": set(), "F": {"G"}, "G": {"H"}, "H": set(), "I": set(), "J": set()} + edges = [ + ("A", "B"), + ("A", "C"), + ("B", "C"), + ("D", "E"), + ("F", "G"), + ("G", "H"), + ("I", "I"), + ("J", "J"), + ] + graph = { + "A": {"B", "C"}, + "B": {"C"}, + "C": set(), + "D": {"E"}, + "E": set(), + "F": {"G"}, + "G": {"H"}, + "H": set(), + "I": set(), + "J": set(), + } g1 = graph_edges_to_nodes(edges) for perm_edges in itertools.permutations(edges): @@ -126,7 +173,7 @@ def test_graph_edges_to_nodes() -> None: # test a few edge cases assert graph_edges_to_nodes([]) == {} # ignores double edge - assert graph_edges_to_nodes([('A', 'B'), ('A', 'B')]) == {'A': {'B'}, 'B': set()} + assert graph_edges_to_nodes([("A", "B"), ("A", "B")]) == {"A": {"B"}, "B": set()} def test_increase_row_counts() -> None: @@ -135,21 +182,13 @@ def test_increase_row_counts() -> None: increase_row_count(counts, "table2", 0) increase_row_count(counts, "table3", 10) - assert counts == { - "table1": 1, - "table2": 0, - "table3": 10 - } + assert counts == {"table1": 1, "table2": 0, "table3": 10} increase_row_count(counts, "table1", 2) increase_row_count(counts, "table2", 3) increase_row_count(counts, "table3", 4) - assert counts == { - "table1": 3, - "table2": 3, - "table3": 14 - } + assert counts == {"table1": 3, "table2": 3, "table3": 14} def test_merge_row_counts() -> None: @@ -158,30 +197,83 @@ def test_merge_row_counts() -> None: "table2": 3, } - merge_row_count(rc1, { - "table2": 5, - "table3": 20, - }) - assert rc1 == { - "table1": 3, - "table2": 8, - "table3": 20 - } - merge_row_count(rc1, { - "table2": 5, - "table3": 20, - "table4": 2 - }) - assert rc1 == { - "table1": 3, - "table2": 13, - "table3": 40, - "table4": 2 - } + merge_row_counts( + rc1, + { + "table2": 5, + "table3": 20, + }, + ) + assert rc1 == {"table1": 3, "table2": 8, "table3": 20} + merge_row_counts(rc1, {"table2": 5, "table3": 20, "table4": 2}) + assert rc1 == {"table1": 3, "table2": 13, "table3": 40, "table4": 2} def test_extend_list_deduplicated() -> None: - assert extend_list_deduplicated(["one", "two", "three"], ["four", "five", "six"]) == ["one", "two", "three", "four", "five", "six"] - assert extend_list_deduplicated(["one", "two", "three", "six"], ["two", "four", "five", "six"]) == ["one", "two", "three", "six", "four", "five"] - assert extend_list_deduplicated(["one", "two", "three"], ["one", "two", "three"]) == ["one", "two", "three"] + assert extend_list_deduplicated(["one", "two", "three"], ["four", "five", "six"]) == [ + "one", + "two", + "three", + "four", + "five", + "six", + ] + assert extend_list_deduplicated( + ["one", "two", "three", "six"], ["two", "four", "five", "six"] + ) == ["one", "two", "three", "six", "four", "five"] + assert extend_list_deduplicated(["one", "two", "three"], ["one", "two", "three"]) == [ + "one", + "two", + "three", + ] assert extend_list_deduplicated([], ["one", "two", "three"]) == ["one", "two", "three"] + + +def test_exception_traces() -> None: + # bare exception without stack trace + trace = get_exception_trace(Exception("Message")) + assert trace["message"] == "Message" + assert trace["exception_type"] == "Exception" + assert "stack_trace" not in trace + assert trace["is_terminal"] is False + + # dlt exception with traceback + try: + raise IdentifierTooLongException("postgres", "table", "too_long_table", 8) + except Exception as exc: + trace = get_exception_trace(exc) + assert trace["exception_type"] == "dlt.common.exceptions.IdentifierTooLongException" + assert isinstance(trace["stack_trace"], list) + assert trace["exception_attrs"] == { + "destination_name": "postgres", + "identifier_type": "table", + "identifier_name": "too_long_table", + "max_identifier_length": 8, + } + assert trace["is_terminal"] is True + + # dlt exception with additional props + try: + raise PipelineException("test_pipeline", "Message") + except Exception as exc: + trace = get_exception_trace(exc) + assert trace["pipeline_name"] == "test_pipeline" + + +def test_exception_trace_chain() -> None: + try: + raise TerminalValueError("Val") + except Exception: + try: + raise IdentifierTooLongException("postgres", "table", "too_long_table", 8) + except Exception as exc: + try: + # explicit cause + raise PipelineException("test_pipeline", "Message") from exc + except Exception as exc: + traces = get_exception_trace_chain(exc) + # outer exception first + assert len(traces) == 3 + assert traces[0]["exception_type"] == "dlt.common.exceptions.PipelineException" + assert traces[1]["exception_type"] == "dlt.common.exceptions.IdentifierTooLongException" + assert traces[2]["exception_type"] == "dlt.common.exceptions.TerminalValueError" diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py index 4583da3a1e..533b91808c 100644 --- a/tests/common/test_validation.py +++ b/tests/common/test_validation.py @@ -1,7 +1,7 @@ from copy import deepcopy import pytest import yaml -from typing import Dict, List, Literal, Mapping, Sequence, TypedDict, Optional +from typing import Dict, List, Literal, Mapping, Sequence, TypedDict, Optional, Union from dlt.common import json from dlt.common.exceptions import DictValidationException @@ -10,9 +10,14 @@ from dlt.common.typing import DictStrStr, StrStr from dlt.common.validation import validate_dict, validate_dict_ignoring_xkeys + TLiteral = Literal["uno", "dos", "tres"] +class TDict(TypedDict): + field: TLiteral + + class TTestRecord(TypedDict): f_bool: bool f_str: str @@ -31,30 +36,15 @@ class TTestRecord(TypedDict): f_literal: TLiteral f_literal_optional: Optional[TLiteral] f_seq_literal: Sequence[Optional[TLiteral]] + f_optional_union: Optional[Union[TLiteral, TDict]] -TEST_COL: TColumnSchema = { - "name": "col1", - "data_type": "bigint", - "nullable": False - } +TEST_COL: TColumnSchema = {"name": "col1", "data_type": "bigint", "nullable": False} TEST_COL_LIST: List[TColumnSchema] = [ - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }, - { - "name": "col2", - "data_type": "double", - "nullable": False - }, - { - "name": "col3", - "data_type": "bool", - "nullable": False - } + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "col2", "data_type": "double", "nullable": False}, + {"name": "col3", "data_type": "bool", "nullable": False}, ] TEST_DOC: TTestRecord = { @@ -67,30 +57,34 @@ class TTestRecord(TypedDict): "f_seq_simple": ["x", "y"], "f_seq_optional_str": ["opt1", "opt2"], "f_seq_of_optional_int": [1, 2, 3], - "f_list_of_dict": TEST_COL_LIST, + "f_list_of_dict": TEST_COL_LIST, "f_dict_simple": {"col1": "map_me"}, "f_map_simple": {"col1": "map_me"}, "f_map_of_dict": {"col1": deepcopy(TEST_COL)}, "f_column": deepcopy(TEST_COL), "f_literal": "uno", "f_literal_optional": "dos", - "f_seq_literal": ["uno", "dos", "tres"] + "f_seq_literal": ["uno", "dos", "tres"], + "f_optional_union": {"field": "uno"}, } + @pytest.fixture def test_doc() -> TTestRecord: return deepcopy(TEST_DOC) def test_validate_schema_cases() -> None: - with open("tests/common/cases/schemas/eth/ethereum_schema_v4.yml", mode="r", encoding="utf-8") as f: + with open( + "tests/common/cases/schemas/eth/ethereum_schema_v8.yml", mode="r", encoding="utf-8" + ) as f: schema_dict: TStoredSchema = yaml.safe_load(f) validate_dict_ignoring_xkeys( spec=TStoredSchema, doc=schema_dict, path=".", - validator_f=simple_regex_validator + validator_f=simple_regex_validator, ) # with open("tests/common/cases/schemas/rasa/event.schema.json") as f: @@ -227,3 +221,23 @@ def test_filter(test_doc: TTestRecord) -> None: test_doc["x-extra"] = "x-annotation" # type: ignore[typeddict-unknown-key] # remove x-extra with a filter validate_dict(TTestRecord, test_doc, ".", filter_f=lambda k: k != "x-extra") + + +def test_nested_union(test_doc: TTestRecord) -> None: + test_doc["f_optional_union"] = {"field": "uno"} + validate_dict(TTestRecord, TEST_DOC, ".") + + test_doc["f_optional_union"] = {"field": "not valid"} # type: ignore[typeddict-item] + with pytest.raises(DictValidationException) as e: + validate_dict(TTestRecord, test_doc, ".") + assert e.value.field == "f_optional_union" + assert e.value.value == {"field": "not valid"} + + test_doc["f_optional_union"] = "dos" + validate_dict(TTestRecord, test_doc, ".") + + test_doc["f_optional_union"] = "blah" # type: ignore[typeddict-item] + with pytest.raises(DictValidationException) as e: + validate_dict(TTestRecord, test_doc, ".") + assert e.value.field == "f_optional_union" + assert e.value.value == "blah" diff --git a/tests/common/test_wei.py b/tests/common/test_wei.py index 8ee47d11c0..1f15978ddc 100644 --- a/tests/common/test_wei.py +++ b/tests/common/test_wei.py @@ -7,9 +7,12 @@ def test_init() -> None: assert Wei.from_int256(10**18, decimals=18) == 1 # make sure the wei scale is supported assert Wei.from_int256(1, decimals=18) == Decimal("0.000000000000000001") - assert Wei.from_int256(2**256-1) == 2**256-1 - assert str(Wei.from_int256(2**256-1, decimals=18)) == "115792089237316195423570985008687907853269984665640564039457.584007913129639935" - assert str(Wei.from_int256(2**256-1)) == str(2**256-1) + assert Wei.from_int256(2**256 - 1) == 2**256 - 1 + assert ( + str(Wei.from_int256(2**256 - 1, decimals=18)) + == "115792089237316195423570985008687907853269984665640564039457.584007913129639935" + ) + assert str(Wei.from_int256(2**256 - 1)) == str(2**256 - 1) assert type(Wei.from_int256(1)) is Wei @@ -30,6 +33,14 @@ def test_wei_variant() -> None: # we get variant value when we call Wei assert Wei(578960446186580977117854925043439539266)() == 578960446186580977117854925043439539266 - assert Wei(578960446186580977117854925043439539267)() == ("str", "578960446186580977117854925043439539267") - assert Wei(-578960446186580977117854925043439539267)() == -578960446186580977117854925043439539267 - assert Wei(-578960446186580977117854925043439539268)() == ("str", "-578960446186580977117854925043439539268") + assert Wei(578960446186580977117854925043439539267)() == ( + "str", + "578960446186580977117854925043439539267", + ) + assert ( + Wei(-578960446186580977117854925043439539267)() == -578960446186580977117854925043439539267 + ) + assert Wei(-578960446186580977117854925043439539268)() == ( + "str", + "-578960446186580977117854925043439539268", + ) diff --git a/tests/common/utils.py b/tests/common/utils.py index 54a48825af..0235d18bbe 100644 --- a/tests/common/utils.py +++ b/tests/common/utils.py @@ -16,9 +16,11 @@ COMMON_TEST_CASES_PATH = "./tests/common/cases/" # for import schema tests, change when upgrading the schema version -IMPORTED_VERSION_HASH_ETH_V6 = "Q/LxiP7taycE+u9PQNb2wiit+G5GntiifOUK2CFM3sQ=" +IMPORTED_VERSION_HASH_ETH_V8 = "C5An8WClbavalXDdNSqXbdI7Swqh/mTWMcwWKCF//EE=" # test sentry DSN -TEST_SENTRY_DSN = "https://797678dd0af64b96937435326c7d30c1@o1061158.ingest.sentry.io/4504306172821504" +TEST_SENTRY_DSN = ( + "https://797678dd0af64b96937435326c7d30c1@o1061158.ingest.sentry.io/4504306172821504" +) # preserve secrets path to be able to restore it SECRET_STORAGE_PATH = environ_provider.SECRET_STORAGE_PATH @@ -42,11 +44,7 @@ def yml_case_path(name: str) -> str: def row_to_column_schemas(row: StrAny) -> TTableSchemaColumns: - return {k: { - "name": k, - "data_type": "text", - "nullable": False - } for k in row.keys()} + return {k: {"name": k, "data_type": "text", "nullable": False} for k in row.keys()} @pytest.fixture(autouse=True) @@ -56,13 +54,17 @@ def restore_secret_storage_path() -> None: def load_secret(name: str) -> str: environ_provider.SECRET_STORAGE_PATH = "./tests/common/cases/secrets/%s" - secret, _ = environ_provider.EnvironProvider().get_value(name, environ_provider.TSecretValue, None) + secret, _ = environ_provider.EnvironProvider().get_value( + name, environ_provider.TSecretValue, None + ) if not secret: raise FileNotFoundError(environ_provider.SECRET_STORAGE_PATH % name) return secret -def modify_and_commit_file(repo_path: str, file_name: str, content: str = "NEW README CONTENT") -> Tuple[str, Commit]: +def modify_and_commit_file( + repo_path: str, file_name: str, content: str = "NEW README CONTENT" +) -> Tuple[str, Commit]: file_path = os.path.join(repo_path, file_name) with open(file_path, "w", encoding="utf-8") as f: diff --git a/tests/conftest.py b/tests/conftest.py index 56760508da..7e12990fd0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,12 +4,26 @@ from typing import List # patch which providers to enable -from dlt.common.configuration.providers import ConfigProvider, EnvironProvider, SecretsTomlProvider, ConfigTomlProvider -from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext, ConfigProvidersConfiguration +from dlt.common.configuration.providers import ( + ConfigProvider, + EnvironProvider, + SecretsTomlProvider, + ConfigTomlProvider, +) +from dlt.common.configuration.specs.config_providers_context import ( + ConfigProvidersContext, + ConfigProvidersConfiguration, +) + def initial_providers() -> List[ConfigProvider]: # do not read the global config - return [EnvironProvider(), SecretsTomlProvider(project_dir="tests/.dlt", add_global_config=False), ConfigTomlProvider(project_dir="tests/.dlt", add_global_config=False)] + return [ + EnvironProvider(), + SecretsTomlProvider(project_dir="tests/.dlt", add_global_config=False), + ConfigTomlProvider(project_dir="tests/.dlt", add_global_config=False), + ] + ConfigProvidersContext.initial_providers = initial_providers # type: ignore[method-assign] # also disable extras @@ -26,29 +40,41 @@ def pytest_configure(config): from dlt.common.storages import configuration as storage_configuration test_storage_root = "_storage" - run_configuration.RunConfiguration.config_files_storage_path = os.path.join(test_storage_root, "config/") - run_configuration.RunConfiguration.dlthub_telemetry_segment_write_key = "TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB" + run_configuration.RunConfiguration.config_files_storage_path = os.path.join( + test_storage_root, "config/" + ) + run_configuration.RunConfiguration.dlthub_telemetry_segment_write_key = ( + "TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB" + ) delattr(run_configuration.RunConfiguration, "__init__") run_configuration.RunConfiguration = dataclasses.dataclass(run_configuration.RunConfiguration, init=True, repr=False) # type: ignore # push telemetry to CI - storage_configuration.LoadStorageConfiguration.load_volume_path = os.path.join(test_storage_root, "load") + storage_configuration.LoadStorageConfiguration.load_volume_path = os.path.join( + test_storage_root, "load" + ) delattr(storage_configuration.LoadStorageConfiguration, "__init__") storage_configuration.LoadStorageConfiguration = dataclasses.dataclass(storage_configuration.LoadStorageConfiguration, init=True, repr=False) # type: ignore[misc, call-overload] - storage_configuration.NormalizeStorageConfiguration.normalize_volume_path = os.path.join(test_storage_root, "normalize") + storage_configuration.NormalizeStorageConfiguration.normalize_volume_path = os.path.join( + test_storage_root, "normalize" + ) # delete __init__, otherwise it will not be recreated by dataclass delattr(storage_configuration.NormalizeStorageConfiguration, "__init__") storage_configuration.NormalizeStorageConfiguration = dataclasses.dataclass(storage_configuration.NormalizeStorageConfiguration, init=True, repr=False) # type: ignore[misc, call-overload] - storage_configuration.SchemaStorageConfiguration.schema_volume_path = os.path.join(test_storage_root, "schemas") + storage_configuration.SchemaStorageConfiguration.schema_volume_path = os.path.join( + test_storage_root, "schemas" + ) delattr(storage_configuration.SchemaStorageConfiguration, "__init__") storage_configuration.SchemaStorageConfiguration = dataclasses.dataclass(storage_configuration.SchemaStorageConfiguration, init=True, repr=False) # type: ignore[misc, call-overload] - - assert run_configuration.RunConfiguration.config_files_storage_path == os.path.join(test_storage_root, "config/") - assert run_configuration.RunConfiguration().config_files_storage_path == os.path.join(test_storage_root, "config/") - + assert run_configuration.RunConfiguration.config_files_storage_path == os.path.join( + test_storage_root, "config/" + ) + assert run_configuration.RunConfiguration().config_files_storage_path == os.path.join( + test_storage_root, "config/" + ) # path pipeline instance id up to millisecond from dlt.common import pendulum @@ -59,7 +85,7 @@ def _create_pipeline_instance_id(self) -> str: Pipeline._create_pipeline_instance_id = _create_pipeline_instance_id # type: ignore[method-assign] # push sentry to ci - os.environ["RUNTIME__SENTRY_DSN"] = "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" + # os.environ["RUNTIME__SENTRY_DSN"] = "https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" # disable sqlfluff logging for log in ["sqlfluff.parser", "sqlfluff.linter", "sqlfluff.templater", "sqlfluff.lexer"]: diff --git a/tests/destinations/conftest.py b/tests/destinations/conftest.py new file mode 100644 index 0000000000..89f7cdffed --- /dev/null +++ b/tests/destinations/conftest.py @@ -0,0 +1,7 @@ +from tests.utils import ( + preserve_environ, + autouse_test_storage, + patch_home_dir, + wipe_pipeline, + duckdb_pipeline_location, +) diff --git a/tests/destinations/test_destination_name_and_config.py b/tests/destinations/test_destination_name_and_config.py new file mode 100644 index 0000000000..930a72d95d --- /dev/null +++ b/tests/destinations/test_destination_name_and_config.py @@ -0,0 +1,215 @@ +import os +import pytest +import posixpath + +import dlt +from dlt.common.configuration.exceptions import ConfigFieldMissingException +from dlt.common.typing import DictStrStr +from dlt.destinations import duckdb, dummy, filesystem +from dlt.common.utils import uniq_id + +from tests.common.configuration.utils import environment +from tests.utils import TEST_STORAGE_ROOT + + +def test_default_name_to_type() -> None: + duck = duckdb(credentials=os.path.join(TEST_STORAGE_ROOT, "quack.duckdb")) + p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duck) + load_info = p.run([1, 2, 3], table_name="table", dataset_name="dataset") + + assert p.destination.destination_name == "duckdb" + assert p.destination.destination_type == "dlt.destinations.duckdb" + assert load_info.destination_name == "duckdb" + assert load_info.destination_type == "dlt.destinations.duckdb" + assert load_info.environment is None + + +def test_set_name_and_environment() -> None: + duck = duckdb( + credentials=os.path.join(TEST_STORAGE_ROOT, "quack.duckdb"), + destination_name="duck1", + environment="production", + ) + p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duck) + assert ( + p.destination.destination_type == "dlt.destinations.duckdb" == p.state["destination_type"] + ) + assert p.destination.destination_name == "duck1" == p.state["destination_name"] + + load_info = p.run([1, 2, 3], table_name="table", dataset_name="dataset") + assert ( + p.destination.destination_type == "dlt.destinations.duckdb" == p.state["destination_type"] + ) + assert p.destination.destination_name == "duck1" == p.state["destination_name"] + + assert load_info.destination_name == "duck1" + assert load_info.destination_type == "dlt.destinations.duckdb" + # TODO: create destination_info and have same information for staging + assert load_info.environment == "production" + p.drop() + + rp = dlt.pipeline(pipeline_name="quack_pipeline", destination=duck) + assert rp.default_schema_name is None + assert rp.schema_names == [] + rp.sync_destination() + assert rp.default_schema_name == "quack" + assert rp.schema_names == ["quack"] + + +def test_preserve_destination_instance() -> None: + dummy1 = dummy(destination_name="dummy1", environment="dev/null/1") + filesystem1 = filesystem( + posixpath.join("file://", posixpath.abspath(TEST_STORAGE_ROOT)), + destination_name="local_fs", + environment="devel", + ) + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=dummy1, staging=filesystem1) + destination_id = id(p.destination) + staging_id = id(p.staging) + import os + + os.environ["COMPLETED_PROB"] = "1.0" + load_info = p.run([1, 2, 3], table_name="table", dataset_name="dataset") + load_info.raise_on_failed_jobs() + # destination and staging stay the same + assert destination_id == id(p.destination) + assert staging_id == id(p.staging) + # all names and types correctly set + assert ( + p.destination.destination_name + == "dummy1" + == p.state["destination_name"] + == load_info.destination_name + ) + assert ( + p.destination.destination_type + == "dlt.destinations.dummy" + == p.state["destination_type"] + == load_info.destination_type + ) + assert p.destination.config_params["environment"] == "dev/null/1" == load_info.environment + assert ( + p.staging.destination_name + == "local_fs" + == p.state["staging_name"] + == load_info.staging_name + ) + assert ( + p.staging.destination_type + == "dlt.destinations.filesystem" + == p.state["staging_type"] + == load_info.staging_type + ) + assert p.staging.config_params["environment"] == "devel" + + # attach pipeline + p = dlt.attach(pipeline_name="dummy_pipeline") + assert p.destination.destination_name == "dummy1" == p.state["destination_name"] + assert p.destination.destination_type == "dlt.destinations.dummy" == p.state["destination_type"] + assert p.staging.destination_name == "local_fs" == p.state["staging_name"] + assert p.staging.destination_type == "dlt.destinations.filesystem" == p.state["staging_type"] + + # config args should not contain self + assert "self" not in p.destination.config_params + + # this information was lost and is not present in the config/secrets when pipeline is restored + assert "environment" not in p.destination.config_params + assert "environment" not in p.staging.config_params + # for that reason dest client cannot be instantiated + with pytest.raises(ConfigFieldMissingException): + p.destination_client() + assert p.default_schema_name == "dummy" + assert p.schema_names == ["dummy"] + + # create new pipeline with the same name but different destination + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination="duckdb") + assert p.destination.destination_name == "duckdb" == p.state["destination_name"] + + +def test_config_respects_dataset_name(environment: DictStrStr) -> None: + environment["DESTINATION__ENVIRONMENT"] = "devel" + environment["QUACK_PIPELINE_DEVEL__DATASET_NAME"] = "devel_dataset" + + environment["DESTINATION__DUCK1__ENVIRONMENT"] = "staging" + environment["QUACK_PIPELINE_STAGING__DATASET_NAME"] = "staging_dataset" + + environment["DESTINATION__DUCK2__ENVIRONMENT"] = "production" + environment["QUACK_PIPELINE_PRODUCTION__DATASET_NAME"] = "production_dataset" + + # default will pick from global destination settings + duck = duckdb(credentials=os.path.join(TEST_STORAGE_ROOT, "quack.duckdb")) + p = dlt.pipeline(pipeline_name="quack_pipeline_devel", destination=duck) + load_info = p.run([1, 2, 3], table_name="table") + with p.destination_client() as client: + assert client.config.environment == "devel" + assert client.config.dataset_name == "devel_dataset" # type: ignore + assert load_info.environment == "devel" + + # duck1 will be staging + duck = duckdb( + credentials=os.path.join(TEST_STORAGE_ROOT, "quack.duckdb"), destination_name="duck1" + ) + p = dlt.pipeline(pipeline_name="quack_pipeline_staging", destination=duck) + load_info = p.run([1, 2, 3], table_name="table") + with p.destination_client() as client: + assert client.config.environment == "staging" + assert client.config.dataset_name == "staging_dataset" # type: ignore + assert load_info.environment == "staging" + + # duck2 will be production + duck = duckdb( + credentials=os.path.join(TEST_STORAGE_ROOT, "quack.duckdb"), destination_name="duck2" + ) + p = dlt.pipeline(pipeline_name="quack_pipeline_production", destination=duck) + load_info = p.run([1, 2, 3], table_name="table") + with p.destination_client() as client: + assert client.config.environment == "production" + assert client.config.dataset_name == "production_dataset" # type: ignore + assert load_info.environment == "production" + + +def test_pipeline_config(environment: DictStrStr) -> None: + environment["DESTINATION_TYPE"] = "redshift" + p = dlt.pipeline(pipeline_name="p_" + uniq_id()) + assert p.config.destination_type == "redshift" + assert p.destination.destination_name == "redshift" + assert p.destination.destination_type == "dlt.destinations.redshift" + assert p.staging is None + + del environment["DESTINATION_TYPE"] + environment["DESTINATION_NAME"] = "duckdb" + p = dlt.pipeline(pipeline_name="p_" + uniq_id()) + assert p.destination.destination_name == "duckdb" + assert p.destination.destination_type == "dlt.destinations.duckdb" + assert p.staging is None + + environment["DESTINATION_TYPE"] = "bigquery" + environment["DESTINATION_NAME"] = "my_dest" + p = dlt.pipeline(pipeline_name="p_" + uniq_id()) + assert p.destination.destination_name == "my_dest" + assert p.destination.destination_type == "dlt.destinations.bigquery" + assert p.staging is None + + environment["STAGING_TYPE"] = "filesystem" + environment["STAGING_NAME"] = "my_staging" + p = dlt.pipeline(pipeline_name="p_" + uniq_id()) + assert p.destination.destination_name == "my_dest" + assert p.destination.destination_type == "dlt.destinations.bigquery" + assert p.staging.destination_type == "dlt.destinations.filesystem" + assert p.staging.destination_name == "my_staging" + + +def test_destination_config_in_name(environment: DictStrStr) -> None: + environment["DESTINATION_TYPE"] = "filesystem" + environment["DESTINATION_NAME"] = "filesystem-prod" + + p = dlt.pipeline(pipeline_name="p_" + uniq_id()) + + # we do not have config for postgres-prod so getting destination client must fail + with pytest.raises(ConfigFieldMissingException): + p.destination_client() + + environment["DESTINATION__FILESYSTEM-PROD__BUCKET_URL"] = "file://" + posixpath.abspath( + TEST_STORAGE_ROOT + ) + assert p.destination_client().fs_path.endswith(TEST_STORAGE_ROOT) # type: ignore[attr-defined] diff --git a/tests/destinations/test_path_utils.py b/tests/destinations/test_path_utils.py index 4317da59b6..1cf2b17d76 100644 --- a/tests/destinations/test_path_utils.py +++ b/tests/destinations/test_path_utils.py @@ -18,9 +18,11 @@ def test_create_path() -> None: "table_name": "table_name", "load_id": "load_id", "file_id": "file_id", - "ext": "ext" + "ext": "ext", } - path = path_utils.create_path("{schema_name}/{table_name}/{load_id}.{file_id}.{ext}", **path_vars) + path = path_utils.create_path( + "{schema_name}/{table_name}/{load_id}.{file_id}.{ext}", **path_vars + ) assert path == "schema_name/table_name/load_id.file_id.ext" # extension gets added automatically @@ -29,14 +31,23 @@ def test_create_path() -> None: def test_get_table_prefix_layout() -> None: - - prefix_layout = path_utils.get_table_prefix_layout("{schema_name}/{table_name}/{load_id}.{file_id}.{ext}") + prefix_layout = path_utils.get_table_prefix_layout( + "{schema_name}/{table_name}/{load_id}.{file_id}.{ext}" + ) assert prefix_layout == "{schema_name}/{table_name}/" - assert prefix_layout.format(schema_name="my_schema", table_name="my_table") == "my_schema/my_table/" + assert ( + prefix_layout.format(schema_name="my_schema", table_name="my_table") + == "my_schema/my_table/" + ) - prefix_layout = path_utils.get_table_prefix_layout("some_random{schema_name}/stuff_in_between/{table_name}/{load_id}") + prefix_layout = path_utils.get_table_prefix_layout( + "some_random{schema_name}/stuff_in_between/{table_name}/{load_id}" + ) assert prefix_layout == "some_random{schema_name}/stuff_in_between/{table_name}/" - assert prefix_layout.format(schema_name="my_schema", table_name="my_table") == "some_randommy_schema/stuff_in_between/my_table/" + assert ( + prefix_layout.format(schema_name="my_schema", table_name="my_table") + == "some_randommy_schema/stuff_in_between/my_table/" + ) # disallow missing table_name with pytest.raises(CantExtractTablePrefix): @@ -48,7 +59,10 @@ def test_get_table_prefix_layout() -> None: # disallow any placeholders before table name (ie. Athena) with pytest.raises(CantExtractTablePrefix): - path_utils.get_table_prefix_layout("{schema_name}some_random{table_name}/stuff_in_between/", supported_prefix_placeholders=[]) + path_utils.get_table_prefix_layout( + "{schema_name}some_random{table_name}/stuff_in_between/", + supported_prefix_placeholders=[], + ) # disallow table_name without following separator with pytest.raises(CantExtractTablePrefix): diff --git a/tests/extract/cases/eth_source/ethereum.schema.yaml b/tests/extract/cases/eth_source/ethereum.schema.yaml index b5d54f9c49..5a8db47163 100644 --- a/tests/extract/cases/eth_source/ethereum.schema.yaml +++ b/tests/extract/cases/eth_source/ethereum.schema.yaml @@ -1,328 +1,428 @@ -version: 11 -version_hash: GPHX4B+0xnRuGZM/w3UYVbldRyg8jSJp1G60K4RDcZg= -engine_version: 5 +version: 14 +version_hash: VuzNqiLOk7XuPxYLndMFMPHTDVItKU5ayiy70nQLdus= +engine_version: 7 name: ethereum tables: _dlt_loads: columns: load_id: - data_type: text nullable: false - schema_name: data_type: text + name: load_id + schema_name: nullable: true + data_type: text + name: schema_name status: - data_type: bigint nullable: false + data_type: bigint + name: status inserted_at: - data_type: timestamp nullable: false + data_type: timestamp + name: inserted_at + schema_version_hash: + nullable: true + data_type: text + name: schema_version_hash write_disposition: skip description: Created by DLT. Tracks completed loads + schema_contract: {} + name: _dlt_loads + resource: _dlt_loads _dlt_version: columns: version: - data_type: bigint nullable: false - engine_version: data_type: bigint + name: version + engine_version: nullable: false + data_type: bigint + name: engine_version inserted_at: - data_type: timestamp nullable: false + data_type: timestamp + name: inserted_at schema_name: - data_type: text nullable: false - version_hash: data_type: text + name: schema_name + version_hash: nullable: false - schema: data_type: text + name: version_hash + schema: nullable: false + data_type: text + name: schema write_disposition: skip description: Created by DLT. Tracks schema updates + schema_contract: {} + name: _dlt_version + resource: _dlt_version blocks: description: Ethereum blocks x-annotation: this will be preserved on save write_disposition: append - table_sealed: true filters: includes: [] excludes: [] columns: _dlt_load_id: + nullable: false description: load id coming from the extractor data_type: text - nullable: false + name: _dlt_load_id _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id number: + nullable: false primary_key: true data_type: bigint - nullable: false + name: number parent_hash: - data_type: text nullable: true + data_type: text + name: parent_hash hash: + nullable: false cluster: true unique: true data_type: text - nullable: false + name: hash base_fee_per_gas: - data_type: wei nullable: false - difficulty: data_type: wei + name: base_fee_per_gas + difficulty: nullable: false + data_type: wei + name: difficulty extra_data: - data_type: text nullable: true + data_type: text + name: extra_data gas_limit: - data_type: bigint nullable: false - gas_used: data_type: bigint + name: gas_limit + gas_used: nullable: false + data_type: bigint + name: gas_used logs_bloom: - data_type: binary nullable: true + data_type: binary + name: logs_bloom miner: - data_type: text nullable: true - mix_hash: data_type: text + name: miner + mix_hash: nullable: true - nonce: data_type: text + name: mix_hash + nonce: nullable: true - receipts_root: data_type: text + name: nonce + receipts_root: nullable: true - sha3_uncles: data_type: text + name: receipts_root + sha3_uncles: nullable: true + data_type: text + name: sha3_uncles size: - data_type: bigint nullable: true + data_type: bigint + name: size state_root: - data_type: text nullable: false + data_type: text + name: state_root timestamp: + nullable: false unique: true sort: true data_type: timestamp - nullable: false + name: timestamp total_difficulty: - data_type: wei nullable: true + data_type: wei + name: total_difficulty transactions_root: - data_type: text nullable: false + data_type: text + name: transactions_root + schema_contract: {} + name: blocks + resource: blocks blocks__transactions: parent: blocks columns: _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id block_number: + nullable: false primary_key: true foreign_key: true data_type: bigint - nullable: false + name: block_number transaction_index: + nullable: false primary_key: true data_type: bigint - nullable: false + name: transaction_index hash: + nullable: false unique: true data_type: text - nullable: false + name: hash block_hash: + nullable: false cluster: true data_type: text - nullable: false + name: block_hash block_timestamp: + nullable: false sort: true data_type: timestamp - nullable: false + name: block_timestamp chain_id: - data_type: text nullable: true - from: data_type: text + name: chain_id + from: nullable: true + data_type: text + name: from gas: - data_type: bigint nullable: true - gas_price: data_type: bigint + name: gas + gas_price: nullable: true + data_type: bigint + name: gas_price input: - data_type: text nullable: true + data_type: text + name: input max_fee_per_gas: - data_type: wei nullable: true - max_priority_fee_per_gas: data_type: wei + name: max_fee_per_gas + max_priority_fee_per_gas: nullable: true + data_type: wei + name: max_priority_fee_per_gas nonce: - data_type: bigint nullable: true + data_type: bigint + name: nonce r: - data_type: text nullable: true - s: data_type: text + name: r + s: nullable: true + data_type: text + name: s status: - data_type: bigint nullable: true + data_type: bigint + name: status to: - data_type: text nullable: true - type: data_type: text + name: to + type: nullable: true + data_type: text + name: type v: - data_type: bigint nullable: true + data_type: bigint + name: v value: - data_type: wei nullable: false + data_type: wei + name: value eth_value: - data_type: decimal nullable: true + data_type: decimal + name: eth_value + name: blocks__transactions blocks__transactions__logs: parent: blocks__transactions columns: _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id address: - data_type: text nullable: false + data_type: text + name: address block_timestamp: + nullable: false sort: true data_type: timestamp - nullable: false + name: block_timestamp block_hash: + nullable: false cluster: true data_type: text - nullable: false + name: block_hash block_number: + nullable: false primary_key: true foreign_key: true data_type: bigint - nullable: false + name: block_number transaction_index: + nullable: false primary_key: true foreign_key: true data_type: bigint - nullable: false + name: transaction_index log_index: + nullable: false primary_key: true data_type: bigint - nullable: false + name: log_index data: - data_type: text nullable: true + data_type: text + name: data removed: - data_type: bool nullable: true + data_type: bool + name: removed transaction_hash: - data_type: text nullable: false + data_type: text + name: transaction_hash + name: blocks__transactions__logs blocks__transactions__logs__topics: parent: blocks__transactions__logs columns: _dlt_parent_id: + nullable: false foreign_key: true data_type: text - nullable: false + name: _dlt_parent_id _dlt_list_idx: - data_type: bigint nullable: false + data_type: bigint + name: _dlt_list_idx _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id _dlt_root_id: + nullable: false root_key: true data_type: text - nullable: false + name: _dlt_root_id value: - data_type: text nullable: true + data_type: text + name: value + name: blocks__transactions__logs__topics blocks__transactions__access_list: parent: blocks__transactions columns: _dlt_parent_id: + nullable: false foreign_key: true data_type: text - nullable: false + name: _dlt_parent_id _dlt_list_idx: - data_type: bigint nullable: false + data_type: bigint + name: _dlt_list_idx _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id _dlt_root_id: + nullable: false root_key: true data_type: text - nullable: false + name: _dlt_root_id address: - data_type: text nullable: true + data_type: text + name: address + name: blocks__transactions__access_list blocks__transactions__access_list__storage_keys: parent: blocks__transactions__access_list columns: _dlt_parent_id: + nullable: false foreign_key: true data_type: text - nullable: false + name: _dlt_parent_id _dlt_list_idx: - data_type: bigint nullable: false + data_type: bigint + name: _dlt_list_idx _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id _dlt_root_id: + nullable: false root_key: true data_type: text - nullable: false + name: _dlt_root_id value: - data_type: text nullable: true + data_type: text + name: value + name: blocks__transactions__access_list__storage_keys blocks__uncles: parent: blocks columns: _dlt_parent_id: + nullable: false foreign_key: true data_type: text - nullable: false + name: _dlt_parent_id _dlt_list_idx: - data_type: bigint nullable: false + data_type: bigint + name: _dlt_list_idx _dlt_id: + nullable: false unique: true data_type: text - nullable: false + name: _dlt_id _dlt_root_id: + nullable: false root_key: true data_type: text - nullable: false + name: _dlt_root_id value: - data_type: text nullable: true + data_type: text + name: value + name: blocks__uncles settings: - schema_sealed: true default_hints: foreign_key: - _dlt_parent_id @@ -342,6 +442,7 @@ settings: preferred_types: timestamp: timestamp block_timestamp: timestamp + schema_contract: {} normalizers: names: dlt.common.normalizers.names.snake_case json: diff --git a/tests/extract/cases/eth_source/source.py b/tests/extract/cases/eth_source/source.py index 08adb79a22..4410954f0b 100644 --- a/tests/extract/cases/eth_source/source.py +++ b/tests/extract/cases/eth_source/source.py @@ -1,6 +1,7 @@ from typing import Any import dlt + @dlt.source def ethereum() -> Any: # this just tests if the schema "ethereum" was loaded diff --git a/tests/extract/cases/imported.any b/tests/extract/cases/imported.any new file mode 100644 index 0000000000..02c1138b21 --- /dev/null +++ b/tests/extract/cases/imported.any @@ -0,0 +1 @@ +Any files may be imported into buffered writer \ No newline at end of file diff --git a/tests/extract/cases/section_source/external_resources.py b/tests/extract/cases/section_source/external_resources.py index 0a991d7438..07d3767e0a 100644 --- a/tests/extract/cases/section_source/external_resources.py +++ b/tests/extract/cases/section_source/external_resources.py @@ -6,19 +6,27 @@ @dlt.source def with_external(source_val: str = dlt.config.value): - @dlt.resource def inner_resource(val): yield val - return dlt.resource([source_val], name="source_val"), inner_resource(source_val), init_resource_f_2, resource_f_2 + return ( + dlt.resource([source_val], name="source_val"), + inner_resource(source_val), + init_resource_f_2, + resource_f_2, + ) @dlt.source def with_bound_external(source_val: str = dlt.config.value): - @dlt.resource def inner_resource(val): yield val - return dlt.resource([source_val], name="source_val"), inner_resource(source_val), init_resource_f_2(), resource_f_2() \ No newline at end of file + return ( + dlt.resource([source_val], name="source_val"), + inner_resource(source_val), + init_resource_f_2(), + resource_f_2(), + ) diff --git a/tests/extract/cases/section_source/named_module.py b/tests/extract/cases/section_source/named_module.py index 4a46ad0e19..c7580982b6 100644 --- a/tests/extract/cases/section_source/named_module.py +++ b/tests/extract/cases/section_source/named_module.py @@ -7,6 +7,7 @@ def source_f_1(val: str = dlt.config.value): return dlt.resource([val], name="f_1") + @dlt.resource def resource_f_2(val: str = dlt.config.value): yield [val] diff --git a/tests/extract/conftest.py b/tests/extract/conftest.py index f5dc47f54b..17f93b0ba5 100644 --- a/tests/extract/conftest.py +++ b/tests/extract/conftest.py @@ -1 +1,7 @@ -from tests.utils import duckdb_pipeline_location, autouse_test_storage, preserve_environ, patch_home_dir, wipe_pipeline \ No newline at end of file +from tests.utils import ( + duckdb_pipeline_location, + autouse_test_storage, + preserve_environ, + patch_home_dir, + wipe_pipeline, +) diff --git a/tests/extract/data_writers/__init__.py b/tests/extract/data_writers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/extract/data_writers/test_buffered_writer.py b/tests/extract/data_writers/test_buffered_writer.py new file mode 100644 index 0000000000..aff49e06ac --- /dev/null +++ b/tests/extract/data_writers/test_buffered_writer.py @@ -0,0 +1,283 @@ +import os +import pytest +import time +from typing import Iterator + +from dlt.common.data_writers.exceptions import BufferedDataWriterClosed +from dlt.common.data_writers.writers import DataWriterMetrics +from dlt.common.destination.capabilities import TLoaderFileFormat +from dlt.common.schema.utils import new_column +from dlt.common.storages.file_storage import FileStorage + +from dlt.common.typing import DictStrAny + +from tests.common.data_writers.utils import ALL_WRITERS, get_writer + + +@pytest.mark.parametrize("format_", ALL_WRITERS) +def test_write_no_item(format_: TLoaderFileFormat) -> None: + with get_writer(_format=format_) as writer: + pass + assert writer.closed + with pytest.raises(BufferedDataWriterClosed): + writer._ensure_open() + # no files rotated + assert writer.closed_files == [] + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +def test_rotation_on_schema_change(disable_compression: bool) -> None: + c1 = new_column("col1", "bigint") + c2 = new_column("col2", "bigint") + c3 = new_column("col3", "text") + + t1 = {"col1": c1} + t2 = {"col2": c2, "col1": c1} + t3 = {"col3": c3, "col2": c2, "col1": c1} + + def c1_doc(count: int) -> Iterator[DictStrAny]: + return map(lambda x: {"col1": x}, range(0, count)) + + def c2_doc(count: int) -> Iterator[DictStrAny]: + return map(lambda x: {"col1": x, "col2": x * 2 + 1}, range(0, count)) + + def c3_doc(count: int) -> Iterator[DictStrAny]: + return map(lambda x: {"col3": "col3_value"}, range(0, count)) + + # change schema before file first flush + with get_writer(disable_compression=disable_compression) as writer: + writer.write_data_item(list(c1_doc(8)), t1) + assert writer._current_columns == t1 + # but different instance + assert writer._current_columns is not t1 + writer.write_data_item(list(c2_doc(1)), t2) + # file name is there + assert writer._file_name is not None + # no file is open + assert writer._file is None + # writer is closed and data was written + assert len(writer.closed_files) == 1 + assert writer.closed_files[0].items_count == 9 + assert writer.closed_files[0].file_size > 0 + # check the content, mind that we swapped the columns + with FileStorage.open_zipsafe_ro(writer.closed_files[0].file_path, "r", encoding="utf-8") as f: + content = f.readlines() + assert "col2,col1" in content[0] + assert "NULL,0" in content[2] + # col2 first + assert "1,0" in content[-1] + + # data would flush and schema change + with get_writer() as writer: + writer.write_data_item(list(c1_doc(9)), t1) + old_file = writer._file_name + writer.write_data_item(list(c2_doc(1)), t2) # rotates here + # file is open + assert writer._file is not None + # no files were closed + assert len(writer.closed_files) == 0 + assert writer._file_name == old_file + # buffer is empty + assert writer._buffered_items == [] + + # file would rotate and schema change + with get_writer() as writer: + writer.file_max_items = 10 + writer.write_data_item(list(c1_doc(9)), t1) + old_file = writer._file_name + writer.write_data_item(list(c2_doc(1)), t2) # rotates here + # file is not open after rotation + assert writer._file is None + # file was rotated + assert len(writer.closed_files) == 1 + assert writer._file_name != old_file + # buffer is empty + assert writer._buffered_items == [] + + # schema change after flush rotates file + with get_writer() as writer: + writer.write_data_item(list(c1_doc(11)), t1) + writer.write_data_item(list(c2_doc(1)), t2) + assert len(writer.closed_files) == 1 + # now the file is closed + assert writer._file is None + old_file = writer._file_name + # so we can write schema change without rotation and flushing + writer.write_data_item(list(c2_doc(1)), t3) + assert writer._file is None + assert writer._file_name == old_file + # make it flush + writer.file_max_items = 10 + writer.write_data_item(list(c3_doc(20)), t3) + assert len(writer.closed_files) == 2 + assert writer._buffered_items == [] + # the last file must contain text value of the column3 + with FileStorage.open_zipsafe_ro(writer.closed_files[-1].file_path, "r", encoding="utf-8") as f: + content = f.readlines() + assert "(col3_value" in content[-1] + # check metrics + assert writer.closed_files[0].items_count == 11 + assert writer.closed_files[1].items_count == 22 + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +def test_NO_rotation_on_schema_change(disable_compression: bool) -> None: + c1 = new_column("col1", "bigint") + c2 = new_column("col2", "bigint") + + t1 = {"col1": c1} + t2 = {"col2": c2, "col1": c1} + + def c1_doc(count: int) -> Iterator[DictStrAny]: + return map(lambda x: {"col1": x}, range(0, count)) + + def c2_doc(count: int) -> Iterator[DictStrAny]: + return map(lambda x: {"col1": x, "col2": x * 2 + 1}, range(0, count)) + + # change schema before file first flush + with get_writer(_format="jsonl", disable_compression=disable_compression) as writer: + writer.write_data_item(list(c1_doc(15)), t1) + # flushed + assert writer._file is not None + writer.write_data_item(list(c2_doc(2)), t2) + # no rotation + assert len(writer._buffered_items) == 2 + # only the initial 15 items written + assert writer._writer.items_count == 15 + # all written + with FileStorage.open_zipsafe_ro(writer.closed_files[-1].file_path, "r", encoding="utf-8") as f: + content = f.readlines() + assert content[-1] == '{"col1":1,"col2":3}\n' + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +def test_writer_requiring_schema(disable_compression: bool) -> None: + # assertion on flushing + with pytest.raises(AssertionError): + with get_writer(disable_compression=disable_compression) as writer: + writer.write_data_item([{"col1": 1}], None) + # just single schema is enough + c1 = new_column("col1", "bigint") + t1 = {"col1": c1} + with get_writer(disable_compression=disable_compression) as writer: + writer.write_data_item([{"col1": 1}], None) + writer.write_data_item([{"col1": 1}], t1) + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +def test_writer_optional_schema(disable_compression: bool) -> None: + with get_writer(_format="jsonl", disable_compression=disable_compression) as writer: + writer.write_data_item([{"col1": 1}], None) + writer.write_data_item([{"col1": 1}], None) + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +@pytest.mark.parametrize("format_", ALL_WRITERS - {"arrow"}) +def test_write_empty_file(disable_compression: bool, format_: TLoaderFileFormat) -> None: + # just single schema is enough + c1 = new_column("col1", "bigint") + t1 = {"col1": c1} + now = time.time() + with get_writer(format_, disable_compression=disable_compression) as writer: + metrics = writer.write_empty_file(t1) + assert len(writer.closed_files) == 1 + assert os.path.abspath(metrics.file_path) + assert os.path.isfile(metrics.file_path) + assert metrics.created <= metrics.last_modified + assert metrics.created >= now + assert metrics.items_count == 0 + assert metrics.file_size >= 0 + assert writer.closed_files[0] == metrics + + +@pytest.mark.parametrize("format_", ALL_WRITERS) +def test_import_file(format_: TLoaderFileFormat) -> None: + now = time.time() + with get_writer(format_) as writer: + # won't destroy the original + metrics = writer.import_file( + "tests/extract/cases/imported.any", DataWriterMetrics("", 1, 231, 0, 0) + ) + assert len(writer.closed_files) == 1 + assert os.path.isfile(metrics.file_path) + assert writer.closed_files[0] == metrics + assert metrics.created <= metrics.last_modified + assert metrics.created >= now + assert metrics.items_count == 1 + assert metrics.file_size == 231 + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +@pytest.mark.parametrize("format_", ALL_WRITERS - {"arrow"}) +def test_gather_metrics(disable_compression: bool, format_: TLoaderFileFormat) -> None: + now = time.time() + c1 = new_column("col1", "bigint") + t1 = {"col1": c1} + with get_writer( + format_, disable_compression=disable_compression, buffer_max_items=2, file_max_items=2 + ) as writer: + time.sleep(0.55) + count = writer.write_data_item([{"col1": 182812}, {"col1": -1}], t1) + assert count == 2 + # file rotated + assert len(writer.closed_files) == 1 + metrics = writer.closed_files[0] + assert metrics.items_count == 2 + assert metrics.last_modified - metrics.created >= 0.55 + assert metrics.created >= now + time.sleep(0.35) + count = writer.write_data_item([{"col1": 182812}, {"col1": -1}, {"col1": 182811}], t1) + assert count == 3 + # file rotated + assert len(writer.closed_files) == 2 + metrics_2 = writer.closed_files[1] + assert metrics_2.items_count == 3 + assert metrics_2.created >= metrics.last_modified + assert metrics_2.last_modified - metrics_2.created >= 0.35 + + assert len(writer.closed_files) == 2 + + +@pytest.mark.parametrize( + "disable_compression", [True, False], ids=["no_compression", "compression"] +) +@pytest.mark.parametrize("format_", ALL_WRITERS - {"arrow"}) +def test_special_write_rotates(disable_compression: bool, format_: TLoaderFileFormat) -> None: + c1 = new_column("col1", "bigint") + t1 = {"col1": c1} + with get_writer( + format_, disable_compression=disable_compression, buffer_max_items=100, file_max_items=100 + ) as writer: + writer.write_data_item([{"col1": 182812}, {"col1": -1}], t1) + assert len(writer.closed_files) == 0 + # writing empty rotates existing file + metrics = writer.write_empty_file(t1) + assert len(writer.closed_files) == 2 + assert writer.closed_files[1] == metrics + assert writer.closed_files[0].items_count == 2 + + # also import rotates + assert writer.write_data_item({"col1": 182812}, t1) == 1 + metrics = writer.import_file( + "tests/extract/cases/imported.any", DataWriterMetrics("", 1, 231, 0, 0) + ) + assert len(writer.closed_files) == 4 + assert writer.closed_files[3] == metrics + assert writer.closed_files[2].items_count == 1 + + assert writer.write_data_item({"col1": 182812}, t1) == 1 + metrics = writer.import_file( + "tests/extract/cases/imported.any", DataWriterMetrics("", 1, 231, 0, 0) + ) diff --git a/tests/extract/data_writers/test_data_item_storage.py b/tests/extract/data_writers/test_data_item_storage.py new file mode 100644 index 0000000000..2db08e572c --- /dev/null +++ b/tests/extract/data_writers/test_data_item_storage.py @@ -0,0 +1,52 @@ +import os +import pytest + +from dlt.common.configuration.container import Container +from dlt.common.data_writers.writers import DataWriterMetrics +from dlt.common.destination.capabilities import TLoaderFileFormat, DestinationCapabilitiesContext +from dlt.common.schema.utils import new_column +from tests.common.data_writers.utils import ALL_WRITERS +from dlt.common.storages.data_item_storage import DataItemStorage + +from tests.utils import TEST_STORAGE_ROOT + + +class TestItemStorage(DataItemStorage): + def _get_data_item_path_template(self, load_id: str, schema_name: str, table_name: str) -> str: + return os.path.join(TEST_STORAGE_ROOT, f"{load_id}.{schema_name}.{table_name}.%s") + + +@pytest.mark.parametrize("format_", ALL_WRITERS - {"arrow"}) +def test_write_items(format_: TLoaderFileFormat) -> None: + with Container().injectable_context( + DestinationCapabilitiesContext.generic_capabilities(format_) + ): + item_storage = TestItemStorage(format_) + c1 = new_column("col1", "bigint") + t1 = {"col1": c1} + count = item_storage.write_data_item( + "load_1", "schema", "t1", [{"col1": 182812}, {"col1": -1}], t1 + ) + assert count == 2 + assert item_storage.closed_files("load_1") == [] + # write empty file + metrics = item_storage.write_empty_items_file("load_2", "schema", "t1", t1) + assert item_storage.closed_files("load_2")[0] == metrics + # force closing file by writing empty file + metrics = item_storage.import_items_file( + "load_1", + "schema", + "t1", + "tests/extract/cases/imported.any", + DataWriterMetrics("", 1, 231, 0, 0), + ) + assert item_storage.closed_files("load_1")[1] == metrics + + # closed files are separate + item_storage.write_data_item("load_1", "schema", "t1", [{"col1": 182812}, {"col1": -1}], t1) + item_storage.write_data_item("load_2", "schema", "t1", [{"col1": 182812}, {"col1": -1}], t1) + item_storage.close_writers("load_1") + assert len(item_storage.closed_files("load_1")) == 3 + assert len(item_storage.closed_files("load_2")) == 1 + item_storage.close_writers("load_2") + assert len(item_storage.closed_files("load_2")) == 2 diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py index 05e3a2fbf3..d1ff98fc26 100644 --- a/tests/extract/test_decorators.py +++ b/tests/extract/test_decorators.py @@ -17,15 +17,29 @@ from dlt.common.schema import Schema from dlt.common.schema.utils import new_table, new_column from dlt.common.schema.typing import TTableSchemaColumns +from dlt.common.schema.exceptions import InvalidSchemaName +from dlt.common.typing import TDataItem from dlt.cli.source_detection import detect_source_configs -from dlt.common.typing import TDataItem -from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, DynamicNameNotStandaloneResource, ExplicitSourceNameInvalid, InconsistentTableTemplate, InvalidResourceDataTypeFunctionNotAGenerator, InvalidResourceDataTypeIsNone, InvalidResourceDataTypeMultiplePipes, ParametrizedResourceUnbound, PipeGenInvalid, PipeNotBoundToData, ResourceFunctionExpected, ResourceInnerCallableConfigWrapDisallowed, SourceDataIsNone, SourceIsAClassTypeError, SourceNotAFunction, SourceSchemaNotAvailable -from dlt.extract.source import DltResource, DltSource -from dlt.common.schema.exceptions import InvalidSchemaName +from dlt.extract import DltResource, DltSource +from dlt.extract.exceptions import ( + DynamicNameNotStandaloneResource, + InvalidResourceDataTypeFunctionNotAGenerator, + InvalidResourceDataTypeIsNone, + InvalidResourceDataTypeMultiplePipes, + ParametrizedResourceUnbound, + PipeGenInvalid, + PipeNotBoundToData, + ResourceFunctionExpected, + ResourceInnerCallableConfigWrapDisallowed, + SourceDataIsNone, + SourceIsAClassTypeError, + SourceNotAFunction, + SourceSchemaNotAvailable, +) from dlt.extract.typing import TableNameMeta -from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V6 +from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V8 def test_none_returning_source() -> None: @@ -38,12 +52,10 @@ def empty() -> None: with pytest.raises(SourceDataIsNone): dlt.source(empty)() - @dlt.source def deco_empty() -> None: pass - with pytest.raises(SourceDataIsNone): deco_empty() @@ -72,11 +84,10 @@ def test_load_schema_for_callable() -> None: schema = s.schema assert schema.name == "ethereum" == s.name # the schema in the associated file has this hash - assert schema.stored_version_hash == IMPORTED_VERSION_HASH_ETH_V6 + assert schema.stored_version_hash == IMPORTED_VERSION_HASH_ETH_V8 def test_unbound_parametrized_transformer() -> None: - empty_pipe = DltResource.Empty._pipe assert empty_pipe.is_empty assert not empty_pipe.is_data_bound @@ -120,7 +131,7 @@ def test_transformer_no_parens() -> None: bound_r = dlt.resource([1, 2, 3], name="data") @dlt.transformer - def empty_t_1(item, meta = None): + def empty_t_1(item, meta=None): yield "a" * item assert list(bound_r | empty_t_1) == ["a", "aa", "aaa"] @@ -155,7 +166,6 @@ def accept_meta(item, meta=None, **kwargs): def test_source_name_is_invalid_schema_name() -> None: - def camelCase(): return dlt.resource([1, 2, 3], name="resource") @@ -180,10 +190,13 @@ def camelCase(): def test_resource_name_is_invalid_table_name_and_columns() -> None: - @dlt.source def camelCase(): - return dlt.resource([1, 2, 3], name="Resource !", columns={"KA!AX": {"name": "DIF!", "nullable": False, "data_type": "text"}}) + return dlt.resource( + [1, 2, 3], + name="Resource !", + columns={"KA!AX": {"name": "DIF!", "nullable": False, "data_type": "text"}}, + ) s = camelCase() assert s.resources["Resource !"].selected @@ -198,10 +211,9 @@ def camelCase(): def test_columns_argument() -> None: - @dlt.resource(name="user", columns={"tags": {"data_type": "complex", "x-extra": "x-annotation"}}) # type: ignore[typeddict-unknown-key] def get_users(): - yield {"u": "u", "tags": [1, 2 ,3]} + yield {"u": "u", "tags": [1, 2, 3]} t = get_users().compute_table_schema() @@ -226,20 +238,35 @@ def get_users(): def test_apply_hints_columns() -> None: @dlt.resource(name="user", columns={"tags": {"data_type": "complex", "primary_key": True}}) def get_users(): - yield {"u": "u", "tags": [1, 2 ,3]} + yield {"u": "u", "tags": [1, 2, 3]} users = get_users() assert users.columns == {"tags": {"data_type": "complex", "name": "tags", "primary_key": True}} - assert cast(TTableSchemaColumns, users.columns)["tags"] == users.compute_table_schema()["columns"]["tags"] + assert ( + cast(TTableSchemaColumns, users.columns)["tags"] + == users.compute_table_schema()["columns"]["tags"] + ) # columns property can be changed in place cast(TTableSchemaColumns, users.columns)["tags"]["data_type"] = "text" assert users.compute_table_schema()["columns"]["tags"]["data_type"] == "text" # apply column definition - it should be merged with defaults - users.apply_hints(columns={"tags": {"primary_key": False, "data_type": "text"}, "things": new_column("things", nullable=False)}) - assert cast(TTableSchemaColumns, users.columns)["tags"] == {"data_type": "text", "name": "tags", "primary_key": False} - assert cast(TTableSchemaColumns, users.columns)["things"] == {"name": "things", "nullable": False} + users.apply_hints( + columns={ + "tags": {"primary_key": False, "data_type": "text"}, + "things": new_column("things", nullable=False), + } + ) + assert cast(TTableSchemaColumns, users.columns)["tags"] == { + "data_type": "text", + "name": "tags", + "primary_key": False, + } + assert cast(TTableSchemaColumns, users.columns)["things"] == { + "name": "things", + "nullable": False, + } # delete columns by passing empty users.apply_hints(columns={}) @@ -303,6 +330,7 @@ def some_data(): def test_source_sections() -> None: # source in __init__.py of module from tests.extract.cases.section_source import init_source_f_1, init_resource_f_2 + # source in file module with name override from tests.extract.cases.section_source.named_module import source_f_1, resource_f_2 @@ -332,21 +360,27 @@ def test_source_sections() -> None: assert list(resource_f_2()) == ["NAME OVERRIDDEN LEVEL"] # values in function name section - os.environ[f"{known_sections.SOURCES.upper()}__SECTION_SOURCE__INIT_SOURCE_F_1__VAL"] = "SECTION INIT_SOURCE_F_1 LEVEL" + os.environ[f"{known_sections.SOURCES.upper()}__SECTION_SOURCE__INIT_SOURCE_F_1__VAL"] = ( + "SECTION INIT_SOURCE_F_1 LEVEL" + ) assert list(init_source_f_1()) == ["SECTION INIT_SOURCE_F_1 LEVEL"] - os.environ[f"{known_sections.SOURCES.upper()}__SECTION_SOURCE__INIT_RESOURCE_F_2__VAL"] = "SECTION INIT_RESOURCE_F_2 LEVEL" + os.environ[f"{known_sections.SOURCES.upper()}__SECTION_SOURCE__INIT_RESOURCE_F_2__VAL"] = ( + "SECTION INIT_RESOURCE_F_2 LEVEL" + ) assert list(init_resource_f_2()) == ["SECTION INIT_RESOURCE_F_2 LEVEL"] - os.environ[f"{known_sections.SOURCES.upper()}__NAME_OVERRIDDEN__SOURCE_F_1__VAL"] = "NAME SOURCE_F_1 LEVEL" + os.environ[f"{known_sections.SOURCES.upper()}__NAME_OVERRIDDEN__SOURCE_F_1__VAL"] = ( + "NAME SOURCE_F_1 LEVEL" + ) assert list(source_f_1()) == ["NAME SOURCE_F_1 LEVEL"] - os.environ[f"{known_sections.SOURCES.upper()}__NAME_OVERRIDDEN__RESOURCE_F_2__VAL"] = "NAME RESOURCE_F_2 LEVEL" + os.environ[f"{known_sections.SOURCES.upper()}__NAME_OVERRIDDEN__RESOURCE_F_2__VAL"] = ( + "NAME RESOURCE_F_2 LEVEL" + ) assert list(resource_f_2()) == ["NAME RESOURCE_F_2 LEVEL"] def test_source_explicit_section() -> None: - @dlt.source(section="custom_section", schema=Schema("custom_section")) def with_section(secret=dlt.secrets.value): - @dlt.resource def mod_state(): dlt.current.source_state()["val"] = secret @@ -363,7 +397,6 @@ def mod_state(): def test_resource_section() -> None: - r = dlt.resource([1, 2, 3], name="T") assert r.name == "T" assert r.section is None @@ -376,14 +409,23 @@ def _inner_gen(): assert r.section == "test_decorators" from tests.extract.cases.section_source.external_resources import init_resource_f_2 + assert init_resource_f_2.name == "init_resource_f_2" assert init_resource_f_2.section == "section_source" def test_resources_injected_sections() -> None: - from tests.extract.cases.section_source.external_resources import with_external, with_bound_external, init_resource_f_2, resource_f_2 + from tests.extract.cases.section_source.external_resources import ( + with_external, + with_bound_external, + init_resource_f_2, + resource_f_2, + ) + # standalone resources must accept the injected sections for lookups - os.environ["SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL"] = "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL" + os.environ["SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL"] = ( + "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL" + ) os.environ["SOURCES__EXTERNAL_RESOURCES__VAL"] = "SOURCES__EXTERNAL_RESOURCES__VAL" os.environ["SOURCES__SECTION_SOURCE__VAL"] = "SOURCES__SECTION_SOURCE__VAL" os.environ["SOURCES__NAME_OVERRIDDEN__VAL"] = "SOURCES__NAME_OVERRIDDEN__VAL" @@ -398,44 +440,59 @@ def test_resources_injected_sections() -> None: "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__VAL", - "SOURCES__EXTERNAL_RESOURCES__VAL" + "SOURCES__EXTERNAL_RESOURCES__VAL", ] # this source will bind external resources before returning them (that is: calling them and obtaining generators) # the iterator in the source will force its sections so external resource sections are not used s = with_bound_external() - assert list(s) == list([ - "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", - "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", - "SOURCES__EXTERNAL_RESOURCES__VAL", - "SOURCES__EXTERNAL_RESOURCES__VAL" - ]) + assert list(s) == list( + [ + "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", + "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", + "SOURCES__EXTERNAL_RESOURCES__VAL", + "SOURCES__EXTERNAL_RESOURCES__VAL", + ] + ) # inject the source sections like the Pipeline object would s = with_external() assert s.name == "with_external" assert s.section == "external_resources" # from module name hosting the function - with inject_section(ConfigSectionContext(pipeline_name="injected_external", sections=("sources", s.section, s.name))): + with inject_section( + ConfigSectionContext( + pipeline_name="injected_external", sections=("sources", s.section, s.name) + ) + ): # now the external sources must adopt the injected namespace - assert(list(s)) == [ + assert (list(s)) == [ "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__VAL", - "SOURCES__EXTERNAL_RESOURCES__VAL" + "SOURCES__EXTERNAL_RESOURCES__VAL", ] # now with environ values that specify source/resource name: the module of the source, the name of the resource - os.environ["SOURCES__EXTERNAL_RESOURCES__INIT_RESOURCE_F_2__VAL"] = "SOURCES__EXTERNAL_RESOURCES__INIT_RESOURCE_F_2__VAL" - os.environ["SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL"] = "SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL" + os.environ["SOURCES__EXTERNAL_RESOURCES__INIT_RESOURCE_F_2__VAL"] = ( + "SOURCES__EXTERNAL_RESOURCES__INIT_RESOURCE_F_2__VAL" + ) + os.environ["SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL"] = ( + "SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL" + ) s = with_external() - with inject_section(ConfigSectionContext(pipeline_name="injected_external", sections=("sources", s.section, s.name))): + with inject_section( + ConfigSectionContext( + pipeline_name="injected_external", sections=("sources", s.section, s.name) + ) + ): # now the external sources must adopt the injected namespace - assert(list(s)) == [ + assert (list(s)) == [ "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__SOURCE_VAL", "SOURCES__EXTERNAL_RESOURCES__INIT_RESOURCE_F_2__VAL", - "SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL" + "SOURCES__EXTERNAL_RESOURCES__RESOURCE_F_2__VAL", ] + def test_source_schema_context() -> None: import dlt @@ -486,7 +543,6 @@ def created_global(): def test_source_state_context() -> None: - @dlt.resource(selected=False) def main(): state = dlt.current.state() @@ -494,14 +550,14 @@ def main(): # increase the multiplier each time state is obtained state["mark"] *= 2 yield [1, 2, 3] - assert dlt.state()["mark"] == mark*2 + assert dlt.state()["mark"] == mark * 2 @dlt.transformer(data_from=main) def feeding(item): # we must have state assert dlt.current.source_state()["mark"] > 1 mark = dlt.current.source_state()["mark"] - yield from map(lambda i: i*mark, item) + yield from map(lambda i: i * mark, item) @dlt.source def pass_the_state(): @@ -517,7 +573,6 @@ def pass_the_state(): def test_source_schema_modified() -> None: - @dlt.source def schema_test(): return dlt.resource(["A", "B"], name="alpha") @@ -535,13 +590,12 @@ def standalone_resource(secret=dlt.secrets.value, config=dlt.config.value, opt: def test_spec_generation() -> None: - # inner resource cannot take configuration with pytest.raises(ResourceInnerCallableConfigWrapDisallowed) as py_ex: @dlt.resource(write_disposition="merge", primary_key="id") - def inner_resource(initial_id = dlt.config.value): + def inner_resource(initial_id=dlt.config.value): yield [{"id": 1, "name": "row1"}, {"id": 1, "name": "row2"}] assert py_ex.value.resource_name == "inner_resource" @@ -570,7 +624,6 @@ def not_args_r(): def test_sources_no_arguments() -> None: - @dlt.source def no_args(): return dlt.resource([1, 2], name="data") @@ -599,7 +652,6 @@ def not_args_r_i(): def test_resource_sets_invalid_write_disposition() -> None: - @dlt.resource(write_disposition="xxxx") # type: ignore[call-overload] def invalid_disposition(): yield from [1, 2, 3] @@ -611,14 +663,12 @@ def invalid_disposition(): def test_custom_source_impl() -> None: - class TypedSource(DltSource): def users(self, mode: str) -> DltResource: return self.resources["users"](mode) @dlt.source(_impl_cls=TypedSource) def all_users(): - @dlt.resource def users(mode: str): yield mode @@ -637,7 +687,6 @@ def standalone_signature(init: int, secret_end: int = dlt.secrets.value): def test_standalone_resource() -> None: - # wrapped flag will not create the resource but just simple function wrapper that must be called before use @dlt.resource(standalone=True) def nice_signature(init: int): @@ -721,7 +770,12 @@ def test_standalone_transformer() -> None: # test configuration os.environ["SOURCES__TEST_DECORATORS__STANDALONE_SIGNATURE__SECRET_END"] = "5" os.environ["SOURCES__TEST_DECORATORS__STANDALONE_TRANSFORMER_RETURNS__INIT"] = "2" - assert list(standalone_signature(1) | standalone_transformer_returns()) == ["AA", "AAAA", "AAAAAA", "AAAAAAAA"] + assert list(standalone_signature(1) | standalone_transformer_returns()) == [ + "AA", + "AAAA", + "AAAAAA", + "AAAAAAAA", + ] @dlt.transformer(standalone=True, name=lambda args: args["res_name"]) @@ -736,9 +790,14 @@ def test_standalone_resource_with_name() -> None: # still the config comes via the function name os.environ["SOURCES__TEST_DECORATORS__STANDALONE_TX_WITH_NAME__INIT"] = "2" - assert list(dlt.resource([1, 2, 3], name="x") | my_tx) == ['my_txmy_tx', 'my_txmy_txmy_txmy_tx', 'my_txmy_txmy_txmy_txmy_txmy_tx'] + assert list(dlt.resource([1, 2, 3], name="x") | my_tx) == [ + "my_txmy_tx", + "my_txmy_txmy_txmy_tx", + "my_txmy_txmy_txmy_txmy_txmy_tx", + ] with pytest.raises(DynamicNameNotStandaloneResource): + @dlt.resource(standalone=False, name=lambda args: args["res_name"]) # type: ignore[call-overload] def standalone_name(): yield "A" @@ -764,7 +823,6 @@ def test_resource_rename_credentials_separation(): def test_class_source() -> None: - class _Source: def __init__(self, elems: int) -> None: self.elems = elems @@ -778,10 +836,11 @@ def __call__(self, more: int = 1): schema = s.discover_schema() assert schema.name == "_Source" assert "_list" in schema.tables - assert list(s) == ['A', 'V', 'A', 'V', 'A', 'V', 'A', 'V'] + assert list(s) == ["A", "V", "A", "V", "A", "V", "A", "V"] # CAN'T decorate classes themselves with pytest.raises(SourceIsAClassTypeError): + @dlt.source(name="planB") class _SourceB: def __init__(self, elems: int) -> None: diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index c487d19aa1..ccf47c24bd 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -1,58 +1,68 @@ +import pytest +import os + import dlt from dlt.common import json -from dlt.common.storages import NormalizeStorageConfiguration -from dlt.extract.extract import ExtractorStorage, extract -from dlt.extract.source import DltResource, DltSource +from dlt.common.storages import ( + SchemaStorage, + SchemaStorageConfiguration, + NormalizeStorageConfiguration, +) +from dlt.common.storages.schema_storage import SchemaStorage -from tests.utils import clean_test_storage -from tests.extract.utils import expect_extracted_file +from dlt.extract import DltResource, DltSource +from dlt.extract.extract import ExtractStorage, Extract +from tests.utils import clean_test_storage, TEST_STORAGE_ROOT +from tests.extract.utils import expect_extracted_file -def test_extract_select_tables() -> None: - - def expect_tables(resource: DltResource) -> dlt.Schema: - # delete files - clean_test_storage() - source = DltSource("selectables", "module", dlt.Schema("selectables"), [resource(10)]) - schema = source.discover_schema() - - storage = ExtractorStorage(NormalizeStorageConfiguration()) - extract_id = storage.create_extract_id() - schema_update = extract(extract_id, source, storage) - # odd and even tables - assert len(schema_update) == 2 - assert "odd_table" in schema_update - assert "even_table" in schema_update - for partials in schema_update.values(): - assert len(partials) == 1 - # you must commit the files - assert len(storage.list_files_to_normalize_sorted()) == 0 - storage.commit_extract_files(extract_id) - # check resulting files - assert len(storage.list_files_to_normalize_sorted()) == 2 - expect_extracted_file(storage, "selectables", "odd_table", json.dumps([1,3,5,7,9])) - expect_extracted_file(storage, "selectables", "even_table", json.dumps([0,2,4,6,8])) - - - # delete files - clean_test_storage() - storage = ExtractorStorage(NormalizeStorageConfiguration()) - # same thing but select only odd - source = DltSource("selectables", "module", dlt.Schema("selectables"), [resource]) - source = source.with_resources(resource.name) - source.selected_resources[resource.name].bind(10).select_tables("odd_table") - extract_id = storage.create_extract_id() - schema_update = extract(extract_id, source, storage) - assert len(schema_update) == 1 - assert "odd_table" in schema_update - for partials in schema_update.values(): - assert len(partials) == 1 - storage.commit_extract_files(extract_id) - assert len(storage.list_files_to_normalize_sorted()) == 1 - expect_extracted_file(storage, "selectables", "odd_table", json.dumps([1,3,5,7,9])) - - return schema +@pytest.fixture +def extract_step() -> Extract: + clean_test_storage(init_normalize=True) + schema_storage = SchemaStorage( + SchemaStorageConfiguration(schema_volume_path=os.path.join(TEST_STORAGE_ROOT, "schemas")), + makedirs=True, + ) + return Extract(schema_storage, NormalizeStorageConfiguration()) + + +def test_storage_reuse_package() -> None: + storage = ExtractStorage(NormalizeStorageConfiguration()) + load_id = storage.create_load_package(dlt.Schema("first")) + # assign the same load id if schema "fists" is being extracted + assert storage.create_load_package(dlt.Schema("first")) == load_id + load_id_2 = storage.create_load_package(dlt.Schema("second")) + assert load_id_2 != load_id + # make sure we have only two packages + assert set(storage.new_packages.list_packages()) == {load_id, load_id_2} + # commit + storage.commit_new_load_package(load_id, dlt.Schema("first")) + # we have a new load id (the package with schema moved to extracted) + load_id_3 = storage.create_load_package(dlt.Schema("first")) + assert load_id != load_id_3 + load_id_4 = storage.create_load_package(dlt.Schema("first"), reuse_exiting_package=False) + assert load_id_4 != load_id_3 + + # this will fail - not all extracts committed + with pytest.raises(OSError): + storage.delete_empty_extract_folder() + # commit the rest + storage.commit_new_load_package(load_id_2, dlt.Schema("second")) + storage.commit_new_load_package(load_id_3, dlt.Schema("first")) + storage.commit_new_load_package(load_id_4, dlt.Schema("first")) + storage.delete_empty_extract_folder() + + # list extracted packages + assert set(storage.extracted_packages.list_packages()) == { + load_id, + load_id_2, + load_id_3, + load_id_4, + } + + +def test_extract_select_tables_mark(extract_step: Extract) -> None: n_f = lambda i: ("odd" if i % 2 == 1 else "even") + "_table" @dlt.resource @@ -60,17 +70,21 @@ def table_with_name_selectable(_range): for i in range(_range): yield dlt.mark.with_table_name(i, n_f(i)) - schema = expect_tables(table_with_name_selectable) + schema = expect_tables(extract_step, table_with_name_selectable) # TODO: this one should not be there but we cannot remove it really, except explicit flag assert "table_with_name_selectable" in schema.tables + +def test_extract_select_tables_lambda(extract_step: Extract) -> None: + n_f = lambda i: ("odd" if i % 2 == 1 else "even") + "_table" + # try the same with lambda function, this is actually advised: should be faster and resource gets removed from schema @dlt.resource(table_name=n_f) def table_name_with_lambda(_range): - yield list(range(_range)) + yield list(range(_range)) - schema = expect_tables(table_name_with_lambda) + schema = expect_tables(extract_step, table_name_with_lambda) assert "table_name_with_lambda" not in schema.tables @@ -78,35 +92,81 @@ def table_name_with_lambda(_range): # pass -def test_extract_shared_pipe(): +def test_extract_shared_pipe(extract_step: Extract): def input_gen(): yield from [1, 2, 3] input_r = DltResource.from_data(input_gen) - source = DltSource("selectables", "module", dlt.Schema("selectables"), [input_r, input_r.with_name("gen_clone")]) - storage = ExtractorStorage(NormalizeStorageConfiguration()) - extract_id = storage.create_extract_id() - schema_update = extract(extract_id, source, storage) + source = DltSource( + dlt.Schema("selectables"), "module", [input_r, input_r.with_name("gen_clone")] + ) + load_id = extract_step.extract_storage.create_load_package(source.discover_schema()) + extract_step._extract_single_source(load_id, source) # both tables got generated - assert "input_gen" in schema_update - assert "gen_clone" in schema_update + assert "input_gen" in source.schema._schema_tables + assert "gen_clone" in source.schema._schema_tables -def test_extract_renamed_clone_and_parent(): +def test_extract_renamed_clone_and_parent(extract_step: Extract): def input_gen(): yield from [1, 2, 3] def tx_step(item): - return item*2 + return item * 2 input_r = DltResource.from_data(input_gen) input_tx = DltResource.from_data(tx_step, data_from=DltResource.Empty) - source = DltSource("selectables", "module", dlt.Schema("selectables"), [input_r, (input_r | input_tx).with_name("tx_clone")]) - storage = ExtractorStorage(NormalizeStorageConfiguration()) - extract_id = storage.create_extract_id() - schema_update = extract(extract_id, source, storage) - assert "input_gen" in schema_update - assert "tx_clone" in schema_update + source = DltSource( + dlt.Schema("selectables"), "module", [input_r, (input_r | input_tx).with_name("tx_clone")] + ) + load_id = extract_step.extract_storage.create_load_package(source.discover_schema()) + extract_step._extract_single_source(load_id, source) + assert "input_gen" in source.schema._schema_tables + assert "tx_clone" in source.schema._schema_tables # mind that pipe name of the evaluated parent will have different name than the resource assert source.tx_clone._pipe.parent.name == "input_gen_tx_clone" + + +def expect_tables(extract_step: Extract, resource: DltResource) -> dlt.Schema: + source = DltSource(dlt.Schema("selectables"), "module", [resource(10)]) + schema = source.discover_schema() + + load_id = extract_step.extract_storage.create_load_package(source.discover_schema()) + extract_step._extract_single_source(load_id, source) + # odd and even tables must be in the source schema + assert len(source.schema.data_tables(include_incomplete=True)) == 2 + assert "odd_table" in source.schema._schema_tables + assert "even_table" in source.schema._schema_tables + # you must commit the files + assert len(extract_step.extract_storage.list_files_to_normalize_sorted()) == 0 + extract_step.extract_storage.commit_new_load_package(load_id, source.schema) + # check resulting files + assert len(extract_step.extract_storage.list_files_to_normalize_sorted()) == 2 + expect_extracted_file( + extract_step.extract_storage, "selectables", "odd_table", json.dumps([1, 3, 5, 7, 9]) + ) + expect_extracted_file( + extract_step.extract_storage, "selectables", "even_table", json.dumps([0, 2, 4, 6, 8]) + ) + + # same thing but select only odd + source = DltSource(dlt.Schema("selectables"), "module", [resource]) + source = source.with_resources(resource.name) + source.selected_resources[resource.name].bind(10).select_tables("odd_table") + load_id = extract_step.extract_storage.create_load_package(source.discover_schema()) + extract_step._extract_single_source(load_id, source) + assert len(source.schema.data_tables(include_incomplete=True)) == 1 + assert "odd_table" in source.schema._schema_tables + extract_step.extract_storage.commit_new_load_package(load_id, source.schema) + assert len(extract_step.extract_storage.list_files_to_normalize_sorted()) == 3 + expect_extracted_file( + extract_step.extract_storage, + "selectables", + "odd_table", + json.dumps([1, 3, 5, 7, 9]), + expected_files=2, + ) + extract_step.extract_storage.delete_empty_extract_folder() + + return schema diff --git a/tests/extract/test_extract_pipe.py b/tests/extract/test_extract_pipe.py index a4e894bf94..4ad6cb6f72 100644 --- a/tests/extract/test_extract_pipe.py +++ b/tests/extract/test_extract_pipe.py @@ -15,7 +15,6 @@ def test_next_item_mode() -> None: - def nested_gen_level_2(): yield from [88, None, 89] @@ -23,25 +22,25 @@ def nested_gen(): yield from [55, 56, None, 77, nested_gen_level_2()] def source_gen1(): - yield from [1, 2, nested_gen(), 3,4] + yield from [1, 2, nested_gen(), 3, 4] def source_gen2(): yield from range(11, 16) def source_gen3(): - yield from range(20,22) + yield from range(20, 22) def get_pipes(): return [ Pipe.from_data("data1", source_gen1()), Pipe.from_data("data2", source_gen2()), Pipe.from_data("data3", source_gen3()), - ] + ] # default mode is "fifo" _l = list(PipeIterator.from_pipes(get_pipes(), next_item_mode="fifo")) # items will be in order of the pipes, nested iterator items appear inline - assert [pi.item for pi in _l] == [1, 2, 55, 56, 77, 88, 89, 3, 4, 11, 12, 13, 14, 15, 20, 21] + assert [pi.item for pi in _l] == [1, 2, 55, 56, 77, 88, 89, 3, 4, 11, 12, 13, 14, 15, 20, 21] # round robin mode _l = list(PipeIterator.from_pipes(get_pipes(), next_item_mode="round_robin")) @@ -50,7 +49,6 @@ def get_pipes(): def test_rotation_on_none() -> None: - global gen_1_started global gen_2_started global gen_3_started @@ -85,7 +83,7 @@ def get_pipes(): Pipe.from_data("data1", source_gen1()), Pipe.from_data("data2", source_gen2()), Pipe.from_data("data3", source_gen3()), - ] + ] # round robin mode _l = list(PipeIterator.from_pipes(get_pipes(), next_item_mode="round_robin")) @@ -136,7 +134,7 @@ def test_insert_remove_step() -> None: pp = Pipe.from_data("data", data) def tx(item): - yield item*2 + yield item * 2 # create pipe with transformer p = Pipe.from_data("tx", tx, parent=pp) @@ -188,7 +186,7 @@ def item_meta_step(item, meta): p.remove_step(0) assert p._gen_idx == 0 _l = list(PipeIterator.from_pipe(p)) - assert [pi.item for pi in _l] == [0.5, 1, 3/2] + assert [pi.item for pi in _l] == [0.5, 1, 3 / 2] # remove all remaining txs p.remove_step(1) pp.remove_step(1) @@ -210,7 +208,7 @@ def item_meta_step(item, meta): def tx_minus(item, meta): assert meta is None - yield item*-4 + yield item * -4 p.replace_gen(tx_minus) _l = list(PipeIterator.from_pipe(p)) @@ -233,8 +231,8 @@ def test_pipe_propagate_meta() -> None: p = Pipe.from_data("data", iter(meta_data)) def item_meta_step(item: int, meta): - assert _meta[item-1] == meta - return item*2 + assert _meta[item - 1] == meta + return item * 2 p.append_step(item_meta_step) # type: ignore[arg-type] _l = list(PipeIterator.from_pipe(p)) @@ -247,19 +245,19 @@ def item_meta_step(item: int, meta): # does not take meta def transformer(item): - yield item*item + yield item * item def item_meta_step_trans(item: int, meta): # reverse all transformations on item - meta_idx = int(item**0.5//2) - assert _meta[meta_idx-1] == meta - return item*2 + meta_idx = int(item**0.5 // 2) + assert _meta[meta_idx - 1] == meta + return item * 2 t = Pipe("tran", [transformer], parent=p) t.append_step(item_meta_step_trans) # type: ignore[arg-type] _l = list(PipeIterator.from_pipe(t)) # item got propagated through transformation -> transformer -> transformation - assert [int((pi.item//2)**0.5//2) for pi in _l] == data # type: ignore[operator] + assert [int((pi.item // 2) ** 0.5 // 2) for pi in _l] == data # type: ignore[operator] assert [pi.meta for pi in _l] == _meta # same but with the fork step @@ -270,7 +268,7 @@ def item_meta_step_trans(item: int, meta): # do not yield parents _l = list(PipeIterator.from_pipes([p, t], yield_parents=False)) # same result - assert [int((pi.item//2)**0.5//2) for pi in _l] == data # type: ignore[operator] + assert [int((pi.item // 2) ** 0.5 // 2) for pi in _l] == data # type: ignore[operator] assert [pi.meta for pi in _l] == _meta # same but yield parents @@ -281,11 +279,11 @@ def item_meta_step_trans(item: int, meta): _l = list(PipeIterator.from_pipes([p, t], yield_parents=True)) # same result for transformer tran_l = [pi for pi in _l if pi.pipe.name == t.name] - assert [int((pi.item//2)**0.5//2) for pi in tran_l] == data # type: ignore[operator] + assert [int((pi.item // 2) ** 0.5 // 2) for pi in tran_l] == data # type: ignore[operator] assert [pi.meta for pi in tran_l] == _meta data_l = [pi for pi in _l if pi.pipe.name is p.name] # data pipe went only through one transformation - assert [int(pi.item//2) for pi in data_l] == data # type: ignore[operator] + assert [int(pi.item // 2) for pi in data_l] == data # type: ignore[operator] assert [pi.meta for pi in data_l] == _meta @@ -297,9 +295,9 @@ def test_pipe_transformation_changes_meta() -> None: p = Pipe.from_data("data", iter(meta_data)) def item_meta_step(item: int, meta): - assert _meta[item-1] == meta + assert _meta[item - 1] == meta # return meta, it should overwrite existing one - return DataItemWithMeta("X" + str(item), item*2) + return DataItemWithMeta("X" + str(item), item * 2) p.append_step(item_meta_step) # type: ignore[arg-type] _l = list(PipeIterator.from_pipe(p)) @@ -309,10 +307,10 @@ def item_meta_step(item: int, meta): # also works for deferred transformations @dlt.defer def item_meta_step_defer(item: int, meta): - assert _meta[item-1] == meta + assert _meta[item - 1] == meta sleep(item * 0.2) # return meta, it should overwrite existing one - return DataItemWithMeta("X" + str(item), item*2) + return DataItemWithMeta("X" + str(item), item * 2) p = Pipe.from_data("data", iter(meta_data)) p.append_step(item_meta_step_defer) # type: ignore[arg-type] @@ -322,9 +320,9 @@ def item_meta_step_defer(item: int, meta): # also works for yielding transformations def item_meta_step_flat(item: int, meta): - assert _meta[item-1] == meta + assert _meta[item - 1] == meta # return meta, it should overwrite existing one - yield DataItemWithMeta("X" + str(item), item*2) + yield DataItemWithMeta("X" + str(item), item * 2) p = Pipe.from_data("data", iter(meta_data)) p.append_step(item_meta_step_flat) # type: ignore[arg-type] @@ -334,10 +332,10 @@ def item_meta_step_flat(item: int, meta): # also works for async async def item_meta_step_async(item: int, meta): - assert _meta[item-1] == meta + assert _meta[item - 1] == meta await asyncio.sleep(item * 0.2) # this returns awaitable - return DataItemWithMeta("X" + str(item), item*2) + return DataItemWithMeta("X" + str(item), item * 2) p = Pipe.from_data("data", iter(meta_data)) p.append_step(item_meta_step_async) # type: ignore[arg-type] @@ -348,7 +346,7 @@ async def item_meta_step_async(item: int, meta): # also lets the transformer return meta def transformer(item: int): - yield DataItemWithMeta("X" + str(item), item*2) + yield DataItemWithMeta("X" + str(item), item * 2) p = Pipe.from_data("data", iter(meta_data)) t = Pipe("tran", [transformer], parent=p) # type: ignore[list-item] # TODO: typealias not working? @@ -446,14 +444,30 @@ def test_yield_map_step() -> None: p = Pipe.from_data("data", [1, 2, 3]) # this creates number of rows as passed by the data p.append_step(YieldMapItem(lambda item: (yield from [f"item_{x}" for x in range(item)]))) - assert _f_items(list(PipeIterator.from_pipe(p))) == ["item_0", "item_0", "item_1", "item_0", "item_1", "item_2"] + assert _f_items(list(PipeIterator.from_pipe(p))) == [ + "item_0", + "item_0", + "item_1", + "item_0", + "item_1", + "item_2", + ] data = [1, 2, 3] meta = ["A", "B", "C"] # package items into meta wrapper meta_data = [DataItemWithMeta(m, d) for m, d in zip(meta, data)] p = Pipe.from_data("data", meta_data) - p.append_step(YieldMapItem(lambda item, meta: (yield from [f"item_{meta}_{x}" for x in range(item)]))) - assert _f_items(list(PipeIterator.from_pipe(p))) == ["item_A_0", "item_B_0", "item_B_1", "item_C_0", "item_C_1", "item_C_2"] + p.append_step( + YieldMapItem(lambda item, meta: (yield from [f"item_{meta}_{x}" for x in range(item)])) + ) + assert _f_items(list(PipeIterator.from_pipe(p))) == [ + "item_A_0", + "item_B_0", + "item_B_1", + "item_C_0", + "item_C_1", + "item_C_2", + ] def test_pipe_copy_on_fork() -> None: @@ -517,9 +531,8 @@ def test_clone_single_pipe() -> None: def test_clone_pipes() -> None: - def pass_gen(item, meta): - yield item*2 + yield item * 2 data = [1, 2, 3] p1 = Pipe("p1", [data]) @@ -559,13 +572,14 @@ def assert_cloned_pipes(pipes: List[Pipe], cloned_pipes: List[Pipe]) -> None: # must yield same data for pipe, cloned_pipe in zip(pipes, cloned_pipes): - assert _f_items(list(PipeIterator.from_pipe(pipe))) == _f_items(list(PipeIterator.from_pipe(cloned_pipe))) + assert _f_items(list(PipeIterator.from_pipe(pipe))) == _f_items( + list(PipeIterator.from_pipe(cloned_pipe)) + ) def test_circular_deps() -> None: - def pass_gen(item, meta): - yield item*2 + yield item * 2 c_p1_p3 = Pipe("c_p1_p3", [pass_gen]) c_p1_p4 = Pipe("c_p1_p4", [pass_gen], parent=c_p1_p3) @@ -641,7 +655,6 @@ def raise_gen(item: int): def test_close_on_sync_exception() -> None: - def long_gen(): global close_pipe_got_exit, close_pipe_yielding @@ -668,7 +681,9 @@ def assert_pipes_closed(raise_gen, long_gen) -> None: close_pipe_yielding = False pit: PipeIterator = None - with PipeIterator.from_pipe(Pipe.from_data("failing", raise_gen, parent=Pipe.from_data("endless", long_gen()))) as pit: + with PipeIterator.from_pipe( + Pipe.from_data("failing", raise_gen, parent=Pipe.from_data("endless", long_gen())) + ) as pit: with pytest.raises(ResourceExtractionError) as py_ex: list(pit) assert isinstance(py_ex.value.__cause__, RuntimeError) @@ -680,7 +695,9 @@ def assert_pipes_closed(raise_gen, long_gen) -> None: close_pipe_got_exit = False close_pipe_yielding = False - pit = ManagedPipeIterator.from_pipe(Pipe.from_data("failing", raise_gen, parent=Pipe.from_data("endless", long_gen()))) + pit = ManagedPipeIterator.from_pipe( + Pipe.from_data("failing", raise_gen, parent=Pipe.from_data("endless", long_gen())) + ) with pytest.raises(ResourceExtractionError) as py_ex: list(pit) assert isinstance(py_ex.value.__cause__, RuntimeError) diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py index 9d5b37f472..c8c5c6d137 100644 --- a/tests/extract/test_incremental.py +++ b/tests/extract/test_incremental.py @@ -17,79 +17,86 @@ from dlt.common.utils import uniq_id, digest128, chunks from dlt.common.json import json -from dlt.extract.source import DltSource +from dlt.extract import DltSource from dlt.sources.helpers.transform import take_first -from dlt.extract.incremental import IncrementalCursorPathMissing, IncrementalPrimaryKeyMissing +from dlt.extract.incremental.exceptions import ( + IncrementalCursorPathMissing, + IncrementalPrimaryKeyMissing, +) from dlt.pipeline.exceptions import PipelineStepFailed -from tests.extract.utils import AssertItems, data_to_item_format, TItemFormat, ALL_ITEM_FORMATS, data_item_to_list +from tests.extract.utils import AssertItems, data_item_to_list +from tests.utils import data_to_item_format, TDataItemFormat, ALL_DATA_ITEM_FORMATS -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_single_items_last_value_state_is_updated(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_single_items_last_value_state_is_updated(item_type: TDataItemFormat) -> None: data = [ - {'created_at': 425}, - {'created_at': 426}, + {"created_at": 425}, + {"created_at": 426}, ] source_items = data_to_item_format(item_type, data) + @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at')): + def some_data(created_at=dlt.sources.incremental("created_at")): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = some_data.state['incremental']['created_at'] - assert s['last_value'] == 426 + s = some_data.state["incremental"]["created_at"] + assert s["last_value"] == 426 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_single_items_last_value_state_is_updated_transformer(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_single_items_last_value_state_is_updated_transformer(item_type: TDataItemFormat) -> None: data = [ - {'created_at': 425}, - {'created_at': 426}, + {"created_at": 425}, + {"created_at": 426}, ] source_items = data_to_item_format(item_type, data) @dlt.transformer - def some_data(item, created_at=dlt.sources.incremental('created_at')): + def some_data(item, created_at=dlt.sources.incremental("created_at")): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) - p.extract(dlt.resource([1,2,3], name="table") | some_data()) + p.extract(dlt.resource([1, 2, 3], name="table") | some_data()) - s = some_data().state['incremental']['created_at'] - assert s['last_value'] == 426 + s = some_data().state["incremental"]["created_at"] + assert s["last_value"] == 426 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_batch_items_last_value_state_is_updated(item_type: TItemFormat) -> None: - data1 = [{'created_at': i} for i in range(5)] - data2 = [{'created_at': i} for i in range(5, 10)] +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_batch_items_last_value_state_is_updated(item_type: TDataItemFormat) -> None: + data1 = [{"created_at": i} for i in range(5)] + data2 = [{"created_at": i} for i in range(5, 10)] source_items1 = data_to_item_format(item_type, data1) source_items2 = data_to_item_format(item_type, data2) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at')): + def some_data(created_at=dlt.sources.incremental("created_at")): yield source_items1 yield source_items2 p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] - assert s['last_value'] == 9 + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] + assert s["last_value"] == 9 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_last_value_access_in_resource(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_last_value_access_in_resource(item_type: TDataItemFormat) -> None: values = [] - data = [{'created_at': i} for i in range(6)] + data = [{"created_at": i} for i in range(6)] source_items = data_to_item_format(item_type, data) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at')): + def some_data(created_at=dlt.sources.incremental("created_at")): values.append(created_at.last_value) yield source_items @@ -100,103 +107,107 @@ def some_data(created_at=dlt.sources.incremental('created_at')): assert values == [None, 5] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_unique_keys_are_deduplicated(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_unique_keys_are_deduplicated(item_type: TDataItemFormat) -> None: data1 = [ - {'created_at': 1, 'id': 'a'}, - {'created_at': 2, 'id': 'b'}, - {'created_at': 3, 'id': 'c'}, - {'created_at': 3, 'id': 'd'}, - {'created_at': 3, 'id': 'e'}, + {"created_at": 1, "id": "a"}, + {"created_at": 2, "id": "b"}, + {"created_at": 3, "id": "c"}, + {"created_at": 3, "id": "d"}, + {"created_at": 3, "id": "e"}, ] data2 = [ - {'created_at': 3, 'id': 'c'}, - {'created_at': 3, 'id': 'd'}, - {'created_at': 3, 'id': 'e'}, - {'created_at': 3, 'id': 'f'}, - {'created_at': 4, 'id': 'g'}, + {"created_at": 3, "id": "c"}, + {"created_at": 3, "id": "d"}, + {"created_at": 3, "id": "e"}, + {"created_at": 3, "id": "f"}, + {"created_at": 4, "id": "g"}, ] source_items1 = data_to_item_format(item_type, data1) source_items2 = data_to_item_format(item_type, data2) - @dlt.resource(primary_key='id') - def some_data(created_at=dlt.sources.incremental('created_at')): + + @dlt.resource(primary_key="id") + def some_data(created_at=dlt.sources.incremental("created_at")): if created_at.last_value is None: yield from source_items1 else: yield from source_items2 - p = dlt.pipeline(pipeline_name=uniq_id(), destination='duckdb', credentials=duckdb.connect(':memory:')) - - p.run(some_data()) - p.run(some_data()) + p = dlt.pipeline( + pipeline_name=uniq_id(), destination="duckdb", credentials=duckdb.connect(":memory:") + ) + p.run(some_data()).raise_on_failed_jobs() + p.run(some_data()).raise_on_failed_jobs() with p.sql_client() as c: with c.execute_query("SELECT created_at, id FROM some_data order by created_at, id") as cur: rows = cur.fetchall() - assert rows == [(1, 'a'), (2, 'b'), (3, 'c'), (3, 'd'), (3, 'e'), (3, 'f'), (4, 'g')] + assert rows == [(1, "a"), (2, "b"), (3, "c"), (3, "d"), (3, "e"), (3, "f"), (4, "g")] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_unique_rows_by_hash_are_deduplicated(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_unique_rows_by_hash_are_deduplicated(item_type: TDataItemFormat) -> None: data1 = [ - {'created_at': 1, 'id': 'a'}, - {'created_at': 2, 'id': 'b'}, - {'created_at': 3, 'id': 'c'}, - {'created_at': 3, 'id': 'd'}, - {'created_at': 3, 'id': 'e'}, + {"created_at": 1, "id": "a"}, + {"created_at": 2, "id": "b"}, + {"created_at": 3, "id": "c"}, + {"created_at": 3, "id": "d"}, + {"created_at": 3, "id": "e"}, ] data2 = [ - {'created_at': 3, 'id': 'c'}, - {'created_at': 3, 'id': 'd'}, - {'created_at': 3, 'id': 'e'}, - {'created_at': 3, 'id': 'f'}, - {'created_at': 4, 'id': 'g'}, + {"created_at": 3, "id": "c"}, + {"created_at": 3, "id": "d"}, + {"created_at": 3, "id": "e"}, + {"created_at": 3, "id": "f"}, + {"created_at": 4, "id": "g"}, ] source_items1 = data_to_item_format(item_type, data1) source_items2 = data_to_item_format(item_type, data2) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at')): + def some_data(created_at=dlt.sources.incremental("created_at")): if created_at.last_value is None: yield from source_items1 else: yield from source_items2 - p = dlt.pipeline(pipeline_name=uniq_id(), destination='duckdb', credentials=duckdb.connect(':memory:')) - p.run(some_data()) - p.run(some_data()) + p = dlt.pipeline( + pipeline_name=uniq_id(), destination="duckdb", credentials=duckdb.connect(":memory:") + ) + p.run(some_data()).raise_on_failed_jobs() + p.run(some_data()).raise_on_failed_jobs() with p.sql_client() as c: with c.execute_query("SELECT created_at, id FROM some_data order by created_at, id") as cur: rows = cur.fetchall() - assert rows == [(1, 'a'), (2, 'b'), (3, 'c'), (3, 'd'), (3, 'e'), (3, 'f'), (4, 'g')] + assert rows == [(1, "a"), (2, "b"), (3, "c"), (3, "d"), (3, "e"), (3, "f"), (4, "g")] def test_nested_cursor_path() -> None: @dlt.resource - def some_data(created_at=dlt.sources.incremental('data.items[0].created_at')): - yield {'data': {'items': [{'created_at': 2}]}} + def some_data(created_at=dlt.sources.incremental("data.items[0].created_at")): + yield {"data": {"items": [{"created_at": 2}]}} p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['data.items[0].created_at'] - assert s['last_value'] == 2 + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "data.items[0].created_at" + ] + assert s["last_value"] == 2 @pytest.mark.parametrize("item_type", ["arrow", "pandas"]) -def test_nested_cursor_path_arrow_fails(item_type: TItemFormat) -> None: - data = [ - {'data': {'items': [{'created_at': 2}]}} - ] +def test_nested_cursor_path_arrow_fails(item_type: TDataItemFormat) -> None: + data = [{"data": {"items": [{"created_at": 2}]}}] source_items = data_to_item_format(item_type, data) @dlt.resource - def some_data(created_at=dlt.sources.incremental('data.items[0].created_at')): + def some_data(created_at=dlt.sources.incremental("data.items[0].created_at")): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) @@ -208,65 +219,74 @@ def some_data(created_at=dlt.sources.incremental('data.items[0].created_at')): assert ex.exception.json_path == "data.items[0].created_at" -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_explicit_initial_value(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_explicit_initial_value(item_type: TDataItemFormat) -> None: @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at')): + def some_data(created_at=dlt.sources.incremental("created_at")): data = [{"created_at": created_at.last_value}] yield from data_to_item_format(item_type, data) p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data(created_at=4242)) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] - assert s['last_value'] == 4242 + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] + assert s["last_value"] == 4242 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_explicit_incremental_instance(item_type: TItemFormat) -> None: - data = [{'inserted_at': 242, 'some_uq': 444}] +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_explicit_incremental_instance(item_type: TDataItemFormat) -> None: + data = [{"inserted_at": 242, "some_uq": 444}] source_items = data_to_item_format(item_type, data) - @dlt.resource(primary_key='some_uq') - def some_data(incremental=dlt.sources.incremental('created_at', initial_value=0)): - assert incremental.cursor_path == 'inserted_at' + @dlt.resource(primary_key="some_uq") + def some_data(incremental=dlt.sources.incremental("created_at", initial_value=0)): + assert incremental.cursor_path == "inserted_at" assert incremental.initial_value == 241 yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) - p.extract(some_data(incremental=dlt.sources.incremental('inserted_at', initial_value=241))) + p.extract(some_data(incremental=dlt.sources.incremental("inserted_at", initial_value=241))) @dlt.resource -def some_data_from_config(call_no: int, item_type: TItemFormat, created_at: Optional[dlt.sources.incremental[str]] = dlt.secrets.value): - assert created_at.cursor_path == 'created_at' +def some_data_from_config( + call_no: int, + item_type: TDataItemFormat, + created_at: Optional[dlt.sources.incremental[str]] = dlt.secrets.value, +): + assert created_at.cursor_path == "created_at" # start value will update to the last_value on next call if call_no == 1: - assert created_at.initial_value == '2022-02-03T00:00:00Z' - assert created_at.start_value == '2022-02-03T00:00:00Z' + assert created_at.initial_value == "2022-02-03T00:00:00Z" + assert created_at.start_value == "2022-02-03T00:00:00Z" if call_no == 2: - assert created_at.initial_value == '2022-02-03T00:00:00Z' - assert created_at.start_value == '2022-02-03T00:00:01Z' - data = [{'created_at': '2022-02-03T00:00:01Z'}] + assert created_at.initial_value == "2022-02-03T00:00:00Z" + assert created_at.start_value == "2022-02-03T00:00:01Z" + data = [{"created_at": "2022-02-03T00:00:01Z"}] source_items = data_to_item_format(item_type, data) yield from source_items -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_optional_incremental_from_config(item_type: TItemFormat) -> None: - - os.environ['SOURCES__TEST_INCREMENTAL__SOME_DATA_FROM_CONFIG__CREATED_AT__CURSOR_PATH'] = 'created_at' - os.environ['SOURCES__TEST_INCREMENTAL__SOME_DATA_FROM_CONFIG__CREATED_AT__INITIAL_VALUE'] = '2022-02-03T00:00:00Z' +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_optional_incremental_from_config(item_type: TDataItemFormat) -> None: + os.environ["SOURCES__TEST_INCREMENTAL__SOME_DATA_FROM_CONFIG__CREATED_AT__CURSOR_PATH"] = ( + "created_at" + ) + os.environ["SOURCES__TEST_INCREMENTAL__SOME_DATA_FROM_CONFIG__CREATED_AT__INITIAL_VALUE"] = ( + "2022-02-03T00:00:00Z" + ) p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data_from_config(1, item_type)) p.extract(some_data_from_config(2, item_type)) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_optional_incremental_not_passed(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_optional_incremental_not_passed(item_type: TDataItemFormat) -> None: """Resource still runs when no incremental is passed""" - data = [1,2,3] + data = [1, 2, 3] source_items = data_to_item_format(item_type, data) @dlt.resource @@ -283,115 +303,129 @@ class OptionalIncrementalConfig(BaseConfiguration): @dlt.resource(spec=OptionalIncrementalConfig) -def optional_incremental_arg_resource(item_type: TItemFormat, incremental: Optional[dlt.sources.incremental[Any]] = None) -> Any: - data = [1,2,3] +def optional_incremental_arg_resource( + item_type: TDataItemFormat, incremental: Optional[dlt.sources.incremental[Any]] = None +) -> Any: + data = [1, 2, 3] source_items = data_to_item_format(item_type, data) assert incremental is None yield source_items -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_optional_arg_from_spec_not_passed(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_optional_arg_from_spec_not_passed(item_type: TDataItemFormat) -> None: p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(optional_incremental_arg_resource(item_type)) @configspec class SomeDataOverrideConfiguration(BaseConfiguration): - created_at: dlt.sources.incremental = dlt.sources.incremental('created_at', initial_value='2022-02-03T00:00:00Z') # type: ignore[type-arg] + created_at: dlt.sources.incremental = dlt.sources.incremental("created_at", initial_value="2022-02-03T00:00:00Z") # type: ignore[type-arg] # provide what to inject via spec. the spec contain the default @dlt.resource(spec=SomeDataOverrideConfiguration) -def some_data_override_config(item_type: TItemFormat, created_at: dlt.sources.incremental[str] = dlt.config.value): - assert created_at.cursor_path == 'created_at' - assert created_at.initial_value == '2000-02-03T00:00:00Z' - data = [{'created_at': '2023-03-03T00:00:00Z'}] +def some_data_override_config( + item_type: TDataItemFormat, created_at: dlt.sources.incremental[str] = dlt.config.value +): + assert created_at.cursor_path == "created_at" + assert created_at.initial_value == "2000-02-03T00:00:00Z" + data = [{"created_at": "2023-03-03T00:00:00Z"}] source_items = data_to_item_format(item_type, data) yield from source_items -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_override_initial_value_from_config(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_override_initial_value_from_config(item_type: TDataItemFormat) -> None: # use the shortest possible config version # os.environ['SOURCES__TEST_INCREMENTAL__SOME_DATA_OVERRIDE_CONFIG__CREATED_AT__INITIAL_VALUE'] = '2000-02-03T00:00:00Z' - os.environ['CREATED_AT__INITIAL_VALUE'] = '2000-02-03T00:00:00Z' + os.environ["CREATED_AT__INITIAL_VALUE"] = "2000-02-03T00:00:00Z" p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data_override_config(item_type)) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_override_primary_key_in_pipeline(item_type: TItemFormat) -> None: - """Primary key hint passed to pipeline is propagated through apply_hints - """ - data = [ - {'created_at': 22, 'id': 2, 'other_id': 5}, - {'created_at': 22, 'id': 2, 'other_id': 6} - ] +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_override_primary_key_in_pipeline(item_type: TDataItemFormat) -> None: + """Primary key hint passed to pipeline is propagated through apply_hints""" + data = [{"created_at": 22, "id": 2, "other_id": 5}, {"created_at": 22, "id": 2, "other_id": 6}] source_items = data_to_item_format(item_type, data) - @dlt.resource(primary_key='id') - def some_data(created_at=dlt.sources.incremental('created_at')): + @dlt.resource(primary_key="id") + def some_data(created_at=dlt.sources.incremental("created_at")): # TODO: this only works because incremental instance is shared across many copies of the resource - assert some_data.incremental.primary_key == ['id', 'other_id'] + assert some_data.incremental.primary_key == ["id", "other_id"] yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) - p.extract(some_data, primary_key=['id', 'other_id']) + p.extract(some_data, primary_key=["id", "other_id"]) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_composite_primary_key(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_composite_primary_key(item_type: TDataItemFormat) -> None: data = [ - {'created_at': 1, 'isrc': 'AAA', 'market': 'DE'}, - {'created_at': 2, 'isrc': 'BBB', 'market': 'DE'}, - {'created_at': 2, 'isrc': 'CCC', 'market': 'US'}, - {'created_at': 2, 'isrc': 'AAA', 'market': 'DE'}, - {'created_at': 2, 'isrc': 'CCC', 'market': 'DE'}, - {'created_at': 2, 'isrc': 'DDD', 'market': 'DE'}, - {'created_at': 1, 'isrc': 'CCC', 'market': 'DE'}, + {"created_at": 1, "isrc": "AAA", "market": "DE"}, + {"created_at": 2, "isrc": "BBB", "market": "DE"}, + {"created_at": 2, "isrc": "CCC", "market": "US"}, + {"created_at": 2, "isrc": "AAA", "market": "DE"}, + {"created_at": 2, "isrc": "CCC", "market": "DE"}, + {"created_at": 2, "isrc": "DDD", "market": "DE"}, + {"created_at": 1, "isrc": "CCC", "market": "DE"}, ] source_items = data_to_item_format(item_type, data) - @dlt.resource(primary_key=['isrc', 'market']) - def some_data(created_at=dlt.sources.incremental('created_at')): + @dlt.resource(primary_key=["isrc", "market"]) + def some_data(created_at=dlt.sources.incremental("created_at")): yield from source_items - p = dlt.pipeline(pipeline_name=uniq_id(), destination='duckdb', credentials=duckdb.connect(':memory:')) - p.run(some_data()) + p = dlt.pipeline( + pipeline_name=uniq_id(), destination="duckdb", credentials=duckdb.connect(":memory:") + ) + p.run(some_data()).raise_on_failed_jobs() with p.sql_client() as c: - with c.execute_query("SELECT created_at, isrc, market FROM some_data order by created_at, isrc, market") as cur: + with c.execute_query( + "SELECT created_at, isrc, market FROM some_data order by created_at, isrc, market" + ) as cur: rows = cur.fetchall() - expected = {(1, 'AAA', 'DE'), (2, 'AAA', 'DE'), (2, 'BBB', 'DE'), (2, 'CCC', 'DE'), (2, 'CCC', 'US'), (2, 'DDD', 'DE'), (1, 'CCC', 'DE')} + expected = { + (1, "AAA", "DE"), + (2, "AAA", "DE"), + (2, "BBB", "DE"), + (2, "CCC", "DE"), + (2, "CCC", "US"), + (2, "DDD", "DE"), + (1, "CCC", "DE"), + } assert set(rows) == expected -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_last_value_func_min(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_last_value_func_min(item_type: TDataItemFormat) -> None: data = [ - {'created_at': 10}, - {'created_at': 11}, - {'created_at': 9}, - {'created_at': 10}, - {'created_at': 8}, - {'created_at': 22}, + {"created_at": 10}, + {"created_at": 11}, + {"created_at": 9}, + {"created_at": 10}, + {"created_at": 8}, + {"created_at": 22}, ] source_items = data_to_item_format(item_type, data) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at', last_value_func=min)): + def some_data(created_at=dlt.sources.incremental("created_at", last_value_func=min)): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] - assert s['last_value'] == 8 + assert s["last_value"] == 8 def test_last_value_func_custom() -> None: @@ -399,68 +433,74 @@ def last_value(values): return max(values) + 1 @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at', last_value_func=last_value)): - yield {'created_at': 9} - yield {'created_at': 10} + def some_data(created_at=dlt.sources.incremental("created_at", last_value_func=last_value)): + yield {"created_at": 9} + yield {"created_at": 10} p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] - assert s['last_value'] == 11 + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] + assert s["last_value"] == 11 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_cursor_datetime_type(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_cursor_datetime_type(item_type: TDataItemFormat) -> None: initial_value = pendulum.now() data = [ - {'created_at': initial_value + timedelta(minutes=1)}, - {'created_at': initial_value + timedelta(minutes=3)}, - {'created_at': initial_value + timedelta(minutes=2)}, - {'created_at': initial_value + timedelta(minutes=4)}, - {'created_at': initial_value + timedelta(minutes=2)}, + {"created_at": initial_value + timedelta(minutes=1)}, + {"created_at": initial_value + timedelta(minutes=3)}, + {"created_at": initial_value + timedelta(minutes=2)}, + {"created_at": initial_value + timedelta(minutes=4)}, + {"created_at": initial_value + timedelta(minutes=2)}, ] source_items = data_to_item_format(item_type, data) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at', initial_value)): + def some_data(created_at=dlt.sources.incremental("created_at", initial_value)): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] - assert s['last_value'] == initial_value + timedelta(minutes=4) + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] + assert s["last_value"] == initial_value + timedelta(minutes=4) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_descending_order_unique_hashes(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_descending_order_unique_hashes(item_type: TDataItemFormat) -> None: """Resource returns items in descending order but using `max` last value function. Only hash matching last_value are stored. """ - data = [{'created_at': i} for i in reversed(range(15, 25))] + data = [{"created_at": i} for i in reversed(range(15, 25))] source_items = data_to_item_format(item_type, data) @dlt.resource - def some_data(created_at=dlt.sources.incremental('created_at', 20)): + def some_data(created_at=dlt.sources.incremental("created_at", 20)): yield from source_items p = dlt.pipeline(pipeline_name=uniq_id()) p.extract(some_data()) - s = p.state["sources"][p.default_schema_name]['resources']['some_data']['incremental']['created_at'] + s = p.state["sources"][p.default_schema_name]["resources"]["some_data"]["incremental"][ + "created_at" + ] - last_hash = digest128(json.dumps({'created_at': 24})) + last_hash = digest128(json.dumps({"created_at": 24})) - assert s['unique_hashes'] == [last_hash] + assert s["unique_hashes"] == [last_hash] # make sure nothing is returned on a next run, source will use state from the active pipeline assert list(some_data()) == [] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_unique_keys_json_identifiers(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_unique_keys_json_identifiers(item_type: TDataItemFormat) -> None: """Uses primary key name that is matching the name of the JSON element in the original namespace but gets converted into destination namespace""" @dlt.resource(primary_key="DelTa") @@ -472,7 +512,7 @@ def some_data(last_timestamp=dlt.sources.incremental("ts")): p = dlt.pipeline(pipeline_name=uniq_id()) p.run(some_data, destination="duckdb") # check if default schema contains normalized PK - assert p.default_schema.tables["some_data"]['columns']["del_ta"]['primary_key'] is True + assert p.default_schema.tables["some_data"]["columns"]["del_ta"]["primary_key"] is True with p.sql_client() as c: with c.execute_query("SELECT del_ta FROM some_data") as cur: rows = cur.fetchall() @@ -492,9 +532,8 @@ def some_data(last_timestamp=dlt.sources.incremental("ts")): assert rows2[-1][0] == 9 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_missing_primary_key(item_type: TItemFormat) -> None: - +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_missing_primary_key(item_type: TDataItemFormat) -> None: @dlt.resource(primary_key="DELTA") def some_data(last_timestamp=dlt.sources.incremental("ts")): data = [{"delta": i, "ts": pendulum.now().add(days=i).timestamp()} for i in range(-10, 10)] @@ -506,9 +545,10 @@ def some_data(last_timestamp=dlt.sources.incremental("ts")): assert py_ex.value.primary_key_column == "DELTA" -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_missing_cursor_field(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_missing_cursor_field(item_type: TDataItemFormat) -> None: os.environ["COMPLETED_PROB"] = "1.0" # make it complete immediately + @dlt.resource def some_data(last_timestamp=dlt.sources.incremental("item.timestamp")): data = [{"delta": i, "ts": pendulum.now().add(days=i).timestamp()} for i in range(-10, 10)] @@ -527,52 +567,78 @@ def some_data(last_timestamp=dlt.sources.incremental("item.timestamp")): def test_json_path_cursor() -> None: - @dlt.resource def some_data(last_timestamp=dlt.sources.incremental("item.timestamp|modifiedAt")): - yield [{ - "delta": i, - "item": { - "timestamp": pendulum.now().add(days=i).timestamp() - } - } for i in range(-10, 10)] - - yield [{ - "delta": i, - "item": { - "modifiedAt": pendulum.now().add(days=i).timestamp() - } - } for i in range(-10, 10)] + yield [ + {"delta": i, "item": {"timestamp": pendulum.now().add(days=i).timestamp()}} + for i in range(-10, 10) + ] + + yield [ + {"delta": i, "item": {"modifiedAt": pendulum.now().add(days=i).timestamp()}} + for i in range(-10, 10) + ] # path should match both timestamp and modifiedAt in item list(some_data) def test_remove_incremental_with_explicit_none() -> None: + @dlt.resource(standalone=True) + def some_data( + last_timestamp: dlt.sources.incremental[float] = dlt.sources.incremental( + "id", initial_value=9 + ), + ): + first_idx = last_timestamp.start_value or 0 + for idx in range(first_idx, 10): + yield {"id": idx} + + # keeps initial value + assert list(some_data()) == [{"id": 9}] + + # removes any initial value + assert len(list(some_data(last_timestamp=None))) == 10 + +def test_remove_incremental_with_incremental_empty() -> None: @dlt.resource - def some_data_optional(last_timestamp: Optional[dlt.sources.incremental[float]] = dlt.sources.incremental("item.timestamp")): + def some_data_optional( + last_timestamp: Optional[dlt.sources.incremental[float]] = dlt.sources.incremental( + "item.timestamp" + ), + ): assert last_timestamp is None yield 1 + # we disable incremental by typing the argument as optional - assert list(some_data_optional(last_timestamp=None)) == [1] + # if not disabled it would fail on "item.timestamp" not found + assert list(some_data_optional(last_timestamp=dlt.sources.incremental.EMPTY)) == [1] @dlt.resource(standalone=True) - def some_data(last_timestamp: dlt.sources.incremental[float] = dlt.sources.incremental("item.timestamp")): + def some_data( + last_timestamp: dlt.sources.incremental[float] = dlt.sources.incremental("item.timestamp"), + ): assert last_timestamp is None yield 1 + # we'll get the value error with pytest.raises(ValueError): - assert list(some_data(last_timestamp=None)) == [1] + assert list(some_data(last_timestamp=dlt.sources.incremental.EMPTY)) == [1] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_filter_processed_items(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_filter_processed_items(item_type: TDataItemFormat) -> None: """Checks if already processed items are filtered out""" @dlt.resource - def standalone_some_data(item_type: TItemFormat, now=None, last_timestamp=dlt.sources.incremental("timestamp")): - data = [{"delta": i, "timestamp": (now or pendulum.now()).add(days=i).timestamp()} for i in range(-10, 10)] + def standalone_some_data( + item_type: TDataItemFormat, now=None, last_timestamp=dlt.sources.incremental("timestamp") + ): + data = [ + {"delta": i, "timestamp": (now or pendulum.now()).add(days=i).timestamp()} + for i in range(-10, 10) + ] source_items = data_to_item_format(item_type, data) yield from source_items @@ -589,9 +655,12 @@ def standalone_some_data(item_type: TItemFormat, now=None, last_timestamp=dlt.so assert all(v["delta"] >= 0 for v in values) # provide the initial value, use min function - values = list(standalone_some_data( - item_type, last_timestamp=dlt.sources.incremental("timestamp", pendulum.now().timestamp(), min) - )) + values = list( + standalone_some_data( + item_type, + last_timestamp=dlt.sources.incremental("timestamp", pendulum.now().timestamp(), min), + ) + ) values = data_item_to_list(item_type, values) assert len(values) == 10 # the minimum element @@ -602,10 +671,9 @@ def test_start_value_set_to_last_value() -> None: p = dlt.pipeline(pipeline_name=uniq_id()) now = pendulum.now() - @dlt.resource def some_data(step, last_timestamp=dlt.sources.incremental("ts")): - expected_last = now.add(days=step-1) + expected_last = now.add(days=step - 1) if step == -10: assert last_timestamp.start_value is None @@ -628,9 +696,9 @@ def some_data(step, last_timestamp=dlt.sources.incremental("ts")): p.run(r, destination="duckdb") -@pytest.mark.parametrize("item_type", set(ALL_ITEM_FORMATS) - {'json'}) -def test_start_value_set_to_last_value_arrow(item_type: TItemFormat) -> None: - p = dlt.pipeline(pipeline_name=uniq_id(), destination='duckdb') +@pytest.mark.parametrize("item_type", set(ALL_DATA_ITEM_FORMATS) - {"json"}) +def test_start_value_set_to_last_value_arrow(item_type: TDataItemFormat) -> None: + p = dlt.pipeline(pipeline_name=uniq_id(), destination="duckdb") now = pendulum.now() data = [{"delta": i, "ts": now.add(days=i)} for i in range(-10, 10)] @@ -643,37 +711,46 @@ def some_data(first: bool, last_timestamp=dlt.sources.incremental("ts")): else: # print(last_timestamp.initial_value) # print(now.add(days=step-1).timestamp()) - assert last_timestamp.start_value == last_timestamp.last_value == data[-1]['ts'] + assert last_timestamp.start_value == last_timestamp.last_value == data[-1]["ts"] yield from source_items # after all yielded if first: assert last_timestamp.start_value is None else: - assert last_timestamp.start_value == data[-1]['ts'] == last_timestamp.last_value + assert last_timestamp.start_value == data[-1]["ts"] == last_timestamp.last_value p.run(some_data(True)) p.run(some_data(False)) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_replace_resets_state(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_replace_resets_state(item_type: TDataItemFormat) -> None: p = dlt.pipeline(pipeline_name=uniq_id(), destination="duckdb") now = pendulum.now() @dlt.resource - def standalone_some_data(item_type: TItemFormat, now=None, last_timestamp=dlt.sources.incremental("timestamp")): - data = [{"delta": i, "timestamp": (now or pendulum.now()).add(days=i).timestamp()} for i in range(-10, 10)] + def standalone_some_data( + item_type: TDataItemFormat, now=None, last_timestamp=dlt.sources.incremental("timestamp") + ): + data = [ + {"delta": i, "timestamp": (now or pendulum.now()).add(days=i).timestamp()} + for i in range(-10, 10) + ] source_items = data_to_item_format(item_type, data) yield from source_items info = p.run(standalone_some_data(item_type, now)) + print(p.last_trace.last_normalize_info) assert len(info.loads_ids) == 1 info = p.run(standalone_some_data(item_type, now)) + print(p.last_trace.last_normalize_info) + print(info) assert len(info.loads_ids) == 0 info = p.run(standalone_some_data(item_type, now), write_disposition="replace") assert len(info.loads_ids) == 1 parent_r = standalone_some_data(item_type, now) + @dlt.transformer(data_from=parent_r, write_disposition="append") def child(item): state = resource_state("child") @@ -688,18 +765,20 @@ def child(item): info = p.run(child, write_disposition="replace") # print(info.load_packages[0]) assert len(info.loads_ids) == 1 - # pipeline applied hints to the child resource - assert child.write_disposition == "replace" + # pipeline applied hints to the child resource but it was placed into source first + # so the original is still "append" + assert child.write_disposition == "append" # create a source where we place only child - s = DltSource("comp", "section", Schema("comp"), [child]) + child.write_disposition = "replace" + s = DltSource(Schema("comp"), "section", [child]) # but extracted resources will include its parent where it derives write disposition from child extracted = s.resources.extracted assert extracted[child.name].write_disposition == "replace" assert extracted[child._pipe.parent.name].write_disposition == "replace" # create a source where we place parent explicitly - s = DltSource("comp", "section", Schema("comp"), [parent_r, child]) + s = DltSource(Schema("comp"), "section", [parent_r, child]) extracted = s.resources.extracted assert extracted[child.name].write_disposition == "replace" # now parent exists separately and has its own write disposition @@ -713,27 +792,30 @@ def child(item): # print(s.state) # state was reset (child is replace but parent is append! so it will not generate any more items due to incremental # so child will reset itself on replace and never set the state...) - assert 'child' not in s.state['resources'] + assert "child" not in s.state["resources"] # there will be a load package to reset the state but also a load package to update the child table - assert len(info.load_packages[0].jobs['completed_jobs']) == 2 - assert {job.job_file_info.table_name for job in info.load_packages[0].jobs['completed_jobs'] } == {"_dlt_pipeline_state", "child"} + assert len(info.load_packages[0].jobs["completed_jobs"]) == 2 + assert { + job.job_file_info.table_name for job in info.load_packages[0].jobs["completed_jobs"] + } == {"_dlt_pipeline_state", "child"} # now we add child that has parent_r as parent but we add another instance of standalone_some_data explicitly # so we have a resource with the same name as child parent but the pipe instance is different - s = DltSource("comp", "section", Schema("comp"), [standalone_some_data(now), child]) + s = DltSource(Schema("comp"), "section", [standalone_some_data(now), child]) assert extracted[child.name].write_disposition == "replace" # now parent exists separately and has its own write disposition - because we search by name to identify matching resource assert extracted[child._pipe.parent.name].write_disposition == "append" -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_incremental_as_transform(item_type: TItemFormat) -> None: - +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_incremental_as_transform(item_type: TDataItemFormat) -> None: now = pendulum.now().timestamp() @dlt.resource def some_data(): - last_value: dlt.sources.incremental[float] = dlt.sources.incremental.from_existing_state("some_data", "ts") + last_value: dlt.sources.incremental[float] = dlt.sources.incremental.from_existing_state( + "some_data", "ts" + ) assert last_value.initial_value == now assert last_value.start_value == now assert last_value.cursor_path == "ts" @@ -749,8 +831,8 @@ def some_data(): assert len(info.loads_ids) == 1 -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_incremental_explicit_disable_unique_check(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_incremental_explicit_disable_unique_check(item_type: TDataItemFormat) -> None: @dlt.resource(primary_key="delta") def some_data(last_timestamp=dlt.sources.incremental("ts", primary_key=())): data = [{"delta": i, "ts": pendulum.now().timestamp()} for i in range(-10, 10)] @@ -764,9 +846,8 @@ def some_data(last_timestamp=dlt.sources.incremental("ts", primary_key=())): assert s.state["incremental"]["ts"]["unique_hashes"] == [] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_apply_hints_incremental(item_type: TItemFormat) -> None: - +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_apply_hints_incremental(item_type: TDataItemFormat) -> None: p = dlt.pipeline(pipeline_name=uniq_id()) data = [{"created_at": 1}, {"created_at": 2}, {"created_at": 3}] source_items = data_to_item_format(item_type, data) @@ -794,7 +875,7 @@ def some_data(created_at: Optional[dlt.sources.incremental[int]] = None): assert r.state["incremental"]["created_at"]["last_value"] == 1 @dlt.resource - def some_data_w_default(created_at = dlt.sources.incremental("created_at", last_value_func=min)): + def some_data_w_default(created_at=dlt.sources.incremental("created_at", last_value_func=min)): yield source_items # default is overridden by apply hints @@ -820,12 +901,12 @@ def some_data_no_incremental(): def test_last_value_func_on_dict() -> None: - """Test last value which is a dictionary""" + def by_event_type(event): last_value = None if len(event) == 1: - item, = event + (item,) = event else: item, last_value = event @@ -834,12 +915,18 @@ def by_event_type(event): else: last_value = dict(last_value) item_type = item["type"] - last_value[item_type] = max(item["created_at"], last_value.get(item_type, "1970-01-01T00:00:00Z")) + last_value[item_type] = max( + item["created_at"], last_value.get(item_type, "1970-01-01T00:00:00Z") + ) return last_value - @dlt.resource(primary_key="id", table_name=lambda i: i['type']) - def _get_shuffled_events(last_created_at = dlt.sources.incremental("$", last_value_func=by_event_type)): - with open("tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8") as f: + @dlt.resource(primary_key="id", table_name=lambda i: i["type"]) + def _get_shuffled_events( + last_created_at=dlt.sources.incremental("$", last_value_func=by_event_type) + ): + with open( + "tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8" + ) as f: yield json.load(f) with Container().injectable_context(StateInjectableContext(state={})): @@ -864,36 +951,45 @@ def test_timezone_naive_datetime() -> None: pendulum_start_dt = pendulum.instance(start_dt) # With timezone @dlt.resource - def some_data(updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental('updated_at', pendulum_start_dt)): - data = [{'updated_at': start_dt + timedelta(hours=1)}, {'updated_at': start_dt + timedelta(hours=2)}] + def some_data( + updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental( + "updated_at", pendulum_start_dt + ) + ): + data = [ + {"updated_at": start_dt + timedelta(hours=1)}, + {"updated_at": start_dt + timedelta(hours=2)}, + ] yield data pipeline = dlt.pipeline(pipeline_name=uniq_id()) resource = some_data() pipeline.extract(resource) # last value has timezone added - last_value = resource.state['incremental']['updated_at']['last_value'] + last_value = resource.state["incremental"]["updated_at"]["last_value"] assert isinstance(last_value, pendulum.DateTime) assert last_value.tzname() == "UTC" @dlt.resource def endless_sequence( - item_type: TItemFormat, - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=1) + item_type: TDataItemFormat, + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=1 + ), ) -> Any: max_values = 20 start = updated_at.last_value - data = [{'updated_at': i} for i in range(start, start + max_values)] + data = [{"updated_at": i} for i in range(start, start + max_values)] source_items = data_to_item_format(item_type, data) yield from source_items -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_chunked_ranges(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_chunked_ranges(item_type: TDataItemFormat) -> None: """Load chunked ranges with end value along with incremental""" - pipeline = dlt.pipeline(pipeline_name='incremental_' + uniq_id(), destination='duckdb') + pipeline = dlt.pipeline(pipeline_name="incremental_" + uniq_id(), destination="duckdb") chunks = [ # Load some start/end ranges in and out of order @@ -912,81 +1008,106 @@ def test_chunked_ranges(item_type: TItemFormat) -> None: for start, end in chunks: pipeline.run( - endless_sequence(item_type, updated_at=dlt.sources.incremental(initial_value=start, end_value=end)), - write_disposition='append' + endless_sequence( + item_type, updated_at=dlt.sources.incremental(initial_value=start, end_value=end) + ), + write_disposition="append", ) - expected_range = list(chain( - range(10, 20), - range(20, 30), - range(40, 50), - range(50, 60), - range(60, 61), - range(62, 70), - range(70, 89), - range(89, 109), - )) + expected_range = list( + chain( + range(10, 20), + range(20, 30), + range(40, 50), + range(50, 60), + range(60, 61), + range(62, 70), + range(70, 89), + range(89, 109), + ) + ) with pipeline.sql_client() as client: - items = [row[0] for row in client.execute_sql("SELECT updated_at FROM endless_sequence ORDER BY updated_at")] + items = [ + row[0] + for row in client.execute_sql( + "SELECT updated_at FROM endless_sequence ORDER BY updated_at" + ) + ] assert items == expected_range -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_end_value_with_batches(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_end_value_with_batches(item_type: TDataItemFormat) -> None: """Ensure incremental with end_value works correctly when resource yields lists instead of single items""" + @dlt.resource def batched_sequence( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=1) + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=1 + ) ) -> Any: start = updated_at.last_value - data = [{'updated_at': i} for i in range(start, start + 12)] + data = [{"updated_at": i} for i in range(start, start + 12)] yield data_to_item_format(item_type, data) - data = [{'updated_at': i} for i in range(start+12, start + 20)] + data = [{"updated_at": i} for i in range(start + 12, start + 20)] yield data_to_item_format(item_type, data) - pipeline = dlt.pipeline(pipeline_name='incremental_' + uniq_id(), destination='duckdb') + pipeline = dlt.pipeline(pipeline_name="incremental_" + uniq_id(), destination="duckdb") pipeline.run( batched_sequence(updated_at=dlt.sources.incremental(initial_value=1, end_value=10)), - write_disposition='append' + write_disposition="append", ) with pipeline.sql_client() as client: - items = [row[0] for row in client.execute_sql("SELECT updated_at FROM batched_sequence ORDER BY updated_at")] + items = [ + row[0] + for row in client.execute_sql( + "SELECT updated_at FROM batched_sequence ORDER BY updated_at" + ) + ] assert items == list(range(1, 10)) pipeline.run( batched_sequence(updated_at=dlt.sources.incremental(initial_value=10, end_value=14)), - write_disposition='append' + write_disposition="append", ) with pipeline.sql_client() as client: - items = [row[0] for row in client.execute_sql("SELECT updated_at FROM batched_sequence ORDER BY updated_at")] + items = [ + row[0] + for row in client.execute_sql( + "SELECT updated_at FROM batched_sequence ORDER BY updated_at" + ) + ] assert items == list(range(1, 14)) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_load_with_end_value_does_not_write_state(item_type: TItemFormat) -> None: - """When loading chunk with initial/end value range. The resource state is untouched. - """ - pipeline = dlt.pipeline(pipeline_name='incremental_' + uniq_id(), destination='duckdb') +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_load_with_end_value_does_not_write_state(item_type: TDataItemFormat) -> None: + """When loading chunk with initial/end value range. The resource state is untouched.""" + pipeline = dlt.pipeline(pipeline_name="incremental_" + uniq_id(), destination="duckdb") - pipeline.extract(endless_sequence(item_type, updated_at=dlt.sources.incremental(initial_value=20, end_value=30))) + pipeline.extract( + endless_sequence( + item_type, updated_at=dlt.sources.incremental(initial_value=20, end_value=30) + ) + ) - assert pipeline.state.get('sources') is None + assert pipeline.state.get("sources") is None -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_end_value_initial_value_errors(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_end_value_initial_value_errors(item_type: TDataItemFormat) -> None: @dlt.resource def some_data( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at') + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at"), ) -> Any: - yield {'updated_at': 1} + yield {"updated_at": 1} # end_value without initial_value with pytest.raises(ConfigurationValueError) as ex: @@ -998,33 +1119,55 @@ def some_data( with pytest.raises(ConfigurationValueError) as ex: list(some_data(updated_at=dlt.sources.incremental(initial_value=42, end_value=22))) - assert str(ex.value).startswith("Incremental 'initial_value' (42) is higher than 'end_value` (22)") + assert str(ex.value).startswith( + "Incremental 'initial_value' (42) is higher than 'end_value` (22)" + ) # max function and end_value higher than initial_value with pytest.raises(ConfigurationValueError) as ex: - list(some_data(updated_at=dlt.sources.incremental(initial_value=22, end_value=42, last_value_func=min))) + list( + some_data( + updated_at=dlt.sources.incremental( + initial_value=22, end_value=42, last_value_func=min + ) + ) + ) - assert str(ex.value).startswith("Incremental 'initial_value' (22) is lower than 'end_value` (42).") + assert str(ex.value).startswith( + "Incremental 'initial_value' (22) is lower than 'end_value` (42)." + ) def custom_last_value(items): return max(items) # custom function which evaluates end_value lower than initial with pytest.raises(ConfigurationValueError) as ex: - list(some_data(updated_at=dlt.sources.incremental(initial_value=42, end_value=22, last_value_func=custom_last_value))) + list( + some_data( + updated_at=dlt.sources.incremental( + initial_value=42, end_value=22, last_value_func=custom_last_value + ) + ) + ) - assert "The result of 'custom_last_value([end_value, initial_value])' must equal 'end_value'" in str(ex.value) + assert ( + "The result of 'custom_last_value([end_value, initial_value])' must equal 'end_value'" + in str(ex.value) + ) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_out_of_range_flags(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_out_of_range_flags(item_type: TDataItemFormat) -> None: """Test incremental.start_out_of_range / end_out_of_range flags are set when items are filtered out""" + @dlt.resource def descending( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=10) + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=10 + ) ) -> Any: for chunk in chunks(list(reversed(range(48))), 10): - data = [{'updated_at': i} for i in chunk] + data = [{"updated_at": i} for i in chunk] yield data_to_item_format(item_type, data) # Assert flag is set only on the first item < initial_value if all(item > 9 for item in chunk): @@ -1035,10 +1178,12 @@ def descending( @dlt.resource def ascending( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=22, end_value=45) + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=22, end_value=45 + ) ) -> Any: for chunk in chunks(list(range(22, 500)), 10): - data = [{'updated_at': i} for i in chunk] + data = [{"updated_at": i} for i in chunk] yield data_to_item_format(item_type, data) # Flag is set only when end_value is reached if all(item < 45 for item in chunk): @@ -1047,15 +1192,16 @@ def ascending( assert updated_at.end_out_of_range is True return - @dlt.resource def descending_single_item( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=10) + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=10 + ) ) -> Any: for i in reversed(range(14)): - data = [{'updated_at': i}] + data = [{"updated_at": i}] yield from data_to_item_format(item_type, data) - yield {'updated_at': i} + yield {"updated_at": i} if i >= 10: assert updated_at.start_out_of_range is False else: @@ -1064,10 +1210,12 @@ def descending_single_item( @dlt.resource def ascending_single_item( - updated_at: dlt.sources.incremental[int] = dlt.sources.incremental('updated_at', initial_value=10, end_value=22) + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", initial_value=10, end_value=22 + ) ) -> Any: for i in range(10, 500): - data = [{'updated_at': i}] + data = [{"updated_at": i}] yield from data_to_item_format(item_type, data) if i < 22: assert updated_at.end_out_of_range is False @@ -1075,7 +1223,7 @@ def ascending_single_item( assert updated_at.end_out_of_range is True return - pipeline = dlt.pipeline(pipeline_name='incremental_' + uniq_id(), destination='duckdb') + pipeline = dlt.pipeline(pipeline_name="incremental_" + uniq_id(), destination="duckdb") pipeline.extract(descending()) @@ -1085,19 +1233,27 @@ def ascending_single_item( pipeline.extract(ascending_single_item()) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_get_incremental_value_type(item_type: TItemFormat) -> None: + +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_get_incremental_value_type(item_type: TDataItemFormat) -> None: assert dlt.sources.incremental("id").get_incremental_value_type() is Any assert dlt.sources.incremental("id", initial_value=0).get_incremental_value_type() is int assert dlt.sources.incremental("id", initial_value=None).get_incremental_value_type() is Any assert dlt.sources.incremental[int]("id").get_incremental_value_type() is int - assert dlt.sources.incremental[pendulum.DateTime]("id").get_incremental_value_type() is pendulum.DateTime + assert ( + dlt.sources.incremental[pendulum.DateTime]("id").get_incremental_value_type() + is pendulum.DateTime + ) # typing has precedence assert dlt.sources.incremental[pendulum.DateTime]("id", initial_value=1).get_incremental_value_type() is pendulum.DateTime # type: ignore[arg-type] # pass default value @dlt.resource - def test_type(updated_at = dlt.sources.incremental[str]("updated_at", allow_external_schedulers=True)): # noqa: B008 + def test_type( + updated_at=dlt.sources.incremental[str]( # noqa: B008 + "updated_at", allow_external_schedulers=True + ) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) @@ -1107,7 +1263,11 @@ def test_type(updated_at = dlt.sources.incremental[str]("updated_at", allow_exte # use annotation @dlt.resource - def test_type_2(updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): + def test_type_2( + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", allow_external_schedulers=True + ) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) @@ -1127,7 +1287,9 @@ def test_type_3(updated_at: dlt.sources.incremental[int]): # pass explicit value overriding default that is typed @dlt.resource - def test_type_4(updated_at = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): + def test_type_4( + updated_at=dlt.sources.incremental("updated_at", allow_external_schedulers=True) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) @@ -1137,7 +1299,9 @@ def test_type_4(updated_at = dlt.sources.incremental("updated_at", allow_externa # no generic type information @dlt.resource - def test_type_5(updated_at = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): + def test_type_5( + updated_at=dlt.sources.incremental("updated_at", allow_external_schedulers=True) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) @@ -1146,36 +1310,48 @@ def test_type_5(updated_at = dlt.sources.incremental("updated_at", allow_externa assert r.incremental._incremental.get_incremental_value_type() is Any -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_join_env_scheduler(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_join_env_scheduler(item_type: TDataItemFormat) -> None: @dlt.resource - def test_type_2(updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): + def test_type_2( + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", allow_external_schedulers=True + ) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) result = list(test_type_2()) - assert data_item_to_list(item_type, result) == [{'updated_at': 1}, {'updated_at': 2}, {'updated_at': 3}] + assert data_item_to_list(item_type, result) == [ + {"updated_at": 1}, + {"updated_at": 2}, + {"updated_at": 3}, + ] # set start and end values os.environ["DLT_START_VALUE"] = "2" result = list(test_type_2()) - assert data_item_to_list(item_type, result) == [{'updated_at': 2}, {'updated_at': 3}] + assert data_item_to_list(item_type, result) == [{"updated_at": 2}, {"updated_at": 3}] os.environ["DLT_END_VALUE"] = "3" result = list(test_type_2()) - assert data_item_to_list(item_type, result) == [{'updated_at': 2}] + assert data_item_to_list(item_type, result) == [{"updated_at": 2}] -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_join_env_scheduler_pipeline(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_join_env_scheduler_pipeline(item_type: TDataItemFormat) -> None: @dlt.resource - def test_type_2(updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): + def test_type_2( + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", allow_external_schedulers=True + ) + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) - pip_1_name = 'incremental_' + uniq_id() - pipeline = dlt.pipeline(pipeline_name=pip_1_name, destination='duckdb') + pip_1_name = "incremental_" + uniq_id() + pipeline = dlt.pipeline(pipeline_name=pip_1_name, destination="duckdb") r = test_type_2() - r.add_step(AssertItems([{'updated_at': 2}, {'updated_at': 3}], item_type)) + r.add_step(AssertItems([{"updated_at": 2}, {"updated_at": 3}], item_type)) os.environ["DLT_START_VALUE"] = "2" pipeline.extract(r) # state is saved next extract has no items @@ -1186,18 +1362,20 @@ def test_type_2(updated_at: dlt.sources.incremental[int] = dlt.sources.increment # setting end value will stop using state os.environ["DLT_END_VALUE"] = "3" r = test_type_2() - r.add_step(AssertItems([{'updated_at': 2}], item_type)) + r.add_step(AssertItems([{"updated_at": 2}], item_type)) pipeline.extract(r) r = test_type_2() os.environ["DLT_START_VALUE"] = "1" - r.add_step(AssertItems([{'updated_at': 1}, {'updated_at': 2}], item_type)) + r.add_step(AssertItems([{"updated_at": 1}, {"updated_at": 2}], item_type)) pipeline.extract(r) -@pytest.mark.parametrize("item_type", ALL_ITEM_FORMATS) -def test_allow_external_schedulers(item_type: TItemFormat) -> None: +@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS) +def test_allow_external_schedulers(item_type: TDataItemFormat) -> None: @dlt.resource() - def test_type_2(updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at")): + def test_type_2( + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental("updated_at"), + ): data = [{"updated_at": d} for d in [1, 2, 3]] yield data_to_item_format(item_type, data) diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index d8223f2ee8..97b3a3c558 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -9,10 +9,21 @@ from dlt.common.pipeline import StateInjectableContext, source_state from dlt.common.schema import Schema from dlt.common.typing import TDataItems -from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, InconsistentTableTemplate, InvalidParentResourceDataType, InvalidParentResourceIsAFunction, InvalidResourceDataTypeMultiplePipes, InvalidTransformerDataTypeGeneratorFunctionRequired, InvalidTransformerGeneratorFunction, ParametrizedResourceUnbound, ResourcesNotFoundError + +from dlt.extract import DltResource, DltSource, Incremental +from dlt.extract.source import DltResourceDict +from dlt.extract.exceptions import ( + DataItemRequiredForDynamicTableHints, + InconsistentTableTemplate, + InvalidParentResourceDataType, + InvalidParentResourceIsAFunction, + InvalidResourceDataTypeMultiplePipes, + InvalidTransformerDataTypeGeneratorFunctionRequired, + InvalidTransformerGeneratorFunction, + ParametrizedResourceUnbound, + ResourcesNotFoundError, +) from dlt.extract.pipe import Pipe -from dlt.extract.typing import FilterItem, MapItem -from dlt.extract.source import DltResource, DltResourceDict, DltSource def test_call_data_resource() -> None: @@ -21,8 +32,7 @@ def test_call_data_resource() -> None: def test_parametrized_resource() -> None: - - def parametrized(p1, /, p2, *, p3 = None): + def parametrized(p1, /, p2, *, p3=None): assert p1 == "p1" assert p2 == 1 assert p3 is None @@ -51,7 +61,7 @@ def parametrized(p1, /, p2, *, p3 = None): # as part of the source r = DltResource.from_data(parametrized) - s = DltSource("source", "module", Schema("source"), [r]) + s = DltSource(Schema("source"), "module", [r]) with pytest.raises(ParametrizedResourceUnbound) as py_ex: list(s) @@ -67,8 +77,7 @@ def parametrized(p1, /, p2, *, p3 = None): def test_parametrized_transformer() -> None: - - def good_transformer(item, /, p1, p2, *, p3 = None): + def good_transformer(item, /, p1, p2, *, p3=None): assert p1 == "p1" assert p2 == 2 assert p3 is None @@ -136,9 +145,9 @@ def bad_transformer_3(*, item): def assert_items(_items: TDataItems) -> None: # 2 items yielded * p2=2 - assert len(_items) == 2*2 - assert _items[0] == {'wrap': 'itemX', 'mark': 'p1', 'iter': 0} - assert _items[3] == {'wrap': 'itemY', 'mark': 'p1', 'iter': 1} + assert len(_items) == 2 * 2 + assert _items[0] == {"wrap": "itemX", "mark": "p1", "iter": 0} + assert _items[3] == {"wrap": "itemY", "mark": "p1", "iter": 1} assert_items(items) @@ -150,7 +159,6 @@ def assert_items(_items: TDataItems) -> None: def test_resource_bind_when_in_source() -> None: - @dlt.resource def parametrized(_range: int): yield list(range(_range)) @@ -187,7 +195,6 @@ def test_source(): def test_resource_bind_call_forms() -> None: - @dlt.resource def returns_res(_input): # resource returning resource @@ -228,7 +235,6 @@ def regular(_input): b_returns_pipe = returns_pipe("ABCA") assert len(b_returns_pipe._pipe) == 1 - @dlt.source def test_source(): return returns_res, returns_pipe, regular @@ -241,7 +247,7 @@ def test_source(): assert s.regular._pipe is not regular._pipe # will repeat each string 3 times - s.regular.add_map(lambda i: i*3) + s.regular.add_map(lambda i: i * 3) assert len(regular._pipe) == 2 assert len(s.regular._pipe) == 3 @@ -252,14 +258,14 @@ def test_source(): assert list(s.regular) == ["AAA", "AAA", "AAA"] # binding resource that returns resource will replace the object content, keeping the object id - s.returns_res.add_map(lambda i: i*3) + s.returns_res.add_map(lambda i: i * 3) s.returns_res.bind(["X", "Y", "Z"]) # got rid of all mapping and filter functions assert len(s.returns_res._pipe) == 1 assert list(s.returns_res) == ["X", "Y", "Z"] # same for resource returning pipe - s.returns_pipe.add_map(lambda i: i*3) + s.returns_pipe.add_map(lambda i: i * 3) s.returns_pipe.bind(["X", "Y", "M"]) # got rid of all mapping and filter functions assert len(s.returns_pipe._pipe) == 1 @@ -267,12 +273,11 @@ def test_source(): # s.regular is exhausted so set it again # add lambda that after filtering for A, will multiply it by 4 - s.resources["regular"] = regular.add_map(lambda i: i*4)(["A", "Y"]) - assert list(s) == ['X', 'Y', 'Z', 'X', 'Y', 'M', 'AAAA'] + s.resources["regular"] = regular.add_map(lambda i: i * 4)(["A", "Y"]) + assert list(s) == ["X", "Y", "Z", "X", "Y", "M", "AAAA"] def test_call_clone_separate_pipe() -> None: - all_yields = [] def some_data_gen(param: str): @@ -293,14 +298,13 @@ def some_data(param: str): def test_resource_bind_lazy_eval() -> None: - @dlt.resource def needs_param(param): yield from range(param) @dlt.transformer(data_from=needs_param(3)) def tx_form(item, multi): - yield item*multi + yield item * multi @dlt.transformer(data_from=tx_form(2)) def tx_form_fin(item, div): @@ -308,7 +312,7 @@ def tx_form_fin(item, div): @dlt.transformer(data_from=needs_param) def tx_form_dir(item, multi): - yield item*multi + yield item * multi # tx_form takes data from needs_param(3) which is lazily evaluated assert list(tx_form(2)) == [0, 2, 4] @@ -316,8 +320,8 @@ def tx_form_dir(item, multi): assert list(tx_form(2)) == [0, 2, 4] # same for tx_form_fin - assert list(tx_form_fin(3)) == [0, 2/3, 4/3] - assert list(tx_form_fin(3)) == [0, 2/3, 4/3] + assert list(tx_form_fin(3)) == [0, 2 / 3, 4 / 3] + assert list(tx_form_fin(3)) == [0, 2 / 3, 4 / 3] # binding `needs_param`` in place will not affect the tx_form and tx_form_fin (they operate on copies) needs_param.bind(4) @@ -331,7 +335,6 @@ def tx_form_dir(item, multi): def test_transformer_preliminary_step() -> None: - def yield_twice(item): yield item.upper() yield item.upper() @@ -340,13 +343,20 @@ def yield_twice(item): # filter out small caps and insert this before the head tx_stage.add_filter(lambda letter: letter.isupper(), 0) # be got filtered out before duplication - assert list(dlt.resource(["A", "b", "C"], name="data") | tx_stage) == ['A', 'A', 'C', 'C'] + assert list(dlt.resource(["A", "b", "C"], name="data") | tx_stage) == ["A", "A", "C", "C"] # filter after duplication tx_stage = dlt.transformer()(yield_twice)() tx_stage.add_filter(lambda letter: letter.isupper()) # nothing is filtered out: on duplicate we also capitalize so filter does not trigger - assert list(dlt.resource(["A", "b", "C"], name="data") | tx_stage) == ['A', 'A', 'B', 'B', 'C', 'C'] + assert list(dlt.resource(["A", "b", "C"], name="data") | tx_stage) == [ + "A", + "A", + "B", + "B", + "C", + "C", + ] def test_set_table_name() -> None: @@ -359,7 +369,6 @@ def test_set_table_name() -> None: def test_select_resources() -> None: - @dlt.source def test_source(no_resources): for i in range(no_resources): @@ -385,7 +394,11 @@ def test_source(no_resources): s_sel = s.with_resources("resource_1", "resource_7") # returns a clone assert s is not s_sel - assert list(s_sel.selected_resources) == ["resource_1", "resource_7"] == list(s_sel.resources.selected) + assert ( + list(s_sel.selected_resources) + == ["resource_1", "resource_7"] + == list(s_sel.resources.selected) + ) assert list(s_sel.resources) == all_resource_names info = str(s_sel) assert "resource resource_0 is not selected" in info @@ -403,7 +416,6 @@ def test_source(no_resources): def test_clone_source() -> None: @dlt.source def test_source(no_resources): - def _gen(i): yield "A" * i @@ -422,7 +434,7 @@ def _gen(i): # but we keep pipe names assert s.resources[name].name == clone_s.resources[name].name - assert list(s) == ['', 'A', 'AA', 'AAA'] + assert list(s) == ["", "A", "AA", "AAA"] # we expired generators assert list(clone_s) == [] @@ -430,7 +442,6 @@ def _gen(i): @dlt.source # type: ignore[no-redef] def test_source(no_resources): - def _gen(i): yield "A" * i @@ -445,15 +456,13 @@ def _gen(i): clone_s.resources[name].bind(idx) # now thanks to late eval both sources evaluate separately - assert list(s) == ['', 'A', 'AA', 'AAA'] - assert list(clone_s) == ['', 'A', 'AA', 'AAA'] + assert list(s) == ["", "A", "AA", "AAA"] + assert list(clone_s) == ["", "A", "AA", "AAA"] def test_multiple_parametrized_transformers() -> None: - @dlt.source def _source(test_set: int = 1): - @dlt.resource(selected=False) def _r1(): yield ["a", "b", "c"] @@ -464,7 +473,7 @@ def _t1(items, suffix): @dlt.transformer(data_from=_t1) def _t2(items, mul): - yield items*mul + yield items * mul if test_set == 1: return _r1, _t1, _t2 @@ -477,8 +486,7 @@ def _t2(items, mul): # true pipelining fun return _r1() | _t1("2") | _t2(2) - - expected_data = ['a_2', 'b_2', 'c_2', 'a_2', 'b_2', 'c_2'] + expected_data = ["a_2", "b_2", "c_2", "a_2", "b_2", "c_2"] # this s contains all resources s = _source(1) @@ -539,7 +547,6 @@ def _t2(items, mul): def test_extracted_resources_selector() -> None: @dlt.source def _source(test_set: int = 1): - @dlt.resource(selected=False, write_disposition="append") def _r1(): yield ["a", "b", "c"] @@ -550,7 +557,7 @@ def _t1(items, suffix): @dlt.transformer(data_from=_r1, write_disposition="merge") def _t2(items, mul): - yield items*mul + yield items * mul if test_set == 1: return _r1, _t1, _t2 @@ -588,10 +595,8 @@ def _t2(items, mul): def test_source_decompose() -> None: - @dlt.source def _source(): - @dlt.resource(selected=True) def _r_init(): yield ["-", "x", "!"] @@ -606,18 +611,18 @@ def _t1(items, suffix): @dlt.transformer(data_from=_r1) def _t2(items, mul): - yield items*mul + yield items * mul @dlt.transformer(data_from=_r1) def _t3(items, mul): for item in items: - yield item.upper()*mul + yield item.upper() * mul # add something to init @dlt.transformer(data_from=_r_init) def _t_init_post(items): for item in items: - yield item*2 + yield item * 2 @dlt.resource def _r_isolee(): @@ -640,7 +645,14 @@ def _r_isolee(): # keeps order of resources inside # here we didn't eliminate (_r_init, _r_init) as this not impacts decomposition, however this edge is not necessary - assert _source().resources.selected_dag == [("_r_init", "_r_init"), ("_r_init", "_t_init_post"), ('_r1', '_t1'), ('_r1', '_t2'), ('_r1', '_t3'), ('_r_isolee', '_r_isolee')] + assert _source().resources.selected_dag == [ + ("_r_init", "_r_init"), + ("_r_init", "_t_init_post"), + ("_r1", "_t1"), + ("_r1", "_t2"), + ("_r1", "_t3"), + ("_r_isolee", "_r_isolee"), + ] components = _source().decompose("scc") # first element contains _r_init assert "_r_init" in components[0].resources.selected.keys() @@ -684,7 +696,6 @@ def _gen(): @dlt.resource def res_in_res(table_name, w_d): - def _gen(s): yield from s @@ -692,7 +703,6 @@ def _gen(s): def test_resource_returning_resource() -> None: - @dlt.source def source_r_in_r(): yield res_in_res @@ -725,6 +735,7 @@ def test_source_resource_attrs_with_conflicting_attrs() -> None: """Resource names that conflict with DltSource attributes do not work with attribute access""" dlt.pipeline(full_refresh=True) # Create pipeline so state property can be accessed names = ["state", "resources", "schema", "name", "clone"] + @dlt.source def test_source() -> Iterator[DltResource]: for name in names: @@ -741,13 +752,19 @@ def test_source() -> Iterator[DltResource]: def test_add_transform_steps() -> None: # add all step types, using indexes. final steps # gen -> map that converts to str and multiplies character -> filter str of len 2 -> yield all characters in str separately - r = dlt.resource([1, 2, 3, 4], name="all").add_limit(3).add_yield_map(lambda i: (yield from i)).add_map(lambda i: str(i) * i, 1).add_filter(lambda i: len(i) == 2, 2) + r = ( + dlt.resource([1, 2, 3, 4], name="all") + .add_limit(3) + .add_yield_map(lambda i: (yield from i)) + .add_map(lambda i: str(i) * i, 1) + .add_filter(lambda i: len(i) == 2, 2) + ) assert list(r) == ["2", "2"] def test_add_transform_steps_pipe() -> None: r = dlt.resource([1, 2, 3], name="all") | (lambda i: str(i) * i) | (lambda i: (yield from i)) - assert list(r) == ['1', '2', '2', '3', '3', '3'] + assert list(r) == ["1", "2", "2", "3", "3", "3"] def test_limit_infinite_counter() -> None: @@ -756,7 +773,6 @@ def test_limit_infinite_counter() -> None: def test_limit_source() -> None: - def mul_c(item): yield from "A" * (item + 2) @@ -768,11 +784,10 @@ def infinite_source(): yield r | dlt.transformer(name=f"mul_c_{idx}")(mul_c) # transformer is not limited to 2 elements, infinite resource is, we have 3 resources - assert list(infinite_source().add_limit(2)) == ['A', 'A', 0, 'A', 'A', 'A', 1] * 3 + assert list(infinite_source().add_limit(2)) == ["A", "A", 0, "A", "A", "A", 1] * 3 def test_source_state() -> None: - @dlt.source def test_source(expected_state): assert source_state() == expected_state @@ -782,17 +797,16 @@ def test_source(expected_state): test_source({}).state dlt.pipeline(full_refresh=True) - assert test_source({}).state == {} + assert test_source({}).state == {} # inject state to see if what we write in state is there with Container().injectable_context(StateInjectableContext(state={})) as state: test_source({}).state["value"] = 1 # type: ignore[index] test_source({"value": 1}) - assert state.state == {'sources': {'test_source': {'value': 1}}} + assert state.state == {"sources": {"test_source": {"value": 1}}} def test_resource_state() -> None: - @dlt.resource def test_resource(): yield [1, 2, 3] @@ -823,10 +837,14 @@ def test_source(): # resource section is current module print(state.state) # the resource that is a part of the source will create a resource state key in the source state key - assert state.state["sources"]["schema_section"] == {'resources': {'test_resource': {'in-source': True}}} - assert s.state == {'resources': {'test_resource': {'in-source': True}}} + assert state.state["sources"]["schema_section"] == { + "resources": {"test_resource": {"in-source": True}} + } + assert s.state == {"resources": {"test_resource": {"in-source": True}}} # the standalone resource will create key which is default schema name - assert state.state["sources"][p._make_schema_with_default_name().name] == {'resources': {'test_resource': {'direct': True}}} + assert state.state["sources"][p._make_schema_with_default_name().name] == { + "resources": {"test_resource": {"direct": True}} + } # def test_add_resources_to_source_simple() -> None: @@ -838,7 +856,7 @@ def input_gen(): yield from [1, 2, 3] def tx_step(item): - return item*2 + return item * 2 res_dict = DltResourceDict("source", "section") input_r = DltResource.from_data(input_gen) @@ -868,10 +886,9 @@ def tx_step(item): assert input_r_orig_pipe == input_r._pipe assert input_tx_orig_pipe == input_tx._pipe - # add all together res_dict = DltResourceDict("source", "section") - res_dict.add(input_r , input_r | input_tx) + res_dict.add(input_r, input_r | input_tx) assert res_dict._new_pipes == [] assert res_dict._suppress_clone_on_setitem is False assert res_dict["input_gen"]._pipe is res_dict["tx_step"]._pipe.parent @@ -879,7 +896,6 @@ def tx_step(item): assert input_r_orig_pipe == input_r._pipe assert input_tx_orig_pipe == input_tx._pipe - # replace existing resource which has the old pipe res_dict["input_gen"] = input_r # an existing clone got assigned @@ -896,8 +912,6 @@ def tx_step(item): assert input_r_orig_pipe == input_r._pipe assert input_tx_orig_pipe == input_tx._pipe - - # can't set with different name than resource really has with pytest.raises(ValueError): res_dict["input_gen_x"] = input_r.with_name("uniq") @@ -913,7 +927,6 @@ def test_add_transformer_to_source(add_mode: str) -> None: def number_gen(init): yield from range(init, init + 5) - @dlt.source def number_source(): return number_gen @@ -922,7 +935,7 @@ def number_source(): @dlt.transformer def multiplier(item): - return item*2 + return item * 2 mul_pipe = source.numbers | multiplier() @@ -947,7 +960,6 @@ def test_unknown_resource_access() -> None: def number_gen(init): yield from range(init, init + 5) - @dlt.source def number_source(): return number_gen @@ -1005,12 +1017,11 @@ def multiplier(number, mul): def test_source_multiple_iterations() -> None: - def some_data(): yield [1, 2, 3] yield [1, 2, 3] - s = DltSource("source", "module", Schema("source"), [dlt.resource(some_data())]) + s = DltSource(Schema("source"), "module", [dlt.resource(some_data())]) assert s.exhausted is False assert list(s) == [1, 2, 3, 1, 2, 3] assert s.exhausted is True @@ -1020,23 +1031,31 @@ def some_data(): def test_exhausted_property() -> None: - # this example will be exhausted after iteration def open_generator_data(): yield from [1, 2, 3, 4] - s = DltSource("source", "module", Schema("source"), [dlt.resource(open_generator_data())]) + + s = DltSource(Schema("source"), "module", [dlt.resource(open_generator_data())]) assert s.exhausted is False assert next(iter(s)) == 1 assert s.exhausted is True # lists will not exhaust - s = DltSource("source", "module", Schema("source"), [dlt.resource([1, 2, 3, 4], table_name="table", name="resource")]) + s = DltSource( + Schema("source"), + "module", + [dlt.resource([1, 2, 3, 4], table_name="table", name="resource")], + ) assert s.exhausted is False assert next(iter(s)) == 1 assert s.exhausted is False # iterators will not exhaust - s = DltSource("source", "module", Schema("source"), [dlt.resource(iter([1, 2, 3, 4]), table_name="table", name="resource")]) + s = DltSource( + Schema("source"), + "module", + [dlt.resource(iter([1, 2, 3, 4]), table_name="table", name="resource")], + ) assert s.exhausted is False assert next(iter(s)) == 1 assert s.exhausted is False @@ -1044,23 +1063,31 @@ def open_generator_data(): # having on exhausted generator resource will make the whole source exhausted def open_generator_data(): # type: ignore[no-redef] yield from [1, 2, 3, 4] - s = DltSource("source", "module", Schema("source"), [ dlt.resource([1, 2, 3, 4], table_name="table", name="resource"), dlt.resource(open_generator_data())]) + + s = DltSource( + Schema("source"), + "module", + [ + dlt.resource([1, 2, 3, 4], table_name="table", name="resource"), + dlt.resource(open_generator_data()), + ], + ) assert s.exhausted is False # execute the whole source list(s) assert s.exhausted is True - # source with transformers also exhausts @dlt.source def mysource(): r = dlt.resource(itertools.count(start=1), name="infinity").add_limit(5) yield r yield r | dlt.transformer(name="double")(lambda x: x * 2) + s = mysource() assert s.exhausted is False - assert next(iter(s)) == 2 # transformer is returned befor resource + assert next(iter(s)) == 2 # transformer is returned befor resource assert s.exhausted is True @@ -1073,7 +1100,6 @@ def _r1(): def _t1(items, suffix): yield list(map(lambda i: i + "_" + suffix, items)) - r1 = _r1() r1_clone = r1.with_name("r1_clone") # new name of resource and pipe @@ -1096,8 +1122,8 @@ def _t1(items, suffix): assert bound_t1_clone_2._pipe.parent is bound_t1_clone._pipe.parent # evaluate transformers - assert list(bound_t1_clone) == ['a_ax', 'b_ax', 'c_ax'] - assert list(bound_t1_clone_2) == ['a_ax_2', 'b_ax_2', 'c_ax_2'] + assert list(bound_t1_clone) == ["a_ax", "b_ax", "c_ax"] + assert list(bound_t1_clone_2) == ["a_ax_2", "b_ax_2", "c_ax_2"] # clone pipes (bound transformer) pipe_r1 = _r1() @@ -1140,14 +1166,20 @@ def _t1(items, suffix): def test_apply_hints() -> None: def empty_gen(): yield [1, 2, 3] - empty_table_schema = {"name": "empty_gen", 'columns': {}, 'resource': 'empty_gen', 'write_disposition': 'append'} + + empty_table_schema = { + "name": "empty_gen", + "columns": {}, + "resource": "empty_gen", + "write_disposition": "append", + } empty = DltResource.from_data(empty_gen) empty_r = empty() # check defaults assert empty_r.name == empty.name == empty_r.table_name == empty.table_name == "empty_gen" - assert empty_r._table_schema_template is None + # assert empty_r._table_schema_template is None assert empty_r.compute_table_schema() == empty_table_schema assert empty_r.write_disposition == "append" @@ -1160,18 +1192,44 @@ def empty_gen(): empty_r.write_disposition = "append" assert empty_r.compute_table_schema()["write_disposition"] == "append" - empty_r.apply_hints(table_name="table", parent_table_name="parent", primary_key=["a", "b"], merge_key=["c", "a"]) + empty_r.apply_hints( + table_name="table", + parent_table_name="parent", + primary_key=["a", "b"], + merge_key=["c", "a"], + schema_contract="freeze", + ) table = empty_r.compute_table_schema() - assert table["columns"]["a"] == {'merge_key': True, 'name': 'a', 'nullable': False, 'primary_key': True} - assert table["columns"]["b"] == {'name': 'b', 'nullable': False, 'primary_key': True} - assert table["columns"]["c"] == {'merge_key': True, 'name': 'c', 'nullable': False} + assert table["columns"]["a"] == { + "merge_key": True, + "name": "a", + "nullable": False, + "primary_key": True, + } + assert table["columns"]["b"] == {"name": "b", "nullable": False, "primary_key": True} + assert table["columns"]["c"] == {"merge_key": True, "name": "c", "nullable": False} assert table["name"] == "table" assert table["parent"] == "parent" assert empty_r.table_name == "table" + assert table["schema_contract"] == "freeze" # reset - empty_r.apply_hints(table_name="", parent_table_name="", primary_key=[], merge_key="", columns={}) - assert empty_r._table_schema_template == {'columns': {}, 'incremental': None, 'validator': None, 'write_disposition': 'append'} + empty_r.apply_hints( + table_name="", + parent_table_name="", + primary_key=[], + merge_key="", + columns={}, + incremental=Incremental.EMPTY, + schema_contract={}, + ) + assert empty_r._hints == { + "columns": {}, + "incremental": None, + "validator": None, + "write_disposition": "append", + "original_columns": {}, + } table = empty_r.compute_table_schema() assert table["name"] == "empty_gen" assert "parent" not in table @@ -1180,11 +1238,20 @@ def empty_gen(): # combine columns with primary key empty_r = empty() - empty_r.apply_hints(columns={"tags": {"data_type": "complex", "primary_key": False}}, primary_key="tags", merge_key="tags") + empty_r.apply_hints( + columns={"tags": {"data_type": "complex", "primary_key": False}}, + primary_key="tags", + merge_key="tags", + ) # primary key not set here assert empty_r.columns["tags"] == {"data_type": "complex", "name": "tags", "primary_key": False} # only in the computed table - assert empty_r.compute_table_schema()["columns"]["tags"] == {"data_type": "complex", "name": "tags", "primary_key": True, "merge_key": True} + assert empty_r.compute_table_schema()["columns"]["tags"] == { + "data_type": "complex", + "name": "tags", + "primary_key": True, + "merge_key": True, + } def test_apply_dynamic_hints() -> None: @@ -1209,17 +1276,23 @@ def empty_gen(): # try write disposition and primary key empty_r.apply_hints(primary_key=lambda ev: ev["pk"], write_disposition=lambda ev: ev["wd"]) - table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip"}) + table = empty_r.compute_table_schema( + {"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip"} + ) assert table["write_disposition"] == "skip" assert "a" in table["columns"] # validate fails with pytest.raises(DictValidationException): - empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "x-skip"}) + empty_r.compute_table_schema( + {"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "x-skip"} + ) # dynamic columns empty_r.apply_hints(columns=lambda ev: ev["c"]) - table = empty_r.compute_table_schema({"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip", "c": [{"name": "tags"}]}) + table = empty_r.compute_table_schema( + {"t": "table", "p": "parent", "pk": ["a", "b"], "wd": "skip", "c": [{"name": "tags"}]} + ) assert table["columns"]["tags"] == {"name": "tags"} @@ -1228,13 +1301,13 @@ def input_gen(): yield from [1, 2, 3] def tx_step(item): - return item*2 + return item * 2 input_r = DltResource.from_data(input_gen) input_r_clone = input_r.with_name("input_gen_2") # separate resources have separate pipe instances - source = DltSource("dupes", "module", Schema("dupes"), [input_r, input_r_clone]) + source = DltSource(Schema("dupes"), "module", [input_r, input_r_clone]) pipes = source.resources.pipes assert len(pipes) == 2 assert pipes[0].name == "input_gen" @@ -1245,17 +1318,23 @@ def tx_step(item): assert list(source) == [1, 2, 3, 1, 2, 3] # cloned from fresh resource - source = DltSource("dupes", "module", Schema("dupes"), [DltResource.from_data(input_gen), DltResource.from_data(input_gen).with_name("gen_2")]) + source = DltSource( + Schema("dupes"), + "module", + [DltResource.from_data(input_gen), DltResource.from_data(input_gen).with_name("gen_2")], + ) assert list(source) == [1, 2, 3, 1, 2, 3] # clone transformer input_r = DltResource.from_data(input_gen) input_tx = DltResource.from_data(tx_step, data_from=DltResource.Empty) - source = DltSource("dupes", "module", Schema("dupes"), [input_r, (input_r | input_tx).with_name("tx_clone")]) + source = DltSource( + Schema("dupes"), "module", [input_r, (input_r | input_tx).with_name("tx_clone")] + ) pipes = source.resources.pipes assert len(pipes) == 2 assert source.resources[pipes[0].name] == source.input_gen assert source.resources[pipes[1].name] == source.tx_clone selected_pipes = source.resources.selected_pipes assert len(selected_pipes) == 2 - assert list(source) == [1, 2, 3, 2, 4, 6] \ No newline at end of file + assert list(source) == [1, 2, 3, 2, 4, 6] diff --git a/tests/extract/test_utils.py b/tests/extract/test_utils.py index ad5584bab6..0ed352b5fc 100644 --- a/tests/extract/test_utils.py +++ b/tests/extract/test_utils.py @@ -11,7 +11,7 @@ def test_column_schema_from_list() -> None: result = ensure_table_schema_columns_hint(TABLE_UPDATE) for col in TABLE_UPDATE: - assert result[col['name']] == col # type: ignore[index] + assert result[col["name"]] == col # type: ignore[index] def test_dynamic_columns_schema_from_list() -> None: @@ -23,7 +23,7 @@ def dynamic_columns(item: Dict[str, Any]) -> List[TColumnSchema]: result = result_func({}) # type: ignore[operator] for col in TABLE_UPDATE: - assert result[col['name']] == col + assert result[col["name"]] == col def test_dynamic_columns_schema_from_pydantic() -> None: @@ -38,5 +38,5 @@ def dynamic_columns(item: Dict[str, Any]) -> Type[BaseModel]: result = result_func({}) # type: ignore[operator] - assert result['a']['data_type'] == 'bigint' - assert result['b']['data_type'] == 'text' + assert result["a"]["data_type"] == "bigint" + assert result["b"]["data_type"] == "text" diff --git a/tests/extract/test_validation.py b/tests/extract/test_validation.py index 64e06bcecc..045f75ab73 100644 --- a/tests/extract/test_validation.py +++ b/tests/extract/test_validation.py @@ -1,15 +1,19 @@ """Tests for resource validation with pydantic schema """ import typing as t - import pytest + import dlt -from dlt.extract.typing import ValidateItem +from dlt.common import json +from dlt.common.schema.exceptions import DataValidationError from dlt.common.typing import TDataItems -from dlt.extract.validation import PydanticValidator -from dlt.extract.exceptions import ValidationError, ResourceExtractionError +from dlt.common.libs.pydantic import BaseModel -from pydantic import BaseModel +from dlt.extract import DltResource +from dlt.extract.typing import ValidateItem +from dlt.extract.validation import PydanticValidator +from dlt.extract.exceptions import ResourceExtractionError +from dlt.pipeline.exceptions import PipelineStepFailed class SimpleModel(BaseModel): @@ -30,7 +34,8 @@ def some_data() -> t.Iterator[TDataItems]: # Items are passed through model data = list(some_data()) - assert data == [SimpleModel(a=1, b="2"), SimpleModel(a=2, b="3")] + # compare content-wise. model names change due to extra settings on columns + assert json.dumpb(data) == json.dumpb([SimpleModel(a=1, b="2"), SimpleModel(a=2, b="3")]) @pytest.mark.parametrize("yield_list", [True, False]) @@ -50,12 +55,11 @@ def some_data() -> t.Iterator[TDataItems]: # Items are passed through model data = list(resource) - assert data == [SimpleModel(a=1, b="2"), SimpleModel(a=2, b="3")] + assert json.dumpb(data) == json.dumpb([SimpleModel(a=1, b="2"), SimpleModel(a=2, b="3")]) @pytest.mark.parametrize("yield_list", [True, False]) def test_remove_validator(yield_list: bool) -> None: - @dlt.resource(columns=SimpleModel) def some_data() -> t.Iterator[TDataItems]: items = [{"a": 1, "b": "2"}, {"a": 2, "b": "3"}] @@ -68,12 +72,11 @@ def some_data() -> t.Iterator[TDataItems]: resource.validator = None data = list(resource) - assert data == [{"a": 1, "b": "2"}, {"a": 2, "b": "3"}] + assert json.dumpb(data) == json.dumpb([{"a": 1, "b": "2"}, {"a": 2, "b": "3"}]) @pytest.mark.parametrize("yield_list", [True, False]) def test_replace_validator_model(yield_list: bool) -> None: - @dlt.resource(columns=SimpleModel) def some_data() -> t.Iterator[TDataItems]: items = [{"a": 1, "b": "2"}, {"a": 2, "b": "3"}] @@ -94,19 +97,21 @@ class AnotherModel(BaseModel): data = list(resource) # Items are validated with the new model - assert data == [AnotherModel(a=1, b="2", c=0.5), AnotherModel(a=2, b="3", c=0.5)] + assert json.dumpb(data) == json.dumpb( + [AnotherModel(a=1, b="2", c=0.5), AnotherModel(a=2, b="3", c=0.5)] + ) # Ensure only one validator is applied in steps steps = resource._pipe.steps assert len(steps) == 2 assert isinstance(steps[-1], ValidateItem) - assert steps[-1].model is AnotherModel # type: ignore[attr-defined] + # model name will change according to extra items handling + assert steps[-1].model.__name__.startswith(AnotherModel.__name__) # type: ignore[attr-defined] @pytest.mark.parametrize("yield_list", [True, False]) def test_validator_property_setter(yield_list: bool) -> None: - @dlt.resource(columns=SimpleModel) def some_data() -> t.Iterator[TDataItems]: items = [{"a": 1, "b": "2"}, {"a": 2, "b": "3"}] @@ -117,24 +122,30 @@ def some_data() -> t.Iterator[TDataItems]: resource = some_data() - assert isinstance(resource.validator, PydanticValidator) and resource.validator.model is SimpleModel + assert isinstance( + resource.validator, PydanticValidator + ) and resource.validator.model.__name__.startswith(SimpleModel.__name__) class AnotherModel(BaseModel): a: int b: str c: float = 0.5 - resource.validator = PydanticValidator(AnotherModel) + resource.validator = PydanticValidator(AnotherModel, column_mode="freeze", data_mode="freeze") - assert resource.validator and resource.validator.model is AnotherModel + assert resource.validator and resource.validator.model.__name__.startswith( + AnotherModel.__name__ + ) data = list(resource) # Items are validated with the new model - assert data == [AnotherModel(a=1, b="2", c=0.5), AnotherModel(a=2, b="3", c=0.5)] + assert json.dumpb(data) == json.dumpb( + [AnotherModel(a=1, b="2", c=0.5), AnotherModel(a=2, b="3", c=0.5)] + ) @pytest.mark.parametrize("yield_list", [True, False]) -def test_failed_validation(yield_list: bool) -> None: +def test_default_validation(yield_list: bool) -> None: @dlt.resource(columns=SimpleModel) def some_data() -> t.Iterator[TDataItems]: # yield item that fails schema validation @@ -144,9 +155,101 @@ def some_data() -> t.Iterator[TDataItems]: else: yield from items + # some_data must have default Pydantic schema contract + assert some_data().schema_contract == { + "tables": "evolve", + "columns": "discard_value", + "data_type": "freeze", + } + # extraction fails with ValidationError with pytest.raises(ResourceExtractionError) as exinfo: list(some_data()) - assert isinstance(exinfo.value.__cause__, ValidationError) - assert str(PydanticValidator(SimpleModel)) in str(exinfo.value) + val_ex = exinfo.value.__cause__ + assert isinstance(val_ex, DataValidationError) + assert val_ex.schema_name is None + assert val_ex.table_name == "some_data" + assert val_ex.column_name == "('items', 1, 'a')" if yield_list else "('a',)" + assert val_ex.data_item == {"a": "not_int", "b": "x"} + assert val_ex.schema_entity == "data_type" + + # fail in pipeline + @dlt.resource(columns=SimpleModel) + def some_data_extra() -> t.Iterator[TDataItems]: + # yield item that fails schema validation + items = [{"a": 1, "b": "z", "c": 1.3}, {"a": "not_int", "b": "x"}] + if yield_list: + yield items + else: + yield from items + + pipeline = dlt.pipeline() + with pytest.raises(PipelineStepFailed) as py_ex: + pipeline.extract(some_data_extra()) + assert isinstance(py_ex.value.__cause__, ResourceExtractionError) + assert isinstance(py_ex.value.__cause__.__cause__, DataValidationError) + val_ex = py_ex.value.__cause__.__cause__ + assert val_ex.table_name == "some_data_extra" + assert val_ex.schema_entity == "data_type" # extra field is the cause + assert val_ex.data_item == {"a": "not_int", "b": "x"} + + +@pytest.mark.parametrize("yield_list", [True, False]) +def test_validation_with_contracts(yield_list: bool) -> None: + def some_data() -> t.Iterator[TDataItems]: + # yield item that fails schema validation + items = [{"a": 1, "b": "z"}, {"a": "not_int", "b": "x"}, {"c": "not_int"}] + if yield_list: + yield items + else: + yield from items + + # let it evolve + r: DltResource = dlt.resource(some_data(), schema_contract="evolve", columns=SimpleModel) + validator: PydanticValidator[SimpleModel] = r.validator # type: ignore[assignment] + assert validator.column_mode == "evolve" + assert validator.data_mode == "evolve" + assert validator.model.__name__.endswith("AnyExtraAllow") + items = list(r) + assert len(items) == 3 + # fully valid + assert items[0].a == 1 + assert items[0].b == "z" + # data type not valid + assert items[1].a == "not_int" + assert items[1].b == "x" + # extra attr and data invalid + assert items[2].a is None + assert items[2].b is None + assert items[2].c == "not_int" + + # let it drop + r = dlt.resource(some_data(), schema_contract="discard_row", columns=SimpleModel) + validator = r.validator # type: ignore[assignment] + assert validator.column_mode == "discard_row" + assert validator.data_mode == "discard_row" + assert validator.model.__name__.endswith("ExtraForbid") + items = list(r) + assert len(items) == 1 + assert items[0].a == 1 + assert items[0].b == "z" + + # filter just offending values + with pytest.raises(NotImplementedError): + # pydantic data_type cannot be discard_value + dlt.resource(some_data(), schema_contract="discard_value", columns=SimpleModel) + r = dlt.resource( + some_data(), + schema_contract={"columns": "discard_value", "data_type": "evolve"}, + columns=SimpleModel, + ) + validator = r.validator # type: ignore[assignment] + assert validator.column_mode == "discard_value" + assert validator.data_mode == "evolve" + # ignore is the default so no Extra in name + assert validator.model.__name__.endswith("Any") + items = list(r) + assert len(items) == 3 + # c is gone from the last model + assert not hasattr(items[2], "c") diff --git a/tests/extract/utils.py b/tests/extract/utils.py index b109cdbdd9..98e798d0f0 100644 --- a/tests/extract/utils.py +++ b/tests/extract/utils.py @@ -1,33 +1,44 @@ -from typing import Any, Optional, List, Literal, get_args +from typing import Any, Optional, List import pytest from itertools import zip_longest +from dlt.common.storages import PackageStorage, ParsedLoadJobFileName from dlt.common.typing import TDataItem, TDataItems -from dlt.extract.extract import ExtractorStorage +from dlt.extract.extract import ExtractStorage from dlt.extract.typing import ItemTransform -import pandas as pd -from dlt.common.libs.pyarrow import pyarrow as pa - - -TItemFormat = Literal["json", "pandas", "arrow"] - -ALL_ITEM_FORMATS = get_args(TItemFormat) - - -def expect_extracted_file(storage: ExtractorStorage, schema_name: str, table_name: str, content: str) -> None: - files = storage.list_files_to_normalize_sorted() - gen = (file for file in files if storage.get_schema_name(file) == schema_name and storage.parse_normalize_file_name(file).table_name == table_name) +from tests.utils import TDataItemFormat + + +def expect_extracted_file( + storage: ExtractStorage, + schema_name: str, + table_name: str, + content: str, + expected_files: int = 1, +) -> None: + load_ids = storage.extracted_packages.list_packages() + gen = ( + file + for load_id in load_ids + for file in storage.extracted_packages.list_new_jobs(load_id) + if storage.extracted_packages.schema_name(load_id) == schema_name + and ParsedLoadJobFileName.parse(file).table_name == table_name + ) file = next(gen, None) if file is None: - raise FileNotFoundError(storage.build_extracted_file_stem(schema_name, table_name, "***")) + raise FileNotFoundError( + PackageStorage.build_job_file_name(table_name, schema_name, validate_components=False) + ) assert file is not None - # only one file expected - with pytest.raises(StopIteration): - next(gen) - # load file and parse line by line - file_content: str = storage.storage.load(file) + # get remaining file names + remaining_files = list(gen) + assert ( + len(remaining_files) + 1 == expected_files + ), f"Expected {expected_files} files for table {schema_name}:{table_name}" + # load first file and parse line by line + file_content: str = storage.extracted_packages.storage.load(file) if content == "***": return for line, file_line in zip_longest(content.splitlines(), file_content.splitlines()): @@ -35,31 +46,17 @@ def expect_extracted_file(storage: ExtractorStorage, schema_name: str, table_nam class AssertItems(ItemTransform[TDataItem]): - def __init__(self, expected_items: Any, item_type: TItemFormat = "json") -> None: - self.expected_items = expected_items - self.item_type = item_type + def __init__(self, expected_items: Any, item_type: TDataItemFormat = "json") -> None: + self.expected_items = expected_items + self.item_type = item_type - def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]: + def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]: assert data_item_to_list(self.item_type, item) == self.expected_items return item -def data_to_item_format(item_format: TItemFormat, data: List[TDataItem]): - """Return the given data in the form of pandas, arrow table or json items""" - if item_format == "json": - return data - # Make dataframe from the data - df = pd.DataFrame(data) - if item_format == "pandas": - return [df] - elif item_format == "arrow": - return [pa.Table.from_pandas(df)] - else: - raise ValueError(f"Unknown item format: {item_format}") - - -def data_item_to_list(from_type: TItemFormat, values: List[TDataItem]): - if from_type == "arrow": +def data_item_to_list(from_type: TDataItemFormat, values: List[TDataItem]): + if from_type in ["arrow", "arrow-batch"]: return values[0].to_pylist() elif from_type == "pandas": return values[0].to_dict("records") diff --git a/tests/helpers/airflow_tests/conftest.py b/tests/helpers/airflow_tests/conftest.py index 023aab88c2..3d040b4a11 100644 --- a/tests/helpers/airflow_tests/conftest.py +++ b/tests/helpers/airflow_tests/conftest.py @@ -1,2 +1,2 @@ from tests.helpers.airflow_tests.utils import initialize_airflow_db -from tests.utils import preserve_environ, autouse_test_storage, TEST_STORAGE_ROOT, patch_home_dir \ No newline at end of file +from tests.utils import preserve_environ, autouse_test_storage, TEST_STORAGE_ROOT, patch_home_dir diff --git a/tests/helpers/airflow_tests/test_airflow_provider.py b/tests/helpers/airflow_tests/test_airflow_provider.py index 447006932b..68e426deb9 100644 --- a/tests/helpers/airflow_tests/test_airflow_provider.py +++ b/tests/helpers/airflow_tests/test_airflow_provider.py @@ -12,7 +12,7 @@ from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.configuration.providers.toml import SECRETS_TOML_KEY -DEFAULT_DATE = pendulum.datetime(2023, 4, 18, tz='Europe/Berlin') +DEFAULT_DATE = pendulum.datetime(2023, 4, 18, tz="Europe/Berlin") # Test data SECRETS_TOML_CONTENT = """ [sources] @@ -21,7 +21,6 @@ def test_airflow_secrets_toml_provider() -> None: - @dag(start_date=DEFAULT_DATE) def test_dag(): from dlt.common.configuration.providers.airflow import AirflowSecretsTomlProvider @@ -33,18 +32,17 @@ def test_dag(): @task() def test_task(): - provider = AirflowSecretsTomlProvider() - api_key, _ = provider.get_value('api_key', str, None, 'sources') + api_key, _ = provider.get_value("api_key", str, None, "sources") # There's no pytest context here in the task, so we need to return # the results as a dict and assert them in the test function. # See ti.xcom_pull() below. return { - 'name': provider.name, - 'supports_secrets': provider.supports_secrets, - 'api_key_from_provider': api_key, + "name": provider.name, + "supports_secrets": provider.supports_secrets, + "api_key_from_provider": api_key, } test_task() @@ -61,12 +59,12 @@ def test_task(): ti.run() # print(task_def.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)) - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['name'] == 'Airflow Secrets TOML Provider' - assert result['supports_secrets'] - assert result['api_key_from_provider'] == 'test_value' + assert result["name"] == "Airflow Secrets TOML Provider" + assert result["supports_secrets"] + assert result["api_key_from_provider"] == "test_value" def test_airflow_secrets_toml_provider_import_dlt_dag() -> None: @@ -86,7 +84,7 @@ def test_dag(): @task() def test_task(): return { - 'api_key_from_provider': api_key, + "api_key_from_provider": api_key, } test_task() @@ -103,10 +101,10 @@ def test_task(): ti.run() # print(task_def.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)) - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['api_key_from_provider'] == 'test_value' + assert result["api_key_from_provider"] == "test_value" def test_airflow_secrets_toml_provider_import_dlt_task() -> None: @@ -114,7 +112,6 @@ def test_airflow_secrets_toml_provider_import_dlt_task() -> None: @dag(start_date=DEFAULT_DATE) def test_dag(): - @task() def test_task(): Variable.set(SECRETS_TOML_KEY, SECRETS_TOML_CONTENT) @@ -125,7 +122,7 @@ def test_task(): api_key = secrets["sources.api_key"] return { - 'api_key_from_provider': api_key, + "api_key_from_provider": api_key, } test_task() @@ -142,14 +139,14 @@ def test_task(): ti.run() # print(task_def.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)) - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['api_key_from_provider'] == 'test_value' + assert result["api_key_from_provider"] == "test_value" def test_airflow_secrets_toml_provider_is_loaded(): - dag = DAG(dag_id='test_dag', start_date=DEFAULT_DATE) + dag = DAG(dag_id="test_dag", start_date=DEFAULT_DATE) def test_task(): from dlt.common.configuration.providers.airflow import AirflowSecretsTomlProvider @@ -177,13 +174,11 @@ def test_task(): # the results as a dict and assert them in the test function. # See ti.xcom_pull() below. return { - 'airflow_secrets_toml_provider_is_loaded': astp_is_loaded, - 'api_key_from_provider': api_key, + "airflow_secrets_toml_provider_is_loaded": astp_is_loaded, + "api_key_from_provider": api_key, } - task = PythonOperator( - task_id='test_task', python_callable=test_task, dag=dag - ) + task = PythonOperator(task_id="test_task", python_callable=test_task, dag=dag) dag.create_dagrun( state=DagRunState.RUNNING, @@ -196,15 +191,15 @@ def test_task(): ti.run() - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['airflow_secrets_toml_provider_is_loaded'] - assert result['api_key_from_provider'] == 'test_value' + assert result["airflow_secrets_toml_provider_is_loaded"] + assert result["api_key_from_provider"] == "test_value" def test_airflow_secrets_toml_provider_missing_variable(): - dag = DAG(dag_id='test_dag', start_date=DEFAULT_DATE) + dag = DAG(dag_id="test_dag", start_date=DEFAULT_DATE) def test_task(): from dlt.common.configuration.specs import config_providers_context @@ -213,14 +208,14 @@ def test_task(): # Make sure the variable is not set Variable.delete(SECRETS_TOML_KEY) providers = config_providers_context._extra_providers() - provider = next(provider for provider in providers if isinstance(provider, AirflowSecretsTomlProvider)) + provider = next( + provider for provider in providers if isinstance(provider, AirflowSecretsTomlProvider) + ) return { - 'airflow_secrets_toml': provider._toml.as_string(), + "airflow_secrets_toml": provider._toml.as_string(), } - task = PythonOperator( - task_id='test_task', python_callable=test_task, dag=dag - ) + task = PythonOperator(task_id="test_task", python_callable=test_task, dag=dag) dag.create_dagrun( state=DagRunState.RUNNING, @@ -233,20 +228,20 @@ def test_task(): ti.run() - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['airflow_secrets_toml'] == "" + assert result["airflow_secrets_toml"] == "" def test_airflow_secrets_toml_provider_invalid_content(): - dag = DAG(dag_id='test_dag', start_date=DEFAULT_DATE) + dag = DAG(dag_id="test_dag", start_date=DEFAULT_DATE) def test_task(): import tomlkit from dlt.common.configuration.providers.airflow import AirflowSecretsTomlProvider - Variable.set(SECRETS_TOML_KEY, 'invalid_content') + Variable.set(SECRETS_TOML_KEY, "invalid_content") # There's no pytest context here in the task, so we need # to catch the exception manually and return the result @@ -258,12 +253,10 @@ def test_task(): exception_raised = True return { - 'exception_raised': exception_raised, + "exception_raised": exception_raised, } - task = PythonOperator( - task_id='test_task', python_callable=test_task, dag=dag - ) + task = PythonOperator(task_id="test_task", python_callable=test_task, dag=dag) dag.create_dagrun( state=DagRunState.RUNNING, @@ -276,7 +269,7 @@ def test_task(): ti.run() - result = ti.xcom_pull(task_ids='test_task') + result = ti.xcom_pull(task_ids="test_task") assert ti.state == State.SUCCESS - assert result['exception_raised'] + assert result["exception_raised"] diff --git a/tests/helpers/airflow_tests/test_airflow_wrapper.py b/tests/helpers/airflow_tests/test_airflow_wrapper.py index e6b622c1c4..ad6631d1fc 100644 --- a/tests/helpers/airflow_tests/test_airflow_wrapper.py +++ b/tests/helpers/airflow_tests/test_airflow_wrapper.py @@ -18,21 +18,20 @@ from tests.utils import TEST_STORAGE_ROOT -DEFAULT_DATE = pendulum.datetime(2023, 4, 18, tz='Europe/Berlin') +DEFAULT_DATE = pendulum.datetime(2023, 4, 18, tz="Europe/Berlin") default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 0, - 'max_active_runs': 1 + "owner": "airflow", + "depends_on_past": False, + "email_on_failure": False, + "email_on_retry": False, + "retries": 0, + "max_active_runs": 1, } @dlt.source def mock_data_source(): - @dlt.resource(selected=True) def _r_init(): yield ["-", "x", "!"] @@ -47,18 +46,18 @@ def _t1(items, suffix): @dlt.transformer(data_from=_r1) def _t2(items, mul): - yield items*mul + yield items * mul @dlt.transformer(data_from=_r1) def _t3(items, mul): for item in items: - yield item.upper()*mul + yield item.upper() * mul # add something to init @dlt.transformer(data_from=_r_init) def _t_init_post(items): for item in items: - yield item*2 + yield item * 2 @dlt.resource def _r_isolee(): @@ -69,7 +68,6 @@ def _r_isolee(): @dlt.source(section="mock_data_source_state") def mock_data_source_state(): - @dlt.resource(selected=True) def _r_init(): dlt.current.source_state()["counter"] = 1 @@ -94,7 +92,7 @@ def _t2(items, mul): dlt.current.source_state()["counter"] += 1 dlt.current.resource_state("_r1")["counter"] += 1 dlt.current.resource_state()["counter"] = 1 - yield items*mul + yield items * mul @dlt.transformer(data_from=_r1) def _t3(items, mul): @@ -102,13 +100,13 @@ def _t3(items, mul): dlt.current.resource_state("_r1")["counter"] += 1 dlt.current.resource_state()["counter"] = 1 for item in items: - yield item.upper()*mul + yield item.upper() * mul # add something to init @dlt.transformer(data_from=_r_init) def _t_init_post(items): for item in items: - yield item*2 + yield item * 2 @dlt.resource def _r_isolee(): @@ -121,53 +119,83 @@ def _r_isolee(): def test_regular_run() -> None: # run the pipeline normally pipeline_standalone = dlt.pipeline( - pipeline_name="pipeline_standalone", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") + pipeline_name="pipeline_standalone", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) pipeline_standalone.run(mock_data_source()) - pipeline_standalone_counts = load_table_counts(pipeline_standalone, *[t["name"] for t in pipeline_standalone.default_schema.data_tables()]) + pipeline_standalone_counts = load_table_counts( + pipeline_standalone, *[t["name"] for t in pipeline_standalone.default_schema.data_tables()] + ) tasks_list: List[PythonOperator] = None - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_regular(): nonlocal tasks_list - tasks = PipelineTasksGroup("pipeline_dag_regular", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False) + tasks = PipelineTasksGroup( + "pipeline_dag_regular", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False + ) pipeline_dag_regular = dlt.pipeline( - pipeline_name="pipeline_dag_regular", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") - tasks_list = tasks.add_run(pipeline_dag_regular, mock_data_source(), decompose="none", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name="pipeline_dag_regular", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) + tasks_list = tasks.add_run( + pipeline_dag_regular, + mock_data_source(), + decompose="none", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) dag_def: DAG = dag_regular() assert len(tasks_list) == 1 # composite task name - assert tasks_list[0].task_id == "pipeline_dag_regular.mock_data_source__r_init-_t_init_post-_t1-_t2-2-more" + assert ( + tasks_list[0].task_id + == "pipeline_dag_regular.mock_data_source__r_init-_t_init_post-_t1-_t2-2-more" + ) dag_def.test() # we should be able to attach to pipeline state created within Airflow pipeline_dag_regular = dlt.attach(pipeline_name="pipeline_dag_regular") - pipeline_dag_regular_counts = load_table_counts(pipeline_dag_regular, *[t["name"] for t in pipeline_dag_regular.default_schema.data_tables()]) + pipeline_dag_regular_counts = load_table_counts( + pipeline_dag_regular, + *[t["name"] for t in pipeline_dag_regular.default_schema.data_tables()], + ) # same data should be loaded assert pipeline_dag_regular_counts == pipeline_standalone_counts quackdb_path = os.path.join(TEST_STORAGE_ROOT, "pipeline_dag_decomposed.duckdb") - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_decomposed(): nonlocal tasks_list - tasks = PipelineTasksGroup("pipeline_dag_decomposed", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False) + tasks = PipelineTasksGroup( + "pipeline_dag_decomposed", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False + ) # set duckdb to be outside of pipeline folder which is dropped on each task pipeline_dag_decomposed = dlt.pipeline( - pipeline_name="pipeline_dag_decomposed", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=quackdb_path) - tasks_list = tasks.add_run(pipeline_dag_decomposed, mock_data_source(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name="pipeline_dag_decomposed", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=quackdb_path, + ) + tasks_list = tasks.add_run( + pipeline_dag_decomposed, + mock_data_source(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) dag_def = dag_decomposed() assert len(tasks_list) == 3 @@ -177,7 +205,10 @@ def dag_decomposed(): assert tasks_list[2].task_id == "pipeline_dag_decomposed.mock_data_source__r_isolee" dag_def.test() pipeline_dag_decomposed = dlt.attach(pipeline_name="pipeline_dag_decomposed") - pipeline_dag_decomposed_counts = load_table_counts(pipeline_dag_decomposed, *[t["name"] for t in pipeline_dag_decomposed.default_schema.data_tables()]) + pipeline_dag_decomposed_counts = load_table_counts( + pipeline_dag_decomposed, + *[t["name"] for t in pipeline_dag_decomposed.default_schema.data_tables()], + ) assert pipeline_dag_decomposed_counts == pipeline_standalone_counts @@ -200,7 +231,6 @@ def dag_decomposed(): def test_run_with_retry() -> None: - retries = 2 now = pendulum.now() @@ -212,19 +242,22 @@ def _fail_3(): raise Exception(f"Failed on retry #{retries}") yield from "ABC" - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_fail_3(): # by default we do not retry so this will fail - tasks = PipelineTasksGroup("pipeline_fail_3", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False) + tasks = PipelineTasksGroup( + "pipeline_fail_3", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False + ) pipeline_fail_3 = dlt.pipeline( - pipeline_name="pipeline_fail_3", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") - tasks.add_run(pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name="pipeline_fail_3", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) + tasks.add_run( + pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True + ) dag_def: DAG = dag_fail_3() ti = get_task_run(dag_def, "pipeline_fail_3.pipeline_fail_3", now) @@ -233,19 +266,25 @@ def dag_fail_3(): ti._run_raw_task() assert pip_ex.value.step == "extract" - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_fail_4(): # by default we do not retry extract so we fail - tasks = PipelineTasksGroup("pipeline_fail_3", retry_policy=DEFAULT_RETRY_BACKOFF, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False) + tasks = PipelineTasksGroup( + "pipeline_fail_3", + retry_policy=DEFAULT_RETRY_BACKOFF, + local_data_folder=TEST_STORAGE_ROOT, + wipe_local_data=False, + ) pipeline_fail_3 = dlt.pipeline( - pipeline_name="pipeline_fail_3", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") - tasks.add_run(pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name="pipeline_fail_3", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) + tasks.add_run( + pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True + ) dag_def = dag_fail_4() ti = get_task_run(dag_def, "pipeline_fail_3.pipeline_fail_3", now) @@ -255,19 +294,26 @@ def dag_fail_4(): ti._run_raw_task() assert pip_ex.value.step == "extract" - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_fail_5(): # this will retry - tasks = PipelineTasksGroup("pipeline_fail_3", retry_policy=DEFAULT_RETRY_BACKOFF, retry_pipeline_steps=("load", "extract"), local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False) + tasks = PipelineTasksGroup( + "pipeline_fail_3", + retry_policy=DEFAULT_RETRY_BACKOFF, + retry_pipeline_steps=("load", "extract"), + local_data_folder=TEST_STORAGE_ROOT, + wipe_local_data=False, + ) pipeline_fail_3 = dlt.pipeline( - pipeline_name="pipeline_fail_3", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") - tasks.add_run(pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name="pipeline_fail_3", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) + tasks.add_run( + pipeline_fail_3, _fail_3, trigger_rule="all_done", retries=0, provide_context=True + ) dag_def = dag_fail_5() ti = get_task_run(dag_def, "pipeline_fail_3.pipeline_fail_3", now) @@ -277,22 +323,30 @@ def dag_fail_5(): def test_run_decomposed_with_state_wipe() -> None: - dataset_name = "mock_data_" + uniq_id() pipeline_name = "pipeline_dag_regular_" + uniq_id() - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_regular(): - tasks = PipelineTasksGroup(pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True, save_load_info=True, save_trace_info=True) + tasks = PipelineTasksGroup( + pipeline_name, + local_data_folder=TEST_STORAGE_ROOT, + wipe_local_data=True, + save_load_info=True, + save_trace_info=True, + ) pipeline_dag_regular = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") - tasks.add_run(pipeline_dag_regular, mock_data_source_state(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) + tasks.add_run( + pipeline_dag_regular, + mock_data_source_state(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) dag_def: DAG = dag_regular() dag_def.test() @@ -302,7 +356,8 @@ def dag_regular(): dlt.attach(pipeline_name=pipeline_name) pipeline_dag_regular = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) pipeline_dag_regular.sync_destination() # print(pipeline_dag_regular.state) # now source can attach to state in the pipeline @@ -311,9 +366,9 @@ def dag_regular(): # end state was increased twice (in init and in isolee at the end) assert post_source.state["end_counter"] == 2 # the source counter was increased in init, _r1 and in 3 transformers * 3 items - assert post_source.state["counter"] == 1 + 1 + 3*3 + assert post_source.state["counter"] == 1 + 1 + 3 * 3 # resource counter _r1 - assert post_source._r1.state["counter"] == 1 + 3*3 + assert post_source._r1.state["counter"] == 1 + 3 * 3 # each transformer has a counter assert post_source._t1.state["counter"] == 1 assert post_source._t2.state["counter"] == 1 @@ -324,68 +379,114 @@ def test_run_multiple_sources() -> None: dataset_name = "mock_data_" + uniq_id() pipeline_name = "pipeline_dag_regular_" + uniq_id() - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_serialize(): - tasks = PipelineTasksGroup(pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True) + tasks = PipelineTasksGroup( + pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True + ) pipeline_dag_regular = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") - st_tasks = tasks.add_run(pipeline_dag_regular, mock_data_source_state(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) - nst_tasks = tasks.add_run(pipeline_dag_regular, mock_data_source(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) + st_tasks = tasks.add_run( + pipeline_dag_regular, + mock_data_source_state(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) + nst_tasks = tasks.add_run( + pipeline_dag_regular, + mock_data_source(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) # connect end of first run to a head of a second st_tasks[-1] >> nst_tasks[0] - dag_def: DAG = dag_serialize() dag_def.test() pipeline_dag_serial = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) pipeline_dag_serial.sync_destination() # we should have two schemas - assert set(pipeline_dag_serial.schema_names) == {'mock_data_source_state', 'mock_data_source'} - counters_st_tasks = load_table_counts(pipeline_dag_serial, *[t["name"] for t in pipeline_dag_serial.schemas['mock_data_source_state'].data_tables()]) - counters_nst_tasks = load_table_counts(pipeline_dag_serial, *[t["name"] for t in pipeline_dag_serial.schemas['mock_data_source'].data_tables()]) + assert set(pipeline_dag_serial.schema_names) == {"mock_data_source_state", "mock_data_source"} + counters_st_tasks = load_table_counts( + pipeline_dag_serial, + *[t["name"] for t in pipeline_dag_serial.schemas["mock_data_source_state"].data_tables()], + ) + counters_nst_tasks = load_table_counts( + pipeline_dag_serial, + *[t["name"] for t in pipeline_dag_serial.schemas["mock_data_source"].data_tables()], + ) # print(counters_st_tasks) # print(counters_nst_tasks) # this state is confirmed in other test - assert pipeline_dag_serial.state["sources"]["mock_data_source_state"] == {'counter': 11, 'end_counter': 2, 'resources': {'_r1': {'counter': 10}, '_t3': {'counter': 1}, '_t2': {'counter': 1}, '_t1': {'counter': 1}}} + assert pipeline_dag_serial.state["sources"]["mock_data_source_state"] == { + "counter": 11, + "end_counter": 2, + "resources": { + "_r1": {"counter": 10}, + "_t3": {"counter": 1}, + "_t2": {"counter": 1}, + "_t1": {"counter": 1}, + }, + } # next DAG does not connect subgraphs dataset_name = "mock_data_" + uniq_id() pipeline_name = "pipeline_dag_regular_" + uniq_id() - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_parallel(): - tasks = PipelineTasksGroup(pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True) + tasks = PipelineTasksGroup( + pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True + ) pipeline_dag_regular = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") - tasks.add_run(pipeline_dag_regular, mock_data_source_state(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) - tasks.add_run(pipeline_dag_regular, mock_data_source(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) + tasks.add_run( + pipeline_dag_regular, + mock_data_source_state(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) + tasks.add_run( + pipeline_dag_regular, + mock_data_source(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) # do not connect graph dag_def = dag_parallel() dag_def.test() pipeline_dag_parallel = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) pipeline_dag_parallel.sync_destination() # we should have two schemas - assert set(pipeline_dag_parallel.schema_names) == {'mock_data_source_state', 'mock_data_source'} - counters_st_tasks_par = load_table_counts(pipeline_dag_parallel, *[t["name"] for t in pipeline_dag_parallel.schemas['mock_data_source_state'].data_tables()]) - counters_nst_tasks_par = load_table_counts(pipeline_dag_parallel, *[t["name"] for t in pipeline_dag_parallel.schemas['mock_data_source'].data_tables()]) + assert set(pipeline_dag_parallel.schema_names) == {"mock_data_source_state", "mock_data_source"} + counters_st_tasks_par = load_table_counts( + pipeline_dag_parallel, + *[t["name"] for t in pipeline_dag_parallel.schemas["mock_data_source_state"].data_tables()], + ) + counters_nst_tasks_par = load_table_counts( + pipeline_dag_parallel, + *[t["name"] for t in pipeline_dag_parallel.schemas["mock_data_source"].data_tables()], + ) assert counters_st_tasks == counters_st_tasks_par assert counters_nst_tasks == counters_nst_tasks_par assert pipeline_dag_serial.state["sources"] == pipeline_dag_parallel.state["sources"] @@ -395,19 +496,31 @@ def dag_parallel(): dataset_name = "mock_data_" + uniq_id() pipeline_name = "pipeline_dag_regular_" + uniq_id() - @dag( - schedule=None, - start_date=DEFAULT_DATE, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args) def dag_mixed(): - tasks = PipelineTasksGroup(pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True) + tasks = PipelineTasksGroup( + pipeline_name, local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=True + ) pipeline_dag_regular = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") - pd_tasks = tasks.add_run(pipeline_dag_regular, mock_data_source_state(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) - hb_tasks = tasks.add_run(pipeline_dag_regular, mock_data_source(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True) + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) + pd_tasks = tasks.add_run( + pipeline_dag_regular, + mock_data_source_state(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) + hb_tasks = tasks.add_run( + pipeline_dag_regular, + mock_data_source(), + decompose="serialize", + trigger_rule="all_done", + retries=0, + provide_context=True, + ) # create almost randomly connected tasks across two runs for pd_t, hb_t in zip(pd_tasks, hb_tasks): pd_t >> hb_t @@ -416,12 +529,19 @@ def dag_mixed(): dag_def.test() pipeline_dag_mixed = dlt.pipeline( - pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb") + pipeline_name=pipeline_name, dataset_name=dataset_name, destination="duckdb" + ) pipeline_dag_mixed.sync_destination() # we should have two schemas - assert set(pipeline_dag_mixed.schema_names) == {'mock_data_source_state', 'mock_data_source'} - counters_st_tasks_par = load_table_counts(pipeline_dag_mixed, *[t["name"] for t in pipeline_dag_mixed.schemas['mock_data_source_state'].data_tables()]) - counters_nst_tasks_par = load_table_counts(pipeline_dag_mixed, *[t["name"] for t in pipeline_dag_mixed.schemas['mock_data_source'].data_tables()]) + assert set(pipeline_dag_mixed.schema_names) == {"mock_data_source_state", "mock_data_source"} + counters_st_tasks_par = load_table_counts( + pipeline_dag_mixed, + *[t["name"] for t in pipeline_dag_mixed.schemas["mock_data_source_state"].data_tables()], + ) + counters_nst_tasks_par = load_table_counts( + pipeline_dag_mixed, + *[t["name"] for t in pipeline_dag_mixed.schemas["mock_data_source"].data_tables()], + ) assert counters_st_tasks == counters_st_tasks_par assert counters_nst_tasks == counters_nst_tasks_par assert pipeline_dag_serial.state["sources"] == pipeline_dag_mixed.state["sources"] @@ -434,7 +554,7 @@ def get_task_run(dag_def: DAG, task_name: str, now: pendulum.DateTime) -> TaskIn state=DagRunState.RUNNING, execution_date=now, run_type=DagRunType.MANUAL, - data_interval=(now, now) + data_interval=(now, now), ) dag_def.run(start_date=now, run_at_least_once=True) task_def = dag_def.task_dict[task_name] diff --git a/tests/helpers/airflow_tests/test_join_airflow_scheduler.py b/tests/helpers/airflow_tests/test_join_airflow_scheduler.py index e65c11967e..8c1992c506 100644 --- a/tests/helpers/airflow_tests/test_join_airflow_scheduler.py +++ b/tests/helpers/airflow_tests/test_join_airflow_scheduler.py @@ -18,27 +18,32 @@ CATCHUP_BEGIN = pendulum.datetime(2023, 1, 1, tz="Europe/Berlin") default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 0, + "owner": "airflow", + "depends_on_past": False, + "email_on_failure": False, + "email_on_retry": False, + "retries": 0, } + @dlt.resource() -def existing_incremental(updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental("updated_at", allow_external_schedulers=True)): +def existing_incremental( + updated_at: dlt.sources.incremental[pendulum.DateTime] = dlt.sources.incremental( + "updated_at", allow_external_schedulers=True + ) +): yield {"updated_at": CATCHUP_BEGIN, "state": updated_at.get_state()} def test_date_coercion() -> None: - @dag(schedule_interval='@daily', + @dag( + schedule_interval="@daily", start_date=CATCHUP_BEGIN, catchup=False, max_active_runs=1, - default_args=default_args + default_args=default_args, ) def dag_regular(): - @task def scheduled() -> None: context = get_current_context() @@ -50,49 +55,78 @@ def scheduled() -> None: assert state["updated_at"] == CATCHUP_BEGIN assert "Europe/Berlin" in str(state["updated_at"].tz) # must have UTC timezone - assert state["state"]["initial_value"] == CATCHUP_BEGIN == context["data_interval_start"] + assert ( + state["state"]["initial_value"] == CATCHUP_BEGIN == context["data_interval_start"] + ) assert state["state"]["initial_value"].tz == UTC assert state["state"]["last_value"] == CATCHUP_BEGIN == context["data_interval_start"] assert state["state"]["last_value"].tz == UTC # end date assert r.incremental._incremental.end_value == context["data_interval_end"] assert r.incremental._incremental.end_value.tz == UTC - assert (r.incremental._incremental.end_value - state["state"]["initial_value"]) == datetime.timedelta(hours=24) + assert ( + r.incremental._incremental.end_value - state["state"]["initial_value"] + ) == datetime.timedelta(hours=24) # datetime.datetime coercion must be pendulum anyway @dlt.resource() - def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime]("updated_at", allow_external_schedulers=True)): + def incremental_datetime( + updated_at=dlt.sources.incremental[datetime.datetime]( + "updated_at", allow_external_schedulers=True + ) + ): yield {"updated_at": CATCHUP_BEGIN, "state": updated_at.get_state()} r = incremental_datetime() state = list(r)[0] # must have UTC timezone - assert state["state"]["initial_value"] == CATCHUP_BEGIN == context["data_interval_start"] + assert ( + state["state"]["initial_value"] == CATCHUP_BEGIN == context["data_interval_start"] + ) assert state["state"]["initial_value"].tz == UTC # datetime.date coercion also works @dlt.resource() # type: ignore[no-redef] - def incremental_datetime(updated_at = dlt.sources.incremental[datetime.date]("updated_at", allow_external_schedulers=True)): - yield {"updated_at": ensure_pendulum_date(CATCHUP_BEGIN), "state": updated_at.get_state()} + def incremental_datetime( + updated_at=dlt.sources.incremental[datetime.date]( + "updated_at", allow_external_schedulers=True + ) + ): + yield { + "updated_at": ensure_pendulum_date(CATCHUP_BEGIN), + "state": updated_at.get_state(), + } r = incremental_datetime() state = list(r)[0] - assert state["state"]["initial_value"] == ensure_pendulum_date(context["data_interval_start"]) + assert state["state"]["initial_value"] == ensure_pendulum_date( + context["data_interval_start"] + ) assert isinstance(state["state"]["initial_value"], datetime.date) # coerce to int @dlt.resource() # type: ignore[no-redef] - def incremental_datetime(updated_at = dlt.sources.incremental[int]("updated_at", allow_external_schedulers=True)): + def incremental_datetime( + updated_at=dlt.sources.incremental[int]( + "updated_at", allow_external_schedulers=True + ) + ): yield {"updated_at": CATCHUP_BEGIN.int_timestamp, "state": updated_at.get_state()} r = incremental_datetime() state = list(r)[0] assert state["state"]["initial_value"] == context["data_interval_start"].int_timestamp - assert r.incremental._incremental.end_value == context["data_interval_end"].int_timestamp + assert ( + r.incremental._incremental.end_value == context["data_interval_end"].int_timestamp + ) # coerce to float @dlt.resource() # type: ignore[no-redef] - def incremental_datetime(updated_at = dlt.sources.incremental[float]("updated_at", allow_external_schedulers=True)): + def incremental_datetime( + updated_at=dlt.sources.incremental[float]( + "updated_at", allow_external_schedulers=True + ) + ): yield {"updated_at": CATCHUP_BEGIN.timestamp(), "state": updated_at.get_state()} r = incremental_datetime() @@ -102,14 +136,27 @@ def incremental_datetime(updated_at = dlt.sources.incremental[float]("updated_at # coerce to str @dlt.resource() # type: ignore[no-redef] - def incremental_datetime(updated_at = dlt.sources.incremental[str]("updated_at", allow_external_schedulers=True)): - yield {"updated_at": CATCHUP_BEGIN.in_tz("UTC").isoformat(), "state": updated_at.get_state()} + def incremental_datetime( + updated_at=dlt.sources.incremental[str]( + "updated_at", allow_external_schedulers=True + ) + ): + yield { + "updated_at": CATCHUP_BEGIN.in_tz("UTC").isoformat(), + "state": updated_at.get_state(), + } r = incremental_datetime() state = list(r)[0] # must have UTC timezone - assert state["state"]["initial_value"] == context["data_interval_start"].in_tz("UTC").isoformat() - assert r.incremental._incremental.end_value == context["data_interval_end"].in_tz("UTC").isoformat() + assert ( + state["state"]["initial_value"] + == context["data_interval_start"].in_tz("UTC").isoformat() + ) + assert ( + r.incremental._incremental.end_value + == context["data_interval_end"].in_tz("UTC").isoformat() + ) scheduled() @@ -122,11 +169,12 @@ def incremental_datetime(updated_at = dlt.sources.incremental[str]("updated_at", def test_no_next_execution_date() -> None: now = pendulum.now() - @dag(schedule=None, + @dag( + schedule=None, catchup=False, start_date=CATCHUP_BEGIN, default_args=default_args, - max_active_runs=1 + max_active_runs=1, ) def dag_no_schedule(): @task @@ -134,8 +182,15 @@ def unscheduled(): context = get_current_context() @dlt.resource() - def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime]("updated_at", allow_external_schedulers=True)): - yield {"updated_at": context["data_interval_start"], "state": updated_at.get_state()} + def incremental_datetime( + updated_at=dlt.sources.incremental[datetime.datetime]( + "updated_at", allow_external_schedulers=True + ) + ): + yield { + "updated_at": context["data_interval_start"], + "state": updated_at.get_state(), + } r = incremental_datetime() state = list(r)[0] @@ -151,8 +206,15 @@ def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime] # will be filtered out (now earlier than data_interval_start) @dlt.resource() # type: ignore[no-redef] - def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime]("updated_at", allow_external_schedulers=True)): - yield {"updated_at": now.subtract(hours=1, seconds=1), "state": updated_at.get_state()} + def incremental_datetime( + updated_at=dlt.sources.incremental[datetime.datetime]( + "updated_at", allow_external_schedulers=True + ) + ): + yield { + "updated_at": now.subtract(hours=1, seconds=1), + "state": updated_at.get_state(), + } r = incremental_datetime() assert len(list(r)) == 0 @@ -172,18 +234,27 @@ def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime] ti.run() assert ti.state == State.SUCCESS - @dag(schedule_interval='@daily', + @dag( + schedule_interval="@daily", start_date=CATCHUP_BEGIN, catchup=True, - default_args=default_args + default_args=default_args, ) def dag_daily_schedule(): @task def scheduled(): context = get_current_context() + @dlt.resource() - def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime]("updated_at", allow_external_schedulers=True)): - yield {"updated_at": context["data_interval_start"], "state": updated_at.get_state()} + def incremental_datetime( + updated_at=dlt.sources.incremental[datetime.datetime]( + "updated_at", allow_external_schedulers=True + ) + ): + yield { + "updated_at": context["data_interval_start"], + "state": updated_at.get_state(), + } r = incremental_datetime() state = list(r)[0] @@ -208,7 +279,7 @@ def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime] state=DagRunState.RUNNING, execution_date=now, run_type=DagRunType.MANUAL, - data_interval=(now, now) + data_interval=(now, now), ) dag_def.run(start_date=now, run_at_least_once=True) task_def = dag_def.task_dict["scheduled"] @@ -219,16 +290,20 @@ def incremental_datetime(updated_at = dlt.sources.incremental[datetime.datetime] def test_scheduler_pipeline_state() -> None: pipeline = dlt.pipeline( - pipeline_name="pipeline_dag_regular", dataset_name="mock_data_" + uniq_id(), destination="duckdb", credentials=":pipeline:") + pipeline_name="pipeline_dag_regular", + dataset_name="mock_data_" + uniq_id(), + destination="duckdb", + credentials=":pipeline:", + ) now = pendulum.now() - @dag(schedule_interval='@daily', + @dag( + schedule_interval="@daily", start_date=CATCHUP_BEGIN, catchup=False, - default_args=default_args + default_args=default_args, ) def dag_regular(): - @task def scheduled() -> None: r = existing_incremental() @@ -252,7 +327,7 @@ def scheduled() -> None: state=DagRunState.RUNNING, execution_date=now, run_type=DagRunType.MANUAL, - data_interval=(now, now) + data_interval=(now, now), ) dag_def.run(start_date=now, run_at_least_once=True) task_def = dag_def.task_dict["scheduled"] @@ -261,20 +336,13 @@ def scheduled() -> None: assert ti.state == State.SUCCESS assert "sources" not in pipeline.state - pipeline = pipeline.drop() dag_def.test(execution_date=CATCHUP_BEGIN) assert "sources" not in pipeline.state - @dag( - schedule=None, - start_date=CATCHUP_BEGIN, - catchup=False, - default_args=default_args - ) + @dag(schedule=None, start_date=CATCHUP_BEGIN, catchup=False, default_args=default_args) def dag_no_schedule(): - @task def unscheduled() -> None: r = existing_incremental() diff --git a/tests/helpers/airflow_tests/utils.py b/tests/helpers/airflow_tests/utils.py index 4fe7472eb0..50aab77505 100644 --- a/tests/helpers/airflow_tests/utils.py +++ b/tests/helpers/airflow_tests/utils.py @@ -11,7 +11,7 @@ from dlt.common.configuration.providers.toml import SECRETS_TOML_KEY -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def initialize_airflow_db(): setup_airflow() # backup context providers @@ -30,14 +30,14 @@ def initialize_airflow_db(): def setup_airflow() -> None: # Disable loading examples try: - conf.add_section('core') + conf.add_section("core") except DuplicateSectionError: pass - conf.set('core', 'load_examples', 'False') + conf.set("core", "load_examples", "False") # Prepare the arguments for the initdb function args = argparse.Namespace() # becomes database/sql_alchemy_conn in apache 2.7.0 - args.backend = conf.get(section='core', key='sql_alchemy_conn') + args.backend = conf.get(section="core", key="sql_alchemy_conn") # Run Airflow resetdb before running any tests args.yes = True diff --git a/tests/helpers/dbt_cloud_tests/test_dbt_cloud.py b/tests/helpers/dbt_cloud_tests/test_dbt_cloud.py index 92d0f722b9..600a11558b 100644 --- a/tests/helpers/dbt_cloud_tests/test_dbt_cloud.py +++ b/tests/helpers/dbt_cloud_tests/test_dbt_cloud.py @@ -2,7 +2,7 @@ from dlt.helpers.dbt_cloud import run_dbt_cloud_job, get_dbt_cloud_run_status -@pytest.mark.parametrize('wait_outcome', [False, True]) +@pytest.mark.parametrize("wait_outcome", [False, True]) def test_trigger_run(wait_outcome): # Trigger job run and wait for an outcome run_status = run_dbt_cloud_job(wait_for_outcome=wait_outcome) @@ -12,7 +12,7 @@ def test_trigger_run(wait_outcome): assert not run_status.get("is_error") -@pytest.mark.parametrize('wait_outcome', [False, True]) +@pytest.mark.parametrize("wait_outcome", [False, True]) def test_run_status(wait_outcome): # Trigger job run and wait for an outcome run_status = run_dbt_cloud_job(wait_for_outcome=False) diff --git a/tests/helpers/dbt_tests/local/test_dbt_utils.py b/tests/helpers/dbt_tests/local/test_dbt_utils.py index 71e570bd69..6c2d28ed23 100644 --- a/tests/helpers/dbt_tests/local/test_dbt_utils.py +++ b/tests/helpers/dbt_tests/local/test_dbt_utils.py @@ -7,8 +7,13 @@ from dlt.common.storages import FileStorage from dlt.common.utils import uniq_id -from dlt.destinations.postgres.configuration import PostgresCredentials -from dlt.helpers.dbt.dbt_utils import DBTProcessingError, initialize_dbt_logging, run_dbt_command, is_incremental_schema_out_of_sync_error +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.helpers.dbt.dbt_utils import ( + DBTProcessingError, + initialize_dbt_logging, + run_dbt_command, + is_incremental_schema_out_of_sync_error, +) from tests.utils import test_storage, preserve_environ from tests.helpers.dbt_tests.utils import clone_jaffle_repo, load_test_case @@ -16,8 +21,18 @@ def test_is_incremental_schema_out_of_sync_error() -> None: # in case of --fail-fast detect on a single run result - assert is_incremental_schema_out_of_sync_error(decode_obj(load_test_case("run_result_incremental_fail.pickle.hex"))) is True - assert is_incremental_schema_out_of_sync_error(decode_obj(load_test_case("run_execution_incremental_fail.pickle.hex"))) is True + assert ( + is_incremental_schema_out_of_sync_error( + decode_obj(load_test_case("run_result_incremental_fail.pickle.hex")) + ) + is True + ) + assert ( + is_incremental_schema_out_of_sync_error( + decode_obj(load_test_case("run_execution_incremental_fail.pickle.hex")) + ) + is True + ) assert is_incremental_schema_out_of_sync_error("AAA") is False @@ -27,24 +42,36 @@ def test_dbt_commands(test_storage: FileStorage) -> None: dbt_vars = {"dbt_schema": schema_name} # extract postgres creds from env, parse and emit - credentials = resolve_configuration(PostgresCredentials(), sections=("destination", "postgres")) + credentials = resolve_configuration(PostgresCredentials(), sections=("destination", "postgres")) add_config_to_env(credentials, ("dlt",)) repo_path = clone_jaffle_repo(test_storage) # copy profile - shutil.copy("./tests/helpers/dbt_tests/cases/profiles_invalid_credentials.yml", os.path.join(repo_path, "profiles.yml")) + shutil.copy( + "./tests/helpers/dbt_tests/cases/profiles_invalid_credentials.yml", + os.path.join(repo_path, "profiles.yml"), + ) # initialize logging global_args = initialize_dbt_logging("ERROR", False) # run deps, results are None assert run_dbt_command(repo_path, "deps", ".", global_args=global_args) is None # run list, results are list of strings - results = run_dbt_command(repo_path, "list", ".", global_args=global_args, package_vars=dbt_vars) + results = run_dbt_command( + repo_path, "list", ".", global_args=global_args, package_vars=dbt_vars + ) assert isinstance(results, list) assert len(results) == 28 assert "jaffle_shop.not_null_orders_amount" in results # run list for specific selector - results = run_dbt_command(repo_path, "list", ".", global_args=global_args, command_args=["-s", "jaffle_shop.not_null_orders_amount"], package_vars=dbt_vars) + results = run_dbt_command( + repo_path, + "list", + ".", + global_args=global_args, + command_args=["-s", "jaffle_shop.not_null_orders_amount"], + package_vars=dbt_vars, + ) assert len(results) == 1 assert results[0] == "jaffle_shop.not_null_orders_amount" # run debug, that will fail @@ -61,26 +88,46 @@ def test_dbt_commands(test_storage: FileStorage) -> None: # same for run with pytest.raises(DBTProcessingError) as dbt_err: - run_dbt_command(repo_path, "run", ".", global_args=global_args, package_vars=dbt_vars, command_args=["--fail-fast", "--full-refresh"]) + run_dbt_command( + repo_path, + "run", + ".", + global_args=global_args, + package_vars=dbt_vars, + command_args=["--fail-fast", "--full-refresh"], + ) # in that case test results are bool, not list of tests runs assert dbt_err.value.command == "run" # copy a correct profile - shutil.copy("./tests/helpers/dbt_tests/cases/profiles.yml", os.path.join(repo_path, "profiles.yml")) + shutil.copy( + "./tests/helpers/dbt_tests/cases/profiles.yml", os.path.join(repo_path, "profiles.yml") + ) - results = run_dbt_command(repo_path, "seed", ".", global_args=global_args, package_vars=dbt_vars) + results = run_dbt_command( + repo_path, "seed", ".", global_args=global_args, package_vars=dbt_vars + ) assert isinstance(results, list) assert len(results) == 3 assert results[0].model_name == "raw_customers" assert results[0].status == "success" - results = run_dbt_command(repo_path, "run", ".", global_args=global_args, package_vars=dbt_vars, command_args=["--fail-fast", "--full-refresh"]) + results = run_dbt_command( + repo_path, + "run", + ".", + global_args=global_args, + package_vars=dbt_vars, + command_args=["--fail-fast", "--full-refresh"], + ) assert isinstance(results, list) assert len(results) == 5 assert results[-1].model_name == "orders" assert results[-1].status == "success" - results = run_dbt_command(repo_path, "test", ".", global_args=global_args, package_vars=dbt_vars) + results = run_dbt_command( + repo_path, "test", ".", global_args=global_args, package_vars=dbt_vars + ) assert isinstance(results, list) assert len(results) == 20 assert results[-1].status == "pass" diff --git a/tests/helpers/dbt_tests/local/test_runner_destinations.py b/tests/helpers/dbt_tests/local/test_runner_destinations.py index 547fdb991c..c9e4b7c83b 100644 --- a/tests/helpers/dbt_tests/local/test_runner_destinations.py +++ b/tests/helpers/dbt_tests/local/test_runner_destinations.py @@ -11,10 +11,16 @@ from tests.utils import TEST_STORAGE_ROOT, clean_test_storage, preserve_environ from tests.common.utils import modify_and_commit_file, load_secret -from tests.helpers.dbt_tests.local.utils import setup_rasa_runner_client, setup_rasa_runner, DBTDestinationInfo +from tests.helpers.dbt_tests.local.utils import ( + setup_rasa_runner_client, + setup_rasa_runner, + DBTDestinationInfo, +) DESTINATION_DATASET_NAME = "test_" + uniq_id() -ALL_DBT_DESTINATIONS = [DBTDestinationInfo("bigquery", "CREATE TABLE", "MERGE")] # DBTDestinationInfo("redshift", "SELECT", "INSERT") +ALL_DBT_DESTINATIONS = [ + DBTDestinationInfo("bigquery", "CREATE TABLE", "MERGE") +] # DBTDestinationInfo("redshift", "SELECT", "INSERT") ALL_DBT_DESTINATIONS_NAMES = ["bigquery"] # "redshift", @@ -27,29 +33,36 @@ def destination_info(request: Any) -> Iterator[DBTDestinationInfo]: def test_setup_dbt_runner() -> None: - runner = setup_rasa_runner("redshift", "carbon_bot_3", override_values={ - "package_additional_vars": {"add_var_name": "add_var_value"}, - "runtime": { - "log_format": "JSON", - "log_level": "INFO" - } - }) + runner = setup_rasa_runner( + "redshift", + "carbon_bot_3", + override_values={ + "package_additional_vars": {"add_var_name": "add_var_value"}, + "runtime": {"log_format": "JSON", "log_level": "INFO"}, + }, + ) assert runner.package_path.endswith("rasa_semantic_schema") assert runner.config.package_profile_name == "redshift" assert runner.config.package_additional_vars == {"add_var_name": "add_var_value"} - assert runner._get_package_vars() == {"source_dataset_name": "carbon_bot_3", "add_var_name": "add_var_value"} + assert runner._get_package_vars() == { + "source_dataset_name": "carbon_bot_3", + "add_var_name": "add_var_value", + } assert runner.source_dataset_name == "carbon_bot_3" assert runner.cloned_package_name == "rasa_semantic_schema" assert runner.working_dir == TEST_STORAGE_ROOT def test_initialize_package_wrong_key() -> None: - runner = setup_rasa_runner("redshift", override_values={ - # private repo - "package_location": "git@github.com:dlt-hub/rasa_bot_experiments.git", - "package_repository_branch": None, - "package_repository_ssh_key": load_secret("DEPLOY_KEY") - }) + runner = setup_rasa_runner( + "redshift", + override_values={ + # private repo + "package_location": "git@github.com:dlt-hub/rasa_bot_experiments.git", + "package_repository_branch": None, + "package_repository_ssh_key": load_secret("DEPLOY_KEY"), + }, + ) with pytest.raises(GitCommandError) as gce: runner.run_all() @@ -60,12 +73,17 @@ def test_reinitialize_package() -> None: runner = setup_rasa_runner("redshift") runner.ensure_newest_package() # mod the package - readme_path, _ = modify_and_commit_file(runner.package_path, "README.md", content=runner.config.package_profiles_dir) + readme_path, _ = modify_and_commit_file( + runner.package_path, "README.md", content=runner.config.package_profiles_dir + ) assert os.path.isfile(readme_path) # this will wipe out old package and clone again runner.ensure_newest_package() # we have old file back - assert runner.repo_storage.load(f"{runner.cloned_package_name}/README.md") != runner.config.package_profiles_dir + assert ( + runner.repo_storage.load(f"{runner.cloned_package_name}/README.md") + != runner.config.package_profiles_dir + ) def test_dbt_test_no_raw_schema(destination_info: DBTDestinationInfo) -> None: @@ -76,7 +94,7 @@ def test_dbt_test_no_raw_schema(destination_info: DBTDestinationInfo) -> None: runner.run_all( destination_dataset_name=DESTINATION_DATASET_NAME, run_params=["--fail-fast", "--full-refresh"], - source_tests_selector="tag:prerequisites" + source_tests_selector="tag:prerequisites", ) assert isinstance(prq_ex.value.args[0], DBTProcessingError) @@ -89,16 +107,21 @@ def test_dbt_run_full_refresh(destination_info: DBTDestinationInfo) -> None: destination_dataset_name=DESTINATION_DATASET_NAME, run_params=["--fail-fast", "--full-refresh"], additional_vars={"user_id": "metadata__user_id"}, - source_tests_selector="tag:prerequisites" + source_tests_selector="tag:prerequisites", ) assert all(r.message.startswith(destination_info.replace_strategy) for r in run_results) is True assert find_run_result(run_results, "_loads") is not None # all models must be SELECT as we do full refresh - assert find_run_result(run_results, "_loads").message.startswith(destination_info.replace_strategy) + assert find_run_result(run_results, "_loads").message.startswith( + destination_info.replace_strategy + ) assert all(m.message.startswith(destination_info.replace_strategy) for m in run_results) is True # all tests should pass - runner.test(destination_dataset_name=DESTINATION_DATASET_NAME, additional_vars={"user_id": "metadata__user_id"}) + runner.test( + destination_dataset_name=DESTINATION_DATASET_NAME, + additional_vars={"user_id": "metadata__user_id"}, + ) def test_dbt_run_error_via_additional_vars(destination_info: DBTDestinationInfo) -> None: @@ -110,8 +133,11 @@ def test_dbt_run_error_via_additional_vars(destination_info: DBTDestinationInfo) runner.run_all( destination_dataset_name=DESTINATION_DATASET_NAME, run_params=["--fail-fast", "--full-refresh"], - additional_vars={"user_id": "metadata__user_id", "external_session_id": "metadata__sess_id"}, - source_tests_selector="tag:prerequisites" + additional_vars={ + "user_id": "metadata__user_id", + "external_session_id": "metadata__sess_id", + }, + source_tests_selector="tag:prerequisites", ) stg_interactions = find_run_result(dbt_err.value.run_results, "stg_interactions") assert "metadata__sess_id" in stg_interactions.message @@ -127,7 +153,7 @@ def test_dbt_incremental_schema_out_of_sync_error(destination_info: DBTDestinati run_params=["--fail-fast", "--model", "+interactions"], # remove all counter metrics additional_vars={"count_metrics": []}, - source_tests_selector="tag:prerequisites" + source_tests_selector="tag:prerequisites", ) # generate schema error on incremental load @@ -140,7 +166,9 @@ def test_dbt_incremental_schema_out_of_sync_error(destination_info: DBTDestinati ) # metrics: StrStr = get_metrics_from_prometheus([runner.model_exec_info])["dbtrunner_model_status_info"] # full refresh on interactions - assert find_run_result(results, "interactions").message.startswith(destination_info.replace_strategy) + assert find_run_result(results, "interactions").message.startswith( + destination_info.replace_strategy + ) # now incremental load should happen results = runner.run( diff --git a/tests/helpers/dbt_tests/local/utils.py b/tests/helpers/dbt_tests/local/utils.py index 2993753a0c..7097140a83 100644 --- a/tests/helpers/dbt_tests/local/utils.py +++ b/tests/helpers/dbt_tests/local/utils.py @@ -1,4 +1,3 @@ - import contextlib from typing import Iterator, NamedTuple @@ -23,10 +22,13 @@ class DBTDestinationInfo(NamedTuple): incremental_strategy: str -def setup_rasa_runner(profile_name: str, dataset_name: str = None, override_values: StrAny = None) -> DBTPackageRunner: - +def setup_rasa_runner( + profile_name: str, dataset_name: str = None, override_values: StrAny = None +) -> DBTPackageRunner: C = DBTRunnerConfiguration() - C.package_location = "https://github.com/scale-vector/rasa_semantic_schema.git" # "/home/rudolfix/src/dbt/rasa_semantic_schema" + C.package_location = ( # "/home/rudolfix/src/dbt/rasa_semantic_schema" + "https://github.com/scale-vector/rasa_semantic_schema.git" + ) C.package_repository_branch = "dlt-dbt-runner-ci-do-not-delete" # override values including the defaults above @@ -41,7 +43,7 @@ def setup_rasa_runner(profile_name: str, dataset_name: str = None, override_valu DestinationClientDwhConfiguration(dataset_name=dataset_name or FIXTURES_DATASET_NAME), TEST_STORAGE_ROOT, package_profile_name=profile_name, - config=C + config=C, ) # now C is resolved init_test_logging(C.runtime) @@ -49,7 +51,9 @@ def setup_rasa_runner(profile_name: str, dataset_name: str = None, override_valu @contextlib.contextmanager -def setup_rasa_runner_client(destination_name: str, destination_dataset_name: str) -> Iterator[None]: +def setup_rasa_runner_client( + destination_name: str, destination_dataset_name: str +) -> Iterator[None]: with cm_yield_client(destination_name, FIXTURES_DATASET_NAME) as client: # emit environ so credentials are passed to dbt profile add_config_to_env(client.config, ("DLT",)) diff --git a/tests/helpers/dbt_tests/test_runner_dbt_versions.py b/tests/helpers/dbt_tests/test_runner_dbt_versions.py index b418bf15b6..a47828a9ea 100644 --- a/tests/helpers/dbt_tests/test_runner_dbt_versions.py +++ b/tests/helpers/dbt_tests/test_runner_dbt_versions.py @@ -14,13 +14,24 @@ from dlt.common.runners.synth_pickle import decode_obj, encode_obj from dlt.common.typing import AnyFun -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.bigquery import BigQueryClientConfiguration +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.helpers.dbt.configuration import DBTRunnerConfiguration from dlt.helpers.dbt.exceptions import PrerequisitesException, DBTProcessingError -from dlt.helpers.dbt import package_runner, create_venv, _create_dbt_deps, _default_profile_name, DEFAULT_DBT_VERSION - -from tests.helpers.dbt_tests.utils import JAFFLE_SHOP_REPO, assert_jaffle_completed, clone_jaffle_repo, find_run_result +from dlt.helpers.dbt import ( + package_runner, + create_venv, + _create_dbt_deps, + _default_profile_name, + DEFAULT_DBT_VERSION, +) + +from tests.helpers.dbt_tests.utils import ( + JAFFLE_SHOP_REPO, + assert_jaffle_completed, + clone_jaffle_repo, + find_run_result, +) from tests.utils import test_storage, preserve_environ from tests.load.utils import yield_client_with_storage, cm_yield_client_with_storage @@ -40,14 +51,14 @@ def client() -> Iterator[PostgresClient]: ("postgres", None), ("snowflake", "1.4.0"), ("snowflake", "1.5.2"), - ("snowflake", None) + ("snowflake", None), ] PACKAGE_IDS = [ - f"{destination}-venv-{version}" - if version else f"{destination}-local" + f"{destination}-venv-{version}" if version else f"{destination}-local" for destination, version in PACKAGE_PARAMS ] + @pytest.fixture(scope="module", params=PACKAGE_PARAMS, ids=PACKAGE_IDS) def dbt_package_f(request: Any) -> Iterator[Tuple[str, AnyFun]]: destination_name, version = request.param @@ -89,7 +100,10 @@ def test_dbt_configuration() -> None: # check names normalized C: DBTRunnerConfiguration = resolve_configuration( DBTRunnerConfiguration(), - explicit_value={"package_repository_ssh_key": "---NO NEWLINE---", "package_location": "/var/local"} + explicit_value={ + "package_repository_ssh_key": "---NO NEWLINE---", + "package_location": "/var/local", + }, ) assert C.package_repository_ssh_key == "---NO NEWLINE---\n" assert C.package_additional_vars is None @@ -98,7 +112,11 @@ def test_dbt_configuration() -> None: C = resolve_configuration( DBTRunnerConfiguration(), - explicit_value={"package_repository_ssh_key": "---WITH NEWLINE---\n", "package_location": "/var/local", "package_additional_vars": {"a": 1}} + explicit_value={ + "package_repository_ssh_key": "---WITH NEWLINE---\n", + "package_location": "/var/local", + "package_additional_vars": {"a": 1}, + }, ) assert C.package_repository_ssh_key == "---WITH NEWLINE---\n" assert C.package_additional_vars == {"a": 1} @@ -108,9 +126,9 @@ def test_dbt_run_exception_pickle() -> None: obj = decode_obj( encode_obj( DBTProcessingError("test", "A", "B"), # type: ignore[arg-type] - ignore_pickle_errors=False + ignore_pickle_errors=False, ), - ignore_pickle_errors=False + ignore_pickle_errors=False, ) assert obj.command == "test" assert obj.run_results == "A" @@ -119,12 +137,21 @@ def test_dbt_run_exception_pickle() -> None: def test_runner_setup(client: PostgresClient, test_storage: FileStorage) -> None: - add_vars = {"source_dataset_name": "overwritten", "destination_dataset_name": "destination", "schema_name": "this_Schema"} + add_vars = { + "source_dataset_name": "overwritten", + "destination_dataset_name": "destination", + "schema_name": "this_Schema", + } os.environ["DBT_PACKAGE_RUNNER__PACKAGE_ADDITIONAL_VARS"] = json.dumps(add_vars) os.environ["AUTO_FULL_REFRESH_WHEN_OUT_OF_SYNC"] = "False" os.environ["DBT_PACKAGE_RUNNER__RUNTIME__LOG_LEVEL"] = "CRITICAL" test_storage.create_folder("jaffle") - r = package_runner(Venv.restore_current(), client.config, test_storage.make_full_path("jaffle"), JAFFLE_SHOP_REPO) + r = package_runner( + Venv.restore_current(), + client.config, + test_storage.make_full_path("jaffle"), + JAFFLE_SHOP_REPO, + ) # runner settings assert r.credentials is client.config assert r.working_dir == test_storage.make_full_path("jaffle") @@ -140,55 +167,76 @@ def test_runner_setup(client: PostgresClient, test_storage: FileStorage) -> None assert r.config.runtime.log_level == "CRITICAL" assert r.config.auto_full_refresh_when_out_of_sync is False - assert r._get_package_vars() == {"source_dataset_name": client.config.dataset_name, "destination_dataset_name": "destination", "schema_name": "this_Schema"} - assert r._get_package_vars(destination_dataset_name="dest_test_123") == {"source_dataset_name": client.config.dataset_name, "destination_dataset_name": "dest_test_123", "schema_name": "this_Schema"} + assert r._get_package_vars() == { + "source_dataset_name": client.config.dataset_name, + "destination_dataset_name": "destination", + "schema_name": "this_Schema", + } + assert r._get_package_vars(destination_dataset_name="dest_test_123") == { + "source_dataset_name": client.config.dataset_name, + "destination_dataset_name": "dest_test_123", + "schema_name": "this_Schema", + } assert r._get_package_vars(additional_vars={"add": 1, "schema_name": "ovr"}) == { - "source_dataset_name": client.config.dataset_name, - "destination_dataset_name": "destination", "schema_name": "ovr", - "add": 1 - } + "source_dataset_name": client.config.dataset_name, + "destination_dataset_name": "destination", + "schema_name": "ovr", + "add": 1, + } -def test_runner_dbt_destinations(test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun]) -> None: +def test_runner_dbt_destinations( + test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun] +) -> None: destination_name, dbt_func = dbt_package_f with cm_yield_client_with_storage(destination_name) as client: - jaffle_base_dir = 'jaffle_' + destination_name + jaffle_base_dir = "jaffle_" + destination_name test_storage.create_folder(jaffle_base_dir) results = dbt_func( client.config, test_storage.make_full_path(jaffle_base_dir), JAFFLE_SHOP_REPO ).run_all(["--fail-fast", "--full-refresh"]) - assert_jaffle_completed(test_storage, results, destination_name, jaffle_dir=jaffle_base_dir + '/jaffle_shop') + assert_jaffle_completed( + test_storage, results, destination_name, jaffle_dir=jaffle_base_dir + "/jaffle_shop" + ) -def test_run_jaffle_from_folder_incremental(test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun]) -> None: +def test_run_jaffle_from_folder_incremental( + test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun] +) -> None: destination_name, dbt_func = dbt_package_f with cm_yield_client_with_storage(destination_name) as client: repo_path = clone_jaffle_repo(test_storage) # copy model with error into package to force run error in model - shutil.copy("./tests/helpers/dbt_tests/cases/jaffle_customers_incremental.sql", os.path.join(repo_path, "models", "customers.sql")) + shutil.copy( + "./tests/helpers/dbt_tests/cases/jaffle_customers_incremental.sql", + os.path.join(repo_path, "models", "customers.sql"), + ) results = dbt_func(client.config, None, repo_path).run_all(run_params=None) assert_jaffle_completed(test_storage, results, destination_name, jaffle_dir="jaffle_shop") results = dbt_func(client.config, None, repo_path).run_all() # out of 100 records 0 was inserted customers = find_run_result(results, "customers") - assert customers.message in JAFFLE_MESSAGES_INCREMENTAL[destination_name]['customers'] + assert customers.message in JAFFLE_MESSAGES_INCREMENTAL[destination_name]["customers"] # change the column name. that will force dbt to fail (on_schema_change='fail'). the runner should do a full refresh - shutil.copy("./tests/helpers/dbt_tests/cases/jaffle_customers_incremental_new_column.sql", os.path.join(repo_path, "models", "customers.sql")) + shutil.copy( + "./tests/helpers/dbt_tests/cases/jaffle_customers_incremental_new_column.sql", + os.path.join(repo_path, "models", "customers.sql"), + ) results = dbt_func(client.config, None, repo_path).run_all(run_params=None) assert_jaffle_completed(test_storage, results, destination_name, jaffle_dir="jaffle_shop") -def test_run_jaffle_fail_prerequisites(test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun]) -> None: +def test_run_jaffle_fail_prerequisites( + test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun] +) -> None: destination_name, dbt_func = dbt_package_f with cm_yield_client_with_storage(destination_name) as client: test_storage.create_folder("jaffle") # we run all the tests before tables are materialized with pytest.raises(PrerequisitesException) as pr_exc: dbt_func( - client.config, - test_storage.make_full_path("jaffle"), - JAFFLE_SHOP_REPO - ).run_all(["--fail-fast", "--full-refresh"], source_tests_selector="*") + client.config, test_storage.make_full_path("jaffle"), JAFFLE_SHOP_REPO + ).run_all(["--fail-fast", "--full-refresh"], source_tests_selector="*") proc_err = pr_exc.value.args[0] assert isinstance(proc_err, DBTProcessingError) customers = find_run_result(proc_err.run_results, "unique_customers_customer_id") @@ -197,23 +245,32 @@ def test_run_jaffle_fail_prerequisites(test_storage: FileStorage, dbt_package_f: assert all(r.status == "error" for r in proc_err.run_results) -def test_run_jaffle_invalid_run_args(test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun]) -> None: +def test_run_jaffle_invalid_run_args( + test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun] +) -> None: destination_name, dbt_func = dbt_package_f with cm_yield_client_with_storage(destination_name) as client: test_storage.create_folder("jaffle") # we run all the tests before tables are materialized with pytest.raises(DBTProcessingError) as pr_exc: - dbt_func(client.config, test_storage.make_full_path("jaffle"), JAFFLE_SHOP_REPO).run_all(["--wrong_flag"]) + dbt_func( + client.config, test_storage.make_full_path("jaffle"), JAFFLE_SHOP_REPO + ).run_all(["--wrong_flag"]) # dbt < 1.5 raises systemexit, dbt >= 1.5 just returns success False assert isinstance(pr_exc.value.dbt_results, SystemExit) or pr_exc.value.dbt_results is None -def test_run_jaffle_failed_run(test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun]) -> None: +def test_run_jaffle_failed_run( + test_storage: FileStorage, dbt_package_f: Tuple[str, AnyFun] +) -> None: destination_name, dbt_func = dbt_package_f with cm_yield_client_with_storage(destination_name) as client: repo_path = clone_jaffle_repo(test_storage) # copy model with error into package to force run error in model - shutil.copy("./tests/helpers/dbt_tests/cases/jaffle_customers_with_error.sql", os.path.join(repo_path, "models", "customers.sql")) + shutil.copy( + "./tests/helpers/dbt_tests/cases/jaffle_customers_with_error.sql", + os.path.join(repo_path, "models", "customers.sql"), + ) with pytest.raises(DBTProcessingError) as pr_exc: dbt_func(client.config, None, repo_path).run_all(run_params=None) assert len(pr_exc.value.run_results) == 5 @@ -222,11 +279,9 @@ def test_run_jaffle_failed_run(test_storage: FileStorage, dbt_package_f: Tuple[s JAFFLE_MESSAGES_INCREMENTAL: Dict[str, Any] = { - 'snowflake': { + "snowflake": { # Different message per version - 'customers': ('SUCCESS 1', 'SUCCESS 100'), + "customers": ("SUCCESS 1", "SUCCESS 100"), }, - 'postgres': { - 'customers': ("INSERT 0 100", ) - } + "postgres": {"customers": ("INSERT 0 100",)}, } diff --git a/tests/helpers/dbt_tests/utils.py b/tests/helpers/dbt_tests/utils.py index 65e0eae2cb..59fa67476b 100644 --- a/tests/helpers/dbt_tests/utils.py +++ b/tests/helpers/dbt_tests/utils.py @@ -9,15 +9,15 @@ TEST_CASES_PATH = "./tests/helpers/dbt_tests/cases/" JAFFLE_RESULT_MESSAGES = { - 'postgres': { - 'stg_orders': 'CREATE VIEW', - 'customers': 'SELECT 100', + "postgres": { + "stg_orders": "CREATE VIEW", + "customers": "SELECT 100", }, # Snowflake only returns generic success messages - 'snowflake': { - 'stg_orders': 'SUCCESS 1', - 'customers': 'SUCCESS 1', - } + "snowflake": { + "stg_orders": "SUCCESS 1", + "customers": "SUCCESS 1", + }, } @@ -33,17 +33,24 @@ def find_run_result(results: Sequence[DBTNodeResult], model_name: str) -> DBTNod def clone_jaffle_repo(test_storage: FileStorage) -> str: repo_path = test_storage.make_full_path("jaffle_shop") # clone jaffle shop for dbt 1.0.0 - clone_repo(JAFFLE_SHOP_REPO, repo_path, with_git_command=None, branch="main").close() # core-v1.0.0 + clone_repo( + JAFFLE_SHOP_REPO, repo_path, with_git_command=None, branch="main" + ).close() # core-v1.0.0 return repo_path -def assert_jaffle_completed(test_storage: FileStorage, results: List[DBTNodeResult], destination_name: str, jaffle_dir: str = "jaffle/jaffle_shop") -> None: +def assert_jaffle_completed( + test_storage: FileStorage, + results: List[DBTNodeResult], + destination_name: str, + jaffle_dir: str = "jaffle/jaffle_shop", +) -> None: assert len(results) == 5 assert all(r.status == "success" for r in results) - stg_orders = find_run_result(results, 'stg_orders') - assert stg_orders.message == JAFFLE_RESULT_MESSAGES[destination_name]['stg_orders'] + stg_orders = find_run_result(results, "stg_orders") + assert stg_orders.message == JAFFLE_RESULT_MESSAGES[destination_name]["stg_orders"] customers = find_run_result(results, "customers") - assert customers.message == JAFFLE_RESULT_MESSAGES[destination_name]['customers'] + assert customers.message == JAFFLE_RESULT_MESSAGES[destination_name]["customers"] # `run_dbt` has injected credentials into environ. make sure that credentials were removed assert "CREDENTIALS__PASSWORD" not in os.environ # make sure jaffle_shop was cloned into right dir diff --git a/tests/helpers/providers/test_google_secrets_provider.py b/tests/helpers/providers/test_google_secrets_provider.py index 8e1f14f655..9d32648862 100644 --- a/tests/helpers/providers/test_google_secrets_provider.py +++ b/tests/helpers/providers/test_google_secrets_provider.py @@ -12,7 +12,7 @@ from dlt.common.configuration.resolve import resolve_configuration -DLT_SECRETS_TOML_CONTENT=""" +DLT_SECRETS_TOML_CONTENT = """ secret_value=2137 api.secret_key="ABCD" @@ -26,7 +26,9 @@ def test_regular_keys() -> None: logger.init_logging(RunConfiguration()) # copy bigquery credentials into providers credentials - c = resolve_configuration(GcpServiceAccountCredentials(), sections=(known_sections.DESTINATION, "bigquery")) + c = resolve_configuration( + GcpServiceAccountCredentials(), sections=(known_sections.DESTINATION, "bigquery") + ) secrets[f"{known_sections.PROVIDERS}.google_secrets.credentials"] = dict(c) # c = secrets.get("destination.credentials", GcpServiceAccountCredentials) # print(c) @@ -37,22 +39,46 @@ def test_regular_keys() -> None: # load secrets toml per pipeline provider.get_value("secret_key", AnyType, "pipeline", "api") - assert provider.get_value("secret_key", AnyType, "pipeline", "api") == ("ABCDE", "pipeline-api-secret_key") - assert provider.get_value("credentials", AnyType, "pipeline") == ({"project_id": "mock-credentials-pipeline"}, "pipeline-credentials") + assert provider.get_value("secret_key", AnyType, "pipeline", "api") == ( + "ABCDE", + "pipeline-api-secret_key", + ) + assert provider.get_value("credentials", AnyType, "pipeline") == ( + {"project_id": "mock-credentials-pipeline"}, + "pipeline-credentials", + ) # load source test_source which should also load "sources", "pipeline-sources", "sources-test_source" and "pipeline-sources-test_source" - assert provider.get_value("only_pipeline", AnyType, "pipeline", "sources", "test_source") == ("ONLY", "pipeline-sources-test_source-only_pipeline") + assert provider.get_value("only_pipeline", AnyType, "pipeline", "sources", "test_source") == ( + "ONLY", + "pipeline-sources-test_source-only_pipeline", + ) # we set sources.test_source.secret_prop_1="OVR_A" in pipeline-sources to override value in sources - assert provider.get_value("secret_prop_1", AnyType, None, "sources", "test_source") == ("OVR_A", "sources-test_source-secret_prop_1") + assert provider.get_value("secret_prop_1", AnyType, None, "sources", "test_source") == ( + "OVR_A", + "sources-test_source-secret_prop_1", + ) # get element unique to pipeline-sources - assert provider.get_value("only_pipeline_top", AnyType, "pipeline", "sources") == ("TOP", "pipeline-sources-only_pipeline_top") + assert provider.get_value("only_pipeline_top", AnyType, "pipeline", "sources") == ( + "TOP", + "pipeline-sources-only_pipeline_top", + ) # get element unique to sources - assert provider.get_value("all_sources_present", AnyType, None, "sources") == (True, "sources-all_sources_present") + assert provider.get_value("all_sources_present", AnyType, None, "sources") == ( + True, + "sources-all_sources_present", + ) # get element unique to sources-test_source - assert provider.get_value("secret_prop_2", AnyType, None, "sources", "test_source") == ("B", "sources-test_source-secret_prop_2") + assert provider.get_value("secret_prop_2", AnyType, None, "sources", "test_source") == ( + "B", + "sources-test_source-secret_prop_2", + ) # this destination will not be found - assert provider.get_value("url", AnyType, "pipeline", "destination", "filesystem") == (None, "pipeline-destination-filesystem-url") + assert provider.get_value("url", AnyType, "pipeline", "destination", "filesystem") == ( + None, + "pipeline-destination-filesystem-url", + ) # try a single secret value assert provider.get_value("secret", TSecretValue, "pipeline") == (None, "pipeline-secret") @@ -63,7 +89,10 @@ def test_regular_keys() -> None: assert provider.get_value("secret", str, "pipeline") == (None, "pipeline-secret") provider.only_secrets = False # non secrets allowed - assert provider.get_value("secret", str, "pipeline") == ("THIS IS SECRET VALUE", "pipeline-secret") + assert provider.get_value("secret", str, "pipeline") == ( + "THIS IS SECRET VALUE", + "pipeline-secret", + ) # request json # print(provider._toml.as_string()) @@ -73,12 +102,12 @@ def test_regular_keys() -> None: # def test_special_sections() -> None: # pass - # with custom_environ({"GOOGLE_APPLICATION_CREDENTIALS": "_secrets/pipelines-ci-secrets-65c0517a9b30.json"}): - # provider = _google_secrets_provider() - # print(provider.get_value("credentials", GcpServiceAccountCredentials, None, "destination", "bigquery")) - # print(provider._toml.as_string()) - # print(provider.get_value("subdomain", AnyType, None, "sources", "zendesk", "credentials")) - # print(provider._toml.as_string()) +# with custom_environ({"GOOGLE_APPLICATION_CREDENTIALS": "_secrets/pipelines-ci-secrets-65c0517a9b30.json"}): +# provider = _google_secrets_provider() +# print(provider.get_value("credentials", GcpServiceAccountCredentials, None, "destination", "bigquery")) +# print(provider._toml.as_string()) +# print(provider.get_value("subdomain", AnyType, None, "sources", "zendesk", "credentials")) +# print(provider._toml.as_string()) # def test_provider_insertion() -> None: @@ -88,4 +117,3 @@ def test_regular_keys() -> None: # }): # # - diff --git a/tests/helpers/streamlit_tests/test_streamlit_show_resources.py b/tests/helpers/streamlit_tests/test_streamlit_show_resources.py index fcf232ea76..a26e9b774d 100644 --- a/tests/helpers/streamlit_tests/test_streamlit_show_resources.py +++ b/tests/helpers/streamlit_tests/test_streamlit_show_resources.py @@ -57,9 +57,9 @@ def test_multiple_resources_pipeline(): ) load_info = pipeline.run([source1(10), source2(20)]) - source1_schema = load_info.pipeline.schemas.get("source1") # type: ignore[attr-defined] + source1_schema = load_info.pipeline.schemas.get("source1") - assert load_info.pipeline.schema_names == ["source2", "source1"] # type: ignore[attr-defined] + assert load_info.pipeline.schema_names == ["source2", "source1"] # type: ignore[attr-defined] assert source1_schema.data_tables()[0]["name"] == "one" assert source1_schema.data_tables()[0]["columns"]["column_1"].get("primary_key") is True diff --git a/tests/libs/__init__.py b/tests/libs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/libs/test_buffered_writer_arrow,py b/tests/libs/test_buffered_writer_arrow,py new file mode 100644 index 0000000000..f0f0968942 --- /dev/null +++ b/tests/libs/test_buffered_writer_arrow,py @@ -0,0 +1,50 @@ +import pytest + +from dlt.common.destination import TLoaderFileFormat +from dlt.common.schema.utils import new_column + +from tests.common.data_writers.utils import get_writer, ALL_WRITERS + + +@pytest.mark.parametrize("writer_format", ALL_WRITERS - {"arrow"}) +def test_writer_items_count(writer_format: TLoaderFileFormat) -> None: + c1 = {"col1": new_column("col1", "bigint")} + with get_writer(_format=writer_format) as writer: + assert writer._buffered_items_count == 0 + # single item + writer.write_data_item({"col1": 1}, columns=c1) + assert writer._buffered_items_count == 1 + # list + writer.write_data_item([{"col1": 1}, {"col1": 2}], columns=c1) + assert writer._buffered_items_count == 3 + writer._flush_items() + assert writer._buffered_items_count == 0 + assert writer._writer.items_count == 3 + + +def test_writer_items_count_arrow() -> None: + import pyarrow as pa + c1 = {"col1": new_column("col1", "bigint")} + with get_writer(_format="arrow") as writer: + assert writer._buffered_items_count == 0 + # single item + writer.write_data_item(pa.Table.from_pylist([{"col1": 1}]), columns=c1) + assert writer._buffered_items_count == 1 + # single item with many rows + writer.write_data_item(pa.Table.from_pylist([{"col1": 1}, {"col1": 2}]), columns=c1) + assert writer._buffered_items_count == 3 + # empty list + writer.write_data_item([], columns=c1) + assert writer._buffered_items_count == 3 + # list with one item + writer.write_data_item([pa.Table.from_pylist([{"col1": 1}])], columns=c1) + assert writer._buffered_items_count == 4 + # list with many items + writer.write_data_item( + [pa.Table.from_pylist([{"col1": 1}]), pa.Table.from_pylist([{"col1": 1}, {"col1": 2}])], + columns=c1 + ) + assert writer._buffered_items_count == 7 + writer._flush_items() + assert writer._buffered_items_count == 0 + assert writer._writer.items_count == 7 diff --git a/tests/common/data_writers/test_parquet_writer.py b/tests/libs/test_parquet_writer.py similarity index 74% rename from tests/common/data_writers/test_parquet_writer.py rename to tests/libs/test_parquet_writer.py index 2f4bafa719..b1c19114fe 100644 --- a/tests/common/data_writers/test_parquet_writer.py +++ b/tests/libs/test_parquet_writer.py @@ -21,12 +21,19 @@ def get_writer( buffer_max_items: int = 10, file_max_items: int = 10, file_max_bytes: int = None, - _caps: DestinationCapabilitiesContext = None + _caps: DestinationCapabilitiesContext = None, ) -> BufferedDataWriter[ParquetDataWriter]: caps = _caps or DestinationCapabilitiesContext.generic_capabilities() caps.preferred_loader_file_format = _format file_template = os.path.join(TEST_STORAGE_ROOT, f"{_format}.%s") - return BufferedDataWriter(_format, file_template, buffer_max_items=buffer_max_items, _caps=caps, file_max_items=file_max_items, file_max_bytes=file_max_bytes) + return BufferedDataWriter( + _format, + file_template, + buffer_max_items=buffer_max_items, + _caps=caps, + file_max_items=file_max_items, + file_max_bytes=file_max_bytes, + ) def test_parquet_writer_schema_evolution_with_big_buffer() -> None: @@ -36,10 +43,15 @@ def test_parquet_writer_schema_evolution_with_big_buffer() -> None: c4 = new_column("col4", "text") with get_writer("parquet") as writer: - writer.write_data_item([{"col1": 1, "col2": 2, "col3": "3"}], {"col1": c1, "col2": c2, "col3": c3}) - writer.write_data_item([{"col1": 1, "col2": 2, "col3": "3", "col4": "4", "col5": {"hello": "marcin"}}], {"col1": c1, "col2": c2, "col3": c3, "col4": c4}) - - with open(writer.closed_files[0], "rb") as f: + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": "3"}], {"col1": c1, "col2": c2, "col3": c3} + ) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": "3", "col4": "4", "col5": {"hello": "marcin"}}], + {"col1": c1, "col2": c2, "col3": c3, "col4": c4}, + ) + + with open(writer.closed_files[0].file_path, "rb") as f: table = pq.read_table(f) assert table.column("col1").to_pylist() == [1, 1] assert table.column("col2").to_pylist() == [2, 2] @@ -55,17 +67,22 @@ def test_parquet_writer_schema_evolution_with_small_buffer() -> None: with get_writer("parquet", buffer_max_items=4, file_max_items=50) as writer: for _ in range(0, 20): - writer.write_data_item([{"col1": 1, "col2": 2, "col3": "3"}], {"col1": c1, "col2": c2, "col3": c3}) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": "3"}], {"col1": c1, "col2": c2, "col3": c3} + ) for _ in range(0, 20): - writer.write_data_item([{"col1": 1, "col2": 2, "col3": "3", "col4": "4", "col5": {"hello": "marcin"}}], {"col1": c1, "col2": c2, "col3": c3, "col4": c4}) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": "3", "col4": "4", "col5": {"hello": "marcin"}}], + {"col1": c1, "col2": c2, "col3": c3, "col4": c4}, + ) assert len(writer.closed_files) == 2 - with open(writer.closed_files[0], "rb") as f: + with open(writer.closed_files[0].file_path, "rb") as f: table = pq.read_table(f) assert len(table.schema) == 3 - with open(writer.closed_files[1], "rb") as f: + with open(writer.closed_files[1].file_path, "rb") as f: table = pq.read_table(f) assert len(table.schema) == 4 @@ -76,20 +93,34 @@ def test_parquet_writer_json_serialization() -> None: c3 = new_column("col3", "complex") with get_writer("parquet") as writer: - writer.write_data_item([{"col1": 1, "col2": 2, "col3": {"hello":"dave"}}], {"col1": c1, "col2": c2, "col3": c3}) - writer.write_data_item([{"col1": 1, "col2": 2, "col3": {"hello":"marcin"}}], {"col1": c1, "col2": c2, "col3": c3}) - writer.write_data_item([{"col1": 1, "col2": 2, "col3": {}}], {"col1": c1, "col2": c2, "col3": c3}) - writer.write_data_item([{"col1": 1, "col2": 2, "col3": []}], {"col1": c1, "col2": c2, "col3": c3}) - - with open(writer.closed_files[0], "rb") as f: + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": {"hello": "dave"}}], + {"col1": c1, "col2": c2, "col3": c3}, + ) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": {"hello": "marcin"}}], + {"col1": c1, "col2": c2, "col3": c3}, + ) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": {}}], {"col1": c1, "col2": c2, "col3": c3} + ) + writer.write_data_item( + [{"col1": 1, "col2": 2, "col3": []}], {"col1": c1, "col2": c2, "col3": c3} + ) + + with open(writer.closed_files[0].file_path, "rb") as f: table = pq.read_table(f) assert table.column("col1").to_pylist() == [1, 1, 1, 1] assert table.column("col2").to_pylist() == [2, 2, 2, 2] - assert table.column("col3").to_pylist() == ["""{"hello":"dave"}""","""{"hello":"marcin"}""","""{}""","""[]"""] + assert table.column("col3").to_pylist() == [ + """{"hello":"dave"}""", + """{"hello":"marcin"}""", + """{}""", + """[]""", + ] def test_parquet_writer_all_data_fields() -> None: - data = dict(TABLE_ROW_ALL_DATA_TYPES) # fix dates to use pendulum data["col4"] = ensure_pendulum_datetime(data["col4"]) # type: ignore[arg-type] @@ -109,7 +140,7 @@ def test_parquet_writer_all_data_fields() -> None: microsecond=int(str(data["col11_precision"].microsecond)[:3] + "000") # type: ignore[attr-defined] ) - with open(writer.closed_files[0], "rb") as f: + with open(writer.closed_files[0].file_path, "rb") as f: table = pq.read_table(f) for key, value in data.items(): # what we have is pandas Timezone which is naive @@ -137,7 +168,7 @@ def test_parquet_writer_items_file_rotation() -> None: writer.write_data_item([{"col1": i}], columns) assert len(writer.closed_files) == 10 - with open(writer.closed_files[4], "rb") as f: + with open(writer.closed_files[4].file_path, "rb") as f: table = pq.read_table(f) assert table.column("col1").to_pylist() == list(range(40, 50)) @@ -152,21 +183,23 @@ def test_parquet_writer_size_file_rotation() -> None: writer.write_data_item([{"col1": i}], columns) assert len(writer.closed_files) == 25 - with open(writer.closed_files[4], "rb") as f: + with open(writer.closed_files[4].file_path, "rb") as f: table = pq.read_table(f) assert table.column("col1").to_pylist() == list(range(16, 20)) def test_parquet_writer_config() -> None: - os.environ["NORMALIZE__DATA_WRITER__VERSION"] = "2.0" os.environ["NORMALIZE__DATA_WRITER__DATA_PAGE_SIZE"] = str(1024 * 512) os.environ["NORMALIZE__DATA_WRITER__TIMESTAMP_TIMEZONE"] = "America/New York" - with inject_section(ConfigSectionContext(pipeline_name=None, sections=("normalize", ))): + with inject_section(ConfigSectionContext(pipeline_name=None, sections=("normalize",))): with get_writer("parquet", file_max_bytes=2**8, buffer_max_items=2) as writer: for i in range(0, 5): - writer.write_data_item([{"col1": i, "col2": pendulum.now()}], {"col1": new_column("col1", "bigint"), "col2": new_column("col2", "timestamp")}) + writer.write_data_item( + [{"col1": i, "col2": pendulum.now()}], + {"col1": new_column("col1", "bigint"), "col2": new_column("col2", "timestamp")}, + ) # force the parquet writer to be created writer._flush_items() @@ -190,7 +223,11 @@ def test_parquet_writer_schema_from_caps() -> None: for _ in range(0, 5): writer.write_data_item( [{"col1": Decimal("2617.27"), "col2": pendulum.now(), "col3": Decimal(2**250)}], - {"col1": new_column("col1", "decimal"), "col2": new_column("col2", "timestamp"), "col3": new_column("col3", "wei")} + { + "col1": new_column("col1", "decimal"), + "col2": new_column("col2", "timestamp"), + "col3": new_column("col3", "wei"), + }, ) # force the parquet writer to be created writer._flush_items() diff --git a/tests/libs/test_pyarrow.py b/tests/libs/test_pyarrow.py new file mode 100644 index 0000000000..dffda35005 --- /dev/null +++ b/tests/libs/test_pyarrow.py @@ -0,0 +1,51 @@ +from copy import deepcopy + +import pyarrow as pa + +from dlt.common.libs.pyarrow import py_arrow_to_table_schema_columns, get_py_arrow_datatype +from dlt.common.destination import DestinationCapabilitiesContext +from tests.cases import TABLE_UPDATE_COLUMNS_SCHEMA + + +def test_py_arrow_to_table_schema_columns(): + dlt_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) + + caps = DestinationCapabilitiesContext.generic_capabilities() + # The arrow schema will add precision + dlt_schema["col4"]["precision"] = caps.timestamp_precision + dlt_schema["col6"]["precision"], dlt_schema["col6"]["scale"] = caps.decimal_precision + dlt_schema["col11"]["precision"] = caps.timestamp_precision + dlt_schema["col4_null"]["precision"] = caps.timestamp_precision + dlt_schema["col6_null"]["precision"], dlt_schema["col6_null"]["scale"] = caps.decimal_precision + dlt_schema["col11_null"]["precision"] = caps.timestamp_precision + + # Ignoring wei as we can't distinguish from decimal + dlt_schema["col8"]["precision"], dlt_schema["col8"]["scale"] = (76, 0) + dlt_schema["col8"]["data_type"] = "decimal" + dlt_schema["col8_null"]["precision"], dlt_schema["col8_null"]["scale"] = (76, 0) + dlt_schema["col8_null"]["data_type"] = "decimal" + # No json type + dlt_schema["col9"]["data_type"] = "text" + del dlt_schema["col9"]["variant"] + dlt_schema["col9_null"]["data_type"] = "text" + del dlt_schema["col9_null"]["variant"] + + # arrow string fields don't have precision + del dlt_schema["col5_precision"]["precision"] + + # Convert to arrow schema + arrow_schema = pa.schema( + [ + pa.field( + column["name"], + get_py_arrow_datatype(column, caps, "UTC"), + nullable=column["nullable"], + ) + for column in dlt_schema.values() + ] + ) + + result = py_arrow_to_table_schema_columns(arrow_schema) + + # Resulting schema should match the original + assert result == dlt_schema diff --git a/tests/libs/test_pydantic.py b/tests/libs/test_pydantic.py new file mode 100644 index 0000000000..b7ca44c595 --- /dev/null +++ b/tests/libs/test_pydantic.py @@ -0,0 +1,482 @@ +from copy import copy +import pytest +from typing import ( + ClassVar, + Sequence, + Mapping, + Dict, + MutableMapping, + MutableSequence, + Union, + Optional, + List, + Dict, + Any, +) +from typing_extensions import Annotated, get_args, get_origin +from enum import Enum + +from datetime import datetime, date, time # noqa: I251 +from dlt.common import Decimal +from dlt.common import json + +from dlt.common.libs.pydantic import ( + DltConfig, + pydantic_to_table_schema_columns, + apply_schema_contract_to_model, + validate_item, + validate_items, + create_list_model, +) +from pydantic import BaseModel, Json, AnyHttpUrl, ConfigDict, ValidationError + +from dlt.common.schema.exceptions import DataValidationError + + +class StrEnum(str, Enum): + a = "a_value" + b = "b_value" + c = "c_value" + + +class IntEnum(int, Enum): + a = 0 + b = 1 + c = 2 + + +class MixedEnum(Enum): + a_int = 0 + b_str = "b_value" + c_int = 2 + + +class NestedModel(BaseModel): + nested_field: str + + +class Model(BaseModel): + bigint_field: int + text_field: str + timestamp_field: datetime + date_field: date + decimal_field: Decimal + double_field: float + time_field: time + + nested_field: NestedModel + list_field: List[str] + + union_field: Union[int, str] + + optional_field: Optional[float] + + blank_dict_field: dict # type: ignore[type-arg] + parametrized_dict_field: Dict[str, int] + + str_enum_field: StrEnum + int_enum_field: IntEnum + # Both of these shouold coerce to str + mixed_enum_int_field: MixedEnum + mixed_enum_str_field: MixedEnum + + json_field: Json[List[str]] + + url_field: AnyHttpUrl + + any_field: Any + json_any_field: Json[Any] + + +class ModelWithConfig(Model): + model_config = ConfigDict(frozen=True, extra="allow") + + +TEST_MODEL_INSTANCE = Model( + bigint_field=1, + text_field="text", + timestamp_field=datetime.now(), + date_field=date.today(), + decimal_field=Decimal(1.1), + double_field=1.1, + time_field=time(1, 2, 3, 12345), + nested_field=NestedModel(nested_field="nested"), + list_field=["a", "b", "c"], + union_field=1, + optional_field=None, + blank_dict_field={}, + parametrized_dict_field={"a": 1, "b": 2, "c": 3}, + str_enum_field=StrEnum.a, + int_enum_field=IntEnum.a, + mixed_enum_int_field=MixedEnum.a_int, + mixed_enum_str_field=MixedEnum.b_str, + json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type] + url_field="https://example.com", # type: ignore[arg-type] + any_field="any_string", + json_any_field=json.dumps("any_string"), +) + + +@pytest.mark.parametrize("instance", [True, False]) +def test_pydantic_model_to_columns(instance: bool) -> None: + if instance: + model = TEST_MODEL_INSTANCE + else: + model = Model # type: ignore[assignment] + + result = pydantic_to_table_schema_columns(model) + + assert result["bigint_field"]["data_type"] == "bigint" + assert result["text_field"]["data_type"] == "text" + assert result["timestamp_field"]["data_type"] == "timestamp" + assert result["date_field"]["data_type"] == "date" + assert result["decimal_field"]["data_type"] == "decimal" + assert result["double_field"]["data_type"] == "double" + assert result["time_field"]["data_type"] == "time" + assert result["nested_field"]["data_type"] == "complex" + assert result["list_field"]["data_type"] == "complex" + assert result["union_field"]["data_type"] == "bigint" + assert result["optional_field"]["data_type"] == "double" + assert result["optional_field"]["nullable"] is True + assert result["blank_dict_field"]["data_type"] == "complex" + assert result["parametrized_dict_field"]["data_type"] == "complex" + assert result["str_enum_field"]["data_type"] == "text" + assert result["int_enum_field"]["data_type"] == "bigint" + assert result["mixed_enum_int_field"]["data_type"] == "text" + assert result["mixed_enum_str_field"]["data_type"] == "text" + assert result["json_field"]["data_type"] == "complex" + assert result["url_field"]["data_type"] == "text" + + # Any type fields are excluded from schema + assert "any_field" not in result + assert "json_any_field" not in result + + +def test_pydantic_model_skip_complex_types() -> None: + class SkipNestedModel(Model): + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + + result = pydantic_to_table_schema_columns(SkipNestedModel) + + assert result["bigint_field"]["data_type"] == "bigint" + assert "nested_field" not in result + assert "list_field" not in result + assert "blank_dict_field" not in result + assert "parametrized_dict_field" not in result + assert "json_field" not in result + assert result["bigint_field"]["data_type"] == "bigint" + assert result["text_field"]["data_type"] == "text" + assert result["timestamp_field"]["data_type"] == "timestamp" + + +def test_model_for_column_mode() -> None: + # extra prop + instance_extra = TEST_MODEL_INSTANCE.dict() + instance_extra["extra_prop"] = "EXTRA" + # back to string + instance_extra["json_field"] = json.dumps(["a", "b", "c"]) + instance_extra["json_any_field"] = json.dumps("any_string") + + # evolve - allow extra fields + model_evolve = apply_schema_contract_to_model(ModelWithConfig, "evolve") + # assert "frozen" in model_evolve.model_config + extra_instance = model_evolve.parse_obj(instance_extra) + assert hasattr(extra_instance, "extra_prop") + assert extra_instance.extra_prop == "EXTRA" + model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type] + extra_instance = model_evolve.parse_obj(instance_extra) + assert extra_instance.extra_prop == "EXTRA" # type: ignore[attr-defined] + + # freeze - validation error on extra fields + model_freeze = apply_schema_contract_to_model(ModelWithConfig, "freeze") + # assert "frozen" in model_freeze.model_config + with pytest.raises(ValidationError) as py_ex: + model_freeze.parse_obj(instance_extra) + assert py_ex.value.errors()[0]["loc"] == ("extra_prop",) + model_freeze = apply_schema_contract_to_model(Model, "freeze") # type: ignore[arg-type] + with pytest.raises(ValidationError) as py_ex: + model_freeze.parse_obj(instance_extra) + assert py_ex.value.errors()[0]["loc"] == ("extra_prop",) + + # discard row - same as freeze + model_freeze = apply_schema_contract_to_model(ModelWithConfig, "discard_row") + with pytest.raises(ValidationError) as py_ex: + model_freeze.parse_obj(instance_extra) + assert py_ex.value.errors()[0]["loc"] == ("extra_prop",) + + # discard value - ignore extra fields + model_discard = apply_schema_contract_to_model(ModelWithConfig, "discard_value") + extra_instance = model_discard.parse_obj(instance_extra) + assert not hasattr(extra_instance, "extra_prop") + model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type] + extra_instance = model_discard.parse_obj(instance_extra) + assert not hasattr(extra_instance, "extra_prop") + + # evolve data but freeze new columns + model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "freeze") + instance_extra_2 = copy(instance_extra) + # should parse ok + model_discard.parse_obj(instance_extra_2) + # this must fail validation + instance_extra_2["bigint_field"] = "NOT INT" + with pytest.raises(ValidationError): + model_discard.parse_obj(instance_extra_2) + # let the datatypes evolve + model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "evolve") + print(model_freeze.parse_obj(instance_extra_2).dict()) + + with pytest.raises(NotImplementedError): + apply_schema_contract_to_model(ModelWithConfig, "evolve", "discard_value") + + +def test_nested_model_config_propagation() -> None: + class UserLabel(BaseModel): + label: str + + class UserAddress(BaseModel): + street: str + zip_code: Sequence[int] + label: Optional[UserLabel] + ro_labels: Mapping[str, UserLabel] + wr_labels: MutableMapping[str, List[UserLabel]] + ro_list: Sequence[UserLabel] + wr_list: MutableSequence[Dict[str, UserLabel]] + + class User(BaseModel): + user_id: int + name: Annotated[str, "PII", "name"] + created_at: Optional[datetime] + labels: List[str] + user_label: UserLabel + user_labels: List[UserLabel] + address: Annotated[UserAddress, "PII", "address"] + unity: Union[UserAddress, UserLabel, Dict[str, UserAddress]] + + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + + model_freeze = apply_schema_contract_to_model(User, "evolve", "freeze") + from typing import get_type_hints + + # print(model_freeze.__fields__) + # extra is modified + assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressExtraAllow" # type: ignore[index] + # annotated is preserved + assert issubclass(get_origin(model_freeze.__fields__["address"].rebuild_annotation()), Annotated) # type: ignore[arg-type, index] + # UserAddress is converted to UserAddressAllow only once + assert model_freeze.__fields__["address"].annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index] + + # print(User.__fields__) + # print(User.__fields__["name"].annotation) + # print(model_freeze.model_config) + # print(model_freeze.__fields__) + # print(model_freeze.__fields__["name"].annotation) + # print(model_freeze.__fields__["address"].annotation) + + +def test_item_list_validation() -> None: + class ItemModel(BaseModel): + b: bool + opt: Optional[int] = None + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + + # non validating items removed from the list (both extra and declared) + discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row") + discard_list_model = create_list_model(discard_model) + # violate data type + items = validate_items( + "items", + discard_list_model, + [{"b": True}, {"b": 2, "opt": "not int", "extra": 1.2}, {"b": 3}, {"b": False}], + "discard_row", + "discard_row", + ) + # {"b": 2, "opt": "not int", "extra": 1.2} - note that this will generate 3 errors for the same item + # and is crucial in our tests when discarding rows + assert len(items) == 2 + assert items[0].b is True + assert items[1].b is False + # violate extra field + items = validate_items( + "items", + discard_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}], + "discard_row", + "discard_row", + ) + assert len(items) == 1 + assert items[0].b is True + + # freeze on non validating items (both extra and declared) + freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze") + freeze_list_model = create_list_model(freeze_model) + # violate data type + with pytest.raises(DataValidationError) as val_ex: + validate_items( + "items", + freeze_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False}], + "freeze", + "freeze", + ) + assert val_ex.value.schema_name is None + assert val_ex.value.table_name == "items" + assert val_ex.value.column_name == str(("items", 1, "b")) # pydantic location + assert val_ex.value.schema_entity == "data_type" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema is freeze_list_model + assert val_ex.value.data_item == {"b": 2} + # extra type + with pytest.raises(DataValidationError) as val_ex: + validate_items( + "items", + freeze_list_model, + [{"b": True}, {"a": 2, "b": False}, {"b": 3}, {"b": False}], + "freeze", + "freeze", + ) + assert val_ex.value.schema_name is None + assert val_ex.value.table_name == "items" + assert val_ex.value.column_name == str(("items", 1, "a")) # pydantic location + assert val_ex.value.schema_entity == "columns" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema is freeze_list_model + assert val_ex.value.data_item == {"a": 2, "b": False} + + # discard values + discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze") + discard_list_model = create_list_model(discard_value_model) + # violate extra field + items = validate_items( + "items", + discard_list_model, + [{"b": True}, {"b": False, "a": False}], + "discard_value", + "freeze", + ) + assert len(items) == 2 + # "a" extra got remove + assert items[1].dict() == {"b": False, "opt": None} + # violate data type + with pytest.raises(NotImplementedError): + apply_schema_contract_to_model(ItemModel, "discard_value", "discard_value") + + # evolve data types and extras + evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve") + evolve_list_model = create_list_model(evolve_model) + # for data types a lenient model will be created that accepts any type + items = validate_items( + "items", + evolve_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False}], + "evolve", + "evolve", + ) + assert len(items) == 4 + assert items[0].b is True + assert items[1].b == 2 + # extra fields allowed + items = validate_items( + "items", + evolve_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}], + "evolve", + "evolve", + ) + assert len(items) == 4 + assert items[3].b is False + assert items[3].a is False # type: ignore[attr-defined] + + # accept new types but discard new columns + mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve") + mixed_list_model = create_list_model(mixed_model) + # for data types a lenient model will be created that accepts any type + items = validate_items( + "items", + mixed_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False}], + "discard_row", + "evolve", + ) + assert len(items) == 4 + assert items[0].b is True + assert items[1].b == 2 + # extra fields forbidden - full rows discarded + items = validate_items( + "items", + mixed_list_model, + [{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}], + "discard_row", + "evolve", + ) + assert len(items) == 3 + + +def test_item_validation() -> None: + class ItemModel(BaseModel): + b: bool + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False} + + # non validating items removed from the list (both extra and declared) + discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row") + # violate data type + assert validate_item("items", discard_model, {"b": 2}, "discard_row", "discard_row") is None + # violate extra field + assert ( + validate_item( + "items", discard_model, {"b": False, "a": False}, "discard_row", "discard_row" + ) + is None + ) + + # freeze on non validating items (both extra and declared) + freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze") + # violate data type + with pytest.raises(DataValidationError) as val_ex: + validate_item("items", freeze_model, {"b": 2}, "freeze", "freeze") + assert val_ex.value.schema_name is None + assert val_ex.value.table_name == "items" + assert val_ex.value.column_name == str(("b",)) # pydantic location + assert val_ex.value.schema_entity == "data_type" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema is freeze_model + assert val_ex.value.data_item == {"b": 2} + # extra type + with pytest.raises(DataValidationError) as val_ex: + validate_item("items", freeze_model, {"a": 2, "b": False}, "freeze", "freeze") + assert val_ex.value.schema_name is None + assert val_ex.value.table_name == "items" + assert val_ex.value.column_name == str(("a",)) # pydantic location + assert val_ex.value.schema_entity == "columns" + assert val_ex.value.contract_mode == "freeze" + assert val_ex.value.table_schema is freeze_model + assert val_ex.value.data_item == {"a": 2, "b": False} + + # discard values + discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze") + # violate extra field + item = validate_item( + "items", discard_value_model, {"b": False, "a": False}, "discard_value", "freeze" + ) + # "a" extra got removed + assert item.dict() == {"b": False} + + # evolve data types and extras + evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve") + # for data types a lenient model will be created that accepts any type + item = validate_item("items", evolve_model, {"b": 2}, "evolve", "evolve") + assert item.b == 2 + # extra fields allowed + item = validate_item("items", evolve_model, {"b": False, "a": False}, "evolve", "evolve") + assert item.b is False + assert item.a is False # type: ignore[attr-defined] + + # accept new types but discard new columns + mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve") + # for data types a lenient model will be created that accepts any type + item = validate_item("items", mixed_model, {"b": 3}, "discard_row", "evolve") + assert item.b == 3 + # extra fields forbidden - full rows discarded + assert ( + validate_item("items", mixed_model, {"b": False, "a": False}, "discard_row", "evolve") + is None + ) diff --git a/tests/load/athena_iceberg/test_athena_iceberg.py b/tests/load/athena_iceberg/test_athena_iceberg.py index 72772b0e2d..0b18f22639 100644 --- a/tests/load/athena_iceberg/test_athena_iceberg.py +++ b/tests/load/athena_iceberg/test_athena_iceberg.py @@ -1,4 +1,3 @@ - import pytest import os import datetime # noqa: I251 @@ -7,7 +6,7 @@ import dlt from dlt.common import pendulum from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import load_table_counts +from tests.load.pipeline.utils import load_table_counts from tests.cases import table_update_and_row, assert_all_data_types_row from tests.pipeline.utils import assert_load_info @@ -25,21 +24,20 @@ def test_iceberg() -> None: We write two tables, one with the iceberg flag, one without. We expect the iceberg table and its subtables to accept update commands and the other table to reject them. """ - os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] = "s3://dlt-ci-test-bucket" + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = "s3://dlt-ci-test-bucket" - pipeline = dlt.pipeline(pipeline_name="aaaaathena-iceberg", destination="athena", staging="filesystem", full_refresh=True) + pipeline = dlt.pipeline( + pipeline_name="aaaaathena-iceberg", + destination="athena", + staging="filesystem", + full_refresh=True, + ) def items() -> Iterator[Any]: yield { "id": 1, "name": "item", - "sub_items": [{ - "id": 101, - "name": "sub item 101" - },{ - "id": 101, - "name": "sub item 102" - }] + "sub_items": [{"id": 101, "name": "sub item 101"}, {"id": 101, "name": "sub item 102"}], } @dlt.resource(name="items_normal", write_disposition="append") @@ -53,7 +51,9 @@ def items_iceberg(): print(pipeline.run([items_normal, items_iceberg])) # see if we have athena tables with items - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values() ]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values()] + ) assert table_counts["items_normal"] == 1 assert table_counts["items_normal__sub_items"] == 2 assert table_counts["_dlt_loads"] == 1 @@ -75,4 +75,3 @@ def items_iceberg(): # modifying iceberg table will succeed client.execute_sql("UPDATE items_iceberg SET name='new name'") client.execute_sql("UPDATE items_iceberg__sub_items SET name='super new name'") - diff --git a/tests/load/bigquery/test_bigquery_client.py b/tests/load/bigquery/test_bigquery_client.py index 145898cde3..a2ca9c53b1 100644 --- a/tests/load/bigquery/test_bigquery_client.py +++ b/tests/load/bigquery/test_bigquery_client.py @@ -8,19 +8,30 @@ from dlt.common.arithmetics import numeric_default_context from dlt.common.configuration.exceptions import ConfigFieldMissingException from dlt.common.configuration.resolve import resolve_configuration -from dlt.common.configuration.specs import GcpServiceAccountCredentials, GcpServiceAccountCredentialsWithoutDefaults, GcpOAuthCredentials, GcpOAuthCredentialsWithoutDefaults +from dlt.common.configuration.specs import ( + GcpServiceAccountCredentials, + GcpServiceAccountCredentialsWithoutDefaults, + GcpOAuthCredentials, + GcpOAuthCredentialsWithoutDefaults, +) from dlt.common.configuration.specs import gcp_credentials from dlt.common.configuration.specs.exceptions import InvalidGoogleNativeCredentialsType from dlt.common.storages import FileStorage from dlt.common.utils import digest128, uniq_id, custom_environ -from dlt.destinations.bigquery.bigquery import BigQueryClient, BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.bigquery import BigQueryClient, BigQueryClientConfiguration from dlt.destinations.exceptions import LoadJobNotExistsException, LoadJobTerminalException from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, preserve_environ from tests.common.utils import json_case_path as common_json_case_path from tests.common.configuration.utils import environment -from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage, cm_yield_client_with_storage +from tests.load.utils import ( + expect_load_file, + prepare_table, + yield_client_with_storage, + cm_yield_client_with_storage, +) + @pytest.fixture(scope="module") def client() -> Iterator[BigQueryClient]: @@ -42,7 +53,7 @@ def test_service_credentials_with_default(environment: Any) -> None: # resolve will miss values and try to find default credentials on the machine with pytest.raises(ConfigFieldMissingException) as py_ex: resolve_configuration(gcpc) - assert py_ex.value.fields == ['project_id', 'private_key', 'client_email'] + assert py_ex.value.fields == ["project_id", "private_key", "client_email"] # prepare real service.json services_str, dest_path = prepare_service_json() @@ -51,7 +62,6 @@ def test_service_credentials_with_default(environment: Any) -> None: gcpc = GcpServiceAccountCredentials() gcpc.parse_native_representation(services_str) # check if credentials can be created - assert gcpc.to_service_account_credentials() is not None assert gcpc.to_native_credentials() is not None # reset failed default credentials timeout so we resolve below @@ -64,7 +74,7 @@ def test_service_credentials_with_default(environment: Any) -> None: # project id recovered from credentials assert gcpc.project_id == "level-dragon-333019" # check if credentials can be created - assert gcpc.to_service_account_credentials() is not None + assert gcpc.to_native_credentials() is not None # the default credentials are available assert gcpc.has_default_credentials() is True assert gcpc.default_credentials() is not None @@ -106,7 +116,7 @@ def test_oauth_credentials_with_default(environment: Any) -> None: # resolve will miss values and try to find default credentials on the machine with pytest.raises(ConfigFieldMissingException) as py_ex: resolve_configuration(gcoauth) - assert py_ex.value.fields == ['client_id', 'client_secret', 'refresh_token', 'project_id'] + assert py_ex.value.fields == ["client_id", "client_secret", "refresh_token", "project_id"] # prepare real service.json oauth_str, _ = prepare_oauth_json() @@ -180,7 +190,9 @@ def test_get_oauth_access_token() -> None: def test_bigquery_configuration() -> None: - config = resolve_configuration(BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery")) + config = resolve_configuration( + BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery") + ) assert config.location == "US" assert config.get_location() == "US" assert config.http_timeout == 15.0 @@ -190,16 +202,22 @@ def test_bigquery_configuration() -> None: # credentials location is deprecated os.environ["CREDENTIALS__LOCATION"] = "EU" - config = resolve_configuration(BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery")) + config = resolve_configuration( + BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery") + ) assert config.location == "US" assert config.credentials.location == "EU" # but if it is set, we propagate it to the config assert config.get_location() == "EU" os.environ["LOCATION"] = "ATLANTIS" - config = resolve_configuration(BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery")) + config = resolve_configuration( + BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery") + ) assert config.get_location() == "ATLANTIS" os.environ["DESTINATION__FILE_UPLOAD_TIMEOUT"] = "20000" - config = resolve_configuration(BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery")) + config = resolve_configuration( + BigQueryClientConfiguration(dataset_name="dataset"), sections=("destination", "bigquery") + ) assert config.file_upload_timeout == 20000.0 # default fingerprint is empty @@ -230,30 +248,40 @@ def test_bigquery_job_errors(client: BigQueryClient, file_storage: FileStorage) load_json = { "_dlt_id": uniq_id(), "_dlt_root_id": uniq_id(), - "sender_id":'90238094809sajlkjxoiewjhduuiuehd', - "timestamp": str(pendulum.now()) + "sender_id": "90238094809sajlkjxoiewjhduuiuehd", + "timestamp": str(pendulum.now()), } job = expect_load_file(client, file_storage, json.dumps(load_json), user_table_name) # start a job from the same file. it should fallback to retrieve job silently - r_job = client.start_file_load(client.schema.get_table(user_table_name), file_storage.make_full_path(job.file_name()), uniq_id()) + r_job = client.start_file_load( + client.schema.get_table(user_table_name), + file_storage.make_full_path(job.file_name()), + uniq_id(), + ) assert r_job.state() == "completed" -@pytest.mark.parametrize('location', ["US", "EU"]) +@pytest.mark.parametrize("location", ["US", "EU"]) def test_bigquery_location(location: str, file_storage: FileStorage) -> None: - with cm_yield_client_with_storage("bigquery", default_config_values={"location": location}) as client: + with cm_yield_client_with_storage( + "bigquery", default_config_values={"credentials": {"location": location}} + ) as client: user_table_name = prepare_table(client) load_json = { "_dlt_id": uniq_id(), "_dlt_root_id": uniq_id(), - "sender_id": '90238094809sajlkjxoiewjhduuiuehd', - "timestamp": str(pendulum.now()) + "sender_id": "90238094809sajlkjxoiewjhduuiuehd", + "timestamp": str(pendulum.now()), } job = expect_load_file(client, file_storage, json.dumps(load_json), user_table_name) # start a job from the same file. it should fallback to retrieve job silently - client.start_file_load(client.schema.get_table(user_table_name), file_storage.make_full_path(job.file_name()), uniq_id()) + client.start_file_load( + client.schema.get_table(user_table_name), + file_storage.make_full_path(job.file_name()), + uniq_id(), + ) canonical_name = client.sql_client.make_qualified_table_name(user_table_name, escape=False) t = client.sql_client.native_connection.get_table(canonical_name) assert t.location == location @@ -265,58 +293,84 @@ def test_loading_errors(client: BigQueryClient, file_storage: FileStorage) -> No load_json: Dict[str, Any] = { "_dlt_id": uniq_id(), "_dlt_root_id": uniq_id(), - "sender_id":'90238094809sajlkjxoiewjhduuiuehd', - "timestamp": str(pendulum.now()) + "sender_id": "90238094809sajlkjxoiewjhduuiuehd", + "timestamp": str(pendulum.now()), } insert_json = copy(load_json) insert_json["_unk_"] = None - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) assert "No such field: _unk_" in job.exception() # insert null value insert_json = copy(load_json) insert_json["timestamp"] = None - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) assert "Only optional fields can be set to NULL. Field: timestamp;" in job.exception() # insert wrong type insert_json = copy(load_json) insert_json["timestamp"] = "AA" - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) assert "Couldn't convert value to timestamp:" in job.exception() # numeric overflow on bigint insert_json = copy(load_json) # 2**64//2 - 1 is a maximum bigint value - insert_json["metadata__rasa_x_id"] = 2**64//2 - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") + insert_json["metadata__rasa_x_id"] = 2**64 // 2 + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) assert "Could not convert value" in job.exception() # numeric overflow on NUMERIC insert_json = copy(load_json) # default decimal is (38, 9) (128 bit), use local context to generate decimals with 38 precision with numeric_default_context(): - below_limit = Decimal(10**29) - Decimal('0.001') + below_limit = Decimal(10**29) - Decimal("0.001") above_limit = Decimal(10**29) # this will pass insert_json["parse_data__intent__id"] = below_limit - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="completed") + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="completed" + ) # this will fail insert_json["parse_data__intent__id"] = above_limit - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") - assert "Invalid NUMERIC value: 100000000000000000000000000000 Field: parse_data__intent__id;" in job.exception() + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) + assert ( + "Invalid NUMERIC value: 100000000000000000000000000000 Field: parse_data__intent__id;" + in job.exception() + ) # max bigquery decimal is (76, 76) (256 bit) = 5.7896044618658097711785492504343953926634992332820282019728792003956564819967E+38 insert_json = copy(load_json) - insert_json["parse_data__metadata__rasa_x_id"] = Decimal("5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+38") - job = expect_load_file(client, file_storage, json.dumps(insert_json), user_table_name, status="failed") - assert "Invalid BIGNUMERIC value: 578960446186580977117854925043439539266.34992332820282019728792003956564819968 Field: parse_data__metadata__rasa_x_id;" in job.exception() + insert_json["parse_data__metadata__rasa_x_id"] = Decimal( + "5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+38" + ) + job = expect_load_file( + client, file_storage, json.dumps(insert_json), user_table_name, status="failed" + ) + assert ( + "Invalid BIGNUMERIC value:" + " 578960446186580977117854925043439539266.34992332820282019728792003956564819968 Field:" + " parse_data__metadata__rasa_x_id;" + in job.exception() + ) def prepare_oauth_json() -> Tuple[str, str]: # prepare real service.json storage = FileStorage("_secrets", makedirs=True) - with open(common_json_case_path("oauth_client_secret_929384042504"), mode="r", encoding="utf-8") as f: + with open( + common_json_case_path("oauth_client_secret_929384042504"), mode="r", encoding="utf-8" + ) as f: oauth_str = f.read() dest_path = storage.save("oauth_client_secret_929384042504.json", oauth_str) return oauth_str, dest_path diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index a3222ba020..d622f9205c 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -8,12 +8,13 @@ from dlt.common.configuration import resolve_configuration from dlt.common.configuration.specs import GcpServiceAccountCredentialsWithoutDefaults -from dlt.destinations.bigquery.bigquery import BigQueryClient -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.bigquery import BigQueryClient +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE + @pytest.fixture def schema() -> Schema: return Schema("event") @@ -37,7 +38,7 @@ def gcp_client(schema: Schema) -> BigQueryClient: creds.project_id = "test_project_id" return BigQueryClient( schema, - BigQueryClientConfiguration(dataset_name="test_" + uniq_id(), credentials=creds) # type: ignore[arg-type] + BigQueryClientConfiguration(dataset_name="test_" + uniq_id(), credentials=creds), # type: ignore[arg-type] ) diff --git a/tests/load/cases/fake_destination.py b/tests/load/cases/fake_destination.py index 152b2db918..016cc19020 100644 --- a/tests/load/cases/fake_destination.py +++ b/tests/load/cases/fake_destination.py @@ -1 +1,6 @@ -# module that is used to test wrong destination references \ No newline at end of file +# module that is used to test wrong destination references + + +class not_a_destination: + def __init__(self, **kwargs) -> None: + pass diff --git a/tests/load/conftest.py b/tests/load/conftest.py index 23c7a2b8c4..1d40e912e6 100644 --- a/tests/load/conftest.py +++ b/tests/load/conftest.py @@ -6,23 +6,21 @@ from tests.utils import preserve_environ -@pytest.fixture(scope='function', params=DEFAULT_BUCKETS) +@pytest.fixture(scope="function", params=DEFAULT_BUCKETS) def default_buckets_env(request) -> Iterator[str]: - """Parametrized fixture to configure filesystem destination bucket in env for each test bucket - """ - os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] = request.param + """Parametrized fixture to configure filesystem destination bucket in env for each test bucket""" + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = request.param yield request.param - -@pytest.fixture(scope='function', params=ALL_BUCKETS) +@pytest.fixture(scope="function", params=ALL_BUCKETS) def all_buckets_env(request) -> Iterator[str]: if isinstance(request.param, dict): - bucket_url = request.param['bucket_url'] + bucket_url = request.param["bucket_url"] # R2 bucket needs to override all credentials - for key, value in request.param['credentials'].items(): - os.environ[f'DESTINATION__FILESYSTEM__CREDENTIALS__{key.upper()}'] = value + for key, value in request.param["credentials"].items(): + os.environ[f"DESTINATION__FILESYSTEM__CREDENTIALS__{key.upper()}"] = value else: bucket_url = request.param - os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] = bucket_url + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = bucket_url yield bucket_url diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 6c362a6b76..ef151833e4 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -6,11 +6,18 @@ from dlt.common.configuration.resolve import resolve_configuration from dlt.common.configuration.utils import get_resolved_traces -from dlt.destinations.duckdb.configuration import DUCK_DB_NAME, DuckDbClientConfiguration, DuckDbCredentials, DEFAULT_DUCK_DB_NAME +from dlt.destinations.impl.duckdb.configuration import ( + DUCK_DB_NAME, + DuckDbClientConfiguration, + DuckDbCredentials, + DEFAULT_DUCK_DB_NAME, +) +from dlt.destinations import duckdb from tests.load.pipeline.utils import drop_pipeline, assert_table from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, TEST_STORAGE_ROOT + @pytest.fixture(autouse=True) def delete_default_duckdb_credentials() -> Iterator[None]: # remove the default duckdb config @@ -46,13 +53,13 @@ def test_duckdb_open_conn_default() -> None: def test_duckdb_database_path() -> None: # resolve without any path provided c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) - assert c.credentials.database.lower() == os.path.abspath("quack.duckdb").lower() + assert c.credentials._conn_str().lower() == os.path.abspath("quack.duckdb").lower() # resolve without any path but with pipeline context p = dlt.pipeline(pipeline_name="quack_pipeline") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) # still cwd db_path = os.path.abspath(os.path.join(".", "quack_pipeline.duckdb")) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() # we do not keep default duckdb path in the local state with pytest.raises(KeyError): p.get_local_state_val("duckdb_database") @@ -67,9 +74,11 @@ def test_duckdb_database_path() -> None: os.unlink(db_path) # test special :pipeline: path to create in pipeline folder - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=":pipeline:")) + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=":pipeline:") + ) db_path = os.path.abspath(os.path.join(p.working_dir, DEFAULT_DUCK_DB_NAME)) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() # connect conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) @@ -79,8 +88,12 @@ def test_duckdb_database_path() -> None: # provide relative path db_path = "_storage/test_quack.duckdb" - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials="duckdb:///_storage/test_quack.duckdb")) - assert c.credentials.database.lower() == os.path.abspath(db_path).lower() + c = resolve_configuration( + DuckDbClientConfiguration( + dataset_name="test_dataset", credentials="duckdb:///_storage/test_quack.duckdb" + ) + ) + assert c.credentials._conn_str().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -88,9 +101,11 @@ def test_duckdb_database_path() -> None: # provide absolute path db_path = os.path.abspath("_storage/abs_test_quack.duckdb") - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=f"duckdb:///{db_path}")) + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=f"duckdb:///{db_path}") + ) assert os.path.isabs(c.credentials.database) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -98,17 +113,21 @@ def test_duckdb_database_path() -> None: # set just path as credentials db_path = "_storage/path_test_quack.duckdb" - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) - assert c.credentials.database.lower() == os.path.abspath(db_path).lower() + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path) + ) + assert c.credentials._conn_str().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) p = p.drop() db_path = os.path.abspath("_storage/abs_path_test_quack.duckdb") - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path) + ) assert os.path.isabs(c.credentials.database) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -118,7 +137,9 @@ def test_duckdb_database_path() -> None: import duckdb with pytest.raises(duckdb.IOException): - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=TEST_STORAGE_ROOT)) + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=TEST_STORAGE_ROOT) + ) conn = c.credentials.borrow_conn(read_only=False) @@ -128,7 +149,7 @@ def test_keeps_initial_db_path() -> None: print(p.pipelines_dir) with p.sql_client() as conn: # still cwd - assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() == os.path.abspath(db_path).lower() # but it is kept in the local state assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() @@ -138,7 +159,7 @@ def test_keeps_initial_db_path() -> None: with p.sql_client() as conn: # still cwd assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() - assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() == os.path.abspath(db_path).lower() # now create a new pipeline dlt.pipeline(pipeline_name="not_quack", destination="dummy") @@ -147,12 +168,12 @@ def test_keeps_initial_db_path() -> None: assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() # new pipeline context took over # TODO: restore pipeline context on each call - assert conn.credentials.database.lower() != os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() != os.path.abspath(db_path).lower() def test_duckdb_database_delete() -> None: db_path = "_storage/path_test_quack.duckdb" - p = dlt.pipeline(pipeline_name="quack_pipeline", credentials=db_path, destination="duckdb") + p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duckdb(credentials=db_path)) p.run([1, 2, 3], table_name="table", dataset_name="dataset") # attach the pipeline p = dlt.attach(pipeline_name="quack_pipeline") @@ -203,7 +224,9 @@ def test_external_duckdb_database() -> None: # pass explicit in memory database conn = duckdb.connect(":memory:") - c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=conn)) + c = resolve_configuration( + DuckDbClientConfiguration(dataset_name="test_dataset", credentials=conn) + ) assert c.credentials._conn_borrows == 0 assert c.credentials._conn is conn int_conn = c.credentials.borrow_conn(read_only=False) @@ -215,6 +238,7 @@ def test_external_duckdb_database() -> None: assert hasattr(c.credentials, "_conn") conn.close() + def test_default_duckdb_dataset_name() -> None: # Check if dataset_name does not collide with pipeline_name data = ["a", "b", "c"] diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index 247d134b06..0e6f799047 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -5,8 +5,8 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.duckdb.duck import DuckDbClient -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb.duck import DuckDbClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from tests.load.utils import TABLE_UPDATE @@ -82,7 +82,7 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: mod_update[0]["sort"] = True mod_update[1]["unique"] = True mod_update[4]["foreign_key"] = True - sql = ';'.join(client._get_table_update_sql("event_test_table", mod_update, False)) + sql = ";".join(client._get_table_update_sql("event_test_table", mod_update, False)) assert '"col1" BIGINT NOT NULL' in sql assert '"col2" DOUBLE NOT NULL' in sql assert '"col5" VARCHAR ' in sql @@ -92,7 +92,10 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: assert '"col4" TIMESTAMP WITH TIME ZONE NOT NULL' in sql # same thing with indexes - client = DuckDbClient(client.schema, DuckDbClientConfiguration(dataset_name="test_" + uniq_id(), create_indexes=True)) + client = DuckDbClient( + client.schema, + DuckDbClientConfiguration(dataset_name="test_" + uniq_id(), create_indexes=True), + ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql) assert '"col2" DOUBLE UNIQUE NOT NULL' in sql diff --git a/tests/load/duckdb/test_motherduck_client.py b/tests/load/duckdb/test_motherduck_client.py index 4a167fa016..d57cf58f53 100644 --- a/tests/load/duckdb/test_motherduck_client.py +++ b/tests/load/duckdb/test_motherduck_client.py @@ -3,12 +3,16 @@ from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations.motherduck.configuration import MotherDuckCredentials, MotherDuckClientConfiguration +from dlt.destinations.impl.motherduck.configuration import ( + MotherDuckCredentials, + MotherDuckClientConfiguration, +) from tests.utils import patch_home_dir, preserve_environ, skip_if_not_active skip_if_not_active("motherduck") + def test_motherduck_database() -> None: # set HOME env otherwise some internal components in ducdkb (HTTPS) do not initialize os.environ["HOME"] = "/tmp" @@ -20,7 +24,9 @@ def test_motherduck_database() -> None: cred.parse_native_representation("md:///?token=TOKEN") assert cred.password == "TOKEN" - config = resolve_configuration(MotherDuckClientConfiguration(dataset_name="test"), sections=("destination", "motherduck")) + config = resolve_configuration( + MotherDuckClientConfiguration(dataset_name="test"), sections=("destination", "motherduck") + ) # connect con = config.credentials.borrow_conn(read_only=False) con.sql("SHOW DATABASES") diff --git a/tests/load/filesystem/test_aws_credentials.py b/tests/load/filesystem/test_aws_credentials.py index d34bc7ed24..7a0d42eb6d 100644 --- a/tests/load/filesystem/test_aws_credentials.py +++ b/tests/load/filesystem/test_aws_credentials.py @@ -10,8 +10,8 @@ from tests.load.utils import ALL_FILESYSTEM_DRIVERS from tests.utils import preserve_environ, autouse_test_storage -if 's3' not in ALL_FILESYSTEM_DRIVERS: - pytest.skip('s3 filesystem driver not configured', allow_module_level=True) +if "s3" not in ALL_FILESYSTEM_DRIVERS: + pytest.skip("s3 filesystem driver not configured", allow_module_level=True) def test_aws_credentials_resolved_from_default(environment: Dict[str, str]) -> None: @@ -19,9 +19,9 @@ def test_aws_credentials_resolved_from_default(environment: Dict[str, str]) -> N config = resolve_configuration(AwsCredentials()) - assert config.aws_access_key_id == 'fake_access_key' - assert config.aws_secret_access_key == 'fake_secret_key' - assert config.aws_session_token == 'fake_session_token' + assert config.aws_access_key_id == "fake_access_key" + assert config.aws_secret_access_key == "fake_secret_key" + assert config.aws_session_token == "fake_session_token" # we do not set the profile assert config.profile_name is None @@ -43,7 +43,7 @@ def test_aws_credentials_from_botocore(environment: Dict[str, str]) -> None: import botocore.session session = botocore.session.get_session() - region_name = 'eu-central-1' # session.get_config_variable('region') + region_name = "eu-central-1" # session.get_config_variable('region') c = AwsCredentials(session) assert c.profile_name is None @@ -60,9 +60,7 @@ def test_aws_credentials_from_botocore(environment: Dict[str, str]) -> None: "token": "fake_session_token", "profile": None, "endpoint_url": None, - "client_kwargs": { - "region_name": region_name - } + "client_kwargs": {"region_name": region_name}, } c = AwsCredentials() @@ -112,18 +110,18 @@ def test_aws_credentials_for_profile(environment: Dict[str, str]) -> None: c.profile_name = "dlt-ci-user" try: c = resolve_configuration(c) - assert digest128(c.aws_access_key_id) == 'S3r3CtEf074HjqVeHKj/' + assert digest128(c.aws_access_key_id) == "S3r3CtEf074HjqVeHKj/" except botocore.exceptions.ProfileNotFound: pytest.skip("This test requires dlt-ci-user aws profile to be present") def test_aws_credentials_with_endpoint_url(environment: Dict[str, str]) -> None: set_aws_credentials_env(environment) - environment['CREDENTIALS__ENDPOINT_URL'] = 'https://123.r2.cloudflarestorage.com' + environment["CREDENTIALS__ENDPOINT_URL"] = "https://123.r2.cloudflarestorage.com" config = resolve_configuration(AwsCredentials()) - assert config.endpoint_url == 'https://123.r2.cloudflarestorage.com' + assert config.endpoint_url == "https://123.r2.cloudflarestorage.com" assert config.to_s3fs_credentials() == { "key": "fake_access_key", @@ -131,14 +129,12 @@ def test_aws_credentials_with_endpoint_url(environment: Dict[str, str]) -> None: "token": "fake_session_token", "profile": None, "endpoint_url": "https://123.r2.cloudflarestorage.com", - "client_kwargs": { - "region_name": 'eu-central-1' - } + "client_kwargs": {"region_name": "eu-central-1"}, } def set_aws_credentials_env(environment: Dict[str, str]) -> None: - environment['AWS_ACCESS_KEY_ID'] = 'fake_access_key' - environment['AWS_SECRET_ACCESS_KEY'] = 'fake_secret_key' - environment['AWS_SESSION_TOKEN'] = 'fake_session_token' - environment["AWS_DEFAULT_REGION"] = environment['REGION_NAME'] = 'eu-central-1' + environment["AWS_ACCESS_KEY_ID"] = "fake_access_key" + environment["AWS_SECRET_ACCESS_KEY"] = "fake_secret_key" + environment["AWS_SESSION_TOKEN"] = "fake_session_token" + environment["AWS_DEFAULT_REGION"] = environment["REGION_NAME"] = "eu-central-1" diff --git a/tests/load/filesystem/test_azure_credentials.py b/tests/load/filesystem/test_azure_credentials.py index b9cf10a05a..093cd6dd19 100644 --- a/tests/load/filesystem/test_azure_credentials.py +++ b/tests/load/filesystem/test_azure_credentials.py @@ -11,54 +11,57 @@ from tests.common.configuration.utils import environment from tests.utils import preserve_environ, autouse_test_storage -if 'az' not in ALL_FILESYSTEM_DRIVERS: - pytest.skip('az filesystem driver not configured', allow_module_level=True) +if "az" not in ALL_FILESYSTEM_DRIVERS: + pytest.skip("az filesystem driver not configured", allow_module_level=True) def test_azure_credentials_from_account_key(environment: Dict[str, str]) -> None: - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] = 'fake_account_name' - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY'] = "QWERTYUIOPASDFGHJKLZXCVBNM1234567890" + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] = "fake_account_name" + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY"] = "QWERTYUIOPASDFGHJKLZXCVBNM1234567890" config = resolve_configuration(AzureCredentials()) # Verify sas token is generated with correct permissions and expiry time sas_params = parse_qs(config.azure_storage_sas_token) - permissions = set(sas_params['sp'][0]) - assert permissions == {'r', 'w', 'd', 'l', 'a', 'c'} + permissions = set(sas_params["sp"][0]) + assert permissions == {"r", "w", "d", "l", "a", "c"} - exp = ensure_pendulum_datetime(sas_params['se'][0]) + exp = ensure_pendulum_datetime(sas_params["se"][0]) assert exp > pendulum.now().add(hours=23) def test_create_azure_sas_token_with_permissions(environment: Dict[str, str]) -> None: - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] = 'fake_account_name' - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY'] = "QWERTYUIOPASDFGHJKLZXCVBNM1234567890" - environment['CREDENTIALS__AZURE_SAS_TOKEN_PERMISSIONS'] = "rl" + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] = "fake_account_name" + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY"] = "QWERTYUIOPASDFGHJKLZXCVBNM1234567890" + environment["CREDENTIALS__AZURE_SAS_TOKEN_PERMISSIONS"] = "rl" config = resolve_configuration(AzureCredentials()) sas_params = parse_qs(config.azure_storage_sas_token) - permissions = set(sas_params['sp'][0]) - assert permissions == {'r', 'l'} - + permissions = set(sas_params["sp"][0]) + assert permissions == {"r", "l"} def test_azure_credentials_from_sas_token(environment: Dict[str, str]) -> None: - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] = 'fake_account_name' - environment['CREDENTIALS__AZURE_STORAGE_SAS_TOKEN'] = "sp=rwdlacx&se=2021-01-01T00:00:00Z&sv=2019-12-12&sr=c&sig=1234567890" + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] = "fake_account_name" + environment["CREDENTIALS__AZURE_STORAGE_SAS_TOKEN"] = ( + "sp=rwdlacx&se=2021-01-01T00:00:00Z&sv=2019-12-12&sr=c&sig=1234567890" + ) config = resolve_configuration(AzureCredentials()) - assert config.azure_storage_sas_token == environment['CREDENTIALS__AZURE_STORAGE_SAS_TOKEN'] - assert config.azure_storage_account_name == environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] + assert config.azure_storage_sas_token == environment["CREDENTIALS__AZURE_STORAGE_SAS_TOKEN"] + assert ( + config.azure_storage_account_name == environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] + ) assert config.azure_storage_account_key is None assert config.to_adlfs_credentials() == { - 'account_name': environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'], - 'account_key': None, - 'sas_token': environment['CREDENTIALS__AZURE_STORAGE_SAS_TOKEN'], + "account_name": environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"], + "account_key": None, + "sas_token": environment["CREDENTIALS__AZURE_STORAGE_SAS_TOKEN"], } @@ -68,22 +71,24 @@ def test_azure_credentials_missing_account_name(environment: Dict[str, str]) -> ex = excinfo.value - assert 'azure_storage_account_name' in ex.fields + assert "azure_storage_account_name" in ex.fields def test_azure_credentials_from_default(environment: Dict[str, str]) -> None: - environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] = 'fake_account_name' + environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] = "fake_account_name" config = resolve_configuration(AzureCredentials()) - assert config.azure_storage_account_name == environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'] + assert ( + config.azure_storage_account_name == environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"] + ) assert config.azure_storage_account_key is None assert config.azure_storage_sas_token is None # fsspec args should have anon=True when using system credentials assert config.to_adlfs_credentials() == { - 'account_name': environment['CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME'], - 'account_key': None, - 'sas_token': None, - 'anon': False + "account_name": environment["CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME"], + "account_key": None, + "sas_token": None, + "anon": False, } diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index f290892e18..56da484abc 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -4,14 +4,18 @@ import pytest from dlt.common.utils import digest128, uniq_id -from dlt.common.storages import LoadStorage, FileStorage +from dlt.common.storages import FileStorage, ParsedLoadJobFileName -from dlt.destinations.filesystem.filesystem import LoadFilesystemJob, FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem.filesystem import ( + LoadFilesystemJob, + FilesystemDestinationClientConfiguration, +) from tests.load.filesystem.utils import perform_load from tests.utils import clean_test_storage, init_test_logging from tests.utils import preserve_environ, autouse_test_storage + @pytest.fixture(autouse=True) def storage() -> FileStorage: return clean_test_storage(init_normalize=True, init_loader=True) @@ -24,34 +28,38 @@ def logger_autouse() -> None: NORMALIZED_FILES = [ "event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl", - "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" + "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl", ] ALL_LAYOUTS = ( None, - "{schema_name}/{table_name}/{load_id}.{file_id}.{ext}", # new default layout with schema - "{schema_name}.{table_name}.{load_id}.{file_id}.{ext}", # classic layout - "{table_name}88{load_id}-u-{file_id}.{ext}" # default layout with strange separators + "{schema_name}/{table_name}/{load_id}.{file_id}.{ext}", # new default layout with schema + "{schema_name}.{table_name}.{load_id}.{file_id}.{ext}", # classic layout + "{table_name}88{load_id}-u-{file_id}.{ext}", # default layout with strange separators ) def test_filesystem_destination_configuration() -> None: assert FilesystemDestinationClientConfiguration().fingerprint() == "" - assert FilesystemDestinationClientConfiguration(bucket_url="s3://cool").fingerprint() == digest128("s3://cool") + assert FilesystemDestinationClientConfiguration( + bucket_url="s3://cool" + ).fingerprint() == digest128("s3://cool") -@pytest.mark.parametrize('write_disposition', ('replace', 'append', 'merge')) -@pytest.mark.parametrize('layout', ALL_LAYOUTS) +@pytest.mark.parametrize("write_disposition", ("replace", "append", "merge")) +@pytest.mark.parametrize("layout", ALL_LAYOUTS) def test_successful_load(write_disposition: str, layout: str, default_buckets_env: str) -> None: """Test load is successful with an empty destination dataset""" if layout: - os.environ['DESTINATION__FILESYSTEM__LAYOUT'] = layout + os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout else: os.environ.pop("DESTINATION__FILESYSTEM__LAYOUT", None) - dataset_name = 'test_' + uniq_id() + dataset_name = "test_" + uniq_id() - with perform_load(dataset_name, NORMALIZED_FILES, write_disposition=write_disposition) as load_info: + with perform_load( + dataset_name, NORMALIZED_FILES, write_disposition=write_disposition + ) as load_info: client, jobs, _, load_id = load_info layout = client.config.layout dataset_path = posixpath.join(client.fs_path, client.config.dataset_name) @@ -62,77 +70,99 @@ def test_successful_load(write_disposition: str, layout: str, default_buckets_en # Sanity check, there are jobs assert jobs for job in jobs: - assert job.state() == 'completed' - job_info = LoadStorage.parse_job_file_name(job.file_name()) + assert job.state() == "completed" + job_info = ParsedLoadJobFileName.parse(job.file_name()) destination_path = posixpath.join( dataset_path, - layout.format(schema_name=client.schema.name, table_name=job_info.table_name, load_id=load_id, file_id=job_info.file_id, ext=job_info.file_format) + layout.format( + schema_name=client.schema.name, + table_name=job_info.table_name, + load_id=load_id, + file_id=job_info.file_id, + ext=job_info.file_format, + ), ) # File is created with correct filename and path assert client.fs_client.isfile(destination_path) -@pytest.mark.parametrize('layout', ALL_LAYOUTS) +@pytest.mark.parametrize("layout", ALL_LAYOUTS) def test_replace_write_disposition(layout: str, default_buckets_env: str) -> None: if layout: - os.environ['DESTINATION__FILESYSTEM__LAYOUT'] = layout + os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout else: os.environ.pop("DESTINATION__FILESYSTEM__LAYOUT", None) - dataset_name = 'test_' + uniq_id() + dataset_name = "test_" + uniq_id() # NOTE: context manager will delete the dataset at the end so keep it open until the end - with perform_load(dataset_name, NORMALIZED_FILES, write_disposition='replace') as load_info: + with perform_load(dataset_name, NORMALIZED_FILES, write_disposition="replace") as load_info: client, _, root_path, load_id1 = load_info layout = client.config.layout # this path will be kept after replace job_2_load_1_path = posixpath.join( root_path, - LoadFilesystemJob.make_destination_filename(layout, NORMALIZED_FILES[1], client.schema.name, load_id1) + LoadFilesystemJob.make_destination_filename( + layout, NORMALIZED_FILES[1], client.schema.name, load_id1 + ), ) - with perform_load(dataset_name, [NORMALIZED_FILES[0]], write_disposition='replace') as load_info: + with perform_load( + dataset_name, [NORMALIZED_FILES[0]], write_disposition="replace" + ) as load_info: client, _, root_path, load_id2 = load_info # this one we expect to be replaced with job_1_load_2_path = posixpath.join( root_path, - LoadFilesystemJob.make_destination_filename(layout, NORMALIZED_FILES[0], client.schema.name, load_id2) + LoadFilesystemJob.make_destination_filename( + layout, NORMALIZED_FILES[0], client.schema.name, load_id2 + ), ) # First file from load1 remains, second file is replaced by load2 # assert that only these two files are in the destination folder paths = [] - for basedir, _dirs, files in client.fs_client.walk(client.dataset_path, detail=False, refresh=True): + for basedir, _dirs, files in client.fs_client.walk( + client.dataset_path, detail=False, refresh=True + ): for f in files: paths.append(posixpath.join(basedir, f)) ls = set(paths) assert ls == {job_2_load_1_path, job_1_load_2_path} -@pytest.mark.parametrize('layout', ALL_LAYOUTS) +@pytest.mark.parametrize("layout", ALL_LAYOUTS) def test_append_write_disposition(layout: str, default_buckets_env: str) -> None: """Run load twice with append write_disposition and assert that there are two copies of each file in destination""" if layout: - os.environ['DESTINATION__FILESYSTEM__LAYOUT'] = layout + os.environ["DESTINATION__FILESYSTEM__LAYOUT"] = layout else: os.environ.pop("DESTINATION__FILESYSTEM__LAYOUT", None) - dataset_name = 'test_' + uniq_id() + dataset_name = "test_" + uniq_id() # NOTE: context manager will delete the dataset at the end so keep it open until the end - with perform_load(dataset_name, NORMALIZED_FILES, write_disposition='append') as load_info: + with perform_load(dataset_name, NORMALIZED_FILES, write_disposition="append") as load_info: client, jobs1, root_path, load_id1 = load_info - with perform_load(dataset_name, NORMALIZED_FILES, write_disposition='append') as load_info: + with perform_load(dataset_name, NORMALIZED_FILES, write_disposition="append") as load_info: client, jobs2, root_path, load_id2 = load_info layout = client.config.layout expected_files = [ - LoadFilesystemJob.make_destination_filename(layout, job.file_name(), client.schema.name, load_id1) for job in jobs1 + LoadFilesystemJob.make_destination_filename( + layout, job.file_name(), client.schema.name, load_id1 + ) + for job in jobs1 ] + [ - LoadFilesystemJob.make_destination_filename(layout, job.file_name(), client.schema.name, load_id2) for job in jobs2 + LoadFilesystemJob.make_destination_filename( + layout, job.file_name(), client.schema.name, load_id2 + ) + for job in jobs2 ] expected_files = sorted([posixpath.join(root_path, fn) for fn in expected_files]) paths = [] - for basedir, _dirs, files in client.fs_client.walk(client.dataset_path, detail=False, refresh=True): + for basedir, _dirs, files in client.fs_client.walk( + client.dataset_path, detail=False, refresh=True + ): for f in files: paths.append(posixpath.join(basedir, f)) assert list(sorted(paths)) == expected_files diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index caf43ca47c..92cce62160 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -25,13 +25,16 @@ def test_filesystem_configuration() -> None: config = FilesystemConfiguration(bucket_url="az://root") assert config.protocol == "az" # print(config.resolve_credentials_type()) - assert config.resolve_credentials_type() == Union[AzureCredentialsWithoutDefaults, AzureCredentials] + assert ( + config.resolve_credentials_type() + == Union[AzureCredentialsWithoutDefaults, AzureCredentials] + ) # make sure that only bucket_url and credentials are there - assert dict(config) == {'bucket_url': 'az://root', 'credentials': None} + assert dict(config) == {"bucket_url": "az://root", "credentials": None} def test_filesystem_instance(all_buckets_env: str) -> None: - bucket_url = os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] + bucket_url = os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] config = get_config() assert bucket_url.startswith(config.protocol) filesystem, url = fsspec_from_config(config) @@ -54,7 +57,7 @@ def test_filesystem_instance(all_buckets_env: str) -> None: @pytest.mark.parametrize("load_content", (True, False)) def test_filesystem_dict(default_buckets_env: str, load_content: bool) -> None: - bucket_url = os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] + bucket_url = os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] config = get_config() if config.protocol in ["memory", "file"]: pytest.skip(f"{config.protocol} not supported in this test") @@ -62,7 +65,9 @@ def test_filesystem_dict(default_buckets_env: str, load_content: bool) -> None: filesystem, _ = fsspec_from_config(config) # use glob to get data try: - all_file_items = list(glob_files(filesystem, posixpath.join(bucket_url, glob_folder, "samples"))) + all_file_items = list( + glob_files(filesystem, posixpath.join(bucket_url, glob_folder, "samples")) + ) assert_sample_files(all_file_items, filesystem, config, load_content) except NotImplementedError as ex: pytest.skip("Skipping due to " + str(ex)) @@ -74,17 +79,18 @@ def test_filesystem_instance_from_s3_endpoint(environment: Dict[str, str]) -> No E.g. when using an S3 compatible service such as Cloudflare R2 """ from s3fs import S3FileSystem - environment['DESTINATION__FILESYSTEM__BUCKET_URL'] = 's3://dummy-bucket' - environment['CREDENTIALS__ENDPOINT_URL'] = 'https://fake-s3-endpoint.example.com' - environment['CREDENTIALS__AWS_ACCESS_KEY_ID'] = 'fake-access-key' - environment['CREDENTIALS__AWS_SECRET_ACCESS_KEY'] = 'fake-secret-key' + + environment["DESTINATION__FILESYSTEM__BUCKET_URL"] = "s3://dummy-bucket" + environment["CREDENTIALS__ENDPOINT_URL"] = "https://fake-s3-endpoint.example.com" + environment["CREDENTIALS__AWS_ACCESS_KEY_ID"] = "fake-access-key" + environment["CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = "fake-secret-key" config = get_config() filesystem, bucket_name = fsspec_from_config(config) assert isinstance(filesystem, S3FileSystem) - assert filesystem.endpoint_url == 'https://fake-s3-endpoint.example.com' - assert bucket_name == 'dummy-bucket' - assert filesystem.key == 'fake-access-key' - assert filesystem.secret == 'fake-secret-key' + assert filesystem.endpoint_url == "https://fake-s3-endpoint.example.com" + assert bucket_name == "dummy-bucket" + assert filesystem.key == "fake-access-key" + assert filesystem.secret == "fake-secret-key" diff --git a/tests/load/filesystem/utils.py b/tests/load/filesystem/utils.py index eebfa6e87c..d03e43bed5 100644 --- a/tests/load/filesystem/utils.py +++ b/tests/load/filesystem/utils.py @@ -5,27 +5,24 @@ from dlt.load import Load from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.destination.reference import DestinationReference, LoadJob +from dlt.common.destination.reference import Destination, LoadJob, TDestination from dlt.destinations import filesystem -from dlt.destinations.filesystem.filesystem import FilesystemClient +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient from dlt.destinations.job_impl import EmptyLoadJob from tests.load.utils import prepare_load_package def setup_loader(dataset_name: str) -> Load: - destination: DestinationReference = filesystem # type: ignore[assignment] - config = filesystem.spec()(dataset_name=dataset_name) + destination: TDestination = filesystem() # type: ignore[assignment] + config = filesystem.spec(dataset_name=dataset_name) # setup loader - with Container().injectable_context(ConfigSectionContext(sections=('filesystem',))): - return Load( - destination, - initial_client_config=config - ) + with Container().injectable_context(ConfigSectionContext(sections=("filesystem",))): + return Load(destination, initial_client_config=config) @contextmanager def perform_load( - dataset_name: str, cases: Sequence[str], write_disposition: str='append' + dataset_name: str, cases: Sequence[str], write_disposition: str = "append" ) -> Iterator[Tuple[FilesystemClient, List[LoadJob], str, str]]: load = setup_loader(dataset_name) load_id, schema = prepare_load_package(load.load_storage, cases, write_disposition) @@ -33,9 +30,9 @@ def perform_load( # for the replace disposition in the loader we truncate the tables, so do this here truncate_tables = [] - if write_disposition == 'replace': + if write_disposition == "replace": for item in cases: - parts = item.split('.') + parts = item.split(".") truncate_tables.append(parts[0]) client.initialize_storage(truncate_tables=truncate_tables) diff --git a/tests/load/mssql/test_mssql_credentials.py b/tests/load/mssql/test_mssql_credentials.py index 9b57692bb2..0098d228f1 100644 --- a/tests/load/mssql/test_mssql_credentials.py +++ b/tests/load/mssql/test_mssql_credentials.py @@ -1,24 +1,112 @@ +import pyodbc +import pytest + from dlt.common.configuration import resolve_configuration +from dlt.common.exceptions import SystemConfigurationException + +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, SUPPORTED_DRIVERS -from dlt.destinations.mssql.configuration import MsSqlCredentials +def test_parse_native_representation_unsupported_driver_specified() -> None: + # Case: unsupported driver specified. + with pytest.raises(SystemConfigurationException): + resolve_configuration( + MsSqlCredentials( + "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=foo" + ) + ) -def test_to_odbc_dsn() -> None: +def test_to_odbc_dsn_supported_driver_specified() -> None: + # Case: supported driver specified — ODBC Driver 18 for SQL Server. creds = resolve_configuration( - MsSqlCredentials("mssql://test_user:test_password@sql.example.com:12345/test_db?FOO=a&BAR=b") + MsSqlCredentials( + "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server" + ) ) + dsn = creds.to_odbc_dsn() + result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))} + assert result == { + "DRIVER": "ODBC Driver 18 for SQL Server", + "SERVER": "sql.example.com,12345", + "DATABASE": "test_db", + "UID": "test_user", + "PWD": "test_password", + } + # Case: supported driver specified — ODBC Driver 17 for SQL Server. + creds = resolve_configuration( + MsSqlCredentials( + "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=ODBC+Driver+17+for+SQL+Server" + ) + ) dsn = creds.to_odbc_dsn() + result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))} + assert result == { + "DRIVER": "ODBC Driver 17 for SQL Server", + "SERVER": "sql.example.com,12345", + "DATABASE": "test_db", + "UID": "test_user", + "PWD": "test_password", + } - result = {k: v for k, v in (param.split('=') for param in dsn.split(";"))} +def test_to_odbc_dsn_arbitrary_keys_specified() -> None: + # Case: arbitrary query keys (and supported driver) specified. + creds = resolve_configuration( + MsSqlCredentials( + "mssql://test_user:test_password@sql.example.com:12345/test_db?FOO=a&BAR=b&DRIVER=ODBC+Driver+18+for+SQL+Server" + ) + ) + dsn = creds.to_odbc_dsn() + result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))} assert result == { - 'DRIVER': 'ODBC Driver 18 for SQL Server', - 'SERVER': 'sql.example.com,12345', - 'DATABASE': 'test_db', - 'UID': 'test_user', - 'PWD': 'test_password', - 'FOO': 'a', - 'BAR': 'b' + "DRIVER": "ODBC Driver 18 for SQL Server", + "SERVER": "sql.example.com,12345", + "DATABASE": "test_db", + "UID": "test_user", + "PWD": "test_password", + "FOO": "a", + "BAR": "b", } + + # Case: arbitrary capitalization. + creds = resolve_configuration( + MsSqlCredentials( + "mssql://test_user:test_password@sql.example.com:12345/test_db?FOO=a&bar=b&Driver=ODBC+Driver+18+for+SQL+Server" + ) + ) + dsn = creds.to_odbc_dsn() + result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))} + assert result == { + "DRIVER": "ODBC Driver 18 for SQL Server", + "SERVER": "sql.example.com,12345", + "DATABASE": "test_db", + "UID": "test_user", + "PWD": "test_password", + "FOO": "a", + "BAR": "b", + } + + +available_drivers = [d for d in pyodbc.drivers() if d in SUPPORTED_DRIVERS] + + +@pytest.mark.skipif(not available_drivers, reason="no supported driver available") +def test_to_odbc_dsn_driver_not_specified() -> None: + # Case: driver not specified, but supported driver is available. + creds = resolve_configuration( + MsSqlCredentials("mssql://test_user:test_password@sql.example.com:12345/test_db") + ) + dsn = creds.to_odbc_dsn() + result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))} + assert result in [ + { + "DRIVER": d, + "SERVER": "sql.example.com,12345", + "DATABASE": "test_db", + "UID": "test_user", + "PWD": "test_password", + } + for d in SUPPORTED_DRIVERS + ] diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py index 4f5a6637d6..f7e0ce53ff 100644 --- a/tests/load/mssql/test_mssql_table_builder.py +++ b/tests/load/mssql/test_mssql_table_builder.py @@ -7,11 +7,12 @@ pytest.importorskip("dlt.destinations.mssql.mssql", reason="MSSQL ODBC driver not installed") -from dlt.destinations.mssql.mssql import MsSqlClient -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials +from dlt.destinations.impl.mssql.mssql import MsSqlClient +from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials from tests.load.utils import TABLE_UPDATE + @pytest.fixture def schema() -> Schema: return Schema("event") @@ -20,7 +21,10 @@ def schema() -> Schema: @pytest.fixture def client(schema: Schema) -> MsSqlClient: # return client without opening connection - return MsSqlClient(schema, MsSqlClientConfiguration(dataset_name="test_" + uniq_id(), credentials=MsSqlCredentials())) + return MsSqlClient( + schema, + MsSqlClientConfiguration(dataset_name="test_" + uniq_id(), credentials=MsSqlCredentials()), + ) def test_create_table(client: MsSqlClient) -> None: diff --git a/tests/load/pipeline/conftest.py b/tests/load/pipeline/conftest.py index 76dc74a555..34227a8041 100644 --- a/tests/load/pipeline/conftest.py +++ b/tests/load/pipeline/conftest.py @@ -1,3 +1,8 @@ -from tests.utils import patch_home_dir, preserve_environ, autouse_test_storage, duckdb_pipeline_location +from tests.utils import ( + patch_home_dir, + preserve_environ, + autouse_test_storage, + duckdb_pipeline_location, +) from tests.pipeline.utils import drop_dataset_from_env from tests.load.pipeline.utils import drop_pipeline diff --git a/tests/load/pipeline/test_arrow_loading.py b/tests/load/pipeline/test_arrow_loading.py index bd709e764d..4a3c209c32 100644 --- a/tests/load/pipeline/test_arrow_loading.py +++ b/tests/load/pipeline/test_arrow_loading.py @@ -18,13 +18,27 @@ from tests.cases import arrow_table_all_data_types, TArrowFormat -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_staging_configs=True, all_staging_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs( + default_sql_configs=True, default_staging_configs=True, all_staging_configs=True + ), + ids=lambda x: x.name, +) @pytest.mark.parametrize("item_type", ["pandas", "table", "record_batch"]) -def test_load_item(item_type: Literal["pandas", "table", "record_batch"], destination_config: DestinationTestConfiguration) -> None: - os.environ['NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_LOAD_ID'] = "True" - os.environ['NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_ID'] = "True" - include_time = destination_config.destination not in ("athena", "redshift") # athena/redshift can't load TIME columns from parquet - item, records = arrow_table_all_data_types(item_type, include_json=False, include_time=include_time) +def test_load_item( + item_type: Literal["pandas", "table", "record_batch"], + destination_config: DestinationTestConfiguration, +) -> None: + os.environ["NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_LOAD_ID"] = "True" + os.environ["NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_ID"] = "True" + include_time = destination_config.destination not in ( + "athena", + "redshift", + ) # athena/redshift can't load TIME columns from parquet + item, records = arrow_table_all_data_types( + item_type, include_json=False, include_time=include_time + ) pipeline = destination_config.setup_pipeline("arrow_" + uniq_id()) @@ -54,7 +68,6 @@ def some_data(): if isinstance(row[i], memoryview): row[i] = row[i].tobytes() - if destination_config.destination == "redshift": # Binary columns are hex formatted in results for record in records: @@ -71,7 +84,9 @@ def some_data(): for row in expected: for i in range(len(row)): if isinstance(row[i], datetime): - row[i] = reduce_pendulum_datetime_precision(row[i], pipeline.destination.capabilities().timestamp_precision) + row[i] = reduce_pendulum_datetime_precision( + row[i], pipeline.destination.capabilities().timestamp_precision + ) load_id = load_info.loads_ids[0] @@ -88,9 +103,20 @@ def some_data(): @pytest.mark.no_load # Skips drop_pipeline fixture since we don't do any loading -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_staging_configs=True, all_staging_configs=True, default_vector_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs( + default_sql_configs=True, + default_staging_configs=True, + all_staging_configs=True, + default_vector_configs=True, + ), + ids=lambda x: x.name, +) @pytest.mark.parametrize("item_type", ["table", "pandas", "record_batch"]) -def test_parquet_column_names_are_normalized(item_type: TArrowFormat, destination_config: DestinationTestConfiguration) -> None: +def test_parquet_column_names_are_normalized( + item_type: TArrowFormat, destination_config: DestinationTestConfiguration +) -> None: """Test normalizing of parquet columns in all destinations""" # Create df with column names with inconsistent naming conventions df = pd.DataFrame( @@ -102,7 +128,7 @@ def test_parquet_column_names_are_normalized(item_type: TArrowFormat, destinatio "e-MAIL", " pHone Number", "ADDRESS", - "CreatedAt" + "CreatedAt", ], ) @@ -122,20 +148,23 @@ def some_data(): # Find the extracted file norm_storage = pipeline._get_normalize_storage() - extract_files = [fn for fn in norm_storage.list_files_to_normalize_sorted() if fn.endswith(".parquet")] + extract_files = [ + fn for fn in norm_storage.list_files_to_normalize_sorted() if fn.endswith(".parquet") + ] assert len(extract_files) == 1 # Normalized column names according to schema naming convention - expected_column_names = [pipeline.default_schema.naming.normalize_path(col) for col in df.columns] + expected_column_names = [ + pipeline.default_schema.naming.normalize_path(col) for col in df.columns + ] new_table_name = pipeline.default_schema.naming.normalize_table_identifier("some_data") schema_columns = pipeline.default_schema.get_table_columns(new_table_name) # Schema columns are normalized - assert [c['name'] for c in schema_columns.values()] == expected_column_names - + assert [c["name"] for c in schema_columns.values()] == expected_column_names - with norm_storage.storage.open_file(extract_files[0], 'rb') as f: + with norm_storage.extracted_packages.storage.open_file(extract_files[0], "rb") as f: result_tbl = pa.parquet.read_table(f) # Parquet schema is written with normalized column names - assert result_tbl.column_names == expected_column_names + assert result_tbl.schema.names == expected_column_names diff --git a/tests/load/pipeline/test_athena.py b/tests/load/pipeline/test_athena.py index dd5baae73b..3da081d881 100644 --- a/tests/load/pipeline/test_athena.py +++ b/tests/load/pipeline/test_athena.py @@ -5,16 +5,19 @@ import dlt from dlt.common import pendulum from dlt.common.utils import uniq_id -from tests.load.pipeline.utils import load_table_counts +from tests.load.pipeline.utils import load_table_counts from tests.cases import table_update_and_row, assert_all_data_types_row from tests.pipeline.utils import assert_load_info from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["athena"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["athena"]), + ids=lambda x: x.name, +) def test_athena_destinations(destination_config: DestinationTestConfiguration) -> None: - pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), full_refresh=True) @dlt.resource(name="items", write_disposition="append") @@ -22,19 +25,15 @@ def items(): yield { "id": 1, "name": "item", - "sub_items": [{ - "id": 101, - "name": "sub item 101" - },{ - "id": 101, - "name": "sub item 102" - }] + "sub_items": [{"id": 101, "name": "sub item 101"}, {"id": 101, "name": "sub item 102"}], } pipeline.run(items) # see if we have athena tables with items - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values() ]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values()] + ) assert table_counts["items"] == 1 assert table_counts["items__sub_items"] == 2 assert table_counts["_dlt_loads"] == 1 @@ -46,25 +45,37 @@ def items2(): "id": 1, "name": "item", "new_field": "hello", - "sub_items": [{ - "id": 101, - "name": "sub item 101", - "other_new_field": "hello 101", - },{ - "id": 101, - "name": "sub item 102", - "other_new_field": "hello 102", - }] + "sub_items": [ + { + "id": 101, + "name": "sub item 101", + "other_new_field": "hello 101", + }, + { + "id": 101, + "name": "sub item 102", + "other_new_field": "hello 102", + }, + ], } + pipeline.run(items2) - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values()]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema._schema_tables.values()] + ) assert table_counts["items"] == 2 assert table_counts["items__sub_items"] == 4 assert table_counts["_dlt_loads"] == 2 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["athena"]), ids=lambda x: x.name) -def test_athena_all_datatypes_and_timestamps(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["athena"]), + ids=lambda x: x.name, +) +def test_athena_all_datatypes_and_timestamps( + destination_config: DestinationTestConfiguration, +) -> None: pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), full_refresh=True) # TIME is not supported @@ -74,7 +85,7 @@ def test_athena_all_datatypes_and_timestamps(destination_config: DestinationTest @dlt.resource(table_name="data_types", write_disposition="append", columns=column_schemas) def my_resource() -> Iterator[Any]: nonlocal data_types - yield [data_types]*10 + yield [data_types] * 10 @dlt.source(max_table_nesting=0) def my_source() -> Any: @@ -89,42 +100,69 @@ def my_source() -> Any: db_row = list(db_rows[0]) # content must equal assert_all_data_types_row( - db_row[:-2], parse_complex_strings=True, timestamp_precision=sql_client.capabilities.timestamp_precision, schema=column_schemas + db_row[:-2], + parse_complex_strings=True, + timestamp_precision=sql_client.capabilities.timestamp_precision, + schema=column_schemas, ) # now let's query the data with timestamps and dates. # https://docs.aws.amazon.com/athena/latest/ug/engine-versions-reference-0003.html#engine-versions-reference-0003-timestamp-changes # use string representation TIMESTAMP(2) - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = TIMESTAMP '2022-05-23 13:26:45.176'") + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = TIMESTAMP '2022-05-23 13:26:45.176'" + ) assert len(db_rows) == 10 # no rows - TIMESTAMP(6) not supported - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = TIMESTAMP '2022-05-23 13:26:45.176145'") + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = TIMESTAMP '2022-05-23 13:26:45.176145'" + ) assert len(db_rows) == 0 # use pendulum # that will pass - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = %s", pendulum.datetime(2022, 5, 23, 13, 26, 45, 176000)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = %s", + pendulum.datetime(2022, 5, 23, 13, 26, 45, 176000), + ) assert len(db_rows) == 10 # that will return empty list - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = %s", pendulum.datetime(2022, 5, 23, 13, 26, 45, 176145)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = %s", + pendulum.datetime(2022, 5, 23, 13, 26, 45, 176145), + ) assert len(db_rows) == 0 # use datetime - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = %s", datetime.datetime(2022, 5, 23, 13, 26, 45, 176000)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = %s", + datetime.datetime(2022, 5, 23, 13, 26, 45, 176000), + ) assert len(db_rows) == 10 - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col4 = %s", datetime.datetime(2022, 5, 23, 13, 26, 45, 176145)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col4 = %s", + datetime.datetime(2022, 5, 23, 13, 26, 45, 176145), + ) assert len(db_rows) == 0 # check date db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col10 = DATE '2023-02-27'") assert len(db_rows) == 10 - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col10 = %s", pendulum.date(2023, 2, 27)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col10 = %s", pendulum.date(2023, 2, 27) + ) assert len(db_rows) == 10 - db_rows = sql_client.execute_sql("SELECT * FROM data_types WHERE col10 = %s", datetime.date(2023, 2, 27)) + db_rows = sql_client.execute_sql( + "SELECT * FROM data_types WHERE col10 = %s", datetime.date(2023, 2, 27) + ) assert len(db_rows) == 10 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["athena"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["athena"]), + ids=lambda x: x.name, +) def test_athena_blocks_time_column(destination_config: DestinationTestConfiguration) -> None: pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), full_refresh=True) @@ -134,7 +172,7 @@ def test_athena_blocks_time_column(destination_config: DestinationTestConfigurat @dlt.resource(table_name="data_types", write_disposition="append", columns=column_schemas) def my_resource() -> Iterator[Any]: nonlocal data_types - yield [data_types]*10 + yield [data_types] * 10 @dlt.source(max_table_nesting=0) def my_source() -> Any: @@ -144,4 +182,7 @@ def my_source() -> Any: assert info.has_failed_jobs - assert "Athena cannot load TIME columns from parquet tables" in info.load_packages[0].jobs['failed_jobs'][0].failed_message + assert ( + "Athena cannot load TIME columns from parquet tables" + in info.load_packages[0].jobs["failed_jobs"][0].failed_message + ) diff --git a/tests/load/pipeline/test_dbt_helper.py b/tests/load/pipeline/test_dbt_helper.py index 37c1f0c607..11f59d5276 100644 --- a/tests/load/pipeline/test_dbt_helper.py +++ b/tests/load/pipeline/test_dbt_helper.py @@ -27,10 +27,16 @@ def dbt_venv() -> Iterator[Venv]: yield venv -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_run_jaffle_package(destination_config: DestinationTestConfiguration, dbt_venv: Venv) -> None: +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_run_jaffle_package( + destination_config: DestinationTestConfiguration, dbt_venv: Venv +) -> None: if destination_config.destination == "athena": - pytest.skip("dbt-athena requires database to be created and we don't do it in case of Jaffle") + pytest.skip( + "dbt-athena requires database to be created and we don't do it in case of Jaffle" + ) pipeline = destination_config.setup_pipeline("jaffle_jaffle", full_refresh=True) # get runner, pass the env from fixture dbt = dlt.dbt.package(pipeline, "https://github.com/dbt-labs/jaffle_shop.git", venv=dbt_venv) @@ -55,16 +61,21 @@ def test_run_jaffle_package(destination_config: DestinationTestConfiguration, db assert len(orders) == 99 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_run_chess_dbt(destination_config: DestinationTestConfiguration, dbt_venv: Venv) -> None: from docs.examples.chess.chess import chess + if not destination_config.supports_dbt: pytest.skip("dbt is not supported for this destination configuration") # provide chess url via environ os.environ["CHESS_URL"] = "https://api.chess.com/pub/" - pipeline = destination_config.setup_pipeline("chess_games", dataset_name="chess_dbt_test", full_refresh=True) + pipeline = destination_config.setup_pipeline( + "chess_games", dataset_name="chess_dbt_test", full_refresh=True + ) assert pipeline.default_schema_name is None # get the runner for the "dbt_transform" package transforms = dlt.dbt.package(pipeline, "docs/examples/chess/dbt_transform", venv=dbt_venv) @@ -81,29 +92,44 @@ def test_run_chess_dbt(destination_config: DestinationTestConfiguration, dbt_ven transforms.run_all(source_tests_selector="source:*") # run all the tests transforms.test() - load_ids = select_data(pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status") + load_ids = select_data( + pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status" + ) assert len(load_ids) == 2 - view_player_games = select_data(pipeline, "SELECT * FROM view_player_games ORDER BY username, uuid") + view_player_games = select_data( + pipeline, "SELECT * FROM view_player_games ORDER BY username, uuid" + ) assert len(view_player_games) > 0 # run again transforms.run() # no new load ids - no new data in view table - new_load_ids = select_data(pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status") - new_view_player_games = select_data(pipeline, "SELECT * FROM view_player_games ORDER BY username, uuid") + new_load_ids = select_data( + pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status" + ) + new_view_player_games = select_data( + pipeline, "SELECT * FROM view_player_games ORDER BY username, uuid" + ) assert load_ids == new_load_ids assert view_player_games == new_view_player_games -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_run_chess_dbt_to_other_dataset(destination_config: DestinationTestConfiguration, dbt_venv: Venv) -> None: +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_run_chess_dbt_to_other_dataset( + destination_config: DestinationTestConfiguration, dbt_venv: Venv +) -> None: from docs.examples.chess.chess import chess + if not destination_config.supports_dbt: pytest.skip("dbt is not supported for this destination configuration") # provide chess url via environ os.environ["CHESS_URL"] = "https://api.chess.com/pub/" - pipeline = destination_config.setup_pipeline("chess_games", dataset_name="chess_dbt_test", full_refresh=True) + pipeline = destination_config.setup_pipeline( + "chess_games", dataset_name="chess_dbt_test", full_refresh=True + ) # load each schema in separate dataset pipeline.config.use_single_dataset = False # assert pipeline.default_schema_name is None @@ -126,12 +152,18 @@ def test_run_chess_dbt_to_other_dataset(destination_config: DestinationTestConfi # run tests on destination dataset where transformations actually are transforms.test(destination_dataset_name=info.dataset_name + "_" + test_suffix) # get load ids from the source dataset - load_ids = select_data(pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status") + load_ids = select_data( + pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status" + ) assert len(load_ids) == 1 # status is 0, no more entries assert load_ids[0][2] == 0 # get from destination dataset - load_ids = select_data(pipeline, "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status", schema_name=test_suffix) + load_ids = select_data( + pipeline, + "SELECT load_id, schema_name, status FROM _dlt_loads ORDER BY status", + schema_name=test_suffix, + ) # TODO: the package is not finished, both results should be here assert len(load_ids) == 1 # status is 1, no more entries diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py index 2a20db62b4..cd18454d7c 100644 --- a/tests/load/pipeline/test_drop.py +++ b/tests/load/pipeline/test_drop.py @@ -6,11 +6,15 @@ import pytest import dlt -from dlt.extract.source import DltResource +from dlt.extract import DltResource from dlt.common.utils import uniq_id from dlt.pipeline import helpers, state_sync, Pipeline from dlt.load import Load -from dlt.pipeline.exceptions import PipelineHasPendingDataException, PipelineNeverRan, PipelineStepFailed +from dlt.pipeline.exceptions import ( + PipelineHasPendingDataException, + PipelineNeverRan, + PipelineStepFailed, +) from dlt.destinations.job_client_impl import SqlJobClientBase from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @@ -20,40 +24,46 @@ def _attach(pipeline: Pipeline) -> Pipeline: return dlt.attach(pipeline.pipeline_name, pipeline.pipelines_dir) -@dlt.source(section='droppable', name='droppable') +@dlt.source(section="droppable", name="droppable") def droppable_source() -> List[DltResource]: @dlt.resource - def droppable_a(a: dlt.sources.incremental[int]=dlt.sources.incremental('a', 0)) -> Iterator[Dict[str, Any]]: + def droppable_a( + a: dlt.sources.incremental[int] = dlt.sources.incremental("a", 0) + ) -> Iterator[Dict[str, Any]]: yield dict(a=1, b=2, c=3) yield dict(a=4, b=23, c=24) - @dlt.resource - def droppable_b(asd: dlt.sources.incremental[int]=dlt.sources.incremental('asd', 0)) -> Iterator[Dict[str, Any]]: + def droppable_b( + asd: dlt.sources.incremental[int] = dlt.sources.incremental("asd", 0) + ) -> Iterator[Dict[str, Any]]: # Child table yield dict(asd=2323, qe=555, items=[dict(m=1, n=2), dict(m=3, n=4)]) - @dlt.resource - def droppable_c(qe: dlt.sources.incremental[int] = dlt.sources.incremental('qe')) -> Iterator[Dict[str, Any]]: + def droppable_c( + qe: dlt.sources.incremental[int] = dlt.sources.incremental("qe"), + ) -> Iterator[Dict[str, Any]]: # Grandchild table - yield dict(asdasd=2424, qe=111, items=[ - dict(k=2, r=2, labels=[dict(name='abc'), dict(name='www')]) - ]) + yield dict( + asdasd=2424, qe=111, items=[dict(k=2, r=2, labels=[dict(name="abc"), dict(name="www")])] + ) @dlt.resource - def droppable_d(o: dlt.sources.incremental[int] = dlt.sources.incremental('o')) -> Iterator[List[Dict[str, Any]]]: - dlt.state()['data_from_d'] = {'foo1': {'bar': 1}, 'foo2': {'bar': 2}} + def droppable_d( + o: dlt.sources.incremental[int] = dlt.sources.incremental("o"), + ) -> Iterator[List[Dict[str, Any]]]: + dlt.state()["data_from_d"] = {"foo1": {"bar": 1}, "foo2": {"bar": 2}} yield [dict(o=55), dict(o=22)] return [droppable_a(), droppable_b(), droppable_c(), droppable_d()] RESOURCE_TABLES = dict( - droppable_a=['droppable_a'], - droppable_b=['droppable_b', 'droppable_b__items'], - droppable_c=['droppable_c', 'droppable_c__items', 'droppable_c__items__labels'], - droppable_d=['droppable_d'] + droppable_a=["droppable_a"], + droppable_b=["droppable_b", "droppable_b__items"], + droppable_c=["droppable_c", "droppable_c__items", "droppable_c__items__labels"], + droppable_d=["droppable_d"], ) @@ -61,12 +71,13 @@ def assert_dropped_resources(pipeline: Pipeline, resources: List[str]) -> None: assert_dropped_resource_tables(pipeline, resources) assert_dropped_resource_states(pipeline, resources) + def assert_dropped_resource_tables(pipeline: Pipeline, resources: List[str]) -> None: # Verify only requested resource tables are removed from pipeline schema all_tables = set(chain.from_iterable(RESOURCE_TABLES.values())) dropped_tables = set(chain.from_iterable(RESOURCE_TABLES[r] for r in resources)) expected_tables = all_tables - dropped_tables - result_tables = set(t['name'] for t in pipeline.default_schema.data_tables()) + result_tables = set(t["name"] for t in pipeline.default_schema.data_tables()) assert result_tables == expected_tables # Verify requested tables are dropped from destination @@ -86,8 +97,8 @@ def assert_dropped_resource_states(pipeline: Pipeline, resources: List[str]) -> # Verify only requested resource keys are removed from state all_resources = set(RESOURCE_TABLES.keys()) expected_keys = all_resources - set(resources) - sources_state = pipeline.state['sources'] - result_keys = set(sources_state['droppable']['resources'].keys()) + sources_state = pipeline.state["sources"] + result_keys = set(sources_state["droppable"]["resources"].keys()) assert result_keys == expected_keys @@ -97,136 +108,158 @@ def assert_destination_state_loaded(pipeline: Pipeline) -> None: with pipeline.destination_client() as client: # type: ignore[assignment] destination_state = state_sync.load_state_from_destination(pipeline.pipeline_name, client) pipeline_state = dict(pipeline.state) - del pipeline_state['_local'] + del pipeline_state["_local"] assert pipeline_state == destination_state -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_command_resources_and_state(destination_config: DestinationTestConfiguration) -> None: """Test the drop command with resource and state path options and verify correct data is deleted from destination and locally""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - helpers.drop(attached, resources=['droppable_c', 'droppable_d'], state_paths='data_from_d.*.bar') + helpers.drop( + attached, resources=["droppable_c", "droppable_d"], state_paths="data_from_d.*.bar" + ) attached = _attach(pipeline) - assert_dropped_resources(attached, ['droppable_c', 'droppable_d']) + assert_dropped_resources(attached, ["droppable_c", "droppable_d"]) # Verify extra json paths are removed from state - sources_state = pipeline.state['sources'] - assert sources_state['droppable']['data_from_d'] == {'foo1': {}, 'foo2': {}} + sources_state = pipeline.state["sources"] + assert sources_state["droppable"]["data_from_d"] == {"foo1": {}, "foo2": {}} assert_destination_state_loaded(pipeline) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_command_only_state(destination_config: DestinationTestConfiguration) -> None: """Test the drop command with resource and state path options and verify correct data is deleted from destination and locally""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - helpers.drop(attached, state_paths='data_from_d.*.bar') + helpers.drop(attached, state_paths="data_from_d.*.bar") attached = _attach(pipeline) assert_dropped_resources(attached, []) # Verify extra json paths are removed from state - sources_state = pipeline.state['sources'] - assert sources_state['droppable']['data_from_d'] == {'foo1': {}, 'foo2': {}} + sources_state = pipeline.state["sources"] + assert sources_state["droppable"]["data_from_d"] == {"foo1": {}, "foo2": {}} assert_destination_state_loaded(pipeline) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_destination_tables_fails(destination_config: DestinationTestConfiguration) -> None: """Fail on drop tables. Command runs again.""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - with mock.patch.object(helpers.DropCommand, '_drop_destination_tables', side_effect=RuntimeError("Something went wrong")): + with mock.patch.object( + helpers.DropCommand, + "_drop_destination_tables", + side_effect=RuntimeError("Something went wrong"), + ): with pytest.raises(RuntimeError): - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) attached = _attach(pipeline) - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) - assert_dropped_resources(attached, ['droppable_a', 'droppable_b']) + assert_dropped_resources(attached, ["droppable_a", "droppable_b"]) assert_destination_state_loaded(attached) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_fail_after_drop_tables(destination_config: DestinationTestConfiguration) -> None: """Fail directly after drop tables. Command runs again ignoring destination tables missing.""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - with mock.patch.object(helpers.DropCommand, '_drop_state_keys', side_effect=RuntimeError("Something went wrong")): + with mock.patch.object( + helpers.DropCommand, "_drop_state_keys", side_effect=RuntimeError("Something went wrong") + ): with pytest.raises(RuntimeError): - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) attached = _attach(pipeline) - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) - assert_dropped_resources(attached, ['droppable_a', 'droppable_b']) + assert_dropped_resources(attached, ["droppable_a", "droppable_b"]) assert_destination_state_loaded(attached) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_load_step_fails(destination_config: DestinationTestConfiguration) -> None: """Test idempotence. pipeline.load() fails. Command can be run again successfully""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - with mock.patch.object(Load, 'run', side_effect=RuntimeError("Something went wrong")): + with mock.patch.object(Load, "run", side_effect=RuntimeError("Something went wrong")): with pytest.raises(PipelineStepFailed) as e: - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) assert isinstance(e.value.exception, RuntimeError) attached = _attach(pipeline) - helpers.drop(attached, resources=('droppable_a', 'droppable_b')) + helpers.drop(attached, resources=("droppable_a", "droppable_b")) - assert_dropped_resources(attached, ['droppable_a', 'droppable_b']) + assert_dropped_resources(attached, ["droppable_a", "droppable_b"]) assert_destination_state_loaded(attached) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_resource_regex(destination_config: DestinationTestConfiguration) -> None: source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) - helpers.drop(attached, resources=['re:.+_b', 're:.+_a']) + helpers.drop(attached, resources=["re:.+_b", "re:.+_a"]) attached = _attach(pipeline) - assert_dropped_resources(attached, ['droppable_a', 'droppable_b']) + assert_dropped_resources(attached, ["droppable_a", "droppable_b"]) assert_destination_state_loaded(attached) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_nothing(destination_config: DestinationTestConfiguration) -> None: """No resources, no state keys. Nothing is changed.""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) attached = _attach(pipeline) @@ -238,13 +271,17 @@ def test_drop_nothing(destination_config: DestinationTestConfiguration) -> None: assert previous_state == attached.state -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_all_flag(destination_config: DestinationTestConfiguration) -> None: """Using drop_all flag. Destination dataset and all local state is deleted""" source = droppable_source() - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(source) - dlt_tables = [t['name'] for t in pipeline.default_schema.dlt_tables()] # Original _dlt tables to check for + dlt_tables = [ + t["name"] for t in pipeline.default_schema.dlt_tables() + ] # Original _dlt tables to check for attached = _attach(pipeline) @@ -261,15 +298,17 @@ def test_drop_all_flag(destination_config: DestinationTestConfiguration) -> None assert exists -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_run_pipeline_after_partial_drop(destination_config: DestinationTestConfiguration) -> None: """Pipeline can be run again after dropping some resources""" - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(droppable_source()) attached = _attach(pipeline) - helpers.drop(attached, resources='droppable_a') + helpers.drop(attached, resources="droppable_a") attached = _attach(pipeline) @@ -278,30 +317,32 @@ def test_run_pipeline_after_partial_drop(destination_config: DestinationTestConf attached.load(raise_on_failed_jobs=True) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_drop_state_only(destination_config: DestinationTestConfiguration) -> None: """Pipeline can be run again after dropping some resources""" - pipeline = destination_config.setup_pipeline('drop_test_' + uniq_id(), full_refresh=True) + pipeline = destination_config.setup_pipeline("drop_test_" + uniq_id(), full_refresh=True) pipeline.run(droppable_source()) attached = _attach(pipeline) - helpers.drop(attached, resources=('droppable_a', 'droppable_b'), state_only=True) + helpers.drop(attached, resources=("droppable_a", "droppable_b"), state_only=True) attached = _attach(pipeline) assert_dropped_resource_tables(attached, []) # No tables dropped - assert_dropped_resource_states(attached, ['droppable_a', 'droppable_b']) + assert_dropped_resource_states(attached, ["droppable_a", "droppable_b"]) assert_destination_state_loaded(attached) def test_drop_first_run_and_pending_packages() -> None: """Attempts to drop before pipeline runs and when partial loads happen""" - pipeline = dlt.pipeline('drop_test_' + uniq_id(), destination="dummy") + pipeline = dlt.pipeline("drop_test_" + uniq_id(), destination="dummy") with pytest.raises(PipelineNeverRan): helpers.drop(pipeline, "droppable_a") os.environ["COMPLETED_PROB"] = "1.0" pipeline.run(droppable_source().with_resources("droppable_a")) pipeline.extract(droppable_source().with_resources("droppable_b")) with pytest.raises(PipelineHasPendingDataException): - helpers.drop(pipeline, "droppable_a") \ No newline at end of file + helpers.drop(pipeline, "droppable_a") diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index c71ac37a81..6064392976 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -6,10 +6,18 @@ from dlt.pipeline.exceptions import PipelineStepFailed from tests.pipeline.utils import airtable_emojis -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration, load_table_counts - - -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["duckdb"]), ids=lambda x: x.name) +from tests.load.pipeline.utils import ( + destinations_configs, + DestinationTestConfiguration, + load_table_counts, +) + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["duckdb"]), + ids=lambda x: x.name, +) def test_duck_case_names(destination_config: DestinationTestConfiguration) -> None: # we want to have nice tables # dlt.config["schema.naming"] = "duck_case" @@ -18,14 +26,16 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No # create tables and columns with emojis and other special characters pipeline.run(airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock")) pipeline.run([{"🐾Feet": 2, "1+1": "two", "\nhey": "value"}], table_name="🦚Peacocks🦚") - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert table_counts == { "📆 Schedule": 3, "🦚Peacock": 1, - '🦚Peacock__peacock': 3, - '🦚Peacocks🦚': 1, - '🦚WidePeacock': 1, - '🦚WidePeacock__peacock': 3 + "🦚Peacock__peacock": 3, + "🦚Peacocks🦚": 1, + "🦚WidePeacock": 1, + "🦚WidePeacock__peacock": 3, } # this will fail - duckdb preserves case but is case insensitive when comparing identifiers @@ -38,5 +48,3 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No with client.execute_query("DESCRIBE 🦚peacocks🦚;") as q: tables = q.df() assert tables["column_name"].tolist() == ["🐾Feet", "1+1", "hey", "_dlt_load_id", "_dlt_id"] - - diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 8e810015f2..8fc4adc0c3 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -4,7 +4,7 @@ import dlt, os from dlt.common.utils import uniq_id from dlt.common.storages.load_storage import LoadJobInfo -from dlt.destinations.filesystem.filesystem import FilesystemClient, LoadFilesystemJob +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient, LoadFilesystemJob from dlt.common.schema.typing import LOADS_TABLE_NAME from tests.utils import skip_if_not_active @@ -12,12 +12,16 @@ skip_if_not_active("filesystem") -def assert_file_matches(layout: str, job: LoadJobInfo, load_id: str, client: FilesystemClient) -> None: +def assert_file_matches( + layout: str, job: LoadJobInfo, load_id: str, client: FilesystemClient +) -> None: """Verify file contents of load job are identical to the corresponding file in destination""" local_path = Path(job.file_path) filename = local_path.name - destination_fn = LoadFilesystemJob.make_destination_filename(layout, filename, client.schema.name, load_id) + destination_fn = LoadFilesystemJob.make_destination_filename( + layout, filename, client.schema.name, load_id + ) destination_path = posixpath.join(client.dataset_path, destination_fn) assert local_path.read_bytes() == client.fs_client.read_bytes(destination_path) @@ -29,11 +33,15 @@ def test_pipeline_merge_write_disposition(default_buckets_env: str) -> None: """ import pyarrow.parquet as pq # Module is evaluated by other tests - pipeline = dlt.pipeline(pipeline_name='test_' + uniq_id(), destination="filesystem", dataset_name='test_' + uniq_id()) + pipeline = dlt.pipeline( + pipeline_name="test_" + uniq_id(), + destination="filesystem", + dataset_name="test_" + uniq_id(), + ) - @dlt.resource(primary_key='id') + @dlt.resource(primary_key="id") def some_data(): - yield [{'id': 1}, {'id': 2}, {'id': 3}] + yield [{"id": 1}, {"id": 2}, {"id": 3}] @dlt.resource def other_data(): @@ -43,8 +51,8 @@ def other_data(): def some_source(): return [some_data(), other_data()] - info1 = pipeline.run(some_source(), write_disposition='merge') - info2 = pipeline.run(some_source(), write_disposition='merge') + info1 = pipeline.run(some_source(), write_disposition="merge") + info2 = pipeline.run(some_source(), write_disposition="merge") client: FilesystemClient = pipeline.destination_client() # type: ignore[assignment] layout = client.config.layout @@ -71,10 +79,9 @@ def some_source(): # Verify file contents assert info2.load_packages for pkg in info2.load_packages: - assert pkg.jobs['completed_jobs'] - for job in pkg.jobs['completed_jobs']: - assert_file_matches(layout, job, pkg.load_id, client) - + assert pkg.jobs["completed_jobs"] + for job in pkg.jobs["completed_jobs"]: + assert_file_matches(layout, job, pkg.load_id, client) complete_fn = f"{client.schema.name}.{LOADS_TABLE_NAME}.%s" @@ -83,7 +90,7 @@ def some_source(): assert client.fs_client.isfile(posixpath.join(client.dataset_path, complete_fn % load_id2)) # Force replace - pipeline.run(some_source(), write_disposition='replace') + pipeline.run(some_source(), write_disposition="replace") append_files = client.fs_client.ls(append_glob, detail=False, refresh=True) replace_files = client.fs_client.ls(replace_glob, detail=False, refresh=True) assert len(append_files) == 1 @@ -91,16 +98,19 @@ def some_source(): def test_pipeline_parquet_filesystem_destination() -> None: - import pyarrow.parquet as pq # Module is evaluated by other tests # store locally - os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] = "file://_storage" - pipeline = dlt.pipeline(pipeline_name='parquet_test_' + uniq_id(), destination="filesystem", dataset_name='parquet_test_' + uniq_id()) - - @dlt.resource(primary_key='id') + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = "file://_storage" + pipeline = dlt.pipeline( + pipeline_name="parquet_test_" + uniq_id(), + destination="filesystem", + dataset_name="parquet_test_" + uniq_id(), + ) + + @dlt.resource(primary_key="id") def some_data(): - yield [{'id': 1}, {'id': 2}, {'id': 3}] + yield [{"id": 1}, {"id": 2}, {"id": 3}] @dlt.resource def other_data(): @@ -119,8 +129,8 @@ def some_source(): assert len(package_info.jobs["completed_jobs"]) == 3 client: FilesystemClient = pipeline.destination_client() # type: ignore[assignment] - some_data_glob = posixpath.join(client.dataset_path, 'some_data/*') - other_data_glob = posixpath.join(client.dataset_path, 'other_data/*') + some_data_glob = posixpath.join(client.dataset_path, "some_data/*") + other_data_glob = posixpath.join(client.dataset_path, "other_data/*") some_data_files = client.fs_client.glob(some_data_glob) other_data_files = client.fs_client.glob(other_data_glob) diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index fbc5088ab2..0714ac333d 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -13,7 +13,7 @@ from dlt.common.pipeline import StateInjectableContext from dlt.common.typing import AnyFun, StrAny from dlt.common.utils import digest128 -from dlt.extract.source import DltResource +from dlt.extract import DltResource from dlt.sources.helpers.transform import skip_first, take_first from tests.pipeline.utils import assert_load_info @@ -25,14 +25,20 @@ # ACTIVE_DESTINATIONS += ["motherduck"] -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_merge_on_keys_in_schema(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("eth_2", full_refresh=True) with open("tests/common/cases/schemas/eth/ethereum_schema_v5.yml", "r", encoding="utf-8") as f: schema = dlt.Schema.from_dict(yaml.safe_load(f)) - with open("tests/normalize/cases/ethereum.blocks.9c1d9b504ea240a482b007788d5cd61c_2.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/ethereum.blocks.9c1d9b504ea240a482b007788d5cd61c_2.json", + "r", + encoding="utf-8", + ) as f: data = json.load(f) # take only the first block. the first block does not have uncles so this table should not be created and merged @@ -42,7 +48,10 @@ def test_merge_on_keys_in_schema(destination_config: DestinationTestConfiguratio # we load a single block assert eth_1_counts["blocks"] == 1 # check root key propagation - assert p.default_schema.tables["blocks__transactions"]["columns"]["_dlt_root_id"]["root_key"] is True + assert ( + p.default_schema.tables["blocks__transactions"]["columns"]["_dlt_root_id"]["root_key"] + is True + ) # now we load the whole dataset. blocks should be created which adds columns to blocks # if the table would be created before the whole load would fail because new columns have hints info = p.run(data, table_name="blocks", write_disposition="merge", schema=schema) @@ -59,11 +68,15 @@ def test_merge_on_keys_in_schema(destination_config: DestinationTestConfiguratio assert eth_2_counts == eth_3_counts -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_merge_on_ad_hoc_primary_key(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_1", full_refresh=True) - with open("tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: data = json.load(f) # note: NodeId will be normalized to "node_id" which exists in the schema info = p.run(data[:17], table_name="issues", write_disposition="merge", primary_key="NodeId") @@ -89,17 +102,27 @@ def test_merge_on_ad_hoc_primary_key(destination_config: DestinationTestConfigur @dlt.source(root_key=True) def github(): - - @dlt.resource(table_name="issues", write_disposition="merge", primary_key="id", merge_key=("node_id", "url")) + @dlt.resource( + table_name="issues", + write_disposition="merge", + primary_key="id", + merge_key=("node_id", "url"), + ) def load_issues(): - with open("tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: yield from json.load(f) return load_issues -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_merge_source_compound_keys_and_changes(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_merge_source_compound_keys_and_changes( + destination_config: DestinationTestConfiguration, +) -> None: p = destination_config.setup_pipeline("github_3", full_refresh=True) info = p.run(github()) @@ -108,9 +131,18 @@ def test_merge_source_compound_keys_and_changes(destination_config: DestinationT # 100 issues total assert github_1_counts["issues"] == 100 # check keys created - assert p.default_schema.tables["issues"]["columns"]["node_id"].items() > {"merge_key": True, "data_type": "text", "nullable": False}.items() - assert p.default_schema.tables["issues"]["columns"]["url"].items() > {"merge_key": True, "data_type": "text", "nullable": False}.items() - assert p.default_schema.tables["issues"]["columns"]["id"].items() > {"primary_key": True, "data_type": "bigint", "nullable": False}.items() + assert ( + p.default_schema.tables["issues"]["columns"]["node_id"].items() + > {"merge_key": True, "data_type": "text", "nullable": False}.items() + ) + assert ( + p.default_schema.tables["issues"]["columns"]["url"].items() + > {"merge_key": True, "data_type": "text", "nullable": False}.items() + ) + assert ( + p.default_schema.tables["issues"]["columns"]["id"].items() + > {"primary_key": True, "data_type": "bigint", "nullable": False}.items() + ) # append load_issues resource info = p.run(github().load_issues, write_disposition="append") @@ -118,10 +150,10 @@ def test_merge_source_compound_keys_and_changes(destination_config: DestinationT assert p.default_schema.tables["issues"]["write_disposition"] == "append" # the counts of all tables must be double github_2_counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables()]) - assert {k:v*2 for k, v in github_1_counts.items()} == github_2_counts + assert {k: v * 2 for k, v in github_1_counts.items()} == github_2_counts # now replace all resources - info = p.run(github(), write_disposition="replace" ) + info = p.run(github(), write_disposition="replace") assert_load_info(info) assert p.default_schema.tables["issues"]["write_disposition"] == "replace" # assert p.default_schema.tables["issues__labels"]["write_disposition"] == "replace" @@ -130,7 +162,9 @@ def test_merge_source_compound_keys_and_changes(destination_config: DestinationT assert github_1_counts == github_3_counts -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_merge_no_child_tables(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_3", full_refresh=True) github_data = github() @@ -161,7 +195,9 @@ def test_merge_no_child_tables(destination_config: DestinationTestConfiguration) assert github_2_counts["issues"] == 100 if destination_config.supports_merge else 115 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_merge_no_merge_keys(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_3", full_refresh=True) github_data = github() @@ -187,19 +223,24 @@ def test_merge_no_merge_keys(destination_config: DestinationTestConfiguration) - assert github_1_counts["issues"] == 10 if destination_config.supports_merge else 100 - 45 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_merge_keys_non_existing_columns(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_3", full_refresh=True) github_data = github() # set keys names that do not exist in the data - github_data.load_issues.apply_hints(merge_key=("mA1", "Ma2"), primary_key=("123-x", )) + github_data.load_issues.apply_hints(merge_key=("mA1", "Ma2"), primary_key=("123-x",)) # skip first 45 rows github_data.load_issues.add_filter(skip_first(45)) info = p.run(github_data) assert_load_info(info) github_1_counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables()]) assert github_1_counts["issues"] == 100 - 45 - assert p.default_schema.tables["issues"]["columns"]["m_a1"].items() > {"merge_key": True, "nullable": False}.items() + assert ( + p.default_schema.tables["issues"]["columns"]["m_a1"].items() + > {"merge_key": True, "nullable": False}.items() + ) # for non merge destinations we just check that the run passes if not destination_config.supports_merge: @@ -207,7 +248,7 @@ def test_merge_keys_non_existing_columns(destination_config: DestinationTestConf # all the keys are invalid so the merge falls back to replace github_data = github() - github_data.load_issues.apply_hints(merge_key=("mA1", "Ma2"), primary_key=("123-x", )) + github_data.load_issues.apply_hints(merge_key=("mA1", "Ma2"), primary_key=("123-x",)) github_data.load_issues.add_filter(take_first(1)) info = p.run(github_data) assert_load_info(info) @@ -219,7 +260,11 @@ def test_merge_keys_non_existing_columns(destination_config: DestinationTestConf assert "m_a1" not in table_schema # unbound columns were not created -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["duckdb", "snowflake", "bigquery"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["duckdb", "snowflake", "bigquery"]), + ids=lambda x: x.name, +) def test_pipeline_load_parquet(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("github_3", full_refresh=True) github_data = github() @@ -227,7 +272,9 @@ def test_pipeline_load_parquet(destination_config: DestinationTestConfiguration) github_data.max_table_nesting = 2 github_data_copy = github() github_data_copy.max_table_nesting = 2 - info = p.run([github_data, github_data_copy], loader_file_format="parquet", write_disposition="merge") + info = p.run( + [github_data, github_data_copy], loader_file_format="parquet", write_disposition="merge" + ) assert_load_info(info) # make sure it was parquet or sql transforms files = p.get_load_package_info(p.list_completed_load_packages()[0]).jobs["completed_jobs"] @@ -250,22 +297,34 @@ def test_pipeline_load_parquet(destination_config: DestinationTestConfiguration) assert github_1_counts["issues"] == 100 - -@dlt.transformer(name="github_repo_events", primary_key="id", write_disposition="merge", table_name=lambda i: i['type']) -def github_repo_events(page: List[StrAny], last_created_at = dlt.sources.incremental("created_at", "1970-01-01T00:00:00Z")): - """A transformer taking a stream of github events and dispatching them to tables named by event type. Deduplicates be 'id'. Loads incrementally by 'created_at' """ +@dlt.transformer( + name="github_repo_events", + primary_key="id", + write_disposition="merge", + table_name=lambda i: i["type"], +) +def github_repo_events( + page: List[StrAny], + last_created_at=dlt.sources.incremental("created_at", "1970-01-01T00:00:00Z"), +): + """A transformer taking a stream of github events and dispatching them to tables named by event type. Deduplicates be 'id'. Loads incrementally by 'created_at'""" yield page @dlt.transformer(name="github_repo_events", primary_key="id", write_disposition="merge") -def github_repo_events_table_meta(page: List[StrAny], last_created_at = dlt.sources.incremental("created_at", "1970-01-01T00:00:00Z")): - """A transformer taking a stream of github events and dispatching them to tables using table meta. Deduplicates be 'id'. Loads incrementally by 'created_at' """ - yield from [dlt.mark.with_table_name(p, p['type']) for p in page] +def github_repo_events_table_meta( + page: List[StrAny], + last_created_at=dlt.sources.incremental("created_at", "1970-01-01T00:00:00Z"), +): + """A transformer taking a stream of github events and dispatching them to tables using table meta. Deduplicates be 'id'. Loads incrementally by 'created_at'""" + yield from [dlt.mark.with_table_name(p, p["type"]) for p in page] @dlt.resource def _get_shuffled_events(shuffle: bool = dlt.secrets.value): - with open("tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8" + ) as f: issues = json.load(f) # random order if shuffle: @@ -273,17 +332,22 @@ def _get_shuffled_events(shuffle: bool = dlt.secrets.value): yield issues - -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -@pytest.mark.parametrize("github_resource",[github_repo_events, github_repo_events_table_meta]) -def test_merge_with_dispatch_and_incremental(destination_config: DestinationTestConfiguration, github_resource: DltResource) -> None: - newest_issues = list(sorted(_get_shuffled_events(True), key = lambda x: x["created_at"], reverse=True)) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +@pytest.mark.parametrize("github_resource", [github_repo_events, github_repo_events_table_meta]) +def test_merge_with_dispatch_and_incremental( + destination_config: DestinationTestConfiguration, github_resource: DltResource +) -> None: + newest_issues = list( + sorted(_get_shuffled_events(True), key=lambda x: x["created_at"], reverse=True) + ) newest_issue = newest_issues[0] @dlt.resource def _new_event(node_id): new_i = copy(newest_issue) - new_i["id"] = str(random.randint(0, 2^32)) + new_i["id"] = str(random.randint(0, 2 ^ 32)) new_i["created_at"] = pendulum.now().isoformat() new_i["node_id"] = node_id # yield pages @@ -301,21 +365,33 @@ def _updated_event(node_id): with Container().injectable_context(StateInjectableContext(state={})): assert len(list(_get_shuffled_events(True) | github_resource)) == 100 incremental_state = github_resource.state - assert incremental_state["incremental"]["created_at"]["last_value"] == newest_issue["created_at"] - assert incremental_state["incremental"]["created_at"]["unique_hashes"] == [digest128(f'"{newest_issue["id"]}"')] + assert ( + incremental_state["incremental"]["created_at"]["last_value"] + == newest_issue["created_at"] + ) + assert incremental_state["incremental"]["created_at"]["unique_hashes"] == [ + digest128(f'"{newest_issue["id"]}"') + ] # subsequent load will skip all elements assert len(list(_get_shuffled_events(True) | github_resource)) == 0 # add one more issue assert len(list(_new_event("new_node") | github_resource)) == 1 - assert incremental_state["incremental"]["created_at"]["last_value"] > newest_issue["created_at"] - assert incremental_state["incremental"]["created_at"]["unique_hashes"] != [digest128(str(newest_issue["id"]))] + assert ( + incremental_state["incremental"]["created_at"]["last_value"] + > newest_issue["created_at"] + ) + assert incremental_state["incremental"]["created_at"]["unique_hashes"] != [ + digest128(str(newest_issue["id"])) + ] # load to destination p = destination_config.setup_pipeline("github_3", full_refresh=True) info = p.run(_get_shuffled_events(True) | github_resource) assert_load_info(info) # get top tables - counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None]) + counts = load_table_counts( + p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None] + ) # total number of events in all top tables == 100 assert sum(counts.values()) == 100 # this should skip all events due to incremental load @@ -326,10 +402,12 @@ def _updated_event(node_id): # load one more event with a new id info = p.run(_new_event("new_node") | github_resource) assert_load_info(info) - counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None]) + counts = load_table_counts( + p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None] + ) assert sum(counts.values()) == 101 # all the columns have primary keys and merge disposition derived from resource - for table in p.default_schema.data_tables(): + for table in p.default_schema.data_tables(): if table.get("parent") is None: assert table["write_disposition"] == "merge" assert table["columns"]["id"]["primary_key"] is True @@ -338,7 +416,9 @@ def _updated_event(node_id): info = p.run(_updated_event("new_node_X") | github_resource) assert_load_info(info) # still 101 - counts = load_table_counts(p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None]) + counts = load_table_counts( + p, *[t["name"] for t in p.default_schema.data_tables() if t.get("parent") is None] + ) assert sum(counts.values()) == 101 if destination_config.supports_merge else 102 # for non merge destinations we just check that the run passes if not destination_config.supports_merge: @@ -350,13 +430,18 @@ def _updated_event(node_id): assert len(list(q.fetchall())) == 1 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_deduplicate_single_load(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("abstract", full_refresh=True) @dlt.resource(write_disposition="merge", primary_key="id") def duplicates(): - yield [{"id": 1, "name": "row1", "child": [1, 2, 3]}, {"id": 1, "name": "row2", "child": [4, 5, 6]}] + yield [ + {"id": 1, "name": "row1", "child": [1, 2, 3]}, + {"id": 1, "name": "row2", "child": [4, 5, 6]}, + ] info = p.run(duplicates()) assert_load_info(info) @@ -366,7 +451,6 @@ def duplicates(): qual_name = p.sql_client().make_qualified_table_name("duplicates") select_data(p, f"SELECT * FROM {qual_name}")[0] - @dlt.resource(write_disposition="merge", primary_key=("id", "subkey")) def duplicates_no_child(): yield [{"id": 1, "subkey": "AX", "name": "row1"}, {"id": 1, "subkey": "AX", "name": "row2"}] @@ -377,13 +461,18 @@ def duplicates_no_child(): assert counts["duplicates_no_child"] == 1 if destination_config.supports_merge else 2 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_no_deduplicate_only_merge_key(destination_config: DestinationTestConfiguration) -> None: p = destination_config.setup_pipeline("abstract", full_refresh=True) @dlt.resource(write_disposition="merge", merge_key="id") def duplicates(): - yield [{"id": 1, "name": "row1", "child": [1, 2, 3]}, {"id": 1, "name": "row2", "child": [4, 5, 6]}] + yield [ + {"id": 1, "name": "row1", "child": [1, 2, 3]}, + {"id": 1, "name": "row2", "child": [4, 5, 6]}, + ] info = p.run(duplicates()) assert_load_info(info) @@ -391,7 +480,6 @@ def duplicates(): assert counts["duplicates"] == 2 assert counts["duplicates__child"] == 6 - @dlt.resource(write_disposition="merge", merge_key=("id", "subkey")) def duplicates_no_child(): yield [{"id": 1, "subkey": "AX", "name": "row1"}, {"id": 1, "subkey": "AX", "name": "row2"}] diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py index 99071a7ac6..abbb2b022f 100644 --- a/tests/load/pipeline/test_pipelines.py +++ b/tests/load/pipeline/test_pipelines.py @@ -8,27 +8,48 @@ from dlt.common.pipeline import SupportsPipeline from dlt.common import json, sleep -from dlt.common.destination.reference import DestinationReference +from dlt.common.destination import Destination from dlt.common.schema.schema import Schema from dlt.common.schema.typing import VERSION_TABLE_NAME from dlt.common.typing import TDataItem from dlt.common.utils import uniq_id from dlt.extract.exceptions import ResourceNameMissing -from dlt.extract.source import DltSource -from dlt.pipeline.exceptions import CannotRestorePipelineException, PipelineConfigMissing, PipelineStepFailed +from dlt.extract import DltSource +from dlt.pipeline.exceptions import ( + CannotRestorePipelineException, + PipelineConfigMissing, + PipelineStepFailed, +) from dlt.common.schema.exceptions import CannotCoerceColumnException from dlt.common.exceptions import DestinationHasFailedJobs from tests.utils import TEST_STORAGE_ROOT, preserve_environ from tests.pipeline.utils import assert_load_info -from tests.load.utils import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA, assert_all_data_types_row, delete_dataset -from tests.load.pipeline.utils import drop_active_pipeline_data, assert_query_data, assert_table, load_table_counts, select_data +from tests.load.utils import ( + TABLE_ROW_ALL_DATA_TYPES, + TABLE_UPDATE_COLUMNS_SCHEMA, + assert_all_data_types_row, + delete_dataset, +) +from tests.load.pipeline.utils import ( + drop_active_pipeline_data, + assert_query_data, + assert_table, + load_table_counts, + select_data, +) from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name) -@pytest.mark.parametrize('use_single_dataset', [True, False]) -def test_default_pipeline_names(use_single_dataset: bool, destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("use_single_dataset", [True, False]) +def test_default_pipeline_names( + use_single_dataset: bool, destination_config: DestinationTestConfiguration +) -> None: destination_config.setup() p = dlt.pipeline() p.config.use_single_dataset = use_single_dataset @@ -66,9 +87,13 @@ def data_fun() -> Iterator[Any]: # mock the correct destinations (never do that in normal code) with p.managed_state(): p._set_destinations( - DestinationReference.from_name(destination_config.destination), - DestinationReference.from_name(destination_config.staging) if destination_config.staging else None - ) + destination=Destination.from_reference(destination_config.destination), + staging=( + Destination.from_reference(destination_config.staging) + if destination_config.staging + else None + ), + ) # does not reset the dataset name assert p.dataset_name in possible_dataset_names # never do that in production code @@ -76,12 +101,17 @@ def data_fun() -> Iterator[Any]: # set no dataset name -> if destination does not support it we revert to default p._set_dataset_name(None) assert p.dataset_name in possible_dataset_names + # the last package contains just the state (we added a new schema) + last_load_id = p.list_extracted_load_packages()[-1] + state_package = p.get_load_package_info(last_load_id) + assert len(state_package.jobs["new_jobs"]) == 1 + assert state_package.schema_name == p.default_schema_name p.normalize() info = p.load(dataset_name="d" + uniq_id()) print(p.dataset_name) assert info.pipeline is p # two packages in two different schemas were loaded - assert len(info.loads_ids) == 2 + assert len(info.loads_ids) == 3 # if loaded to single data, double the data was loaded to a single table because the schemas overlapped if use_single_dataset: @@ -92,13 +122,23 @@ def data_fun() -> Iterator[Any]: assert_table(p, "data_fun", data, schema_name="names", info=info) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), + ids=lambda x: x.name, +) def test_default_schema_name(destination_config: DestinationTestConfiguration) -> None: destination_config.setup() dataset_name = "dataset_" + uniq_id() data = ["a", "b", "c"] - p = dlt.pipeline("test_default_schema_name", TEST_STORAGE_ROOT, destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p = dlt.pipeline( + "test_default_schema_name", + TEST_STORAGE_ROOT, + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) p.extract(data, table_name="test", schema=Schema("default")) p.normalize() info = p.load() @@ -111,9 +151,12 @@ def test_default_schema_name(destination_config: DestinationTestConfiguration) - assert_table(p, "test", data, info=info) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), + ids=lambda x: x.name, +) def test_attach_pipeline(destination_config: DestinationTestConfiguration) -> None: - # load data and then restore the pipeline and see if data is still there data = ["a", "b", "c"] @@ -123,7 +166,12 @@ def _data(): yield d destination_config.setup() - info = dlt.run(_data(), destination=destination_config.destination, staging=destination_config.staging, dataset_name="specific" + uniq_id()) + info = dlt.run( + _data(), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name="specific" + uniq_id(), + ) with pytest.raises(CannotRestorePipelineException): dlt.attach("unknown") @@ -144,9 +192,12 @@ def _data(): assert_table(p, "data_table", data, info=info) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_skip_sync_schema_for_tables_without_columns(destination_config: DestinationTestConfiguration) -> None: - +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_skip_sync_schema_for_tables_without_columns( + destination_config: DestinationTestConfiguration, +) -> None: # load data and then restore the pipeline and see if data is still there data = ["a", "b", "c"] @@ -173,7 +224,11 @@ def _data(): assert not exists -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), + ids=lambda x: x.name, +) def test_run_full_refresh(destination_config: DestinationTestConfiguration) -> None: data = ["a", ["a", "b", "c"], ["a", "b", "c"]] destination_config.setup() @@ -186,7 +241,12 @@ def _data(): return dlt.resource(d(), name="lists", write_disposition="replace") p = dlt.pipeline(full_refresh=True) - info = p.run(_data(), destination=destination_config.destination, staging=destination_config.staging, dataset_name="iteration" + uniq_id()) + info = p.run( + _data(), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name="iteration" + uniq_id(), + ) assert info.dataset_name == p.dataset_name assert info.dataset_name.endswith(p._pipeline_instance_id) # print(p.default_schema.to_pretty_yaml()) @@ -203,23 +263,18 @@ def _data(): assert_table(p, "lists__value", sorted(data_list)) - -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_evolve_schema(destination_config: DestinationTestConfiguration) -> None: dataset_name = "d" + uniq_id() row = { "id": "level0", - "f": [{ - "id": "level1", - "l": ["a", "b", "c"], - "v": 120, - "o": [{"a": 1}, {"a": 2}] - }] + "f": [{"id": "level1", "l": ["a", "b", "c"], "v": 120, "o": [{"a": 1}, {"a": 2}]}], } @dlt.source(name="parallel") def source(top_elements: int): - @dlt.defer def get_item(no: int) -> TDataItem: # the test will not last 10 seconds but 2 (there are 5 working threads by default) @@ -228,23 +283,38 @@ def get_item(no: int) -> TDataItem: data["id"] = "level" + str(no) return data - @dlt.resource(columns={"id": {"name": "id", "nullable": False, "data_type": "text", "unique": True, "sort": True}}) + @dlt.resource( + columns={ + "id": { + "name": "id", + "nullable": False, + "data_type": "text", + "unique": True, + "sort": True, + } + } + ) def simple_rows(): for no in range(top_elements): # yield deferred items resolved in threads yield get_item(no) - @dlt.resource(table_name="simple_rows", columns={"new_column": {"nullable": True, "data_type": "decimal"}}) + @dlt.resource( + table_name="simple_rows", + columns={"new_column": {"nullable": True, "data_type": "decimal"}}, + ) def extended_rows(): for no in range(top_elements): # yield deferred items resolved in threads - yield get_item(no+100) + yield get_item(no + 100) return simple_rows(), extended_rows(), dlt.resource(["a", "b", "c"], name="simple") import_schema_path = os.path.join(TEST_STORAGE_ROOT, "schemas", "import") export_schema_path = os.path.join(TEST_STORAGE_ROOT, "schemas", "export") - p = destination_config.setup_pipeline("my_pipeline", import_schema_path=import_schema_path, export_schema_path=export_schema_path) + p = destination_config.setup_pipeline( + "my_pipeline", import_schema_path=import_schema_path, export_schema_path=export_schema_path + ) p.extract(source(10).with_resources("simple_rows")) # print(p.default_schema.to_pretty_yaml()) @@ -285,21 +355,35 @@ def extended_rows(): # TODO: test export and import schema # test data - id_data = sorted(["level" + str(n) for n in range(10)] + ["level" + str(n) for n in range(100, 110)]) + id_data = sorted( + ["level" + str(n) for n in range(10)] + ["level" + str(n) for n in range(100, 110)] + ) with p.sql_client() as client: simple_rows_table = client.make_qualified_table_name("simple_rows") dlt_loads_table = client.make_qualified_table_name("_dlt_loads") assert_query_data(p, f"SELECT * FROM {simple_rows_table} ORDER BY id", id_data) - assert_query_data(p, f"SELECT schema_version_hash FROM {dlt_loads_table} ORDER BY inserted_at", version_history) - - -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), ids=lambda x: x.name) -@pytest.mark.parametrize('disable_compression', [True, False]) -def test_pipeline_data_writer_compression(disable_compression: bool, destination_config: DestinationTestConfiguration) -> None: + assert_query_data( + p, + f"SELECT schema_version_hash FROM {dlt_loads_table} ORDER BY inserted_at", + version_history, + ) + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_buckets_filesystem_configs=True), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("disable_compression", [True, False]) +def test_pipeline_data_writer_compression( + disable_compression: bool, destination_config: DestinationTestConfiguration +) -> None: # Ensure pipeline works without compression data = ["a", "b", "c"] - dataset_name = "compression_data_"+ uniq_id() - dlt.config["data_writer"] = {"disable_compression": disable_compression} # not sure how else to set this + dataset_name = "compression_data_" + uniq_id() + dlt.config["data_writer"] = { + "disable_compression": disable_compression + } # not sure how else to set this p = destination_config.setup_pipeline("compression_test", dataset_name=dataset_name) p.extract(dlt.resource(data, name="data")) s = p._get_normalize_storage() @@ -307,33 +391,30 @@ def test_pipeline_data_writer_compression(disable_compression: bool, destination if disable_compression: for f in s.list_files_to_normalize_sorted(): with pytest.raises(gzip.BadGzipFile): - gzip.open(s.storage.make_full_path(f), "rb").read() + gzip.open(s.extracted_packages.storage.make_full_path(f), "rb").read() p.normalize() info = p.load() assert_table(p, "data", data, info=info) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_source_max_nesting(destination_config: DestinationTestConfiguration) -> None: destination_config.setup() - complex_part = { - "l": [1, 2, 3], - "c": { - "a": 1, - "b": 12.3 - } - } + complex_part = {"l": [1, 2, 3], "c": {"a": 1, "b": 12.3}} @dlt.source(name="complex", max_table_nesting=0) def complex_data(): - return dlt.resource([ - { - "idx": 1, - "cn": complex_part - } - ], name="complex_cn") - info = dlt.run(complex_data(), destination=destination_config.destination, staging=destination_config.staging, dataset_name="ds_" + uniq_id()) + return dlt.resource([{"idx": 1, "cn": complex_part}], name="complex_cn") + + info = dlt.run( + complex_data(), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name="ds_" + uniq_id(), + ) print(info) with dlt.pipeline().sql_client() as client: complex_cn_table = client.make_qualified_table_name("complex_cn") @@ -345,7 +426,9 @@ def complex_data(): assert cn_val == complex_part -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_dataset_name_change(destination_config: DestinationTestConfiguration) -> None: destination_config.setup() # standard name @@ -385,11 +468,18 @@ def test_dataset_name_change(destination_config: DestinationTestConfiguration) - # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["postgres"]), ids=lambda x: x.name) -def test_pipeline_explicit_destination_credentials(destination_config: DestinationTestConfiguration) -> None: - +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres"]), + ids=lambda x: x.name, +) +def test_pipeline_explicit_destination_credentials( + destination_config: DestinationTestConfiguration, +) -> None: # explicit credentials resolved - p = dlt.pipeline(destination="postgres", credentials="postgresql://loader:loader@localhost:5432/dlt_data") + p = dlt.pipeline( + destination="postgres", credentials="postgresql://loader:loader@localhost:5432/dlt_data" + ) c = p._get_destination_clients(Schema("s"), p._get_destination_client_initial_config())[0] assert c.config.credentials.host == "localhost" # type: ignore[attr-defined] @@ -398,7 +488,9 @@ def test_pipeline_explicit_destination_credentials(destination_config: Destinati os.environ.pop("DESTINATION__POSTGRES__CREDENTIALS", None) # explicit credentials resolved ignoring the config providers os.environ["DESTINATION__POSTGRES__CREDENTIALS__HOST"] = "HOST" - p = dlt.pipeline(destination="postgres", credentials="postgresql://loader:loader@localhost:5432/dlt_data") + p = dlt.pipeline( + destination="postgres", credentials="postgresql://loader:loader@localhost:5432/dlt_data" + ) c = p._get_destination_clients(Schema("s"), p._get_destination_client_initial_config())[0] assert c.config.credentials.host == "localhost" # type: ignore[attr-defined] @@ -420,14 +512,18 @@ def test_pipeline_explicit_destination_credentials(destination_config: Destinati # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["postgres"]), ids=lambda x: x.name) -def test_pipeline_with_sources_sharing_schema(destination_config: DestinationTestConfiguration) -> None: - +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres"]), + ids=lambda x: x.name, +) +def test_pipeline_with_sources_sharing_schema( + destination_config: DestinationTestConfiguration, +) -> None: schema = Schema("shared") @dlt.source(schema=schema, max_table_nesting=1) def source_1(): - @dlt.resource(primary_key="user_id") def gen1(): dlt.current.source_state()["source_1"] = True @@ -442,7 +538,6 @@ def conflict(): @dlt.source(schema=schema, max_table_nesting=2) def source_2(): - @dlt.resource(primary_key="id") def gen1(): dlt.current.source_state()["source_2"] = True @@ -485,9 +580,15 @@ def conflict(): p.load() table_names = [t["name"] for t in default_schema.data_tables()] counts = load_table_counts(p, *table_names) - assert counts == {'gen1': 2, 'gen2': 3, 'conflict': 1} + assert counts == {"gen1": 2, "gen2": 3, "conflict": 1} # both sources share the same state - assert p.state["sources"] == {'shared': {'source_1': True, 'resources': {'gen1': {'source_1': True, 'source_2': True}}, 'source_2': True}} + assert p.state["sources"] == { + "shared": { + "source_1": True, + "resources": {"gen1": {"source_1": True, "source_2": True}}, + "source_2": True, + } + } drop_active_pipeline_data() # same pipeline but enable conflict @@ -498,13 +599,16 @@ def conflict(): # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["postgres"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres"]), + ids=lambda x: x.name, +) def test_many_pipelines_single_dataset(destination_config: DestinationTestConfiguration) -> None: schema = Schema("shared") @dlt.source(schema=schema, max_table_nesting=1) def source_1(): - @dlt.resource(primary_key="user_id") def gen1(): dlt.current.source_state()["source_1"] = True @@ -515,7 +619,6 @@ def gen1(): @dlt.source(schema=schema, max_table_nesting=2) def source_2(): - @dlt.resource(primary_key="id") def gen1(): dlt.current.source_state()["source_2"] = True @@ -528,44 +631,68 @@ def gen2(): return gen2, gen1 # load source_1 to common dataset - p = dlt.pipeline(pipeline_name="source_1_pipeline", destination="duckdb", dataset_name="shared_dataset") + p = dlt.pipeline( + pipeline_name="source_1_pipeline", destination="duckdb", dataset_name="shared_dataset" + ) p.run(source_1(), credentials="duckdb:///_storage/test_quack.duckdb") counts = load_table_counts(p, *p.default_schema.tables.keys()) - assert counts.items() >= {'gen1': 1, '_dlt_pipeline_state': 1, "_dlt_loads": 1}.items() + assert counts.items() >= {"gen1": 1, "_dlt_pipeline_state": 1, "_dlt_loads": 1}.items() p._wipe_working_folder() p.deactivate() - p = dlt.pipeline(pipeline_name="source_2_pipeline", destination="duckdb", dataset_name="shared_dataset") + p = dlt.pipeline( + pipeline_name="source_2_pipeline", destination="duckdb", dataset_name="shared_dataset" + ) p.run(source_2(), credentials="duckdb:///_storage/test_quack.duckdb") # table_names = [t["name"] for t in p.default_schema.data_tables()] counts = load_table_counts(p, *p.default_schema.tables.keys()) # gen1: one record comes from source_1, 1 record from source_2 - assert counts.items() >= {'gen1': 2, '_dlt_pipeline_state': 2, "_dlt_loads": 2}.items() + assert counts.items() >= {"gen1": 2, "_dlt_pipeline_state": 2, "_dlt_loads": 2}.items() # assert counts == {'gen1': 2, 'gen2': 3} p._wipe_working_folder() p.deactivate() # restore from destination, check state - p = dlt.pipeline(pipeline_name="source_1_pipeline", destination="duckdb", dataset_name="shared_dataset", credentials="duckdb:///_storage/test_quack.duckdb") + p = dlt.pipeline( + pipeline_name="source_1_pipeline", + destination="duckdb", + dataset_name="shared_dataset", + credentials="duckdb:///_storage/test_quack.duckdb", + ) p.sync_destination() # we have our separate state - assert p.state["sources"]["shared"] == {'source_1': True, 'resources': {'gen1': {'source_1': True}}} + assert p.state["sources"]["shared"] == { + "source_1": True, + "resources": {"gen1": {"source_1": True}}, + } # but the schema was common so we have the earliest one assert "gen2" in p.default_schema.tables p._wipe_working_folder() p.deactivate() - p = dlt.pipeline(pipeline_name="source_2_pipeline", destination="duckdb", dataset_name="shared_dataset", credentials="duckdb:///_storage/test_quack.duckdb") + p = dlt.pipeline( + pipeline_name="source_2_pipeline", + destination="duckdb", + dataset_name="shared_dataset", + credentials="duckdb:///_storage/test_quack.duckdb", + ) p.sync_destination() # we have our separate state - assert p.state["sources"]["shared"] == {'source_2': True, 'resources': {'gen1': {'source_2': True}}} + assert p.state["sources"]["shared"] == { + "source_2": True, + "resources": {"gen1": {"source_2": True}}, + } # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["snowflake"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["snowflake"]), + ids=lambda x: x.name, +) def test_snowflake_custom_stage(destination_config: DestinationTestConfiguration) -> None: """Using custom stage name instead of the table stage""" - os.environ['DESTINATION__SNOWFLAKE__STAGE_NAME'] = 'my_non_existing_stage' + os.environ["DESTINATION__SNOWFLAKE__STAGE_NAME"] = "my_non_existing_stage" pipeline, data = simple_nested_pipeline(destination_config, f"custom_stage_{uniq_id()}", False) info = pipeline.run(data()) with pytest.raises(DestinationHasFailedJobs) as f_jobs: @@ -577,8 +704,8 @@ def test_snowflake_custom_stage(destination_config: DestinationTestConfiguration # NOTE: this stage must be created in DLT_DATA database for this test to pass! # CREATE STAGE MY_CUSTOM_LOCAL_STAGE; # GRANT READ, WRITE ON STAGE DLT_DATA.PUBLIC.MY_CUSTOM_LOCAL_STAGE TO ROLE DLT_LOADER_ROLE; - stage_name = 'PUBLIC.MY_CUSTOM_LOCAL_STAGE' - os.environ['DESTINATION__SNOWFLAKE__STAGE_NAME'] = stage_name + stage_name = "PUBLIC.MY_CUSTOM_LOCAL_STAGE" + os.environ["DESTINATION__SNOWFLAKE__STAGE_NAME"] = stage_name pipeline, data = simple_nested_pipeline(destination_config, f"custom_stage_{uniq_id()}", False) info = pipeline.run(data()) assert_load_info(info) @@ -591,16 +718,22 @@ def test_snowflake_custom_stage(destination_config: DestinationTestConfiguration assert len(staged_files) == 3 # check data of one table to ensure copy was done successfully tbl_name = client.make_qualified_table_name("lists") - assert_query_data(pipeline, f"SELECT value FROM {tbl_name}", ['a', None, None]) + assert_query_data(pipeline, f"SELECT value FROM {tbl_name}", ["a", None, None]) # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["snowflake"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["snowflake"]), + ids=lambda x: x.name, +) def test_snowflake_delete_file_after_copy(destination_config: DestinationTestConfiguration) -> None: """Using keep_staged_files = false option to remove staged files after copy""" - os.environ['DESTINATION__SNOWFLAKE__KEEP_STAGED_FILES'] = 'FALSE' + os.environ["DESTINATION__SNOWFLAKE__KEEP_STAGED_FILES"] = "FALSE" - pipeline, data = simple_nested_pipeline(destination_config, f"delete_staged_files_{uniq_id()}", False) + pipeline, data = simple_nested_pipeline( + destination_config, f"delete_staged_files_{uniq_id()}", False + ) info = pipeline.run(data()) assert_load_info(info) @@ -609,26 +742,32 @@ def test_snowflake_delete_file_after_copy(destination_config: DestinationTestCon with pipeline.sql_client() as client: # no files are left in table stage - stage_name = client.make_qualified_table_name('%lists') + stage_name = client.make_qualified_table_name("%lists") staged_files = client.execute_sql(f'LIST @{stage_name}/"{load_id}"') assert len(staged_files) == 0 # ensure copy was done tbl_name = client.make_qualified_table_name("lists") - assert_query_data(pipeline, f"SELECT value FROM {tbl_name}", ['a', None, None]) + assert_query_data(pipeline, f"SELECT value FROM {tbl_name}", ["a", None, None]) # do not remove - it allows us to filter tests by destination -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, all_staging_configs=True, file_format="parquet"), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, all_staging_configs=True, file_format="parquet"), + ids=lambda x: x.name, +) def test_parquet_loading(destination_config: DestinationTestConfiguration) -> None: """Run pipeline twice with merge write disposition Resource with primary key falls back to append. Resource without keys falls back to replace. """ - pipeline = destination_config.setup_pipeline('parquet_test_' + uniq_id(), dataset_name='parquet_test_' + uniq_id()) + pipeline = destination_config.setup_pipeline( + "parquet_test_" + uniq_id(), dataset_name="parquet_test_" + uniq_id() + ) - @dlt.resource(primary_key='id') + @dlt.resource(primary_key="id") def some_data(): - yield [{'id': 1}, {'id': 2}, {'id': 3}] + yield [{"id": 1}, {"id": 2}, {"id": 3}] @dlt.resource(write_disposition="replace") def other_data(): @@ -662,7 +801,7 @@ def other_data(): @dlt.resource(table_name="data_types", write_disposition="merge", columns=column_schemas) def my_resource(): nonlocal data_types - yield [data_types]*10 + yield [data_types] * 10 @dlt.source(max_table_nesting=0) def some_source(): @@ -685,8 +824,14 @@ def some_source(): assert len(package_info.jobs["completed_jobs"]) == expected_completed_jobs with pipeline.sql_client() as sql_client: - assert [row[0] for row in sql_client.execute_sql("SELECT * FROM other_data ORDER BY 1")] == [1, 2, 3, 4, 5] - assert [row[0] for row in sql_client.execute_sql("SELECT * FROM some_data ORDER BY 1")] == [1, 2, 3] + assert [ + row[0] for row in sql_client.execute_sql("SELECT * FROM other_data ORDER BY 1") + ] == [1, 2, 3, 4, 5] + assert [row[0] for row in sql_client.execute_sql("SELECT * FROM some_data ORDER BY 1")] == [ + 1, + 2, + 3, + ] db_rows = sql_client.execute_sql("SELECT * FROM data_types") assert len(db_rows) == 10 db_row = list(db_rows[0]) @@ -694,12 +839,15 @@ def some_source(): assert_all_data_types_row( db_row, schema=column_schemas, - parse_complex_strings=destination_config.destination in ["snowflake", "bigquery", "redshift"], - timestamp_precision= 3 if destination_config.destination == "athena" else 6 + parse_complex_strings=destination_config.destination + in ["snowflake", "bigquery", "redshift"], + timestamp_precision=3 if destination_config.destination == "athena" else 6, ) -def simple_nested_pipeline(destination_config: DestinationTestConfiguration, dataset_name: str, full_refresh: bool) -> Tuple[dlt.Pipeline, Callable[[], DltSource]]: +def simple_nested_pipeline( + destination_config: DestinationTestConfiguration, dataset_name: str, full_refresh: bool +) -> Tuple[dlt.Pipeline, Callable[[], DltSource]]: data = ["a", ["a", "b", "c"], ["a", "b", "c"]] def d(): @@ -709,6 +857,11 @@ def d(): def _data(): return dlt.resource(d(), name="lists", write_disposition="append") - p = dlt.pipeline(pipeline_name=f"pipeline_{dataset_name}", full_refresh=full_refresh, destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p = dlt.pipeline( + pipeline_name=f"pipeline_{dataset_name}", + full_refresh=full_refresh, + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) return p, _data - diff --git a/tests/load/pipeline/test_redshift.py b/tests/load/pipeline/test_redshift.py index 709e924bc9..a5d0cd178f 100644 --- a/tests/load/pipeline/test_redshift.py +++ b/tests/load/pipeline/test_redshift.py @@ -9,7 +9,11 @@ from tests.pipeline.utils import assert_load_info -@pytest.mark.parametrize("destination_config", destinations_configs(all_staging_configs=True, subset=["redshift"]), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(all_staging_configs=True, subset=["redshift"]), + ids=lambda x: x.name, +) def test_redshift_blocks_time_column(destination_config: DestinationTestConfiguration) -> None: pipeline = destination_config.setup_pipeline("athena_" + uniq_id(), full_refresh=True) @@ -19,7 +23,7 @@ def test_redshift_blocks_time_column(destination_config: DestinationTestConfigur @dlt.resource(table_name="data_types", write_disposition="append", columns=column_schemas) def my_resource() -> Iterator[Any]: nonlocal data_types - yield [data_types]*10 + yield [data_types] * 10 @dlt.source(max_table_nesting=0) def my_source() -> Any: @@ -29,4 +33,7 @@ def my_source() -> Any: assert info.has_failed_jobs - assert "Redshift cannot load TIME columns from" in info.load_packages[0].jobs['failed_jobs'][0].failed_message + assert ( + "Redshift cannot load TIME columns from" + in info.load_packages[0].jobs["failed_jobs"][0].failed_message + ) diff --git a/tests/load/pipeline/test_replace_disposition.py b/tests/load/pipeline/test_replace_disposition.py index d39556ab2f..c6db91efff 100644 --- a/tests/load/pipeline/test_replace_disposition.py +++ b/tests/load/pipeline/test_replace_disposition.py @@ -3,38 +3,62 @@ import dlt, os, pytest from dlt.common.utils import uniq_id -from tests.pipeline.utils import assert_load_info -from tests.load.pipeline.utils import drop_active_pipeline_data, load_table_counts, load_tables_to_dicts +from tests.pipeline.utils import assert_load_info +from tests.load.pipeline.utils import ( + drop_active_pipeline_data, + load_table_counts, + load_tables_to_dicts, +) from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration REPLACE_STRATEGIES = ["truncate-and-insert", "insert-from-staging", "staging-optimized"] -@pytest.mark.parametrize("destination_config", destinations_configs(local_filesystem_configs=True, default_staging_configs=True, default_sql_configs=True), ids=lambda x: x.name) -@pytest.mark.parametrize("replace_strategy", REPLACE_STRATEGIES) -def test_replace_disposition(destination_config: DestinationTestConfiguration, replace_strategy: str) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs( + local_filesystem_configs=True, default_staging_configs=True, default_sql_configs=True + ), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("replace_strategy", REPLACE_STRATEGIES) +def test_replace_disposition( + destination_config: DestinationTestConfiguration, replace_strategy: str +) -> None: if not destination_config.supports_merge and replace_strategy != "truncate-and-insert": - pytest.skip(f"Destination {destination_config.name} does not support merge and thus {replace_strategy}") + pytest.skip( + f"Destination {destination_config.name} does not support merge and thus" + f" {replace_strategy}" + ) # only allow 40 items per file - os.environ['DATA_WRITER__FILE_MAX_ITEMS'] = "40" + os.environ["DATA_WRITER__FILE_MAX_ITEMS"] = "40" # use staging tables for replace - os.environ['DESTINATION__REPLACE_STRATEGY'] = replace_strategy + os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy # make duckdb to reuse database in working folder os.environ["DESTINATION__DUCKDB__CREDENTIALS"] = "duckdb:///test_replace_disposition.duckdb" # TODO: start storing _dlt_loads with right json content increase_loads = lambda x: x if destination_config.destination == "filesystem" else x + 1 - increase_state_loads = lambda info: len([job for job in info.load_packages[0].jobs["completed_jobs"] if job.job_file_info.table_name == "_dlt_pipeline_state" and job.job_file_info.file_format not in ["sql", "reference"]]) + increase_state_loads = lambda info: len( + [ + job + for job in info.load_packages[0].jobs["completed_jobs"] + if job.job_file_info.table_name == "_dlt_pipeline_state" + and job.job_file_info.file_format not in ["sql", "reference"] + ] + ) # filesystem does not have versions and child tables def norm_table_counts(counts: Dict[str, int], *child_tables: str) -> Dict[str, int]: if destination_config.destination != "filesystem": return counts - return {**{"_dlt_version": 0}, **{t:0 for t in child_tables}, **counts} + return {**{"_dlt_version": 0}, **{t: 0 for t in child_tables}, **counts} dataset_name = "test_replace_strategies_ds" + uniq_id() - pipeline = destination_config.setup_pipeline("test_replace_strategies", dataset_name=dataset_name) + pipeline = destination_config.setup_pipeline( + "test_replace_strategies", dataset_name=dataset_name + ) offset = 1000 @@ -45,36 +69,39 @@ def load_items(): # 6 jobs for the sub_items # 3 jobs for the sub_sub_items nonlocal offset - for _, index in enumerate(range(offset, offset+120), 1): + for _, index in enumerate(range(offset, offset + 120), 1): yield { "id": index, "name": f"item {index}", - "sub_items": [{ - "id": index + 1000, - "name": f"sub item {index + 1000}" - },{ - "id": index + 2000, - "name": f"sub item {index + 2000}", - "sub_sub_items": [{ - "id": index + 3000, - "name": f"sub item {index + 3000}", - }] - }] - } + "sub_items": [ + {"id": index + 1000, "name": f"sub item {index + 1000}"}, + { + "id": index + 2000, + "name": f"sub item {index + 2000}", + "sub_sub_items": [ + { + "id": index + 3000, + "name": f"sub item {index + 3000}", + } + ], + }, + ], + } # append resource to see if we do not drop any tables @dlt.resource(write_disposition="append") def append_items(): nonlocal offset - for _, index in enumerate(range(offset, offset+12), 1): + for _, index in enumerate(range(offset, offset + 12), 1): yield { "id": index, "name": f"item {index}", } - # first run with offset 0 - info = pipeline.run([load_items, append_items], loader_file_format=destination_config.file_format) + info = pipeline.run( + [load_items, append_items], loader_file_format=destination_config.file_format + ) assert_load_info(info) # count state records that got extracted state_records = increase_state_loads(info) @@ -83,7 +110,9 @@ def append_items(): # second run with higher offset so we can check the results offset = 1000 - info = pipeline.run([load_items, append_items], loader_file_format=destination_config.file_format) + info = pipeline.run( + [load_items, append_items], loader_file_format=destination_config.file_format + ) assert_load_info(info) state_records += increase_state_loads(info) dlt_loads = increase_loads(dlt_loads) @@ -97,7 +126,7 @@ def append_items(): "items__sub_items__sub_sub_items": 120, "_dlt_pipeline_state": state_records, "_dlt_loads": dlt_loads, - "_dlt_version": dlt_versions + "_dlt_version": dlt_versions, } # check trace @@ -105,36 +134,49 @@ def append_items(): "append_items": 12, "items": 120, "items__sub_items": 240, - "items__sub_items__sub_sub_items": 120 + "items__sub_items__sub_sub_items": 120, } - # check we really have the replaced data in our destination - table_dicts = load_tables_to_dicts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) - assert {x for i,x in enumerate(range(1000, 1120), 1)} == {int(x["id"]) for x in table_dicts["items"]} - assert {x for i,x in enumerate(range(2000, 2000+120), 1)}.union({x for i,x in enumerate(range(3000, 3000+120), 1)}) == {int(x["id"]) for x in table_dicts["items__sub_items"]} - assert {x for i,x in enumerate(range(4000, 4120), 1)} == {int(x["id"]) for x in table_dicts["items__sub_items__sub_sub_items"]} + table_dicts = load_tables_to_dicts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert {x for i, x in enumerate(range(1000, 1120), 1)} == { + int(x["id"]) for x in table_dicts["items"] + } + assert {x for i, x in enumerate(range(2000, 2000 + 120), 1)}.union( + {x for i, x in enumerate(range(3000, 3000 + 120), 1)} + ) == {int(x["id"]) for x in table_dicts["items__sub_items"]} + assert {x for i, x in enumerate(range(4000, 4120), 1)} == { + int(x["id"]) for x in table_dicts["items__sub_items__sub_sub_items"] + } - # we need to test that destination tables including child tables are cleared when we yield none from the resource + # we need to test that destination tables including child tables are cleared if we do not yield anything @dlt.resource(name="items", write_disposition="replace", primary_key="id") def load_items_none(): - yield + # do not yield even once + if False: + yield - info = pipeline.run([load_items_none, append_items], loader_file_format=destination_config.file_format) + info = pipeline.run( + [load_items_none, append_items], loader_file_format=destination_config.file_format + ) assert_load_info(info) state_records += increase_state_loads(info) dlt_loads = increase_loads(dlt_loads) # table and child tables should be cleared table_counts = load_table_counts(pipeline, *pipeline.default_schema.tables.keys()) - assert norm_table_counts(table_counts, "items__sub_items", "items__sub_items__sub_sub_items") == { + assert norm_table_counts( + table_counts, "items__sub_items", "items__sub_items__sub_sub_items" + ) == { "append_items": 36, "items": 0, "items__sub_items": 0, "items__sub_items__sub_sub_items": 0, "_dlt_pipeline_state": state_records, "_dlt_loads": dlt_loads, - "_dlt_version": dlt_versions + "_dlt_version": dlt_versions, } # check trace assert pipeline.last_trace.last_normalize_info.row_counts == { @@ -146,8 +188,12 @@ def load_items_none(): # drop_active_pipeline_data() # create a pipeline with different name but loading to the same dataset as above - this is to provoke truncating non existing tables - pipeline_2 = destination_config.setup_pipeline("test_replace_strategies_2", dataset_name=dataset_name) - info = pipeline_2.run(load_items, table_name="items_copy", loader_file_format=destination_config.file_format) + pipeline_2 = destination_config.setup_pipeline( + "test_replace_strategies_2", dataset_name=dataset_name + ) + info = pipeline_2.run( + load_items, table_name="items_copy", loader_file_format=destination_config.file_format + ) assert_load_info(info) new_state_records = increase_state_loads(info) assert new_state_records == 1 @@ -158,7 +204,7 @@ def load_items_none(): "items_copy": 120, "items_copy__sub_items": 240, "items_copy__sub_items__sub_sub_items": 120, - "_dlt_pipeline_state": 1 + "_dlt_pipeline_state": 1, } info = pipeline_2.run(append_items, loader_file_format=destination_config.file_format) @@ -176,7 +222,7 @@ def load_items_none(): "items_copy__sub_items__sub_sub_items": 120, "_dlt_pipeline_state": state_records + 1, "_dlt_loads": dlt_loads, - "_dlt_version": increase_loads(dlt_versions) + "_dlt_version": increase_loads(dlt_versions), } # check trace assert pipeline_2.last_trace.last_normalize_info.row_counts == { @@ -185,50 +231,56 @@ def load_items_none(): # old pipeline -> shares completed loads and versions table table_counts = load_table_counts(pipeline, *pipeline.default_schema.tables.keys()) - assert norm_table_counts(table_counts, "items__sub_items", "items__sub_items__sub_sub_items") == { + assert norm_table_counts( + table_counts, "items__sub_items", "items__sub_items__sub_sub_items" + ) == { "append_items": 48, "items": 0, "items__sub_items": 0, "items__sub_items__sub_sub_items": 0, "_dlt_pipeline_state": state_records + 1, "_dlt_loads": dlt_loads, # next load - "_dlt_version": increase_loads(dlt_versions) # new table name -> new schema + "_dlt_version": increase_loads(dlt_versions), # new table name -> new schema } -@pytest.mark.parametrize("destination_config", destinations_configs(local_filesystem_configs=True, default_staging_configs=True, default_sql_configs=True), ids=lambda x: x.name) + +@pytest.mark.parametrize( + "destination_config", + destinations_configs( + local_filesystem_configs=True, default_staging_configs=True, default_sql_configs=True + ), + ids=lambda x: x.name, +) @pytest.mark.parametrize("replace_strategy", REPLACE_STRATEGIES) -def test_replace_table_clearing(destination_config: DestinationTestConfiguration,replace_strategy: str) -> None: +def test_replace_table_clearing( + destination_config: DestinationTestConfiguration, replace_strategy: str +) -> None: if not destination_config.supports_merge and replace_strategy != "truncate-and-insert": - pytest.skip(f"Destination {destination_config.name} does not support merge and thus {replace_strategy}") + pytest.skip( + f"Destination {destination_config.name} does not support merge and thus" + f" {replace_strategy}" + ) # use staging tables for replace - os.environ['DESTINATION__REPLACE_STRATEGY'] = replace_strategy + os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy - pipeline = destination_config.setup_pipeline("test_replace_table_clearing", dataset_name="test_replace_table_clearing", full_refresh=True) + pipeline = destination_config.setup_pipeline( + "test_replace_table_clearing", dataset_name="test_replace_table_clearing", full_refresh=True + ) @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id") def items_with_subitems(): data = { "id": 1, "name": "item", - "sub_items": [{ - "id": 101, - "name": "sub item 101" - },{ - "id": 101, - "name": "sub item 102" - }] + "sub_items": [{"id": 101, "name": "sub item 101"}, {"id": 101, "name": "sub item 102"}], } yield dlt.mark.with_table_name(data, "items") yield dlt.mark.with_table_name(data, "other_items") @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id") def items_without_subitems(): - data = [{ - "id": 1, - "name": "item", - "sub_items": [] - }] + data = [{"id": 1, "name": "item", "sub_items": []}] yield dlt.mark.with_table_name(data, "items") yield dlt.mark.with_table_name(data, "other_items") @@ -236,17 +288,16 @@ def items_without_subitems(): def items_with_subitems_yield_none(): yield None yield None - data = [{ - "id": 1, - "name": "item", - "sub_items": [{ - "id": 101, - "name": "sub item 101" - },{ - "id": 101, - "name": "sub item 102" - }] - }] + data = [ + { + "id": 1, + "name": "item", + "sub_items": [ + {"id": 101, "name": "sub item 101"}, + {"id": 101, "name": "sub item 102"}, + ], + } + ] yield dlt.mark.with_table_name(data, "items") yield dlt.mark.with_table_name(data, "other_items") yield None @@ -257,22 +308,30 @@ def static_items(): yield { "id": 1, "name": "item", - "sub_items": [{ - "id": 101, - "name": "sub item 101" - },{ - "id": 101, - "name": "sub item 102" - }] + "sub_items": [{"id": 101, "name": "sub item 101"}, {"id": 101, "name": "sub item 102"}], } @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id") def yield_none(): yield + @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id") + def no_yield(): + # this will not yield even once + if False: + yield + + @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id") + def yield_empty_list(): + yield [] + # regular call - pipeline.run([items_with_subitems, static_items], loader_file_format=destination_config.file_format) - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + pipeline.run( + [items_with_subitems, static_items], loader_file_format=destination_config.file_format + ) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert table_counts["items"] == 1 assert table_counts["items__sub_items"] == 2 assert table_counts["other_items"] == 1 @@ -287,12 +346,14 @@ def yield_none(): "other_items__sub_items": 2, "static_items": 1, "static_items__sub_items": 2, - "_dlt_pipeline_state": 1 + "_dlt_pipeline_state": 1, } # see if child table gets cleared pipeline.run(items_without_subitems, loader_file_format=destination_config.file_format) - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert table_counts["items"] == 1 assert table_counts.get("items__sub_items", 0) == 0 assert table_counts["other_items"] == 1 @@ -300,30 +361,29 @@ def yield_none(): assert table_counts["static_items"] == 1 assert table_counts["static_items__sub_items"] == 2 # check trace - assert pipeline.last_trace.last_normalize_info.row_counts == { - "items": 1, - "other_items": 1 - } + assert pipeline.last_trace.last_normalize_info.row_counts == {"items": 1, "other_items": 1} # see if yield none clears everything - pipeline.run(items_with_subitems, loader_file_format=destination_config.file_format) - pipeline.run(yield_none, loader_file_format=destination_config.file_format) - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) - assert table_counts.get("items", 0) == 0 - assert table_counts.get("items__sub_items", 0) == 0 - assert table_counts.get("other_items", 0) == 0 - assert table_counts.get("other_items__sub_items", 0) == 0 - assert table_counts["static_items"] == 1 - assert table_counts["static_items__sub_items"] == 2 - # check trace - assert pipeline.last_trace.last_normalize_info.row_counts == { - "items": 0, - "other_items": 0 - } + for empty_resource in [yield_none, no_yield, yield_empty_list]: + pipeline.run(items_with_subitems, loader_file_format=destination_config.file_format) + pipeline.run(empty_resource, loader_file_format=destination_config.file_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts.get("items", 0) == 0 + assert table_counts.get("items__sub_items", 0) == 0 + assert table_counts.get("other_items", 0) == 0 + assert table_counts.get("other_items__sub_items", 0) == 0 + assert table_counts["static_items"] == 1 + assert table_counts["static_items__sub_items"] == 2 + # check trace + assert pipeline.last_trace.last_normalize_info.row_counts == {"items": 0, "other_items": 0} # see if yielding something next to other none entries still goes into db pipeline.run(items_with_subitems_yield_none, loader_file_format=destination_config.file_format) - table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert table_counts["items"] == 1 assert table_counts["items__sub_items"] == 2 assert table_counts["other_items"] == 1 diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py index f80dbbd7e6..381068f1e1 100644 --- a/tests/load/pipeline/test_restore_state.py +++ b/tests/load/pipeline/test_restore_state.py @@ -18,10 +18,14 @@ from tests.utils import TEST_STORAGE_ROOT from tests.cases import JSON_TYPED_DICT, JSON_TYPED_DICT_DECODED -from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V6, yml_case_path as common_yml_case_path +from tests.common.utils import IMPORTED_VERSION_HASH_ETH_V8, yml_case_path as common_yml_case_path from tests.common.configuration.utils import environment from tests.load.pipeline.utils import assert_query_data, drop_active_pipeline_data -from tests.load.utils import destinations_configs, DestinationTestConfiguration, get_normalized_dataset_name +from tests.load.utils import ( + destinations_configs, + DestinationTestConfiguration, + get_normalized_dataset_name, +) @pytest.fixture(autouse=True) @@ -31,10 +35,17 @@ def duckdb_pipeline_location() -> None: del os.environ["DESTINATION__DUCKDB__CREDENTIALS"] -@pytest.mark.parametrize("destination_config", destinations_configs(default_staging_configs=True, default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs( + default_staging_configs=True, default_sql_configs=True, default_vector_configs=True + ), + ids=lambda x: x.name, +) def test_restore_state_utils(destination_config: DestinationTestConfiguration) -> None: - - p = destination_config.setup_pipeline(pipeline_name="pipe_" + uniq_id(), dataset_name="state_test_" + uniq_id()) + p = destination_config.setup_pipeline( + pipeline_name="pipe_" + uniq_id(), dataset_name="state_test_" + uniq_id() + ) schema = Schema("state") # inject schema into pipeline, don't do it in production @@ -54,13 +65,16 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - initial_state = p._get_state() # now add table to schema and sync initial_state["_local"]["_last_extracted_at"] = pendulum.now() + initial_state["_local"]["_last_extracted_hash"] = initial_state["_version_hash"] # add _dlt_id and _dlt_load_id resource = state_resource(initial_state) - resource.apply_hints(columns={ - "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}, - "_dlt_load_id": {"name": "_dlt_load_id", "data_type": "text", "nullable": False}, - **STATE_TABLE_COLUMNS - }) + resource.apply_hints( + columns={ + "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}, + "_dlt_load_id": {"name": "_dlt_load_id", "data_type": "text", "nullable": False}, + **STATE_TABLE_COLUMNS, + } + ) schema.update_table(schema.normalize_table_identifiers(resource.compute_table_schema())) # do not bump version here or in sync_schema, dlt won't recognize that schema changed and it won't update it in storage # so dlt in normalize stage infers _state_version table again but with different column order and the column order in schema is different @@ -114,11 +128,11 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - assert local_state != new_local_state # version increased assert local_state["_state_version"] + 1 == new_local_state["_state_version"] - # last extracted timestamp not present - assert "_last_extracted_at" not in new_local_state_local + # last extracted hash does not match current version hash + assert new_local_state_local["_last_extracted_hash"] != new_local_state["_version_hash"] # use the state context manager again but do not change state - # because _last_extracted_at is not present, the version will not change but state will be extracted anyway + # because _last_extracted_hash is not present (or different), the version will not change but state will be extracted anyway with p.managed_state(extract_state=True): pass new_local_state_2 = p._get_state() @@ -126,6 +140,8 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - assert new_local_state == new_local_state_2 # there's extraction timestamp assert "_last_extracted_at" in new_local_state_2_local + # and extract hash is == hash + assert new_local_state_2_local["_last_extracted_hash"] == new_local_state_2["_version_hash"] # but the version didn't change assert new_local_state["_state_version"] == new_local_state_2["_state_version"] p.normalize(loader_file_format=destination_config.file_format) @@ -137,21 +153,37 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) - assert new_stored_state["_state_version"] + 1 == new_stored_state_2["_state_version"] -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) -def test_silently_skip_on_invalid_credentials(destination_config: DestinationTestConfiguration, environment: Any) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) +def test_silently_skip_on_invalid_credentials( + destination_config: DestinationTestConfiguration, environment: Any +) -> None: environment["CREDENTIALS"] = "postgres://loader:password@localhost:5432/dlt_data" - environment["DESTINATION__BIGQUERY__CREDENTIALS"] = '{"project_id": "chat-analytics-","client_email": "loader@chat-analytics-317513","private_key": "-----BEGIN PRIVATE KEY-----\\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD"}' + environment["DESTINATION__BIGQUERY__CREDENTIALS"] = ( + '{"project_id": "chat-analytics-","client_email":' + ' "loader@chat-analytics-317513","private_key": "-----BEGIN PRIVATE' + ' KEY-----\\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD"}' + ) pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() # NOTE: we are not restoring the state in __init__ anymore but the test should stay: init should not fail on lack of credentials destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) -@pytest.mark.parametrize('use_single_dataset', [True, False]) -def test_get_schemas_from_destination(destination_config: DestinationTestConfiguration, use_single_dataset: bool) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) +@pytest.mark.parametrize("use_single_dataset", [True, False]) +def test_get_schemas_from_destination( + destination_config: DestinationTestConfiguration, use_single_dataset: bool +) -> None: pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) p.config.use_single_dataset = use_single_dataset @@ -164,23 +196,29 @@ def _make_dn_name(schema_name: str) -> str: default_schema = Schema("state") p._inject_schema(default_schema) - with p.destination_client() as job_client: + with p.destination_client() as job_client: # just sync schema without name - will use default schema p.sync_schema() - assert get_normalized_dataset_name(job_client) == default_schema.naming.normalize_table_identifier(dataset_name) + assert get_normalized_dataset_name( + job_client + ) == default_schema.naming.normalize_table_identifier(dataset_name) schema_two = Schema("two") with p._get_destination_clients(schema_two)[0] as job_client: # use the job_client to do that job_client.initialize_storage() job_client.update_stored_schema() # this may be a separate dataset depending in use_single_dataset setting - assert get_normalized_dataset_name(job_client) == schema_two.naming.normalize_table_identifier(_make_dn_name("two")) + assert get_normalized_dataset_name( + job_client + ) == schema_two.naming.normalize_table_identifier(_make_dn_name("two")) schema_three = Schema("three") p._inject_schema(schema_three) with p._get_destination_clients(schema_three)[0] as job_client: # sync schema with a name p.sync_schema(schema_three.name) - assert get_normalized_dataset_name(job_client) == schema_three.naming.normalize_table_identifier(_make_dn_name("three")) + assert get_normalized_dataset_name( + job_client + ) == schema_three.naming.normalize_table_identifier(_make_dn_name("three")) # wipe and restore p._wipe_working_folder() @@ -217,11 +255,15 @@ def _make_dn_name(schema_name: str) -> str: assert len(restored_schemas) == 3 -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) def test_restore_state_pipeline(destination_config: DestinationTestConfiguration) -> None: os.environ["RESTORE_FROM_DESTINATION"] = "True" pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) def some_data_gen(param: str) -> Any: @@ -283,7 +325,10 @@ def some_data(): assert p.default_schema_name == "default" assert set(p.schema_names) == set(["default", "two", "three", "four"]) assert p.state["sources"] == { - "default": {'state1': 'state1', 'state2': 'state2'}, "two": {'state3': 'state3'}, "three": {'state4': 'state4'}, "four": {"state5": JSON_TYPED_DICT_DECODED} + "default": {"state1": "state1", "state2": "state2"}, + "two": {"state3": "state3"}, + "three": {"state4": "state4"}, + "four": {"state5": JSON_TYPED_DICT_DECODED}, } for schema in p.schemas.values(): normalized_id = schema.naming.normalize_table_identifier("some_data") @@ -294,7 +339,9 @@ def some_data(): # full refresh will not restore pipeline even if requested p._wipe_working_folder() - p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name, full_refresh=True) + p = destination_config.setup_pipeline( + pipeline_name=pipeline_name, dataset_name=dataset_name, full_refresh=True + ) p.run() assert p.default_schema_name is None drop_active_pipeline_data() @@ -314,11 +361,15 @@ def some_data(): assert restored_state["_state_version"] == orig_state["_state_version"] # second run will not restore - p._inject_schema(Schema("second")) # this will modify state, run does not sync if states are identical + p._inject_schema( + Schema("second") + ) # this will modify state, run does not sync if states are identical assert p.state["_state_version"] > orig_state["_state_version"] # print(p.state) p.run() - assert set(p.schema_names) == set(["default", "two", "three", "second", "four"]) # we keep our local copy + assert set(p.schema_names) == set( + ["default", "two", "three", "second", "four"] + ) # we keep our local copy # clear internal flag and decrease state version so restore triggers state = p.state state["_state_version"] -= 1 @@ -328,10 +379,14 @@ def some_data(): assert set(p.schema_names) == set(["default", "two", "three", "four"]) -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) def test_ignore_state_unfinished_load(destination_config: DestinationTestConfiguration) -> None: pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline(pipeline_name=pipeline_name, dataset_name=dataset_name) @dlt.resource @@ -354,18 +409,24 @@ def complete_package_mock(self, load_id: str, schema: Schema, aborted: bool = Fa assert state is None -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) -def test_restore_schemas_while_import_schemas_exist(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) +def test_restore_schemas_while_import_schemas_exist( + destination_config: DestinationTestConfiguration, +) -> None: # restored schema should attach itself to imported schema and it should not get overwritten import_schema_path = os.path.join(TEST_STORAGE_ROOT, "schemas", "import") export_schema_path = os.path.join(TEST_STORAGE_ROOT, "schemas", "export") pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() p = destination_config.setup_pipeline( pipeline_name=pipeline_name, dataset_name=dataset_name, import_schema_path=import_schema_path, - export_schema_path=export_schema_path + export_schema_path=export_schema_path, ) prepare_import_folder(p) # make sure schema got imported @@ -395,16 +456,20 @@ def test_restore_schemas_while_import_schemas_exist(destination_config: Destinat p = dlt.pipeline( pipeline_name=pipeline_name, import_schema_path=import_schema_path, - export_schema_path=export_schema_path + export_schema_path=export_schema_path, ) # use run to get changes - p.run(destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p.run( + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) schema = p.schemas["ethereum"] assert normalized_labels in schema.tables assert normalized_annotations in schema.tables # check if attached to import schema - assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V6 + assert schema._imported_version_hash == IMPORTED_VERSION_HASH_ETH_V8 # extract some data with restored pipeline p.run(["C", "D", "E"], table_name="blacklist") assert normalized_labels in schema.tables @@ -420,11 +485,14 @@ def test_restore_change_dataset_and_destination(destination_name: str) -> None: pass -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) def test_restore_state_parallel_changes(destination_config: DestinationTestConfiguration) -> None: - pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() destination_config.setup() p = dlt.pipeline(pipeline_name=pipeline_name) @@ -437,16 +505,26 @@ def some_data(param: str) -> Any: data1 = some_data("state1") data1._pipe.name = "state1_data" - p.run([data1, some_data("state2")], schema=Schema("default"), destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p.run( + [data1, some_data("state2")], + schema=Schema("default"), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) orig_state = p.state # create a production pipeline in separate pipelines_dir production_p = dlt.pipeline(pipeline_name=pipeline_name, pipelines_dir=TEST_STORAGE_ROOT) - production_p.run(destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + production_p.run( + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) assert production_p.default_schema_name == "default" prod_state = production_p.state - assert prod_state["sources"] == {"default": {'state1': 'state1', 'state2': 'state2'}} + assert prod_state["sources"] == {"default": {"state1": "state1", "state2": "state2"}} assert prod_state["_state_version"] == orig_state["_state_version"] # generate data on production that modifies the schema but not state data2 = some_data("state1") @@ -505,18 +583,22 @@ def some_data(param: str) -> Any: state_table = client.make_qualified_table_name(p.default_schema.state_table_name) assert_query_data( - p, - f"SELECT version FROM {state_table} ORDER BY created_at DESC", - [5, 4, 4, 3, 2] + p, f"SELECT version FROM {state_table} ORDER BY created_at DESC", [5, 4, 4, 3, 2] ) except SqlClientNotAvailable: pytest.skip(f"destination {destination_config.destination} does not support sql client") -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, default_vector_configs=True), ids=lambda x: x.name) -def test_reset_pipeline_on_deleted_dataset(destination_config: DestinationTestConfiguration) -> None: +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, default_vector_configs=True), + ids=lambda x: x.name, +) +def test_reset_pipeline_on_deleted_dataset( + destination_config: DestinationTestConfiguration, +) -> None: pipeline_name = "pipe_" + uniq_id() - dataset_name="state_test_" + uniq_id() + dataset_name = "state_test_" + uniq_id() destination_config.setup() p = dlt.pipeline(pipeline_name=pipeline_name) @@ -527,7 +609,13 @@ def some_data(param: str) -> Any: data4 = some_data("state4") data4.apply_hints(table_name="state1_data4") - p.run(data4, schema=Schema("sch1"), destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p.run( + data4, + schema=Schema("sch1"), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) data5 = some_data("state4") data5.apply_hints(table_name="state1_data5") p.run(data5, schema=Schema("sch2")) @@ -550,7 +638,13 @@ def some_data(param: str) -> Any: p.config.restore_from_destination = False data4 = some_data("state4") data4.apply_hints(table_name="state1_data4") - p.run(data4, schema=Schema("sch1"), destination=destination_config.destination, staging=destination_config.staging, dataset_name=dataset_name) + p.run( + data4, + schema=Schema("sch1"), + destination=destination_config.destination, + staging=destination_config.staging, + dataset_name=dataset_name, + ) assert p.first_run is False assert p.state["_local"]["first_run"] is False # attach again to make the `run` method check the destination @@ -566,4 +660,7 @@ def some_data(param: str) -> Any: def prepare_import_folder(p: Pipeline) -> None: os.makedirs(p._schema_storage.config.import_schema_path, exist_ok=True) - shutil.copy(common_yml_case_path("schemas/eth/ethereum_schema_v5"), os.path.join(p._schema_storage.config.import_schema_path, "ethereum.schema.yaml")) + shutil.copy( + common_yml_case_path("schemas/eth/ethereum_schema_v5"), + os.path.join(p._schema_storage.config.import_schema_path, "ethereum.schema.yaml"), + ) diff --git a/tests/load/pipeline/test_stage_loading.py b/tests/load/pipeline/test_stage_loading.py index 9e2e28e5d5..de4a7f4c3b 100644 --- a/tests/load/pipeline/test_stage_loading.py +++ b/tests/load/pipeline/test_stage_loading.py @@ -8,16 +8,24 @@ from dlt.common.schema.typing import TDataType from tests.load.pipeline.test_merge_disposition import github -from tests.load.pipeline.utils import load_table_counts -from tests.pipeline.utils import assert_load_info -from tests.load.utils import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA, assert_all_data_types_row +from tests.load.pipeline.utils import load_table_counts +from tests.pipeline.utils import assert_load_info +from tests.load.utils import ( + TABLE_ROW_ALL_DATA_TYPES, + TABLE_UPDATE_COLUMNS_SCHEMA, + assert_all_data_types_row, +) from tests.cases import table_update_and_row from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -@dlt.resource(table_name="issues", write_disposition="merge", primary_key="id", merge_key=("node_id", "url")) +@dlt.resource( + table_name="issues", write_disposition="merge", primary_key="id", merge_key=("node_id", "url") +) def load_modified_issues(): - with open("tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: issues = json.load(f) # change 2 issues @@ -30,10 +38,13 @@ def load_modified_issues(): yield from issues -@pytest.mark.parametrize("destination_config", destinations_configs(all_staging_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(all_staging_configs=True), ids=lambda x: x.name +) def test_staging_load(destination_config: DestinationTestConfiguration) -> None: - - pipeline = destination_config.setup_pipeline(pipeline_name='test_stage_loading_5', dataset_name="test_staging_load" + uniq_id()) + pipeline = destination_config.setup_pipeline( + pipeline_name="test_stage_loading_5", dataset_name="test_staging_load" + uniq_id() + ) info = pipeline.run(github(), loader_file_format=destination_config.file_format) assert_load_info(info) @@ -44,12 +55,41 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None: # we have 4 parquet and 4 reference jobs plus one merge job num_jobs = 4 + 4 + 1 if destination_config.supports_merge else 4 + 4 assert len(package_info.jobs["completed_jobs"]) == num_jobs - assert len([x for x in package_info.jobs["completed_jobs"] if x.job_file_info.file_format == "reference"]) == 4 - assert len([x for x in package_info.jobs["completed_jobs"] if x.job_file_info.file_format == destination_config.file_format]) == 4 + assert ( + len( + [ + x + for x in package_info.jobs["completed_jobs"] + if x.job_file_info.file_format == "reference" + ] + ) + == 4 + ) + assert ( + len( + [ + x + for x in package_info.jobs["completed_jobs"] + if x.job_file_info.file_format == destination_config.file_format + ] + ) + == 4 + ) if destination_config.supports_merge: - assert len([x for x in package_info.jobs["completed_jobs"] if x.job_file_info.file_format == "sql"]) == 1 + assert ( + len( + [ + x + for x in package_info.jobs["completed_jobs"] + if x.job_file_info.file_format == "sql" + ] + ) + == 1 + ) - initial_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + initial_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert initial_counts["issues"] == 100 # check item of first row in db @@ -62,7 +102,9 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None: info = pipeline.run(load_modified_issues, loader_file_format=destination_config.file_format) assert_load_info(info) assert pipeline.default_schema.tables["issues"]["write_disposition"] == "merge" - merge_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + merge_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert merge_counts == initial_counts # check changes where merged in @@ -73,39 +115,62 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None: assert rows[0][0] == 300 # test append - info = pipeline.run(github().load_issues, write_disposition="append", loader_file_format=destination_config.file_format) + info = pipeline.run( + github().load_issues, + write_disposition="append", + loader_file_format=destination_config.file_format, + ) assert_load_info(info) assert pipeline.default_schema.tables["issues"]["write_disposition"] == "append" # the counts of all tables must be double - append_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) - assert {k:v*2 for k, v in initial_counts.items()} == append_counts + append_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert {k: v * 2 for k, v in initial_counts.items()} == append_counts # test replace - info = pipeline.run(github().load_issues, write_disposition="replace", loader_file_format=destination_config.file_format) + info = pipeline.run( + github().load_issues, + write_disposition="replace", + loader_file_format=destination_config.file_format, + ) assert_load_info(info) assert pipeline.default_schema.tables["issues"]["write_disposition"] == "replace" # the counts of all tables must be double - replace_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) + replace_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) assert replace_counts == initial_counts -@pytest.mark.parametrize("destination_config", destinations_configs(all_staging_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(all_staging_configs=True), ids=lambda x: x.name +) def test_all_data_types(destination_config: DestinationTestConfiguration) -> None: - - pipeline = destination_config.setup_pipeline('test_stage_loading', dataset_name="test_all_data_types" + uniq_id()) + pipeline = destination_config.setup_pipeline( + "test_stage_loading", dataset_name="test_all_data_types" + uniq_id() + ) # Redshift parquet -> exclude col7_precision # redshift and athena, parquet and jsonl, exclude time types exclude_types: List[TDataType] = [] exclude_columns: List[str] = [] - if destination_config.destination in ("redshift", "athena") and destination_config.file_format in ('parquet', 'jsonl'): + if destination_config.destination in ( + "redshift", + "athena", + ) and destination_config.file_format in ("parquet", "jsonl"): # Redshift copy doesn't support TIME column exclude_types.append("time") - if destination_config.destination == "redshift" and destination_config.file_format in ("parquet", "jsonl"): + if destination_config.destination == "redshift" and destination_config.file_format in ( + "parquet", + "jsonl", + ): # Redshift can't load fixed width binary columns from parquet exclude_columns.append("col7_precision") - column_schemas, data_types = table_update_and_row(exclude_types=exclude_types, exclude_columns=exclude_columns) + column_schemas, data_types = table_update_and_row( + exclude_types=exclude_types, exclude_columns=exclude_columns + ) # bigquery cannot load into JSON fields from parquet if destination_config.file_format == "parquet": @@ -124,7 +189,7 @@ def test_all_data_types(destination_config: DestinationTestConfiguration) -> Non @dlt.resource(table_name="data_types", write_disposition="merge", columns=column_schemas) def my_resource(): nonlocal data_types - yield [data_types]*10 + yield [data_types] * 10 @dlt.source(max_table_nesting=0) def my_source(): @@ -138,13 +203,19 @@ def my_source(): assert len(db_rows) == 10 db_row = list(db_rows[0]) # parquet is not really good at inserting json, best we get are strings in JSON columns - parse_complex_strings = destination_config.file_format == "parquet" and destination_config.destination in ["redshift", "bigquery", "snowflake"] - allow_base64_binary = destination_config.file_format == "jsonl" and destination_config.destination in ["redshift"] + parse_complex_strings = ( + destination_config.file_format == "parquet" + and destination_config.destination in ["redshift", "bigquery", "snowflake"] + ) + allow_base64_binary = ( + destination_config.file_format == "jsonl" + and destination_config.destination in ["redshift"] + ) # content must equal assert_all_data_types_row( db_row[:-2], parse_complex_strings=parse_complex_strings, allow_base64_binary=allow_base64_binary, timestamp_precision=sql_client.capabilities.timestamp_precision, - schema=column_schemas + schema=column_schemas, ) diff --git a/tests/load/pipeline/test_write_disposition_changes.py b/tests/load/pipeline/test_write_disposition_changes.py index 158993b7c8..11356cdd20 100644 --- a/tests/load/pipeline/test_write_disposition_changes.py +++ b/tests/load/pipeline/test_write_disposition_changes.py @@ -1,61 +1,80 @@ import pytest import dlt from typing import Any -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration, assert_data_table_counts +from tests.load.pipeline.utils import ( + destinations_configs, + DestinationTestConfiguration, + assert_data_table_counts, +) from tests.pipeline.utils import assert_load_info from dlt.pipeline.exceptions import PipelineStepFailed + def data_with_subtables(offset: int) -> Any: - for _, index in enumerate(range(offset, offset+100), 1): + for _, index in enumerate(range(offset, offset + 100), 1): yield { "id": index, "name": f"item {index}", - "sub_items": [{ - "id": index + 1000, - "name": f"sub item {index + 1000}" - }] + "sub_items": [{"id": index + 1000, "name": f"sub item {index + 1000}"}], } -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) + +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_switch_from_merge(destination_config: DestinationTestConfiguration): - pipeline = destination_config.setup_pipeline(pipeline_name='test_switch_from_merge', full_refresh=True) + pipeline = destination_config.setup_pipeline( + pipeline_name="test_switch_from_merge", full_refresh=True + ) - info = (pipeline.run(data_with_subtables(10), table_name="items", write_disposition="merge")) - assert_data_table_counts(pipeline, { - "items": 100, - "items__sub_items": 100 - }) - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"]["items"] == {'_dlt_id': '_dlt_root_id'} + info = pipeline.run(data_with_subtables(10), table_name="items", write_disposition="merge") + assert_data_table_counts(pipeline, {"items": 100, "items__sub_items": 100}) + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "items" + ] == {"_dlt_id": "_dlt_root_id"} - info = (pipeline.run(data_with_subtables(10), table_name="items", write_disposition="merge")) + info = pipeline.run(data_with_subtables(10), table_name="items", write_disposition="merge") assert_load_info(info) - assert_data_table_counts(pipeline, { - "items": 100 if destination_config.supports_merge else 200, - "items__sub_items": 100 if destination_config.supports_merge else 200 - }) - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"]["items"] == {'_dlt_id': '_dlt_root_id'} - - info = (pipeline.run(data_with_subtables(10), table_name="items", write_disposition="append")) + assert_data_table_counts( + pipeline, + { + "items": 100 if destination_config.supports_merge else 200, + "items__sub_items": 100 if destination_config.supports_merge else 200, + }, + ) + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "items" + ] == {"_dlt_id": "_dlt_root_id"} + + info = pipeline.run(data_with_subtables(10), table_name="items", write_disposition="append") assert_load_info(info) - assert_data_table_counts(pipeline, { - "items": 200 if destination_config.supports_merge else 300, - "items__sub_items": 200 if destination_config.supports_merge else 300 - }) - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"]["items"] == {'_dlt_id': '_dlt_root_id'} - - info = (pipeline.run(data_with_subtables(10), table_name="items", write_disposition="replace")) + assert_data_table_counts( + pipeline, + { + "items": 200 if destination_config.supports_merge else 300, + "items__sub_items": 200 if destination_config.supports_merge else 300, + }, + ) + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "items" + ] == {"_dlt_id": "_dlt_root_id"} + + info = pipeline.run(data_with_subtables(10), table_name="items", write_disposition="replace") assert_load_info(info) - assert_data_table_counts(pipeline, { - "items": 100, - "items__sub_items": 100 - }) - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"]["items"] == {'_dlt_id': '_dlt_root_id'} + assert_data_table_counts(pipeline, {"items": 100, "items__sub_items": 100}) + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "items" + ] == {"_dlt_id": "_dlt_root_id"} -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) @pytest.mark.parametrize("with_root_key", [True, False]) def test_switch_to_merge(destination_config: DestinationTestConfiguration, with_root_key: bool): - pipeline = destination_config.setup_pipeline(pipeline_name='test_switch_to_merge', full_refresh=True) + pipeline = destination_config.setup_pipeline( + pipeline_name="test_switch_to_merge", full_refresh=True + ) @dlt.resource() def resource(): @@ -68,16 +87,17 @@ def source(): s = source() s.root_key = with_root_key - info = (pipeline.run(s, table_name="items", write_disposition="append")) - assert_data_table_counts(pipeline, { - "items": 100, - "items__sub_items": 100 - }) + info = pipeline.run(s, table_name="items", write_disposition="append") + assert_data_table_counts(pipeline, {"items": 100, "items__sub_items": 100}) if with_root_key: - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["root"] == {'_dlt_id': '_dlt_root_id'} + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"][ + "root" + ] == {"_dlt_id": "_dlt_root_id"} else: - assert "propagation" not in pipeline.default_schema._normalizers_config["json"]["config"] + assert "propagation" not in pipeline.default_schema._normalizers_config["json"].get( + "config", {} + ) # without a root key this will fail, it is expected if not with_root_key and destination_config.supports_merge: @@ -85,11 +105,15 @@ def source(): pipeline.run(s, table_name="items", write_disposition="merge") return - info = (pipeline.run(s, table_name="items", write_disposition="merge")) + info = pipeline.run(s, table_name="items", write_disposition="merge") assert_load_info(info) - assert_data_table_counts(pipeline, { - "items": 100 if destination_config.supports_merge else 200, - "items__sub_items": 100 if destination_config.supports_merge else 200, - }) - assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"]["items"] == {'_dlt_id': '_dlt_root_id'} - + assert_data_table_counts( + pipeline, + { + "items": 100 if destination_config.supports_merge else 200, + "items__sub_items": 100 if destination_config.supports_merge else 200, + }, + ) + assert pipeline.default_schema._normalizers_config["json"]["config"]["propagation"]["tables"][ + "items" + ] == {"_dlt_id": "_dlt_root_id"} diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index 752571591c..17360e76fd 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -1,22 +1,26 @@ -import posixpath, os -from typing import Any, Iterator, List, Sequence, TYPE_CHECKING, Optional, Tuple, Dict, Callable +from typing import Any, Iterator, List, Sequence, TYPE_CHECKING, Callable import pytest import dlt from dlt.common.destination.reference import WithStagingDataset -from dlt.pipeline.pipeline import Pipeline -from dlt.common import json from dlt.common.configuration.container import Container from dlt.common.pipeline import LoadInfo, PipelineContext -from dlt.common.typing import DictStrAny -from dlt.pipeline.exceptions import SqlClientNotAvailable -from dlt.common.schema.typing import LOADS_TABLE_NAME +from tests.pipeline.utils import ( + load_table_counts, + load_data_table_counts, + assert_data_table_counts, + load_file, + load_files, + load_tables_to_dicts, + load_table_distinct_counts, +) from tests.load.utils import DestinationTestConfiguration, destinations_configs if TYPE_CHECKING: - from dlt.destinations.filesystem.filesystem import FilesystemClient + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient + @pytest.fixture(autouse=True) def drop_pipeline(request) -> Iterator[None]: @@ -67,22 +71,47 @@ def _drop_dataset(schema_name: str) -> None: def _is_filesystem(p: dlt.Pipeline) -> bool: if not p.destination: return False - return p.destination.__name__.rsplit('.', 1)[-1] == 'filesystem' + return p.destination.destination_name == "filesystem" -def assert_table(p: dlt.Pipeline, table_name: str, table_data: List[Any], schema_name: str = None, info: LoadInfo = None) -> None: +def assert_table( + p: dlt.Pipeline, + table_name: str, + table_data: List[Any], + schema_name: str = None, + info: LoadInfo = None, +) -> None: func = _assert_table_fs if _is_filesystem(p) else _assert_table_sql func(p, table_name, table_data, schema_name, info) -def _assert_table_sql(p: dlt.Pipeline, table_name: str, table_data: List[Any], schema_name: str = None, info: LoadInfo = None) -> None: +def _assert_table_sql( + p: dlt.Pipeline, + table_name: str, + table_data: List[Any], + schema_name: str = None, + info: LoadInfo = None, +) -> None: with p.sql_client(schema_name=schema_name) as c: table_name = c.make_qualified_table_name(table_name) # Implement NULLS FIRST sort in python - assert_query_data(p, f"SELECT * FROM {table_name} ORDER BY 1", table_data, schema_name, info, sort_key=lambda row: row[0] is not None) - - -def _assert_table_fs(p: dlt.Pipeline, table_name: str, table_data: List[Any], schema_name: str = None, info: LoadInfo = None) -> None: + assert_query_data( + p, + f"SELECT * FROM {table_name} ORDER BY 1", + table_data, + schema_name, + info, + sort_key=lambda row: row[0] is not None, + ) + + +def _assert_table_fs( + p: dlt.Pipeline, + table_name: str, + table_data: List[Any], + schema_name: str = None, + info: LoadInfo = None, +) -> None: """Assert table is loaded to filesystem destination""" client: FilesystemClient = p.destination_client(schema_name) # type: ignore[assignment] # get table directory @@ -102,7 +131,14 @@ def select_data(p: dlt.Pipeline, sql: str, schema_name: str = None) -> List[Sequ return list(cur.fetchall()) -def assert_query_data(p: dlt.Pipeline, sql: str, table_data: List[Any], schema_name: str = None, info: LoadInfo = None, sort_key: Callable[[Any], Any] = None) -> None: +def assert_query_data( + p: dlt.Pipeline, + sql: str, + table_data: List[Any], + schema_name: str = None, + info: LoadInfo = None, + sort_key: Callable[[Any], Any] = None, +) -> None: """Asserts that query selecting single column of values matches `table_data`. If `info` is provided, second column must contain one of load_ids in `info` Args: @@ -120,149 +156,3 @@ def assert_query_data(p: dlt.Pipeline, sql: str, table_data: List[Any], schema_n # the second is load id if info: assert row[1] in info.loads_ids - - -def load_file(path: str, file: str) -> Tuple[str, List[Dict[str, Any]]]: - """ - util function to load a filesystem destination file and return parsed content - values may not be cast to the right type, especially for insert_values, please - make sure to do conversions and casting if needed in your tests - """ - result: List[Dict[str, Any]] = [] - - # check if this is a file we want to read - file_name_items = file.split(".") - ext = file_name_items[-1] - if ext not in ["jsonl", "insert_values", "parquet"]: - return "skip", [] - - # table name will be last element of path - table_name = path.split("/")[-1] - - # skip loads table - if table_name == "_dlt_loads": - return table_name, [] - - full_path = posixpath.join(path, file) - - # load jsonl - if ext == "jsonl": - with open(full_path, "rU", encoding="utf-8") as f: - for line in f: - result.append(json.loads(line)) - - # load insert_values (this is a bit volatile if the exact format of the source file changes) - elif ext == "insert_values": - with open(full_path, "rU", encoding="utf-8") as f: - lines = f.readlines() - # extract col names - cols = lines[0][15:-2].split(",") - for line in lines[2:]: - values = line[1:-3].split(",") - result.append(dict(zip(cols, values))) - - # load parquet - elif ext == "parquet": - import pyarrow.parquet as pq - with open(full_path, "rb") as f: - table = pq.read_table(f) - cols = table.column_names - count = 0 - for column in table: - column_name = cols[count] - item_count = 0 - for item in column.to_pylist(): - if len(result) <= item_count: - result.append({column_name: item}) - else: - result[item_count][column_name] = item - item_count += 1 - count += 1 - - return table_name, result - - -def load_files(p: dlt.Pipeline, *table_names: str) -> Dict[str, List[Dict[str, Any]]]: - """For now this will expect the standard layout in the filesystem destination, if changed the results will not be correct""" - client: FilesystemClient = p.destination_client() # type: ignore[assignment] - result: Dict[str, Any] = {} - for basedir, _dirs, files in client.fs_client.walk(client.dataset_path, detail=False, refresh=True): - for file in files: - table_name, items = load_file(basedir, file) - if table_name not in table_names: - continue - if table_name in result: - result[table_name] = result[table_name] + items - else: - result[table_name] = items - - # loads file is special case - if LOADS_TABLE_NAME in table_names and file.find(".{LOADS_TABLE_NAME}."): - result[LOADS_TABLE_NAME] = [] - - return result - - -def load_table_counts(p: dlt.Pipeline, *table_names: str) -> DictStrAny: - """Returns row counts for `table_names` as dict""" - - # try sql, could be other destination though - try: - with p.sql_client() as c: - qualified_names = [c.make_qualified_table_name(name) for name in table_names] - query = "\nUNION ALL\n".join([f"SELECT '{name}' as name, COUNT(1) as c FROM {q_name}" for name, q_name in zip(table_names, qualified_names)]) - with c.execute_query(query) as cur: - rows = list(cur.fetchall()) - return {r[0]: r[1] for r in rows} - except SqlClientNotAvailable: - pass - - # try filesystem - file_tables = load_files(p, *table_names) - result = {} - for table_name, items in file_tables.items(): - result[table_name] = len(items) - return result - -def load_data_table_counts(p: dlt.Pipeline) -> DictStrAny: - tables = [table["name"] for table in p.default_schema.data_tables()] - return load_table_counts(p, *tables) - - -def assert_data_table_counts(p: dlt.Pipeline, expected_counts: DictStrAny) -> None: - table_counts = load_data_table_counts(p) - assert table_counts == expected_counts, f"Table counts do not match, expected {expected_counts}, got {table_counts}" - - -def load_tables_to_dicts(p: dlt.Pipeline, *table_names: str) -> Dict[str, List[Dict[str, Any]]]: - - # try sql, could be other destination though - try: - result = {} - for table_name in table_names: - table_rows = [] - columns = p.default_schema.get_table_columns(table_name).keys() - query_columns = ",".join(columns) - - with p.sql_client() as c: - f_q_table_name = c.make_qualified_table_name(table_name) - query = f"SELECT {query_columns} FROM {f_q_table_name}" - with c.execute_query(query) as cur: - for row in list(cur.fetchall()): - table_rows.append(dict(zip(columns, row))) - result[table_name] = table_rows - return result - - except SqlClientNotAvailable: - pass - - # try files - return load_files(p, *table_names) - -def load_table_distinct_counts(p: dlt.Pipeline, distinct_column: str, *table_names: str) -> DictStrAny: - """Returns counts of distinct values for column `distinct_column` for `table_names` as dict""" - query = "\nUNION ALL\n".join([f"SELECT '{name}' as name, COUNT(DISTINCT {distinct_column}) as c FROM {name}" for name in table_names]) - with p.sql_client() as c: - with c.execute_query(query) as cur: - rows = list(cur.fetchall()) - return {r[0]: r[1] for r in rows} diff --git a/tests/load/postgres/test_postgres_client.py b/tests/load/postgres/test_postgres_client.py index dcc242cf50..83b59bddaf 100644 --- a/tests/load/postgres/test_postgres_client.py +++ b/tests/load/postgres/test_postgres_client.py @@ -7,9 +7,9 @@ from dlt.common.storages import FileStorage from dlt.common.utils import uniq_id -from dlt.destinations.postgres.configuration import PostgresCredentials -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.sql_client import psycopg2 +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.sql_client import psycopg2 from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, skipifpypy, preserve_environ from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage @@ -43,14 +43,20 @@ def test_postgres_credentials_defaults() -> None: def test_postgres_credentials_native_value(environment) -> None: with pytest.raises(ConfigFieldMissingException): - resolve_configuration(PostgresCredentials(), explicit_value="postgres://loader@localhost/dlt_data") + resolve_configuration( + PostgresCredentials(), explicit_value="postgres://loader@localhost/dlt_data" + ) # set password via env os.environ["CREDENTIALS__PASSWORD"] = "pass" - c = resolve_configuration(PostgresCredentials(), explicit_value="postgres://loader@localhost/dlt_data") + c = resolve_configuration( + PostgresCredentials(), explicit_value="postgres://loader@localhost/dlt_data" + ) assert c.is_resolved() assert c.password == "pass" # but if password is specified - it is final - c = resolve_configuration(PostgresCredentials(), explicit_value="postgres://loader:loader@localhost/dlt_data") + c = resolve_configuration( + PostgresCredentials(), explicit_value="postgres://loader:loader@localhost/dlt_data" + ) assert c.is_resolved() assert c.password == "loader" @@ -68,14 +74,32 @@ def test_wei_value(client: PostgresClient, file_storage: FileStorage) -> None: user_table_name = prepare_table(client) # postgres supports EVM precisions - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, parse_data__metadata__rasa_x_id)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {Wei.from_int256(2*256-1)});" - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) - - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, parse_data__metadata__rasa_x_id)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {Wei.from_int256(2*256-1, 18)});" - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) - - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, parse_data__metadata__rasa_x_id)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {Wei.from_int256(2*256-1, 78)});" - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp," + " parse_data__metadata__rasa_x_id)\nVALUES\n" + ) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {Wei.from_int256(2*256-1)});" + ) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) + + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp," + " parse_data__metadata__rasa_x_id)\nVALUES\n" + ) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {Wei.from_int256(2*256-1, 18)});" + ) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) + + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp," + " parse_data__metadata__rasa_x_id)\nVALUES\n" + ) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {Wei.from_int256(2*256-1, 78)});" + ) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index 165c62a468..68e6702b75 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -5,11 +5,15 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.configuration import PostgresClientConfiguration, PostgresCredentials +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.configuration import ( + PostgresClientConfiguration, + PostgresCredentials, +) from tests.load.utils import TABLE_UPDATE + @pytest.fixture def schema() -> Schema: return Schema("event") @@ -18,7 +22,12 @@ def schema() -> Schema: @pytest.fixture def client(schema: Schema) -> PostgresClient: # return client without opening connection - return PostgresClient(schema, PostgresClientConfiguration(dataset_name="test_" + uniq_id(), credentials=PostgresCredentials())) + return PostgresClient( + schema, + PostgresClientConfiguration( + dataset_name="test_" + uniq_id(), credentials=PostgresCredentials() + ), + ) def test_create_table(client: PostgresClient) -> None: @@ -89,7 +98,14 @@ def test_create_table_with_hints(client: PostgresClient) -> None: assert '"col4" timestamp with time zone NOT NULL' in sql # same thing without indexes - client = PostgresClient(client.schema, PostgresClientConfiguration(dataset_name="test_" + uniq_id(), create_indexes=False, credentials=PostgresCredentials())) + client = PostgresClient( + client.schema, + PostgresClientConfiguration( + dataset_name="test_" + uniq_id(), + create_indexes=False, + credentials=PostgresCredentials(), + ), + ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql, dialect="postgres") assert '"col2" double precision NOT NULL' in sql diff --git a/tests/load/qdrant/test_pipeline.py b/tests/load/qdrant/test_pipeline.py index 303a5de69f..c24c309ca6 100644 --- a/tests/load/qdrant/test_pipeline.py +++ b/tests/load/qdrant/test_pipeline.py @@ -5,11 +5,12 @@ from dlt.common import json from dlt.common.utils import uniq_id -from dlt.destinations.qdrant.qdrant_adapter import qdrant_adapter, VECTORIZE_HINT -from dlt.destinations.qdrant.qdrant_client import QdrantClient +from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter, VECTORIZE_HINT +from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient from tests.pipeline.utils import assert_load_info from tests.load.qdrant.utils import drop_active_pipeline_data, assert_collection + @pytest.fixture(autouse=True) def drop_qdrant_data() -> Iterator[None]: yield @@ -146,7 +147,6 @@ def some_data(): def test_pipeline_replace() -> None: - generator_instance1 = sequence_generator() generator_instance2 = sequence_generator() @@ -164,7 +164,8 @@ def some_data(): pipeline = dlt.pipeline( pipeline_name="test_pipeline_replace", destination="qdrant", - dataset_name="test_pipeline_replace_dataset" + uid, # Qdrant doesn't mandate any name normalization + dataset_name="test_pipeline_replace_dataset" + + uid, # Qdrant doesn't mandate any name normalization ) info = pipeline.run( @@ -172,7 +173,9 @@ def some_data(): write_disposition="replace", ) assert_load_info(info) - assert info.dataset_name == "test_pipeline_replace_dataset" + uid # Qdrant doesn't mandate any name normalization + assert ( + info.dataset_name == "test_pipeline_replace_dataset" + uid + ) # Qdrant doesn't mandate any name normalization data = next(generator_instance2) assert_collection(pipeline, "some_data", items=data) @@ -193,16 +196,14 @@ def test_pipeline_merge() -> None: "doc_id": 1, "title": "The Shawshank Redemption", "description": ( - "Two imprisoned men find redemption through acts " - "of decency over the years." + "Two imprisoned men find redemption through acts of decency over the years." ), }, { "doc_id": 2, "title": "The Godfather", "description": ( - "A crime dynasty's aging patriarch transfers " - "control to his reluctant son." + "A crime dynasty's aging patriarch transfers control to his reluctant son." ), }, { @@ -230,9 +231,7 @@ def movies_data(): dataset_name="TestPipelineAppendDataset" + uniq_id(), ) info = pipeline.run( - movies_data(), - write_disposition="merge", - dataset_name="MoviesDataset" + uniq_id() + movies_data(), write_disposition="merge", dataset_name="MoviesDataset" + uniq_id() ) assert_load_info(info) assert_collection(pipeline, "movies_data", items=data) @@ -308,21 +307,38 @@ def test_merge_github_nested() -> None: p = dlt.pipeline(destination="qdrant", dataset_name="github1", full_refresh=True) assert p.dataset_name.startswith("github1_202") - with open("tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: data = json.load(f) info = p.run( qdrant_adapter(data[:17], embed=["title", "body"]), table_name="issues", write_disposition="merge", - primary_key="id" + primary_key="id", ) assert_load_info(info) # assert if schema contains tables with right names print(p.default_schema.tables.keys()) - assert set(p.default_schema.tables.keys()) == {'_dlt_version', '_dlt_loads', 'issues', '_dlt_pipeline_state', 'issues__labels', 'issues__assignees'} - assert set([t["name"] for t in p.default_schema.data_tables()]) == {'issues', 'issues__labels', 'issues__assignees'} - assert set([t["name"] for t in p.default_schema.dlt_tables()]) == {'_dlt_version', '_dlt_loads', '_dlt_pipeline_state'} + assert set(p.default_schema.tables.keys()) == { + "_dlt_version", + "_dlt_loads", + "issues", + "_dlt_pipeline_state", + "issues__labels", + "issues__assignees", + } + assert set([t["name"] for t in p.default_schema.data_tables()]) == { + "issues", + "issues__labels", + "issues__assignees", + } + assert set([t["name"] for t in p.default_schema.dlt_tables()]) == { + "_dlt_version", + "_dlt_loads", + "_dlt_pipeline_state", + } issues = p.default_schema.tables["issues"] assert issues["columns"]["id"]["primary_key"] is True # make sure that vectorization is enabled for @@ -345,4 +361,3 @@ def test_empty_dataset_allowed() -> None: assert client.dataset_name is None assert client.sentinel_collection == "DltSentinelCollection" assert_collection(p, "content", expected_items_count=3) - diff --git a/tests/load/qdrant/utils.py b/tests/load/qdrant/utils.py index 96b582a28e..74d5db9715 100644 --- a/tests/load/qdrant/utils.py +++ b/tests/load/qdrant/utils.py @@ -5,7 +5,7 @@ from dlt.common.pipeline import PipelineContext from dlt.common.configuration.container import Container -from dlt.destinations.qdrant.qdrant_client import QdrantClient +from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient def assert_unordered_list_equal(list1: List[Any], list2: List[Any]) -> None: @@ -20,14 +20,16 @@ def assert_collection( expected_items_count: int = None, items: List[Any] = None, ) -> None: - client: QdrantClient = pipeline.destination_client() # type: ignore[assignment] + client: QdrantClient = pipeline.destination_client() # type: ignore[assignment] # Check if collection exists exists = client._collection_exists(collection_name) assert exists qualified_collection_name = client._make_qualified_collection_name(collection_name) - point_records, offset = client.db_client.scroll(qualified_collection_name, with_payload=True, limit=50) + point_records, offset = client.db_client.scroll( + qualified_collection_name, with_payload=True, limit=50 + ) if expected_items_count is not None: assert expected_items_count == len(point_records) @@ -42,8 +44,10 @@ def assert_collection( assert_unordered_list_equal(objects_without_dlt_keys, items) + def drop_active_pipeline_data() -> None: print("Dropping active pipeline data for test") + def has_collections(client): schema = client.db_client.get_collections().collections return len(schema) > 0 @@ -51,7 +55,7 @@ def has_collections(client): if Container()[PipelineContext].is_active(): # take existing pipeline p = dlt.pipeline() - client: QdrantClient = p.destination_client() # type: ignore[assignment] + client: QdrantClient = p.destination_client() # type: ignore[assignment] if has_collections(client): client.drop_storage() diff --git a/tests/load/redshift/test_redshift_client.py b/tests/load/redshift/test_redshift_client.py index 9839965b70..f5efc16a47 100644 --- a/tests/load/redshift/test_redshift_client.py +++ b/tests/load/redshift/test_redshift_client.py @@ -12,15 +12,14 @@ from dlt.common.utils import uniq_id from dlt.destinations.exceptions import DatabaseTerminalException -from dlt.destinations.redshift.configuration import RedshiftCredentials -from dlt.destinations.redshift.redshift import RedshiftClient, psycopg2 +from dlt.destinations.impl.redshift.configuration import RedshiftCredentials +from dlt.destinations.impl.redshift.redshift import RedshiftClient, psycopg2 from tests.common.utils import COMMON_TEST_CASES_PATH from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, skipifpypy from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage - @pytest.fixture def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) @@ -50,13 +49,13 @@ def test_text_too_long(client: RedshiftClient, file_storage: FileStorage) -> Non # try some unicode value - redshift checks the max length based on utf-8 representation, not the number of characters # max_len_str = 'उ' * (65535 // 3) + 1 -> does not fit # max_len_str = 'a' * 65535 + 1 -> does not fit - max_len_str = 'उ' * ((caps["max_text_data_type_length"] // 3) + 1) + max_len_str = "उ" * ((caps["max_text_data_type_length"] // 3) + 1) # max_len_str_b = max_len_str.encode("utf-8") # print(len(max_len_str_b)) row_id = uniq_id() insert_values = f"('{row_id}', '{uniq_id()}', '{max_len_str}' , '{str(pendulum.now())}');" with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) assert type(exv.value.dbapi_exception) is psycopg2.errors.StringDataRightTruncation @@ -64,25 +63,36 @@ def test_wei_value(client: RedshiftClient, file_storage: FileStorage) -> None: user_table_name = prepare_table(client) # max redshift decimal is (38, 0) (128 bit) = 10**38 - 1 - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, parse_data__metadata__rasa_x_id)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {10**38});" + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp," + " parse_data__metadata__rasa_x_id)\nVALUES\n" + ) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {10**38});" + ) with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) assert type(exv.value.dbapi_exception) is psycopg2.errors.InternalError_ def test_schema_string_exceeds_max_text_length(client: RedshiftClient) -> None: client.update_stored_schema() # schema should be compressed and stored as base64 - schema = SchemaStorage.load_schema_file(os.path.join(COMMON_TEST_CASES_PATH, "schemas/ev1"), "event", ("json",)) + schema = SchemaStorage.load_schema_file( + os.path.join(COMMON_TEST_CASES_PATH, "schemas/ev1"), "event", ("json",) + ) schema_str = json.dumps(schema.to_dict()) assert len(schema_str.encode("utf-8")) > client.capabilities.max_text_data_type_length client._update_schema_in_storage(schema) schema_info = client.get_stored_schema() assert schema_info.schema == schema_str # take base64 from db - with client.sql_client.execute_query(f"SELECT schema FROM {VERSION_TABLE_NAME} WHERE version_hash = '{schema.stored_version_hash}'") as cur: - row = cur.fetchone() + with client.sql_client.execute_query( + f"SELECT schema FROM {VERSION_TABLE_NAME} WHERE version_hash =" + f" '{schema.stored_version_hash}'" + ) as cur: + row = cur.fetchone() # decode base base64.b64decode(row[0], validate=True) @@ -99,7 +109,10 @@ def test_maximum_query_size(client: RedshiftClient, file_storage: FileStorage) - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp)\nVALUES\n" insert_values = "('{}', '{}', '90238094809sajlkjxoiewjhduuiuehd', '{}'){}" - insert_sql = insert_sql + insert_values.format(uniq_id(), uniq_id(), str(pendulum.now()), ",\n") * 150000 + insert_sql = ( + insert_sql + + insert_values.format(uniq_id(), uniq_id(), str(pendulum.now()), ",\n") * 150000 + ) insert_sql += insert_values.format(uniq_id(), uniq_id(), str(pendulum.now()), ";") user_table_name = prepare_table(client) diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index 8c61ccc1f2..d2adfde403 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -6,11 +6,15 @@ from dlt.common.schema import Schema from dlt.common.configuration import resolve_configuration -from dlt.destinations.redshift.redshift import RedshiftClient -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration, RedshiftCredentials +from dlt.destinations.impl.redshift.redshift import RedshiftClient +from dlt.destinations.impl.redshift.configuration import ( + RedshiftClientConfiguration, + RedshiftCredentials, +) from tests.load.utils import TABLE_UPDATE + @pytest.fixture def schema() -> Schema: return Schema("event") @@ -19,12 +23,22 @@ def schema() -> Schema: @pytest.fixture def client(schema: Schema) -> RedshiftClient: # return client without opening connection - return RedshiftClient(schema, RedshiftClientConfiguration(dataset_name="test_" + uniq_id(), credentials=RedshiftCredentials())) + return RedshiftClient( + schema, + RedshiftClientConfiguration( + dataset_name="test_" + uniq_id(), credentials=RedshiftCredentials() + ), + ) def test_redshift_configuration() -> None: # check names normalized - with custom_environ({"DESTINATION__REDSHIFT__CREDENTIALS__DATABASE": "UPPER_CASE_DATABASE", "DESTINATION__REDSHIFT__CREDENTIALS__PASSWORD": " pass\n"}): + with custom_environ( + { + "DESTINATION__REDSHIFT__CREDENTIALS__DATABASE": "UPPER_CASE_DATABASE", + "DESTINATION__REDSHIFT__CREDENTIALS__PASSWORD": " pass\n", + } + ): C = resolve_configuration(RedshiftCredentials(), sections=("destination", "redshift")) assert C.database == "upper_case_database" assert C.password == "pass" @@ -32,13 +46,16 @@ def test_redshift_configuration() -> None: # check fingerprint assert RedshiftClientConfiguration().fingerprint() == "" # based on host - c = resolve_configuration(RedshiftCredentials(), explicit_value="postgres://user1:pass@host1/db1?warehouse=warehouse1&role=role1") + c = resolve_configuration( + RedshiftCredentials(), + explicit_value="postgres://user1:pass@host1/db1?warehouse=warehouse1&role=role1", + ) assert RedshiftClientConfiguration(credentials=c).fingerprint() == digest128("host1") def test_create_table(client: RedshiftClient) -> None: # non existing table - sql = ';'.join(client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)) + sql = ";".join(client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)) sqlfluff.parse(sql, dialect="redshift") assert "event_test_table" in sql assert '"col1" bigint NOT NULL' in sql @@ -62,7 +79,7 @@ def test_create_table(client: RedshiftClient) -> None: def test_alter_table(client: RedshiftClient) -> None: # existing table has no columns - sql = ';'.join(client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)) + sql = ";".join(client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)) sqlfluff.parse(sql, dialect="redshift") canonical_name = client.sql_client.make_qualified_table_name("event_test_table") # must have several ALTER TABLE statements @@ -94,7 +111,7 @@ def test_create_table_with_hints(client: RedshiftClient) -> None: mod_update[0]["sort"] = True mod_update[1]["cluster"] = True mod_update[4]["cluster"] = True - sql = ';'.join(client._get_table_update_sql("event_test_table", mod_update, False)) + sql = ";".join(client._get_table_update_sql("event_test_table", mod_update, False)) sqlfluff.parse(sql, dialect="redshift") # PRIMARY KEY will not be present https://heap.io/blog/redshift-pitfalls-avoid assert '"col1" bigint SORTKEY NOT NULL' in sql diff --git a/tests/load/snowflake/test_snowflake_configuration.py b/tests/load/snowflake/test_snowflake_configuration.py index 7108ad06e5..fb8ff925c0 100644 --- a/tests/load/snowflake/test_snowflake_configuration.py +++ b/tests/load/snowflake/test_snowflake_configuration.py @@ -9,7 +9,10 @@ from dlt.common.configuration.exceptions import ConfigurationValueError from dlt.common.utils import digest128 -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials +from dlt.destinations.impl.snowflake.configuration import ( + SnowflakeClientConfiguration, + SnowflakeCredentials, +) from tests.common.configuration.utils import environment @@ -37,75 +40,87 @@ def test_connection_string_with_all_params() -> None: def test_to_connector_params() -> None: # PEM key - pkey_str = Path('./tests/common/cases/secrets/encrypted-private-key').read_text('utf8') + pkey_str = Path("./tests/common/cases/secrets/encrypted-private-key").read_text("utf8") creds = SnowflakeCredentials() creds.private_key = pkey_str # type: ignore[assignment] - creds.private_key_passphrase = '12345' # type: ignore[assignment] - creds.username = 'user1' - creds.database = 'db1' - creds.host = 'host1' - creds.warehouse = 'warehouse1' - creds.role = 'role1' + creds.private_key_passphrase = "12345" # type: ignore[assignment] + creds.username = "user1" + creds.database = "db1" + creds.host = "host1" + creds.warehouse = "warehouse1" + creds.role = "role1" params = creds.to_connector_params() - assert isinstance(params['private_key'], bytes) - params.pop('private_key') + assert isinstance(params["private_key"], bytes) + params.pop("private_key") assert params == dict( - user='user1', - database='db1', - account='host1', + user="user1", + database="db1", + account="host1", password=None, - warehouse='warehouse1', - role='role1', + warehouse="warehouse1", + role="role1", ) # base64 encoded DER key - pkey_str = Path('./tests/common/cases/secrets/encrypted-private-key-base64').read_text('utf8') + pkey_str = Path("./tests/common/cases/secrets/encrypted-private-key-base64").read_text("utf8") creds = SnowflakeCredentials() creds.private_key = pkey_str # type: ignore[assignment] - creds.private_key_passphrase = '12345' # type: ignore[assignment] - creds.username = 'user1' - creds.database = 'db1' - creds.host = 'host1' - creds.warehouse = 'warehouse1' - creds.role = 'role1' + creds.private_key_passphrase = "12345" # type: ignore[assignment] + creds.username = "user1" + creds.database = "db1" + creds.host = "host1" + creds.warehouse = "warehouse1" + creds.role = "role1" params = creds.to_connector_params() - assert isinstance(params['private_key'], bytes) - params.pop('private_key') + assert isinstance(params["private_key"], bytes) + params.pop("private_key") assert params == dict( - user='user1', - database='db1', - account='host1', + user="user1", + database="db1", + account="host1", password=None, - warehouse='warehouse1', - role='role1', + warehouse="warehouse1", + role="role1", ) def test_snowflake_credentials_native_value(environment) -> None: with pytest.raises(ConfigurationValueError): - resolve_configuration(SnowflakeCredentials(), explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1") + resolve_configuration( + SnowflakeCredentials(), + explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1", + ) # set password via env os.environ["CREDENTIALS__PASSWORD"] = "pass" - c = resolve_configuration(SnowflakeCredentials(), explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1") + c = resolve_configuration( + SnowflakeCredentials(), + explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1", + ) assert c.is_resolved() assert c.password == "pass" # # but if password is specified - it is final - c = resolve_configuration(SnowflakeCredentials(), explicit_value="snowflake://user1:pass1@host1/db1?warehouse=warehouse1&role=role1") + c = resolve_configuration( + SnowflakeCredentials(), + explicit_value="snowflake://user1:pass1@host1/db1?warehouse=warehouse1&role=role1", + ) assert c.is_resolved() assert c.password == "pass1" # set PK via env del os.environ["CREDENTIALS__PASSWORD"] os.environ["CREDENTIALS__PRIVATE_KEY"] = "pk" - c = resolve_configuration(SnowflakeCredentials(), explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1") + c = resolve_configuration( + SnowflakeCredentials(), + explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1", + ) assert c.is_resolved() assert c.private_key == "pk" @@ -114,5 +129,8 @@ def test_snowflake_configuration() -> None: # def empty fingerprint assert SnowflakeClientConfiguration().fingerprint() == "" # based on host - c = resolve_configuration(SnowflakeCredentials(), explicit_value="snowflake://user1:pass@host1/db1?warehouse=warehouse1&role=role1") + c = resolve_configuration( + SnowflakeCredentials(), + explicit_value="snowflake://user1:pass@host1/db1?warehouse=warehouse1&role=role1", + ) assert SnowflakeClientConfiguration(credentials=c).fingerprint() == digest128("host1") diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index 81164625f9..e6eaf26c89 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -5,8 +5,11 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.snowflake.snowflake import SnowflakeClient -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials +from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient +from dlt.destinations.impl.snowflake.configuration import ( + SnowflakeClientConfiguration, + SnowflakeCredentials, +) from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE @@ -21,14 +24,16 @@ def schema() -> Schema: def snowflake_client(schema: Schema) -> SnowflakeClient: # return client without opening connection creds = SnowflakeCredentials() - return SnowflakeClient(schema, SnowflakeClientConfiguration(dataset_name="test_" + uniq_id(), credentials=creds)) + return SnowflakeClient( + schema, SnowflakeClientConfiguration(dataset_name="test_" + uniq_id(), credentials=creds) + ) def test_create_table(snowflake_client: SnowflakeClient) -> None: statements = snowflake_client._get_table_update_sql("event_test_table", TABLE_UPDATE, False) assert len(statements) == 1 sql = statements[0] - sqlfluff.parse(sql, dialect='snowflake') + sqlfluff.parse(sql, dialect="snowflake") assert sql.strip().startswith("CREATE TABLE") assert "EVENT_TEST_TABLE" in sql diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index 1216906967..7436023f03 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -1,39 +1,41 @@ -import shutil import os from concurrent.futures import ThreadPoolExecutor from time import sleep -from typing import List, Sequence, Tuple import pytest from unittest.mock import patch +from typing import List from dlt.common.exceptions import TerminalException, TerminalValueError -from dlt.common.schema import Schema -from dlt.common.storages import FileStorage, LoadStorage +from dlt.common.storages import FileStorage, LoadStorage, PackageStorage, ParsedLoadJobFileName from dlt.common.storages.load_storage import JobWithUnsupportedWriterException -from dlt.common.utils import uniq_id -from dlt.common.destination.reference import DestinationReference, LoadJob +from dlt.common.destination.reference import LoadJob, TDestination from dlt.load import Load from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations import dummy -from dlt.destinations.dummy import dummy as dummy_impl -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.destinations.impl.dummy import dummy as dummy_impl +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration from dlt.load.exceptions import LoadClientJobFailed, LoadClientJobRetry from dlt.common.schema.utils import get_top_level_table -from tests.utils import clean_test_storage, init_test_logging, TEST_DICT_CONFIG_PROVIDER, preserve_environ +from tests.utils import ( + clean_test_storage, + init_test_logging, + TEST_DICT_CONFIG_PROVIDER, + preserve_environ, +) from tests.load.utils import prepare_load_package from tests.utils import skip_if_not_active skip_if_not_active("dummy") - NORMALIZED_FILES = [ "event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl", - "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" + "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl", ] + @pytest.fixture(autouse=True) def storage() -> FileStorage: return clean_test_storage(init_normalize=True, init_loader=True) @@ -47,18 +49,19 @@ def logger_autouse() -> None: def test_spool_job_started() -> None: # default config keeps the job always running load = setup_loader() - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) - files = load.load_storage.list_new_jobs(load_id) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 2 jobs: List[LoadJob] = [] for f in files: job = Load.w_spool_job(load, f, load_id, schema) assert type(job) is dummy_impl.LoadDummyJob assert job.state() == "running" - assert load.load_storage.storage.has_file(load.load_storage._get_job_file_path(load_id, LoadStorage.STARTED_JOBS_FOLDER, job.file_name())) + assert load.load_storage.normalized_packages.storage.has_file( + load.load_storage.normalized_packages.get_job_file_path( + load_id, PackageStorage.STARTED_JOBS_FOLDER, job.file_name() + ) + ) jobs.append(job) # still running remaining_jobs = load.complete_jobs(load_id, jobs, schema) @@ -68,8 +71,7 @@ def test_spool_job_started() -> None: def test_unsupported_writer_type() -> None: load = setup_loader() load_id, _ = prepare_load_package( - load.load_storage, - ["event_bot.181291798a78198.0.unsupported_format"] + load.load_storage, ["event_bot.181291798a78198.0.unsupported_format"] ) with pytest.raises(TerminalValueError): load.load_storage.list_new_jobs(load_id) @@ -77,27 +79,28 @@ def test_unsupported_writer_type() -> None: def test_unsupported_write_disposition() -> None: load = setup_loader() - load_id, schema = prepare_load_package( - load.load_storage, - [NORMALIZED_FILES[0]] - ) + load_id, schema = prepare_load_package(load.load_storage, [NORMALIZED_FILES[0]]) # mock unsupported disposition schema.get_table("event_user")["write_disposition"] = "skip" # write back schema - load.load_storage._save_schema(schema, load_id) + load.load_storage.normalized_packages.save_schema(load_id, schema) with ThreadPoolExecutor() as pool: load.run(pool) # job with unsupported write disp. is failed - exception = [f for f in load.load_storage.list_failed_jobs(load_id) if f.endswith(".exception")][0] - assert "LoadClientUnsupportedWriteDisposition" in load.load_storage.storage.load(exception) + exception_file = [ + f + for f in load.load_storage.normalized_packages.list_failed_jobs(load_id) + if f.endswith(".exception") + ][0] + assert ( + "LoadClientUnsupportedWriteDisposition" + in load.load_storage.normalized_packages.storage.load(exception_file) + ) def test_get_new_jobs_info() -> None: load = setup_loader() - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) # no write disposition specified - get all new jobs assert len(load.get_new_jobs_info(load_id)) == 2 @@ -105,54 +108,75 @@ def test_get_new_jobs_info() -> None: def test_get_completed_table_chain_single_job_per_table() -> None: load = setup_loader() - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) top_job_table = get_top_level_table(schema.tables, "event_user") assert load.get_completed_table_chain(load_id, schema, top_job_table) is None # fake being completed - assert len(load.get_completed_table_chain(load_id, schema, top_job_table, "event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl")) == 1 + assert ( + len( + load.get_completed_table_chain( + load_id, + schema, + top_job_table, + "event_user.839c6e6b514e427687586ccc65bf133f.jsonl", + ) + ) + == 1 + ) # actually complete loop_top_job_table = get_top_level_table(schema.tables, "event_loop_interrupted") - load.load_storage.start_job(load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl") + load.load_storage.normalized_packages.start_job( + load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" + ) assert load.get_completed_table_chain(load_id, schema, loop_top_job_table) is None - load.load_storage.complete_job(load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl") - assert load.get_completed_table_chain(load_id, schema, loop_top_job_table) == [schema.get_table("event_loop_interrupted")] - assert load.get_completed_table_chain(load_id, schema, loop_top_job_table, "event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl") == [schema.get_table("event_loop_interrupted")] + load.load_storage.normalized_packages.complete_job( + load_id, "event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl" + ) + assert load.get_completed_table_chain(load_id, schema, loop_top_job_table) == [ + schema.get_table("event_loop_interrupted") + ] + assert load.get_completed_table_chain( + load_id, schema, loop_top_job_table, "event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl" + ) == [schema.get_table("event_loop_interrupted")] def test_spool_job_failed() -> None: # this config fails job on start load = setup_loader(client_config=DummyClientConfiguration(fail_prob=1.0)) - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) - files = load.load_storage.list_new_jobs(load_id) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) jobs: List[LoadJob] = [] for f in files: job = Load.w_spool_job(load, f, load_id, schema) assert type(job) is EmptyLoadJob assert job.state() == "failed" - assert load.load_storage.storage.has_file(load.load_storage._get_job_file_path(load_id, LoadStorage.STARTED_JOBS_FOLDER, job.file_name())) + assert load.load_storage.normalized_packages.storage.has_file( + load.load_storage.normalized_packages.get_job_file_path( + load_id, PackageStorage.STARTED_JOBS_FOLDER, job.file_name() + ) + ) jobs.append(job) # complete files remaining_jobs = load.complete_jobs(load_id, jobs, schema) assert len(remaining_jobs) == 0 for job in jobs: - assert load.load_storage.storage.has_file(load.load_storage._get_job_file_path(load_id, LoadStorage.FAILED_JOBS_FOLDER, job.file_name())) - assert load.load_storage.storage.has_file(load.load_storage._get_job_file_path(load_id, LoadStorage.FAILED_JOBS_FOLDER, job.file_name() + ".exception")) - started_files = load.load_storage.list_started_jobs(load_id) + assert load.load_storage.normalized_packages.storage.has_file( + load.load_storage.normalized_packages.get_job_file_path( + load_id, PackageStorage.FAILED_JOBS_FOLDER, job.file_name() + ) + ) + assert load.load_storage.normalized_packages.storage.has_file( + load.load_storage.normalized_packages.get_job_file_path( + load_id, PackageStorage.FAILED_JOBS_FOLDER, job.file_name() + ".exception" + ) + ) + started_files = load.load_storage.normalized_packages.list_started_jobs(load_id) assert len(started_files) == 0 # test the whole flow load = setup_loader(client_config=DummyClientConfiguration(fail_prob=1.0)) - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) run_all(load) package_info = load.load_storage.get_load_package_info(load_id) assert package_info.state == "loaded" @@ -165,10 +189,7 @@ def test_spool_job_failed_exception_init() -> None: os.environ["LOAD__RAISE_ON_FAILED_JOBS"] = "true" os.environ["FAIL_IN_INIT"] = "true" load = setup_loader(client_config=DummyClientConfiguration(fail_prob=1.0)) - load_id, _ = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, _ = prepare_load_package(load.load_storage, NORMALIZED_FILES) with patch.object(dummy_impl.DummyClient, "complete_load") as complete_load: with pytest.raises(LoadClientJobFailed) as py_ex: run_all(load) @@ -187,10 +208,7 @@ def test_spool_job_failed_exception_complete() -> None: os.environ["LOAD__RAISE_ON_FAILED_JOBS"] = "true" os.environ["FAIL_IN_INIT"] = "false" load = setup_loader(client_config=DummyClientConfiguration(fail_prob=1.0)) - load_id, _ = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, _ = prepare_load_package(load.load_storage, NORMALIZED_FILES) with pytest.raises(LoadClientJobFailed) as py_ex: run_all(load) assert py_ex.value.load_id == load_id @@ -204,22 +222,17 @@ def test_spool_job_failed_exception_complete() -> None: def test_spool_job_retry_new() -> None: # this config retries job on start (transient fail) load = setup_loader(client_config=DummyClientConfiguration(retry_prob=1.0)) - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) - files = load.load_storage.list_new_jobs(load_id) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) for f in files: job = Load.w_spool_job(load, f, load_id, schema) assert job.state() == "retry" + def test_spool_job_retry_spool_new() -> None: # this config retries job on start (transient fail) load = setup_loader(client_config=DummyClientConfiguration(retry_prob=1.0)) - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) # call higher level function that returns jobs and counts with ThreadPoolExecutor() as pool: load.pool = pool @@ -232,33 +245,34 @@ def test_spool_job_retry_started() -> None: # this config keeps the job always running load = setup_loader() # dummy_impl.CLIENT_CONFIG = DummyClientConfiguration - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) - files = load.load_storage.list_new_jobs(load_id) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) jobs: List[LoadJob] = [] for f in files: job = Load.w_spool_job(load, f, load_id, schema) assert type(job) is dummy_impl.LoadDummyJob assert job.state() == "running" - assert load.load_storage.storage.has_file(load.load_storage._get_job_file_path(load_id, LoadStorage.STARTED_JOBS_FOLDER, job.file_name())) + assert load.load_storage.normalized_packages.storage.has_file( + load.load_storage.normalized_packages.get_job_file_path( + load_id, PackageStorage.STARTED_JOBS_FOLDER, job.file_name() + ) + ) # mock job config to make it retry job.config.retry_prob = 1.0 jobs.append(job) - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 0 # should retry, that moves jobs into new folder remaining_jobs = load.complete_jobs(load_id, jobs, schema) assert len(remaining_jobs) == 0 # clear retry flag dummy_impl.JOBS = {} - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 2 # parse the new job names - for fn in load.load_storage.list_new_jobs(load_id): + for fn in load.load_storage.normalized_packages.list_new_jobs(load_id): # we failed when already running the job so retry count will increase - assert LoadStorage.parse_job_file_name(fn).retry_count == 1 + assert ParsedLoadJobFileName.parse(fn).retry_count == 1 for f in files: job = Load.w_spool_job(load, f, load_id, schema) assert job.state() == "running" @@ -266,14 +280,13 @@ def test_spool_job_retry_started() -> None: def test_try_retrieve_job() -> None: load = setup_loader() - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) # manually move jobs to started - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) for f in files: - load.load_storage.start_job(load_id, FileStorage.get_file_name_from_file_path(f)) + load.load_storage.normalized_packages.start_job( + load_id, FileStorage.get_file_name_from_file_path(f) + ) # dummy client may retrieve jobs that it created itself, jobs in started folder are unknown # and returned as terminal with load.destination.client(schema, load.initial_client_config) as c: @@ -282,10 +295,7 @@ def test_try_retrieve_job() -> None: for j in jobs: assert j.state() == "failed" # new load package - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) load.pool = ThreadPoolExecutor() jobs_count, jobs = load.spool_new_jobs(load_id, schema) assert jobs_count == 2 @@ -299,61 +309,61 @@ def test_try_retrieve_job() -> None: def test_completed_loop() -> None: load = setup_loader(client_config=DummyClientConfiguration(completed_prob=1.0)) - assert_complete_job(load, load.load_storage.storage) + assert_complete_job(load) def test_failed_loop() -> None: # ask to delete completed - load = setup_loader(delete_completed_jobs=True, client_config=DummyClientConfiguration(fail_prob=1.0)) + load = setup_loader( + delete_completed_jobs=True, client_config=DummyClientConfiguration(fail_prob=1.0) + ) # actually not deleted because one of the jobs failed - assert_complete_job(load, load.load_storage.storage, should_delete_completed=False) + assert_complete_job(load, should_delete_completed=False) def test_completed_loop_with_delete_completed() -> None: load = setup_loader(client_config=DummyClientConfiguration(completed_prob=1.0)) load.load_storage = load.create_storage(is_storage_owner=False) load.load_storage.config.delete_completed_jobs = True - assert_complete_job(load, load.load_storage.storage, should_delete_completed=True) + assert_complete_job(load, should_delete_completed=True) def test_retry_on_new_loop() -> None: # test job that retries sitting in new jobs load = setup_loader(client_config=DummyClientConfiguration(retry_prob=1.0)) - load_id, schema = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, schema = prepare_load_package(load.load_storage, NORMALIZED_FILES) with ThreadPoolExecutor() as pool: # 1st retry load.run(pool) - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 2 # 2nd retry load.run(pool) - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 2 # jobs will be completed load = setup_loader(client_config=DummyClientConfiguration(completed_prob=1.0)) load.run(pool) - files = load.load_storage.list_new_jobs(load_id) + files = load.load_storage.normalized_packages.list_new_jobs(load_id) assert len(files) == 0 # complete package load.run(pool) - assert not load.load_storage.storage.has_folder(load.load_storage.get_normalized_package_path(load_id)) + assert not load.load_storage.normalized_packages.storage.has_folder( + load.load_storage.get_normalized_package_path(load_id) + ) # parse the completed job names - completed_path = load.load_storage.get_completed_package_path(load_id) - for fn in load.load_storage.storage.list_folder_files(os.path.join(completed_path, LoadStorage.COMPLETED_JOBS_FOLDER)): + completed_path = load.load_storage.loaded_packages.get_package_path(load_id) + for fn in load.load_storage.loaded_packages.storage.list_folder_files( + os.path.join(completed_path, PackageStorage.COMPLETED_JOBS_FOLDER) + ): # we update a retry count in each case - assert LoadStorage.parse_job_file_name(fn).retry_count == 2 + assert ParsedLoadJobFileName.parse(fn).retry_count == 2 def test_retry_exceptions() -> None: load = setup_loader(client_config=DummyClientConfiguration(retry_prob=1.0)) - prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + prepare_load_package(load.load_storage, NORMALIZED_FILES) with ThreadPoolExecutor() as pool: # 1st retry with pytest.raises(LoadClientJobRetry) as py_ex: @@ -374,23 +384,24 @@ def test_load_single_thread() -> None: os.environ["LOAD__WORKERS"] = "1" load = setup_loader(client_config=DummyClientConfiguration(completed_prob=1.0)) assert load.config.pool_type == "none" - load_id, _ = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) + load_id, _ = prepare_load_package(load.load_storage, NORMALIZED_FILES) # we do not need pool to complete metrics = load.run(None) while metrics.pending_items > 0: metrics = load.run(None) - assert not load.load_storage.storage.has_folder(load.load_storage.get_normalized_package_path(load_id)) + assert not load.load_storage.storage.has_folder( + load.load_storage.get_normalized_package_path(load_id) + ) def test_wrong_writer_type() -> None: load = setup_loader() load_id, _ = prepare_load_package( load.load_storage, - ["event_bot.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values", - "event_user.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values"] + [ + "event_bot.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values", + "event_user.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values", + ], ) with ThreadPoolExecutor() as pool: with pytest.raises(JobWithUnsupportedWriterException) as exv: @@ -407,28 +418,34 @@ def test_terminal_exceptions() -> None: raise AssertionError() -def assert_complete_job(load: Load, storage: FileStorage, should_delete_completed: bool = False) -> None: - load_id, _ = prepare_load_package( - load.load_storage, - NORMALIZED_FILES - ) +def assert_complete_job(load: Load, should_delete_completed: bool = False) -> None: + load_id, _ = prepare_load_package(load.load_storage, NORMALIZED_FILES) # will complete all jobs with patch.object(dummy_impl.DummyClient, "complete_load") as complete_load: with ThreadPoolExecutor() as pool: load.run(pool) # did process schema update - assert storage.has_file(os.path.join(load.load_storage.get_normalized_package_path(load_id), LoadStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME)) + assert load.load_storage.storage.has_file( + os.path.join( + load.load_storage.get_normalized_package_path(load_id), + PackageStorage.APPLIED_SCHEMA_UPDATES_FILE_NAME, + ) + ) # will finalize the whole package load.run(pool) # moved to loaded - assert not storage.has_folder(load.load_storage.get_normalized_package_path(load_id)) - completed_path = load.load_storage._get_job_folder_completed_path(load_id, "completed_jobs") + assert not load.load_storage.storage.has_folder( + load.load_storage.get_normalized_package_path(load_id) + ) + completed_path = load.load_storage.loaded_packages.get_job_folder_path( + load_id, "completed_jobs" + ) if should_delete_completed: # package was deleted - assert not storage.has_folder(completed_path) + assert not load.load_storage.loaded_packages.storage.has_folder(completed_path) else: # package not deleted - assert storage.has_folder(completed_path) + assert load.load_storage.loaded_packages.storage.has_folder(completed_path) # complete load on client was called complete_load.assert_called_once_with(load_id) @@ -442,17 +459,16 @@ def run_all(load: Load) -> None: sleep(0.1) -def setup_loader(delete_completed_jobs: bool = False, client_config: DummyClientConfiguration = None) -> Load: +def setup_loader( + delete_completed_jobs: bool = False, client_config: DummyClientConfiguration = None +) -> Load: # reset jobs for a test dummy_impl.JOBS = {} - destination: DestinationReference = dummy # type: ignore[assignment] + destination: TDestination = dummy() # type: ignore[assignment] client_config = client_config or DummyClientConfiguration(loader_file_format="jsonl") # patch destination to provide client_config # destination.client = lambda schema: dummy_impl.DummyClient(schema, client_config) # setup loader with TEST_DICT_CONFIG_PROVIDER().values({"delete_completed_jobs": delete_completed_jobs}): - return Load( - destination, - initial_client_config=client_config - ) + return Load(destination, initial_client_config=client_config) diff --git a/tests/load/test_insert_job_client.py b/tests/load/test_insert_job_client.py index 95e63a79f2..1c79b733e5 100644 --- a/tests/load/test_insert_job_client.py +++ b/tests/load/test_insert_job_client.py @@ -7,7 +7,11 @@ from dlt.common.storages import FileStorage from dlt.common.utils import uniq_id -from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) from dlt.destinations.insert_job_client import InsertValuesJobClient from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, skipifpypy @@ -16,22 +20,33 @@ DEFAULT_SUBSET = ["duckdb", "redshift", "postgres"] + @pytest.fixture def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) + @pytest.fixture(scope="function") def client(request) -> Iterator[InsertValuesJobClient]: yield from yield_client_with_storage(request.param.destination) # type: ignore[misc] -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), indirect=True, ids=lambda x: x.name) + +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), + indirect=True, + ids=lambda x: x.name, +) def test_simple_load(client: InsertValuesJobClient, file_storage: FileStorage) -> None: user_table_name = prepare_table(client) canonical_name = client.sql_client.make_qualified_table_name(user_table_name) # create insert insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}')" - expect_load_file(client, file_storage, insert_sql+insert_values+";", user_table_name) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}')" + ) + expect_load_file(client, file_storage, insert_sql + insert_values + ";", user_table_name) rows_count = client.sql_client.execute_sql(f"SELECT COUNT(1) FROM {canonical_name}")[0][0] assert rows_count == 1 # insert 100 more rows @@ -41,100 +56,135 @@ def test_simple_load(client: InsertValuesJobClient, file_storage: FileStorage) - assert rows_count == 101 # insert null value insert_sql_nc = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, text)\nVALUES\n" - insert_values_nc = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', NULL);" - expect_load_file(client, file_storage, insert_sql_nc+insert_values_nc, user_table_name) + insert_values_nc = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', NULL);" + ) + expect_load_file(client, file_storage, insert_sql_nc + insert_values_nc, user_table_name) rows_count = client.sql_client.execute_sql(f"SELECT COUNT(1) FROM {canonical_name}")[0][0] assert rows_count == 102 @skipifpypy -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), + indirect=True, + ids=lambda x: x.name, +) def test_loading_errors(client: InsertValuesJobClient, file_storage: FileStorage) -> None: # test expected dbiapi exceptions for supported destinations import duckdb - from dlt.destinations.postgres.sql_client import psycopg2 + from dlt.destinations.impl.postgres.sql_client import psycopg2 TNotNullViolation = psycopg2.errors.NotNullViolation TNumericValueOutOfRange = psycopg2.errors.NumericValueOutOfRange TUndefinedColumn = psycopg2.errors.UndefinedColumn TDatatypeMismatch = psycopg2.errors.DatatypeMismatch - if client.config.destination_name == "redshift": + if client.config.destination_type == "redshift": # redshift does not know or psycopg does not recognize those correctly TNotNullViolation = psycopg2.errors.InternalError_ - if client.config.destination_name == "duckdb": + if client.config.destination_type == "duckdb": TUndefinedColumn = duckdb.BinderException TNotNullViolation = duckdb.ConstraintException TNumericValueOutOfRange = TDatatypeMismatch = duckdb.ConversionException - user_table_name = prepare_table(client) # insert into unknown column insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, _unk_)\nVALUES\n" - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', NULL);" + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', NULL);" + ) with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) assert type(exv.value.dbapi_exception) is TUndefinedColumn # insert null value insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp)\nVALUES\n" insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', NULL);" with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) assert type(exv.value.dbapi_exception) is TNotNullViolation # insert wrong type insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp)\nVALUES\n" insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', TRUE);" with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) assert type(exv.value.dbapi_exception) is TDatatypeMismatch # numeric overflow on bigint - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, metadata__rasa_x_id)\nVALUES\n" + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, metadata__rasa_x_id)\nVALUES\n" + ) # 2**64//2 - 1 is a maximum bigint value - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {2**64//2});" + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {2**64//2});" + ) with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) - assert type(exv.value.dbapi_exception) in (TNumericValueOutOfRange, ) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) + assert type(exv.value.dbapi_exception) in (TNumericValueOutOfRange,) # numeric overflow on NUMERIC - insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp, parse_data__intent__id)\nVALUES\n" + insert_sql = ( + "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp," + " parse_data__intent__id)\nVALUES\n" + ) # default decimal is (38, 9) (128 bit), use local context to generate decimals with 38 precision with numeric_default_context(): - below_limit = Decimal(10**29) - Decimal('0.001') + below_limit = Decimal(10**29) - Decimal("0.001") above_limit = Decimal(10**29) # this will pass - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {below_limit});" - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {below_limit});" + ) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) # this will raise - insert_values = f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd', '{str(pendulum.now())}', {above_limit});" + insert_values = ( + f"('{uniq_id()}', '{uniq_id()}', '90238094809sajlkjxoiewjhduuiuehd'," + f" '{str(pendulum.now())}', {above_limit});" + ) with pytest.raises(DatabaseTerminalException) as exv: - expect_load_file(client, file_storage, insert_sql+insert_values, user_table_name) - assert type(exv.value.dbapi_exception) in (TNumericValueOutOfRange, psycopg2.errors.InternalError_) - - - -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), indirect=True, ids=lambda x: x.name) + expect_load_file(client, file_storage, insert_sql + insert_values, user_table_name) + assert type(exv.value.dbapi_exception) in ( + TNumericValueOutOfRange, + psycopg2.errors.InternalError_, + ) + + +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, subset=DEFAULT_SUBSET), + indirect=True, + ids=lambda x: x.name, +) def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) -> None: mocked_caps = client.sql_client.__class__.capabilities insert_sql = prepare_insert_statement(10) # this guarantees that we execute inserts line by line - with patch.object(mocked_caps, "max_query_length", 2), patch.object(client.sql_client, "execute_fragments") as mocked_fragments: + with patch.object(mocked_caps, "max_query_length", 2), patch.object( + client.sql_client, "execute_fragments" + ) as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # print(mocked_fragments.mock_calls) # split in 10 lines assert mocked_fragments.call_count == 10 for idx, call in enumerate(mocked_fragments.call_args_list): - fragment:List[str] = call.args[0] + fragment: List[str] = call.args[0] # last elem of fragment is a data list, first element is id, and must end with ;\n assert fragment[-1].startswith(f"'{idx}'") assert fragment[-1].endswith(");") assert_load_with_max_query(client, file_storage, 10, 2) start_idx = insert_sql.find("S\n(") - idx = insert_sql.find("),\n", len(insert_sql)//2) + idx = insert_sql.find("),\n", len(insert_sql) // 2) # set query length so it reads data until "," (followed by \n) query_length = (idx - start_idx - 1) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object(client.sql_client, "execute_fragments") as mocked_fragments: + with patch.object(mocked_caps, "max_query_length", query_length), patch.object( + client.sql_client, "execute_fragments" + ) as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on ',' @@ -142,7 +192,9 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - # so it reads until "\n" query_length = (idx - start_idx) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object(client.sql_client, "execute_fragments") as mocked_fragments: + with patch.object(mocked_caps, "max_query_length", query_length), patch.object( + client.sql_client, "execute_fragments" + ) as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on ',' @@ -150,14 +202,21 @@ def test_query_split(client: InsertValuesJobClient, file_storage: FileStorage) - # so it reads till the last ; query_length = (len(insert_sql) - start_idx - 3) * 2 - with patch.object(mocked_caps, "max_query_length", query_length), patch.object(client.sql_client, "execute_fragments") as mocked_fragments: + with patch.object(mocked_caps, "max_query_length", query_length), patch.object( + client.sql_client, "execute_fragments" + ) as mocked_fragments: user_table_name = prepare_table(client) expect_load_file(client, file_storage, insert_sql, user_table_name) # split in 2 on ',' assert mocked_fragments.call_count == 1 -def assert_load_with_max_query(client: InsertValuesJobClient, file_storage: FileStorage, insert_lines: int, max_query_length: int) -> None: +def assert_load_with_max_query( + client: InsertValuesJobClient, + file_storage: FileStorage, + insert_lines: int, + max_query_length: int, +) -> None: # load and check for real mocked_caps = client.sql_client.__class__.capabilities with patch.object(mocked_caps, "max_query_length", max_query_length): @@ -167,7 +226,9 @@ def assert_load_with_max_query(client: InsertValuesJobClient, file_storage: File rows_count = client.sql_client.execute_sql(f"SELECT COUNT(1) FROM {user_table_name}")[0][0] assert rows_count == insert_lines # get all uniq ids in order - with client.sql_client.execute_query(f"SELECT _dlt_id FROM {user_table_name} ORDER BY timestamp ASC;") as c: + with client.sql_client.execute_query( + f"SELECT _dlt_id FROM {user_table_name} ORDER BY timestamp ASC;" + ) as c: rows = list(c.fetchall()) v_ids = list(map(lambda i: i[0], rows)) assert list(map(str, range(0, insert_lines))) == v_ids @@ -177,7 +238,7 @@ def assert_load_with_max_query(client: InsertValuesJobClient, file_storage: File def prepare_insert_statement(lines: int) -> str: insert_sql = "INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp)\nVALUES\n" insert_values = "('{}', '{}', '90238094809sajlkjxoiewjhduuiuehd', '{}')" - #ids = [] + # ids = [] for i in range(lines): # id_ = uniq_id() # ids.append(id_) diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py index 35394ed1c6..e6aab20d51 100644 --- a/tests/load/test_job_client.py +++ b/tests/load/test_job_client.py @@ -9,20 +9,39 @@ from dlt.common import json, pendulum from dlt.common.schema import Schema -from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME, TWriteDisposition, TTableSchema +from dlt.common.schema.typing import ( + LOADS_TABLE_NAME, + VERSION_TABLE_NAME, + TWriteDisposition, + TTableSchema, +) from dlt.common.schema.utils import new_table, new_column from dlt.common.storages import FileStorage from dlt.common.schema import TTableSchemaColumns from dlt.common.utils import uniq_id -from dlt.destinations.exceptions import DatabaseException, DatabaseTerminalException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseException, + DatabaseTerminalException, + DatabaseUndefinedRelation, +) from dlt.destinations.job_client_impl import SqlJobClientBase from dlt.common.destination.reference import WithStagingDataset from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage from tests.common.utils import load_json_case -from tests.load.utils import (TABLE_UPDATE, TABLE_UPDATE_COLUMNS_SCHEMA, TABLE_ROW_ALL_DATA_TYPES, assert_all_data_types_row , expect_load_file, load_table, yield_client_with_storage, - cm_yield_client_with_storage, write_dataset, prepare_table) +from tests.load.utils import ( + TABLE_UPDATE, + TABLE_UPDATE_COLUMNS_SCHEMA, + TABLE_ROW_ALL_DATA_TYPES, + assert_all_data_types_row, + expect_load_file, + load_table, + yield_client_with_storage, + cm_yield_client_with_storage, + write_dataset, + prepare_table, +) from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration @@ -30,17 +49,24 @@ def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) + @pytest.fixture(scope="function") def client(request) -> Iterator[SqlJobClientBase]: yield from yield_client_with_storage(request.param.destination) + @pytest.mark.order(1) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_initialize_storage(client: SqlJobClientBase) -> None: pass + @pytest.mark.order(2) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_get_schema_on_empty_storage(client: SqlJobClientBase) -> None: # test getting schema on empty dataset without any tables exists, _ = client.get_storage_table(VERSION_TABLE_NAME) @@ -50,8 +76,11 @@ def test_get_schema_on_empty_storage(client: SqlJobClientBase) -> None: schema_info = client.get_stored_schema_by_hash("8a0298298823928939") assert schema_info is None + @pytest.mark.order(3) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_get_update_basic_schema(client: SqlJobClientBase) -> None: schema = client.schema schema_update = client.update_stored_schema() @@ -104,7 +133,7 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: client._update_schema_in_storage(first_schema) this_schema = client.get_stored_schema_by_hash(first_schema.version_hash) newest_schema = client.get_stored_schema() - assert this_schema == newest_schema # error + assert this_schema == newest_schema # error assert this_schema.version == first_schema.version == 2 assert this_schema.version_hash == first_schema.stored_version_hash @@ -127,7 +156,9 @@ def test_get_update_basic_schema(client: SqlJobClientBase) -> None: assert this_schema == newest_schema -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_complete_load(client: SqlJobClientBase) -> None: client.update_stored_schema() load_id = "182879721.182912" @@ -139,18 +170,28 @@ def test_complete_load(client: SqlJobClientBase) -> None: assert load_rows[0][1] == client.schema.name assert load_rows[0][2] == 0 import datetime # noqa: I251 + assert type(load_rows[0][3]) is datetime.datetime assert load_rows[0][4] == client.schema.version_hash # make sure that hash in loads exists in schema versions table versions_table = client.sql_client.make_qualified_table_name(VERSION_TABLE_NAME) - version_rows = list(client.sql_client.execute_sql(f"SELECT * FROM {versions_table} WHERE version_hash = %s", load_rows[0][4])) + version_rows = list( + client.sql_client.execute_sql( + f"SELECT * FROM {versions_table} WHERE version_hash = %s", load_rows[0][4] + ) + ) assert len(version_rows) == 1 client.complete_load("load2") load_rows = list(client.sql_client.execute_sql(f"SELECT * FROM {load_table}")) assert len(load_rows) == 2 -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, subset=["redshift", "postgres", "duckdb"]), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, subset=["redshift", "postgres", "duckdb"]), + indirect=True, + ids=lambda x: x.name, +) def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: # infer typical rasa event schema schema = client.schema @@ -160,7 +201,7 @@ def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: assert timestamp["sort"] is True # this will be destkey sender_id = schema._infer_column("sender_id", "982398490809324") - assert sender_id["cluster"] is True + assert sender_id["cluster"] is True # this will be not null record_hash = schema._infer_column("_dlt_id", "m,i0392903jdlkasjdlk") assert record_hash["unique"] is True @@ -176,7 +217,12 @@ def test_schema_update_create_table_redshift(client: SqlJobClientBase) -> None: assert exists is True -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, subset=["bigquery"]), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, subset=["bigquery"]), + indirect=True, + ids=lambda x: x.name, +) def test_schema_update_create_table_bigquery(client: SqlJobClientBase) -> None: # infer typical rasa event schema schema = client.schema @@ -203,7 +249,9 @@ def test_schema_update_create_table_bigquery(client: SqlJobClientBase) -> None: assert storage_table["version"]["cluster"] is False -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_schema_update_alter_table(client: SqlJobClientBase) -> None: # force to update schema in chunks by setting the max query size to 10 bytes/chars with patch.object(client.capabilities, "max_query_length", new=10): @@ -241,34 +289,38 @@ def test_schema_update_alter_table(client: SqlJobClientBase) -> None: assert storage_table["col4"]["data_type"] == "timestamp" -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_drop_tables(client: SqlJobClientBase) -> None: schema = client.schema # Add columns in all tables - schema.tables['event_user']['columns'] = dict(schema.tables['event_slot']['columns']) - schema.tables['event_bot']['columns'] = dict(schema.tables['event_slot']['columns']) + schema.tables["event_user"]["columns"] = dict(schema.tables["event_slot"]["columns"]) + schema.tables["event_bot"]["columns"] = dict(schema.tables["event_slot"]["columns"]) schema.bump_version() client.update_stored_schema() # Create a second schema with 2 hashes sd = schema.to_dict() - sd['name'] = 'event_2' + sd["name"] = "event_2" schema_2 = Schema.from_dict(sd).clone() # type: ignore[arg-type] for tbl_name in list(schema_2.tables): - if tbl_name.startswith('_dlt'): + if tbl_name.startswith("_dlt"): continue - schema_2.tables[tbl_name + '_2'] = schema_2.tables.pop(tbl_name) + # rename the table properly + schema_2.tables[tbl_name + "_2"] = schema_2.tables.pop(tbl_name) + schema_2.tables[tbl_name + "_2"]["name"] = tbl_name + "_2" client.schema = schema_2 client.schema.bump_version() client.update_stored_schema() - client.schema.tables['event_slot_2']['columns']['value']['nullable'] = False + client.schema.tables["event_slot_2"]["columns"]["value"]["nullable"] = False client.schema.bump_version() client.update_stored_schema() # Drop tables from the first schema client.schema = schema - tables_to_drop = ['event_slot', 'event_user'] + tables_to_drop = ["event_slot", "event_user"] for tbl in tables_to_drop: del schema.tables[tbl] schema.bump_version() @@ -291,16 +343,22 @@ def test_drop_tables(client: SqlJobClientBase) -> None: # Verify _dlt_version schema is updated and old versions deleted table_name = client.sql_client.make_qualified_table_name(VERSION_TABLE_NAME) - rows = client.sql_client.execute_sql(f"SELECT version_hash FROM {table_name} WHERE schema_name = %s", schema.name) + rows = client.sql_client.execute_sql( + f"SELECT version_hash FROM {table_name} WHERE schema_name = %s", schema.name + ) assert len(rows) == 1 assert rows[0][0] == schema.version_hash # Other schema is not replaced - rows = client.sql_client.execute_sql(f"SELECT version_hash FROM {table_name} WHERE schema_name = %s", schema_2.name) + rows = client.sql_client.execute_sql( + f"SELECT version_hash FROM {table_name} WHERE schema_name = %s", schema_2.name + ) assert len(rows) == 2 -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None: schema = client.schema table_name = "event_test_table" + uniq_id() @@ -323,21 +381,28 @@ def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None: # print(c["data_type"]) assert c["name"] == expected_c["name"] # athena does not know wei data type and has no JSON type, time is not supported with parquet tables - if client.config.destination_name == "athena" and c["data_type"] in ("wei", "complex", "time"): + if client.config.destination_type == "athena" and c["data_type"] in ( + "wei", + "complex", + "time", + ): continue - if client.config.destination_name == "mssql" and c["data_type"] in ("wei", "complex"): + if client.config.destination_type == "mssql" and c["data_type"] in ("wei", "complex"): continue assert c["data_type"] == expected_c["data_type"] -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_preserve_column_order(client: SqlJobClientBase) -> None: schema = client.schema table_name = "event_test_table" + uniq_id() import random + columns = deepcopy(TABLE_UPDATE) random.shuffle(columns) - print(columns) + schema.update_table(new_table(table_name, columns=columns)) schema.bump_version() @@ -353,13 +418,15 @@ def _assert_columns_order(sql_: str) -> None: idx = sql_.find(col_name, idx) assert idx > 0, f"column {col_name} not found in script" - sql = ';'.join(client._get_table_update_sql(table_name, columns, generate_alter=False)) + sql = ";".join(client._get_table_update_sql(table_name, columns, generate_alter=False)) _assert_columns_order(sql) - sql = ';'.join(client._get_table_update_sql(table_name, columns, generate_alter=True)) + sql = ";".join(client._get_table_update_sql(table_name, columns, generate_alter=True)) _assert_columns_order(sql) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_data_writer_load(client: SqlJobClientBase, file_storage: FileStorage) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") @@ -378,12 +445,16 @@ def test_data_writer_load(client: SqlJobClientBase, file_storage: FileStorage) - write_dataset(client, f, [rows[1]], client.schema.get_table(table_name)["columns"]) query = f.getvalue().decode() expect_load_file(client, file_storage, query, table_name) - db_row = client.sql_client.execute_sql(f"SELECT * FROM {canonical_name} WHERE f_int = {rows[1]['f_int']}")[0] + db_row = client.sql_client.execute_sql( + f"SELECT * FROM {canonical_name} WHERE f_int = {rows[1]['f_int']}" + )[0] assert db_row[3] is None assert db_row[5] is None -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_data_writer_string_escape(client: SqlJobClientBase, file_storage: FileStorage) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") @@ -401,8 +472,12 @@ def test_data_writer_string_escape(client: SqlJobClientBase, file_storage: FileS assert list(db_row) == list(row.values()) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) -def test_data_writer_string_escape_edge(client: SqlJobClientBase, file_storage: FileStorage) -> None: +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) +def test_data_writer_string_escape_edge( + client: SqlJobClientBase, file_storage: FileStorage +) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") rows, table_name = prepare_schema(client, "weird_rows") @@ -411,20 +486,26 @@ def test_data_writer_string_escape_edge(client: SqlJobClientBase, file_storage: write_dataset(client, f, rows, client.schema.get_table(table_name)["columns"]) query = f.getvalue().decode() expect_load_file(client, file_storage, query, table_name) - for i in range(1,len(rows) + 1): + for i in range(1, len(rows) + 1): db_row = client.sql_client.execute_sql(f"SELECT str FROM {canonical_name} WHERE idx = {i}") - row_value, expected = db_row[0][0], rows[i-1]["str"] + row_value, expected = db_row[0][0], rows[i - 1]["str"] assert row_value == expected -@pytest.mark.parametrize('write_disposition', ["append", "replace"]) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) -def test_load_with_all_types(client: SqlJobClientBase, write_disposition: TWriteDisposition, file_storage: FileStorage) -> None: +@pytest.mark.parametrize("write_disposition", ["append", "replace"]) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) +def test_load_with_all_types( + client: SqlJobClientBase, write_disposition: TWriteDisposition, file_storage: FileStorage +) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") table_name = "event_test_table" + uniq_id() # we should have identical content with all disposition types - client.schema.update_table(new_table(table_name, write_disposition=write_disposition, columns=TABLE_UPDATE)) + client.schema.update_table( + new_table(table_name, write_disposition=write_disposition, columns=TABLE_UPDATE) + ) client.schema.bump_version() client.update_stored_schema() @@ -435,7 +516,7 @@ def test_load_with_all_types(client: SqlJobClientBase, write_disposition: TWrite client.update_stored_schema() with client.sql_client.with_staging_dataset( - client.should_load_data_to_staging_dataset(client.schema.tables[table_name]) # type: ignore[attr-defined] + client.should_load_data_to_staging_dataset(client.schema.tables[table_name]) # type: ignore[attr-defined] ): canonical_name = client.sql_client.make_qualified_table_name(table_name) # write row @@ -447,28 +528,39 @@ def test_load_with_all_types(client: SqlJobClientBase, write_disposition: TWrite # content must equal assert_all_data_types_row(db_row) -@pytest.mark.parametrize('write_disposition,replace_strategy', [ - ("append", ""), - ("merge", ""), - ("replace", "truncate-and-insert"), - ("replace", "insert-from-staging"), - ("replace", "staging-optimized") - ]) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) -def test_write_dispositions(client: SqlJobClientBase, write_disposition: TWriteDisposition, replace_strategy: str, file_storage: FileStorage) -> None: + +@pytest.mark.parametrize( + "write_disposition,replace_strategy", + [ + ("append", ""), + ("merge", ""), + ("replace", "truncate-and-insert"), + ("replace", "insert-from-staging"), + ("replace", "staging-optimized"), + ], +) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) +def test_write_dispositions( + client: SqlJobClientBase, + write_disposition: TWriteDisposition, + replace_strategy: str, + file_storage: FileStorage, +) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") - os.environ['DESTINATION__REPLACE_STRATEGY'] = replace_strategy + os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy table_name = "event_test_table" + uniq_id() client.schema.update_table( new_table(table_name, write_disposition=write_disposition, columns=TABLE_UPDATE) - ) + ) child_table = client.schema.naming.make_path(table_name, "child") # add child table without write disposition so it will be inferred from the parent client.schema.update_table( new_table(child_table, columns=TABLE_UPDATE, parent_table_name=table_name) - ) + ) client.schema.bump_version() client.update_stored_schema() @@ -500,7 +592,12 @@ def test_write_dispositions(client: SqlJobClientBase, write_disposition: TWriteD else: # load directly on other expect_load_file(client, file_storage, query, t) - db_rows = list(client.sql_client.execute_sql(f"SELECT * FROM {client.sql_client.make_qualified_table_name(t)} ORDER BY col1 ASC")) + db_rows = list( + client.sql_client.execute_sql( + f"SELECT * FROM {client.sql_client.make_qualified_table_name(t)} ORDER BY" + " col1 ASC" + ) + ) # in case of merge if write_disposition == "append": # we append 1 row to tables in each iteration @@ -513,13 +610,20 @@ def test_write_dispositions(client: SqlJobClientBase, write_disposition: TWriteD assert len(db_rows) == 0 # check staging with client.sql_client.with_staging_dataset(staging=True): - db_rows = list(client.sql_client.execute_sql(f"SELECT * FROM {client.sql_client.make_qualified_table_name(t)} ORDER BY col1 ASC")) + db_rows = list( + client.sql_client.execute_sql( + f"SELECT * FROM {client.sql_client.make_qualified_table_name(t)} ORDER" + " BY col1 ASC" + ) + ) assert len(db_rows) == idx + 1 # last row must have our last idx - make sure we append and overwrite assert db_rows[-1][0] == idx -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_retrieve_job(client: SqlJobClientBase, file_storage: FileStorage) -> None: if not client.capabilities.preferred_loader_file_format: pytest.skip("preferred loader file format not set, destination will only work with staging") @@ -527,8 +631,8 @@ def test_retrieve_job(client: SqlJobClientBase, file_storage: FileStorage) -> No load_json = { "_dlt_id": uniq_id(), "_dlt_root_id": uniq_id(), - "sender_id":'90238094809sajlkjxoiewjhduuiuehd', - "timestamp": str(pendulum.now()) + "sender_id": "90238094809sajlkjxoiewjhduuiuehd", + "timestamp": str(pendulum.now()), } with io.BytesIO() as f: write_dataset(client, f, [load_json], client.schema.get_table(user_table_name)["columns"]) @@ -543,30 +647,50 @@ def test_retrieve_job(client: SqlJobClientBase, file_storage: FileStorage) -> No assert r_job.state() == "completed" -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) def test_default_schema_name_init_storage(destination_config: DestinationTestConfiguration) -> None: - with cm_yield_client_with_storage(destination_config.destination, default_config_values={ - "default_schema_name": "event" # pass the schema that is a default schema. that should create dataset with the name `dataset_name` - }) as client: + with cm_yield_client_with_storage( + destination_config.destination, + default_config_values={ + "default_schema_name": ( # pass the schema that is a default schema. that should create dataset with the name `dataset_name` + "event" + ) + }, + ) as client: assert client.sql_client.dataset_name == client.config.dataset_name assert client.sql_client.has_dataset() - with cm_yield_client_with_storage(destination_config.destination, default_config_values={ - "default_schema_name": None # no default_schema. that should create dataset with the name `dataset_name` - }) as client: + with cm_yield_client_with_storage( + destination_config.destination, + default_config_values={ + "default_schema_name": ( + None # no default_schema. that should create dataset with the name `dataset_name` + ) + }, + ) as client: assert client.sql_client.dataset_name == client.config.dataset_name assert client.sql_client.has_dataset() - with cm_yield_client_with_storage(destination_config.destination, default_config_values={ - "default_schema_name": "event_2" # the default schema is not event schema . that should create dataset with the name `dataset_name` with schema suffix - }) as client: + with cm_yield_client_with_storage( + destination_config.destination, + default_config_values={ + "default_schema_name": ( # the default schema is not event schema . that should create dataset with the name `dataset_name` with schema suffix + "event_2" + ) + }, + ) as client: assert client.sql_client.dataset_name == client.config.dataset_name + "_event" assert client.sql_client.has_dataset() -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_many_schemas_single_dataset(destination_config: DestinationTestConfiguration, file_storage: FileStorage) -> None: - +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_many_schemas_single_dataset( + destination_config: DestinationTestConfiguration, file_storage: FileStorage +) -> None: def _load_something(_client: SqlJobClientBase, expected_rows: int) -> None: # load something to event:user_table user_row = { @@ -575,7 +699,7 @@ def _load_something(_client: SqlJobClientBase, expected_rows: int) -> None: # "_dlt_load_id": "load_id", "event": "user", "sender_id": "sender_id", - "timestamp": str(pendulum.now()) + "timestamp": str(pendulum.now()), } with io.BytesIO() as f: write_dataset(_client, f, [user_row], _client.schema.tables["event_user"]["columns"]) @@ -585,11 +709,14 @@ def _load_something(_client: SqlJobClientBase, expected_rows: int) -> None: db_rows = list(_client.sql_client.execute_sql(f"SELECT * FROM {qual_table_name}")) assert len(db_rows) == expected_rows - with cm_yield_client_with_storage(destination_config.destination, default_config_values={"default_schema_name": None}) as client: - + with cm_yield_client_with_storage( + destination_config.destination, default_config_values={"default_schema_name": None} + ) as client: # event schema with event table if not client.capabilities.preferred_loader_file_format: - pytest.skip("preferred loader file format not set, destination will only work with staging") + pytest.skip( + "preferred loader file format not set, destination will only work with staging" + ) user_table = load_table("event_user")["event_user"] client.schema.update_table(new_table("event_user", columns=list(user_table.values()))) @@ -633,11 +760,17 @@ def _load_something(_client: SqlJobClientBase, expected_rows: int) -> None: _load_something(client, 3) # adding new non null column will generate sync error - event_3_schema.tables["event_user"]["columns"]["mandatory_column"] = new_column("mandatory_column", "text", nullable=False) + event_3_schema.tables["event_user"]["columns"]["mandatory_column"] = new_column( + "mandatory_column", "text", nullable=False + ) client.schema.bump_version() with pytest.raises(DatabaseException) as py_ex: client.update_stored_schema() - assert "mandatory_column" in str(py_ex.value).lower() or "NOT NULL" in str(py_ex.value) or "Adding columns with constraints not yet supported" in str(py_ex.value) + assert ( + "mandatory_column" in str(py_ex.value).lower() + or "NOT NULL" in str(py_ex.value) + or "Adding columns with constraints not yet supported" in str(py_ex.value) + ) def prepare_schema(client: SqlJobClientBase, case: str) -> Tuple[List[Dict[str, Any]], str]: diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py index 5c6e1b9e31..96f0db09bb 100644 --- a/tests/load/test_sql_client.py +++ b/tests/load/test_sql_client.py @@ -9,7 +9,12 @@ from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME from dlt.common.storages import FileStorage from dlt.common.utils import derives_from_class_of_name, uniq_id -from dlt.destinations.exceptions import DatabaseException, DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation +from dlt.destinations.exceptions import ( + DatabaseException, + DatabaseTerminalException, + DatabaseTransientException, + DatabaseUndefinedRelation, +) from dlt.destinations.sql_client import DBApiCursor, SqlClientBase from dlt.destinations.job_client_impl import SqlJobClientBase @@ -25,30 +30,43 @@ def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) + @pytest.fixture(scope="function") def client(request) -> Iterator[SqlJobClientBase]: yield from yield_client_with_storage(request.param.destination) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True, exclude=["mssql"]), indirect=True, ids=lambda x: x.name) + +@pytest.mark.parametrize( + "client", + destinations_configs(default_sql_configs=True, exclude=["mssql"]), + indirect=True, + ids=lambda x: x.name, +) def test_sql_client_default_dataset_unqualified(client: SqlJobClientBase) -> None: client.update_stored_schema() load_id = "182879721.182912" client.complete_load(load_id) curr: DBApiCursor # get data from unqualified name - with client.sql_client.execute_query(f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at") as curr: + with client.sql_client.execute_query( + f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at" + ) as curr: columns = [c[0] for c in curr.description] data = curr.fetchall() assert len(data) > 0 # get data from qualified name load_table = client.sql_client.make_qualified_table_name(LOADS_TABLE_NAME) - with client.sql_client.execute_query(f"SELECT * FROM {load_table} ORDER BY inserted_at") as curr: + with client.sql_client.execute_query( + f"SELECT * FROM {load_table} ORDER BY inserted_at" + ) as curr: assert [c[0] for c in curr.description] == columns assert curr.fetchall() == data -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_malformed_query_parameters(client: SqlJobClientBase) -> None: client.update_stored_schema() loads_table_name = client.sql_client.make_qualified_table_name(LOADS_TABLE_NAME) @@ -59,25 +77,35 @@ def test_malformed_query_parameters(client: SqlJobClientBase) -> None: # parameters for placeholder will not be provided. the placeholder remains in query if is_positional: with pytest.raises(DatabaseTransientException) as term_ex: - with client.sql_client.execute_query(f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}"): + with client.sql_client.execute_query( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}" + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) # too many parameters with pytest.raises(DatabaseTransientException) as term_ex: - with client.sql_client.execute_query(f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}", pendulum.now(), 10): + with client.sql_client.execute_query( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}", + pendulum.now(), + 10, + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) # unknown named parameter if client.sql_client.dbapi.paramstyle == "pyformat": with pytest.raises(DatabaseTransientException) as term_ex: - with client.sql_client.execute_query(f"SELECT * FROM {loads_table_name} WHERE inserted_at = %(date)s"): + with client.sql_client.execute_query( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = %(date)s" + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_malformed_execute_parameters(client: SqlJobClientBase) -> None: client.update_stored_schema() loads_table_name = client.sql_client.make_qualified_table_name(LOADS_TABLE_NAME) @@ -88,32 +116,46 @@ def test_malformed_execute_parameters(client: SqlJobClientBase) -> None: # parameters for placeholder will not be provided. the placeholder remains in query if is_positional: with pytest.raises(DatabaseTransientException) as term_ex: - client.sql_client.execute_sql(f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}") + client.sql_client.execute_sql( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}" + ) assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) # too many parameters with pytest.raises(DatabaseTransientException) as term_ex: - client.sql_client.execute_sql(f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}", pendulum.now(), 10) + client.sql_client.execute_sql( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = {placeholder}", + pendulum.now(), + 10, + ) assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) # unknown named parameter if client.sql_client.dbapi.paramstyle == "pyformat": with pytest.raises(DatabaseTransientException) as term_ex: - client.sql_client.execute_sql(f"SELECT * FROM {loads_table_name} WHERE inserted_at = %(date)s") + client.sql_client.execute_sql( + f"SELECT * FROM {loads_table_name} WHERE inserted_at = %(date)s" + ) assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_execute_sql(client: SqlJobClientBase) -> None: client.update_stored_schema() # ask with datetime # no_rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {VERSION_TABLE_NAME} WHERE inserted_at = %s", pendulum.now().add(seconds=1)) # assert len(no_rows) == 0 version_table_name = client.sql_client.make_qualified_table_name(VERSION_TABLE_NAME) - rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {version_table_name}") + rows = client.sql_client.execute_sql( + f"SELECT schema_name, inserted_at FROM {version_table_name}" + ) assert len(rows) == 1 assert rows[0][0] == "event" - rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE schema_name = %s", "event") + rows = client.sql_client.execute_sql( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE schema_name = %s", "event" + ) assert len(rows) == 1 # print(rows) assert rows[0][0] == "event" @@ -122,18 +164,31 @@ def test_execute_sql(client: SqlJobClientBase) -> None: # print(rows[0][1]) # print(type(rows[0][1])) # convert to pendulum to make sure it is supported by dbapi - rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", ensure_pendulum_datetime(rows[0][1])) + rows = client.sql_client.execute_sql( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", + ensure_pendulum_datetime(rows[0][1]), + ) assert len(rows) == 1 # use rows in subsequent test if client.sql_client.dbapi.paramstyle == "pyformat": - rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %(date)s", date=rows[0][1]) + rows = client.sql_client.execute_sql( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at =" + " %(date)s", + date=rows[0][1], + ) assert len(rows) == 1 assert rows[0][0] == "event" - rows = client.sql_client.execute_sql(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %(date)s", date=pendulum.now().add(seconds=1)) + rows = client.sql_client.execute_sql( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at =" + " %(date)s", + date=pendulum.now().add(seconds=1), + ) assert len(rows) == 0 -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_execute_ddl(client: SqlJobClientBase) -> None: uniq_suffix = uniq_id() client.update_stored_schema() @@ -149,38 +204,56 @@ def test_execute_ddl(client: SqlJobClientBase) -> None: assert rows[0][0] == Decimal("1.0") -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_execute_query(client: SqlJobClientBase) -> None: client.update_stored_schema() version_table_name = client.sql_client.make_qualified_table_name(VERSION_TABLE_NAME) - with client.sql_client.execute_query(f"SELECT schema_name, inserted_at FROM {version_table_name}") as curr: + with client.sql_client.execute_query( + f"SELECT schema_name, inserted_at FROM {version_table_name}" + ) as curr: rows = curr.fetchall() assert len(rows) == 1 assert rows[0][0] == "event" - with client.sql_client.execute_query(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE schema_name = %s", "event") as curr: + with client.sql_client.execute_query( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE schema_name = %s", "event" + ) as curr: rows = curr.fetchall() assert len(rows) == 1 assert rows[0][0] == "event" assert isinstance(rows[0][1], datetime.datetime) - with client.sql_client.execute_query(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", rows[0][1]) as curr: + with client.sql_client.execute_query( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", + rows[0][1], + ) as curr: rows = curr.fetchall() assert len(rows) == 1 assert rows[0][0] == "event" - with client.sql_client.execute_query(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", pendulum.now().add(seconds=1)) as curr: + with client.sql_client.execute_query( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %s", + pendulum.now().add(seconds=1), + ) as curr: rows = curr.fetchall() assert len(rows) == 0 if client.sql_client.dbapi.paramstyle == "pyformat": - with client.sql_client.execute_query(f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at = %(date)s", date=pendulum.now().add(seconds=1)) as curr: + with client.sql_client.execute_query( + f"SELECT schema_name, inserted_at FROM {version_table_name} WHERE inserted_at =" + " %(date)s", + date=pendulum.now().add(seconds=1), + ) as curr: rows = curr.fetchall() assert len(rows) == 0 -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_execute_df(client: SqlJobClientBase) -> None: - if client.config.destination_name == "bigquery": + if client.config.destination_type == "bigquery": chunk_size = 50 total_records = 80 - elif client.config.destination_name == "mssql": + elif client.config.destination_type == "mssql": chunk_size = 700 total_records = 1000 else: @@ -193,13 +266,17 @@ def test_execute_df(client: SqlJobClientBase) -> None: insert_query = ",".join([f"({idx})" for idx in range(0, total_records)]) client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES {insert_query};") - with client.sql_client.execute_query(f"SELECT * FROM {f_q_table_name} ORDER BY col ASC") as curr: + with client.sql_client.execute_query( + f"SELECT * FROM {f_q_table_name} ORDER BY col ASC" + ) as curr: df = curr.df() # Force lower case df columns, snowflake has all cols uppercase df.columns = [dfcol.lower() for dfcol in df.columns] assert list(df["col"]) == list(range(0, total_records)) # get chunked - with client.sql_client.execute_query(f"SELECT * FROM {f_q_table_name} ORDER BY col ASC") as curr: + with client.sql_client.execute_query( + f"SELECT * FROM {f_q_table_name} ORDER BY col ASC" + ) as curr: # be compatible with duckdb vector size df_1 = curr.df(chunk_size=chunk_size) df_2 = curr.df(chunk_size=chunk_size) @@ -214,7 +291,9 @@ def test_execute_df(client: SqlJobClientBase) -> None: assert df_3 is None -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_database_exceptions(client: SqlJobClientBase) -> None: client.update_stored_schema() term_ex: Any @@ -232,11 +311,15 @@ def test_database_exceptions(client: SqlJobClientBase) -> None: pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) with pytest.raises(DatabaseUndefinedRelation) as term_ex: - with client.sql_client.execute_query("DELETE FROM TABLE_XXX WHERE 1=1;DELETE FROM ticket_forms__ticket_field_ids WHERE 1=1;"): + with client.sql_client.execute_query( + "DELETE FROM TABLE_XXX WHERE 1=1;DELETE FROM ticket_forms__ticket_field_ids WHERE 1=1;" + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) with pytest.raises(DatabaseUndefinedRelation) as term_ex: - with client.sql_client.execute_query("DROP TABLE TABLE_XXX;DROP TABLE ticket_forms__ticket_field_ids;"): + with client.sql_client.execute_query( + "DROP TABLE TABLE_XXX;DROP TABLE ticket_forms__ticket_field_ids;" + ): pass # invalid syntax @@ -247,7 +330,9 @@ def test_database_exceptions(client: SqlJobClientBase) -> None: # invalid column with pytest.raises(DatabaseTerminalException) as term_ex: loads_table_name = client.sql_client.make_qualified_table_name(LOADS_TABLE_NAME) - with client.sql_client.execute_query(f"SELECT * FROM {loads_table_name} ORDER BY column_XXX"): + with client.sql_client.execute_query( + f"SELECT * FROM {loads_table_name} ORDER BY column_XXX" + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) # invalid parameters to dbapi @@ -259,7 +344,9 @@ def test_database_exceptions(client: SqlJobClientBase) -> None: with client.sql_client.with_alternative_dataset_name("UNKNOWN"): qualified_name = client.sql_client.make_qualified_table_name(LOADS_TABLE_NAME) with pytest.raises(DatabaseUndefinedRelation) as term_ex: - with client.sql_client.execute_query(f"SELECT * FROM {qualified_name} ORDER BY inserted_at"): + with client.sql_client.execute_query( + f"SELECT * FROM {qualified_name} ORDER BY inserted_at" + ): pass assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) with pytest.raises(DatabaseUndefinedRelation) as term_ex: @@ -272,28 +359,40 @@ def test_database_exceptions(client: SqlJobClientBase) -> None: assert client.sql_client.is_dbapi_exception(term_ex.value.dbapi_exception) -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_commit_transaction(client: SqlJobClientBase) -> None: table_name = prepare_temp_table(client) f_q_table_name = client.sql_client.make_qualified_table_name(table_name) with client.sql_client.begin_transaction(): client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0")) # check row still in transaction - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 1 # check row after commit - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 1 assert rows[0][0] == 1.0 with client.sql_client.begin_transaction() as tx: - client.sql_client.execute_sql(f"DELETE FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + client.sql_client.execute_sql( + f"DELETE FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) # explicit commit tx.commit_transaction() - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 0 -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_rollback_transaction(client: SqlJobClientBase) -> None: if client.capabilities.supports_transactions is False: pytest.skip("Destination does not support tx") @@ -302,29 +401,43 @@ def test_rollback_transaction(client: SqlJobClientBase) -> None: # test python exception with pytest.raises(RuntimeError): with client.sql_client.begin_transaction(): - client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0")) - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + client.sql_client.execute_sql( + f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0") + ) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 1 # python exception triggers rollback raise RuntimeError("ROLLBACK") - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 0 # test rollback on invalid query f_q_wrong_table_name = client.sql_client.make_qualified_table_name(f"{table_name}_X") with pytest.raises(DatabaseException): with client.sql_client.begin_transaction(): - client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0")) + client.sql_client.execute_sql( + f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0") + ) # table does not exist - client.sql_client.execute_sql(f"SELECT col FROM {f_q_wrong_table_name} WHERE col = %s", Decimal("1.0")) - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + client.sql_client.execute_sql( + f"SELECT col FROM {f_q_wrong_table_name} WHERE col = %s", Decimal("1.0") + ) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 0 # test explicit rollback with client.sql_client.begin_transaction() as tx: client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES (%s)", Decimal("1.0")) tx.rollback_transaction() - rows = client.sql_client.execute_sql(f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0")) + rows = client.sql_client.execute_sql( + f"SELECT col FROM {f_q_table_name} WHERE col = %s", Decimal("1.0") + ) assert len(rows) == 0 # test double rollback - behavior inconsistent across databases (some raise some not) @@ -335,7 +448,9 @@ def test_rollback_transaction(client: SqlJobClientBase) -> None: # tx.rollback_transaction() -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_transaction_isolation(client: SqlJobClientBase) -> None: if client.capabilities.supports_transactions is False: pytest.skip("Destination does not support tx") @@ -346,7 +461,9 @@ def test_transaction_isolation(client: SqlJobClientBase) -> None: def test_thread(thread_id: Decimal) -> None: # make a copy of the sql_client - thread_client = client.sql_client.__class__(client.sql_client.dataset_name, client.sql_client.credentials) + thread_client = client.sql_client.__class__( + client.sql_client.dataset_name, client.sql_client.credentials + ) with thread_client: with thread_client.begin_transaction(): thread_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES (%s)", thread_id) @@ -374,11 +491,18 @@ def test_thread(thread_id: Decimal) -> None: assert rows[0][0] == Decimal("2.0") -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_max_table_identifier_length(client: SqlJobClientBase) -> None: if client.capabilities.max_identifier_length >= 65536: - pytest.skip(f"destination {client.config.destination_name} has no table name length restriction") - table_name = 8 * "prospects_external_data__data365_member__member__feed_activities_created_post__items__comments__items__comments__items__author_details__educations" + pytest.skip( + f"destination {client.config.destination_type} has no table name length restriction" + ) + table_name = ( + 8 + * "prospects_external_data__data365_member__member__feed_activities_created_post__items__comments__items__comments__items__author_details__educations" + ) with pytest.raises(IdentifierTooLongException) as py_ex: prepare_table(client, "long_table_name", table_name, make_uniq_table=False) assert py_ex.value.identifier_type == "table" @@ -392,17 +516,24 @@ def test_max_table_identifier_length(client: SqlJobClientBase) -> None: # BQ is failing on the HTTP protocol level # exists, _ = client.get_storage_table(long_table_name) - # assert exists is (client.config.destination_name == "postgres") + # assert exists is (client.config.destination_type == "postgres") # exists, table_def = client.get_storage_table(long_table_name[:client.capabilities.max_identifier_length]) # assert exists is True -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_max_column_identifier_length(client: SqlJobClientBase) -> None: if client.capabilities.max_column_identifier_length >= 65536: - pytest.skip(f"destination {client.config.destination_name} has no column name length restriction") + pytest.skip( + f"destination {client.config.destination_type} has no column name length restriction" + ) table_name = "prospects_external_data__data365_member__member" - column_name = 7 * "prospects_external_data__data365_member__member__feed_activities_created_post__items__comments__items__comments__items__author_details__educations__school_name" + column_name = ( + 7 + * "prospects_external_data__data365_member__member__feed_activities_created_post__items__comments__items__comments__items__author_details__educations__school_name" + ) with pytest.raises(IdentifierTooLongException) as py_ex: prepare_table(client, "long_column_name", table_name, make_uniq_table=False) assert py_ex.value.identifier_type == "column" @@ -414,7 +545,9 @@ def test_max_column_identifier_length(client: SqlJobClientBase) -> None: # assert long_column_name[:client.capabilities.max_column_identifier_length] in table_def -@pytest.mark.parametrize("client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name) +@pytest.mark.parametrize( + "client", destinations_configs(default_sql_configs=True), indirect=True, ids=lambda x: x.name +) def test_recover_on_explicit_tx(client: SqlJobClientBase) -> None: if client.capabilities.supports_transactions is False: pytest.skip("Destination does not support tx") @@ -441,7 +574,11 @@ def test_recover_on_explicit_tx(client: SqlJobClientBase) -> None: assert_load_id(client.sql_client, "EFG") # wrong value inserted - statements = ["BEGIN TRANSACTION;", f"INSERT INTO {version_table}(version) VALUES(1);", "COMMIT;"] + statements = [ + "BEGIN TRANSACTION;", + f"INSERT INTO {version_table}(version) VALUES(1);", + "COMMIT;", + ] # cannot insert NULL value with pytest.raises(DatabaseTerminalException): client.sql_client.execute_fragments(statements) @@ -466,11 +603,16 @@ def prepare_temp_table(client: SqlJobClientBase) -> str: table_name = f"tmp_{uniq_suffix}" iceberg_table_suffix = "" coltype = "numeric" - if client.config.destination_name == "athena": - iceberg_table_suffix = f"LOCATION '{AWS_BUCKET}/ci/{table_name}' TBLPROPERTIES ('table_type'='ICEBERG', 'format'='parquet');" + if client.config.destination_type == "athena": + iceberg_table_suffix = ( + f"LOCATION '{AWS_BUCKET}/ci/{table_name}' TBLPROPERTIES ('table_type'='ICEBERG'," + " 'format'='parquet');" + ) coltype = "bigint" qualified_table_name = table_name else: qualified_table_name = client.sql_client.make_qualified_table_name(table_name) - client.sql_client.execute_sql(f"CREATE TABLE {qualified_table_name} (col {coltype}) {iceberg_table_suffix};") + client.sql_client.execute_sql( + f"CREATE TABLE {qualified_table_name} (col {coltype}) {iceberg_table_suffix};" + ) return table_name diff --git a/tests/load/utils.py b/tests/load/utils.py index be2097c879..6811ca59a6 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -1,8 +1,7 @@ import contextlib -from importlib import import_module import codecs import os -from typing import Any, Iterator, List, Sequence, cast, IO, Tuple, Optional, Dict, Union +from typing import Any, Iterator, List, Sequence, IO, Tuple, Optional, Dict, Union import shutil from pathlib import Path from dataclasses import dataclass @@ -12,13 +11,19 @@ from dlt.common.configuration import resolve_configuration from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.destination.reference import DestinationClientDwhConfiguration, DestinationReference, JobClientBase, LoadJob, DestinationClientStagingConfiguration, WithStagingDataset, TDestinationReferenceArg -from dlt.common.destination import TLoaderFileFormat +from dlt.common.destination.reference import ( + DestinationClientDwhConfiguration, + JobClientBase, + LoadJob, + DestinationClientStagingConfiguration, + WithStagingDataset, +) +from dlt.common.destination import TLoaderFileFormat, Destination from dlt.common.data_writers import DataWriter -from dlt.common.schema import TColumnSchema, TTableSchemaColumns, Schema +from dlt.common.schema import TTableSchemaColumns, Schema from dlt.common.storages import SchemaStorage, FileStorage, SchemaStorageConfiguration from dlt.common.schema.utils import new_table -from dlt.common.storages.load_storage import ParsedLoadJobFileName, LoadStorage +from dlt.common.storages import ParsedLoadJobFileName, LoadStorage, PackageStorage from dlt.common.typing import StrAny from dlt.common.utils import uniq_id @@ -26,8 +31,18 @@ from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.job_client_impl import SqlJobClientBase -from tests.utils import ACTIVE_DESTINATIONS, IMPLEMENTED_DESTINATIONS, SQL_DESTINATIONS, EXCLUDED_DESTINATION_CONFIGURATIONS -from tests.cases import TABLE_UPDATE_COLUMNS_SCHEMA, TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES, assert_all_data_types_row +from tests.utils import ( + ACTIVE_DESTINATIONS, + IMPLEMENTED_DESTINATIONS, + SQL_DESTINATIONS, + EXCLUDED_DESTINATION_CONFIGURATIONS, +) +from tests.cases import ( + TABLE_UPDATE_COLUMNS_SCHEMA, + TABLE_UPDATE, + TABLE_ROW_ALL_DATA_TYPES, + assert_all_data_types_row, +) # bucket urls AWS_BUCKET = dlt.config.get("tests.bucket_url_s3", str) @@ -37,11 +52,20 @@ R2_BUCKET = dlt.config.get("tests.bucket_url_r2", str) MEMORY_BUCKET = dlt.config.get("tests.memory", str) -ALL_FILESYSTEM_DRIVERS = dlt.config.get("ALL_FILESYSTEM_DRIVERS", list) or ["s3", "gs", "az", "file", "memory", "r2"] +ALL_FILESYSTEM_DRIVERS = dlt.config.get("ALL_FILESYSTEM_DRIVERS", list) or [ + "s3", + "gs", + "az", + "file", + "memory", + "r2", +] # Filter out buckets not in all filesystem drivers DEFAULT_BUCKETS = [GCS_BUCKET, AWS_BUCKET, FILE_BUCKET, MEMORY_BUCKET, AZ_BUCKET] -DEFAULT_BUCKETS = [bucket for bucket in DEFAULT_BUCKETS if bucket.split(':')[0] in ALL_FILESYSTEM_DRIVERS] +DEFAULT_BUCKETS = [ + bucket for bucket in DEFAULT_BUCKETS if bucket.split(":")[0] in ALL_FILESYSTEM_DRIVERS +] # Add r2 in extra buckets so it's not run for all tests R2_BUCKET_CONFIG = dict( @@ -51,7 +75,7 @@ aws_access_key_id=dlt.config.get("tests.r2_aws_access_key_id", str), aws_secret_access_key=dlt.config.get("tests.r2_aws_secret_access_key", str), endpoint_url=dlt.config.get("tests.r2_endpoint_url", str), - ) + ), ) EXTRA_BUCKETS: List[Dict[str, Any]] = [] @@ -64,6 +88,7 @@ @dataclass class DestinationTestConfiguration: """Class for defining test setup for one destination.""" + destination: str staging: Optional[str] = None file_format: Optional[TLoaderFileFormat] = None @@ -77,7 +102,7 @@ class DestinationTestConfiguration: @property def name(self) -> str: - name: str = self.destination + name: str = self.destination if self.file_format: name += f"-{self.file_format}" if not self.staging: @@ -90,35 +115,42 @@ def name(self) -> str: def setup(self) -> None: """Sets up environment variables for this destination configuration""" - os.environ['DESTINATION__FILESYSTEM__BUCKET_URL'] = self.bucket_url or "" - os.environ['DESTINATION__STAGE_NAME'] = self.stage_name or "" - os.environ['DESTINATION__STAGING_IAM_ROLE'] = self.staging_iam_role or "" - os.environ['DESTINATION__FORCE_ICEBERG'] = str(self.force_iceberg) or "" + os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = self.bucket_url or "" + os.environ["DESTINATION__STAGE_NAME"] = self.stage_name or "" + os.environ["DESTINATION__STAGING_IAM_ROLE"] = self.staging_iam_role or "" + os.environ["DESTINATION__FORCE_ICEBERG"] = str(self.force_iceberg) or "" """For the filesystem destinations we disable compression to make analyzing the result easier""" if self.destination == "filesystem": - os.environ['DATA_WRITER__DISABLE_COMPRESSION'] = "True" + os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True" - - def setup_pipeline(self, pipeline_name: str, dataset_name: str = None, full_refresh: bool = False, **kwargs) -> dlt.Pipeline: + def setup_pipeline( + self, pipeline_name: str, dataset_name: str = None, full_refresh: bool = False, **kwargs + ) -> dlt.Pipeline: """Convenience method to setup pipeline with this configuration""" self.setup() - pipeline = dlt.pipeline(pipeline_name=pipeline_name, destination=self.destination, staging=self.staging, dataset_name=dataset_name or pipeline_name, full_refresh=full_refresh, **kwargs) + pipeline = dlt.pipeline( + pipeline_name=pipeline_name, + destination=self.destination, + staging=self.staging, + dataset_name=dataset_name or pipeline_name, + full_refresh=full_refresh, + **kwargs, + ) return pipeline def destinations_configs( - default_sql_configs: bool = False, - default_vector_configs: bool = False, - default_staging_configs: bool = False, - all_staging_configs: bool = False, - local_filesystem_configs: bool = False, - all_buckets_filesystem_configs: bool = False, - subset: Sequence[str] = (), - exclude: Sequence[str] = (), - file_format: Optional[TLoaderFileFormat] = None, + default_sql_configs: bool = False, + default_vector_configs: bool = False, + default_staging_configs: bool = False, + all_staging_configs: bool = False, + local_filesystem_configs: bool = False, + all_buckets_filesystem_configs: bool = False, + subset: Sequence[str] = (), + exclude: Sequence[str] = (), + file_format: Optional[TLoaderFileFormat] = None, ) -> List[DestinationTestConfiguration]: - # sanity check for item in subset: assert item in IMPLEMENTED_DESTINATIONS, f"Destination {item} is not implemented" @@ -128,11 +160,36 @@ def destinations_configs( # default non staging sql based configs, one per destination if default_sql_configs: - destination_configs += [DestinationTestConfiguration(destination=destination) for destination in SQL_DESTINATIONS if destination != "athena"] - destination_configs += [DestinationTestConfiguration(destination="duckdb", file_format="parquet")] + destination_configs += [ + DestinationTestConfiguration(destination=destination) + for destination in SQL_DESTINATIONS + if destination != "athena" + ] + destination_configs += [ + DestinationTestConfiguration(destination="duckdb", file_format="parquet") + ] # athena needs filesystem staging, which will be automatically set, we have to supply a bucket url though - destination_configs += [DestinationTestConfiguration(destination="athena", staging="filesystem", file_format="parquet", supports_merge=False, bucket_url=AWS_BUCKET)] - destination_configs += [DestinationTestConfiguration(destination="athena", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, force_iceberg=True, supports_merge=False, supports_dbt=False, extra_info="iceberg")] + destination_configs += [ + DestinationTestConfiguration( + destination="athena", + staging="filesystem", + file_format="parquet", + supports_merge=False, + bucket_url=AWS_BUCKET, + ) + ] + destination_configs += [ + DestinationTestConfiguration( + destination="athena", + staging="filesystem", + file_format="parquet", + bucket_url=AWS_BUCKET, + force_iceberg=True, + supports_merge=False, + supports_dbt=False, + extra_info="iceberg", + ) + ] if default_vector_configs: # for now only weaviate @@ -140,46 +197,140 @@ def destinations_configs( if default_staging_configs or all_staging_configs: destination_configs += [ - DestinationTestConfiguration(destination="redshift", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, staging_iam_role="arn:aws:iam::267388281016:role/redshift_s3_read", extra_info="s3-role"), - DestinationTestConfiguration(destination="bigquery", staging="filesystem", file_format="parquet", bucket_url=GCS_BUCKET, extra_info="gcs-authorization"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=GCS_BUCKET, stage_name="PUBLIC.dlt_gcs_stage", extra_info="gcs-integration"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, extra_info="s3-integration"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, stage_name="PUBLIC.dlt_s3_stage", extra_info="s3-integration"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, stage_name="PUBLIC.dlt_az_stage", extra_info="az-integration"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, extra_info="az-authorization"), + DestinationTestConfiguration( + destination="redshift", + staging="filesystem", + file_format="parquet", + bucket_url=AWS_BUCKET, + staging_iam_role="arn:aws:iam::267388281016:role/redshift_s3_read", + extra_info="s3-role", + ), + DestinationTestConfiguration( + destination="bigquery", + staging="filesystem", + file_format="parquet", + bucket_url=GCS_BUCKET, + extra_info="gcs-authorization", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="jsonl", + bucket_url=GCS_BUCKET, + stage_name="PUBLIC.dlt_gcs_stage", + extra_info="gcs-integration", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="jsonl", + bucket_url=AWS_BUCKET, + extra_info="s3-integration", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="jsonl", + bucket_url=AWS_BUCKET, + stage_name="PUBLIC.dlt_s3_stage", + extra_info="s3-integration", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="jsonl", + bucket_url=AZ_BUCKET, + stage_name="PUBLIC.dlt_az_stage", + extra_info="az-integration", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="jsonl", + bucket_url=AZ_BUCKET, + extra_info="az-authorization", + ), ] if all_staging_configs: destination_configs += [ - DestinationTestConfiguration(destination="redshift", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"), - DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"), - DestinationTestConfiguration(destination="redshift", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"), - DestinationTestConfiguration(destination="bigquery", staging="filesystem", file_format="jsonl", bucket_url=GCS_BUCKET, extra_info="gcs-authorization"), + DestinationTestConfiguration( + destination="redshift", + staging="filesystem", + file_format="parquet", + bucket_url=AWS_BUCKET, + extra_info="credential-forwarding", + ), + DestinationTestConfiguration( + destination="snowflake", + staging="filesystem", + file_format="parquet", + bucket_url=AWS_BUCKET, + extra_info="credential-forwarding", + ), + DestinationTestConfiguration( + destination="redshift", + staging="filesystem", + file_format="jsonl", + bucket_url=AWS_BUCKET, + extra_info="credential-forwarding", + ), + DestinationTestConfiguration( + destination="bigquery", + staging="filesystem", + file_format="jsonl", + bucket_url=GCS_BUCKET, + extra_info="gcs-authorization", + ), ] # add local filesystem destinations if requested if local_filesystem_configs: - destination_configs += [DestinationTestConfiguration(destination="filesystem", bucket_url=FILE_BUCKET, file_format="insert_values")] - destination_configs += [DestinationTestConfiguration(destination="filesystem", bucket_url=FILE_BUCKET, file_format="parquet")] - destination_configs += [DestinationTestConfiguration(destination="filesystem", bucket_url=FILE_BUCKET, file_format="jsonl")] + destination_configs += [ + DestinationTestConfiguration( + destination="filesystem", bucket_url=FILE_BUCKET, file_format="insert_values" + ) + ] + destination_configs += [ + DestinationTestConfiguration( + destination="filesystem", bucket_url=FILE_BUCKET, file_format="parquet" + ) + ] + destination_configs += [ + DestinationTestConfiguration( + destination="filesystem", bucket_url=FILE_BUCKET, file_format="jsonl" + ) + ] if all_buckets_filesystem_configs: for bucket in DEFAULT_BUCKETS: - destination_configs += [DestinationTestConfiguration(destination="filesystem", bucket_url=bucket, extra_info=bucket)] + destination_configs += [ + DestinationTestConfiguration( + destination="filesystem", bucket_url=bucket, extra_info=bucket + ) + ] # filter out non active destinations - destination_configs = [conf for conf in destination_configs if conf.destination in ACTIVE_DESTINATIONS] + destination_configs = [ + conf for conf in destination_configs if conf.destination in ACTIVE_DESTINATIONS + ] # filter out destinations not in subset if subset: destination_configs = [conf for conf in destination_configs if conf.destination in subset] if exclude: - destination_configs = [conf for conf in destination_configs if conf.destination not in exclude] + destination_configs = [ + conf for conf in destination_configs if conf.destination not in exclude + ] if file_format: - destination_configs = [conf for conf in destination_configs if conf.file_format == file_format] + destination_configs = [ + conf for conf in destination_configs if conf.file_format == file_format + ] # filter out excluded configs - destination_configs = [conf for conf in destination_configs if conf.name not in EXCLUDED_DESTINATION_CONFIGURATIONS] + destination_configs = [ + conf for conf in destination_configs if conf.name not in EXCLUDED_DESTINATION_CONFIGURATIONS + ] return destination_configs @@ -188,7 +339,10 @@ def get_normalized_dataset_name(client: JobClientBase) -> str: if isinstance(client.config, DestinationClientDwhConfiguration): return client.config.normalize_dataset_name(client.schema) else: - raise TypeError(f"{type(client)} client has configuration {type(client.config)} that does not support dataset name") + raise TypeError( + f"{type(client)} client has configuration {type(client.config)} that does not support" + " dataset name" + ) def load_table(name: str) -> Dict[str, TTableSchemaColumns]: @@ -196,19 +350,35 @@ def load_table(name: str) -> Dict[str, TTableSchemaColumns]: return json.load(f) -def expect_load_file(client: JobClientBase, file_storage: FileStorage, query: str, table_name: str, status = "completed") -> LoadJob: - file_name = ParsedLoadJobFileName(table_name, uniq_id(), 0, client.capabilities.preferred_loader_file_format).job_id() +def expect_load_file( + client: JobClientBase, + file_storage: FileStorage, + query: str, + table_name: str, + status="completed", +) -> LoadJob: + file_name = ParsedLoadJobFileName( + table_name, + ParsedLoadJobFileName.new_file_id(), + 0, + client.capabilities.preferred_loader_file_format, + ).file_name() file_storage.save(file_name, query.encode("utf-8")) table = client.get_load_table(table_name) job = client.start_file_load(table, file_storage.make_full_path(file_name), uniq_id()) while job.state() == "running": sleep(0.5) assert job.file_name() == file_name - assert job.state() == status + assert job.state() == status return job -def prepare_table(client: JobClientBase, case_name: str = "event_user", table_name: str = "event_user", make_uniq_table: bool = True) -> str: +def prepare_table( + client: JobClientBase, + case_name: str = "event_user", + table_name: str = "event_user", + make_uniq_table: bool = True, +) -> str: client.schema.bump_version() client.update_stored_schema() user_table = load_table(case_name)[table_name] @@ -221,69 +391,78 @@ def prepare_table(client: JobClientBase, case_name: str = "event_user", table_na client.update_stored_schema() return user_table_name + def yield_client( - destination_name: str, + destination_type: str, dataset_name: str = None, default_config_values: StrAny = None, - schema_name: str = "event" + schema_name: str = "event", ) -> Iterator[SqlJobClientBase]: os.environ.pop("DATASET_NAME", None) # import destination reference by name - destination = import_module(f"dlt.destinations.{destination_name}") + destination = Destination.from_reference(destination_type) # create initial config dest_config: DestinationClientDwhConfiguration = None - dest_config = destination.spec()() - dest_config.dataset_name = dataset_name # type: ignore[misc] # TODO: Why is dataset_name final? + dest_config = destination.spec() # type: ignore[assignment] + dest_config.dataset_name = dataset_name # type: ignore[misc] if default_config_values is not None: # apply the values to credentials, if dict is provided it will be used as default - dest_config.credentials = default_config_values # type: ignore[assignment] + # dest_config.credentials = default_config_values # type: ignore[assignment] # also apply to config dest_config.update(default_config_values) # get event default schema - storage_config = resolve_configuration(SchemaStorageConfiguration(), explicit_value={ - "schema_volume_path": "tests/common/cases/schemas/rasa" - }) + storage_config = resolve_configuration( + SchemaStorageConfiguration(), + explicit_value={"schema_volume_path": "tests/common/cases/schemas/rasa"}, + ) schema_storage = SchemaStorage(storage_config) schema = schema_storage.load_schema(schema_name) # create client and dataset client: SqlJobClientBase = None # athena requires staging config to be present, so stick this in there here - if destination_name == "athena": + if destination_type == "athena": staging_config = DestinationClientStagingConfiguration( - destination_name="fake-stage", + destination_type="fake-stage", # type: ignore dataset_name=dest_config.dataset_name, default_schema_name=dest_config.default_schema_name, - bucket_url=AWS_BUCKET + bucket_url=AWS_BUCKET, ) dest_config.staging_config = staging_config # type: ignore[attr-defined] # lookup for credentials in the section that is destination name - with Container().injectable_context(ConfigSectionContext(sections=("destination", destination_name,))): - with destination.client(schema, dest_config) as client: + with Container().injectable_context( + ConfigSectionContext( + sections=( + "destination", + destination_type, + ) + ) + ): + with destination.client(schema, dest_config) as client: # type: ignore[assignment] yield client + @contextlib.contextmanager def cm_yield_client( - destination_name: str, + destination_type: str, dataset_name: str, default_config_values: StrAny = None, - schema_name: str = "event" + schema_name: str = "event", ) -> Iterator[SqlJobClientBase]: - return yield_client(destination_name, dataset_name, default_config_values, schema_name) + return yield_client(destination_type, dataset_name, default_config_values, schema_name) def yield_client_with_storage( - destination_name: str, - default_config_values: StrAny = None, - schema_name: str = "event" + destination_type: str, default_config_values: StrAny = None, schema_name: str = "event" ) -> Iterator[SqlJobClientBase]: - # create dataset with random name dataset_name = "test_" + uniq_id() - with cm_yield_client(destination_name, dataset_name, default_config_values, schema_name) as client: + with cm_yield_client( + destination_type, dataset_name, default_config_values, schema_name + ) as client: client.initialize_storage() yield client # print(dataset_name) @@ -304,44 +483,58 @@ def delete_dataset(client: SqlClientBase[Any], normalized_dataset_name: str) -> @contextlib.contextmanager def cm_yield_client_with_storage( - destination_name: str, - default_config_values: StrAny = None, - schema_name: str = "event" + destination_type: str, default_config_values: StrAny = None, schema_name: str = "event" ) -> Iterator[SqlJobClientBase]: - return yield_client_with_storage(destination_name, default_config_values, schema_name) + return yield_client_with_storage(destination_type, default_config_values, schema_name) -def write_dataset(client: JobClientBase, f: IO[bytes], rows: Union[List[Dict[str, Any]], List[StrAny]], columns_schema: TTableSchemaColumns) -> None: - data_format = DataWriter.data_format_from_file_format(client.capabilities.preferred_loader_file_format) +def write_dataset( + client: JobClientBase, + f: IO[bytes], + rows: Union[List[Dict[str, Any]], List[StrAny]], + columns_schema: TTableSchemaColumns, +) -> None: + data_format = DataWriter.data_format_from_file_format( + client.capabilities.preferred_loader_file_format + ) # adapt bytes stream to text file format if not data_format.is_binary_format and isinstance(f.read(0), bytes): f = codecs.getwriter("utf-8")(f) # type: ignore[assignment] writer = DataWriter.from_destination_capabilities(client.capabilities, f) # remove None values for idx, row in enumerate(rows): - rows[idx] = {k:v for k, v in row.items() if v is not None} + rows[idx] = {k: v for k, v in row.items() if v is not None} writer.write_all(columns_schema, rows) -def prepare_load_package(load_storage: LoadStorage, cases: Sequence[str], write_disposition: str='append') -> Tuple[str, Schema]: +def prepare_load_package( + load_storage: LoadStorage, cases: Sequence[str], write_disposition: str = "append" +) -> Tuple[str, Schema]: load_id = uniq_id() - load_storage.create_temp_load_package(load_id) + load_storage.new_packages.create_package(load_id) for case in cases: path = f"./tests/load/cases/loading/{case}" - shutil.copy(path, load_storage.storage.make_full_path(f"{load_id}/{LoadStorage.NEW_JOBS_FOLDER}")) + shutil.copy( + path, + load_storage.new_packages.storage.make_full_path( + load_storage.new_packages.get_job_folder_path(load_id, "new_jobs") + ), + ) schema_path = Path("./tests/load/cases/loading/schema.json") - data = json.loads(schema_path.read_text(encoding='utf8')) - for name, table in data['tables'].items(): - if name.startswith('_dlt'): + # load without migration + data = json.loads(schema_path.read_text(encoding="utf8")) + for name, table in data["tables"].items(): + if name.startswith("_dlt"): continue - table['write_disposition'] = write_disposition - Path( - load_storage.storage.make_full_path(load_id) - ).joinpath(schema_path.name).write_text(json.dumps(data), encoding='utf8') + table["write_disposition"] = write_disposition + full_package_path = load_storage.new_packages.storage.make_full_path( + load_storage.new_packages.get_package_path(load_id) + ) + Path(full_package_path).joinpath(schema_path.name).write_text(json.dumps(data), encoding="utf8") schema_update_path = "./tests/load/cases/loading/schema_updates.json" - shutil.copy(schema_update_path, load_storage.storage.make_full_path(load_id)) + shutil.copy(schema_update_path, full_package_path) - load_storage.commit_temp_load_package(load_id) - schema = load_storage.load_package_schema(load_id) + load_storage.commit_new_load_package(load_id) + schema = load_storage.normalized_packages.load_schema(load_id) return load_id, schema diff --git a/tests/load/weaviate/test_naming.py b/tests/load/weaviate/test_naming.py index a965201425..290879cb67 100644 --- a/tests/load/weaviate/test_naming.py +++ b/tests/load/weaviate/test_naming.py @@ -1,16 +1,19 @@ import dlt, pytest -from dlt.destinations.weaviate.naming import NamingConvention -from dlt.destinations.weaviate.ci_naming import NamingConvention as CINamingConvention +from dlt.destinations.impl.weaviate.naming import NamingConvention +from dlt.destinations.impl.weaviate.ci_naming import NamingConvention as CINamingConvention from tests.common.utils import load_yml_case + @dlt.source def small(): - return dlt.resource([1,2,3], name="table") + return dlt.resource([1, 2, 3], name="table") -@pytest.mark.parametrize("n", [NamingConvention(), CINamingConvention()], ids=["naming", "ci_naming"]) +@pytest.mark.parametrize( + "n", [NamingConvention(), CINamingConvention()], ids=["naming", "ci_naming"] +) def test_table_name_normalization(n: NamingConvention) -> None: assert n.normalize_table_identifier("FlatSpace") == "FlatSpace" assert n.normalize_table_identifier("a_snake_case_name") == "ASnakeCaseName" @@ -87,13 +90,13 @@ def test_reserved_property_names() -> None: # print(schema_2.name) # print(schema_2.naming) -# eth_v6 = load_yml_case("schemas/eth/ethereum_schema_v6") -# eth_v6_schema = dlt.Schema.from_dict(eth_v6) +# eth_V8 = load_yml_case("schemas/eth/ethereum_schema_v8") +# eth_V8_schema = dlt.Schema.from_dict(eth_V8) -# pipeline.extract(s, schema=eth_v6_schema) +# pipeline.extract(s, schema=eth_V8_schema) -# print(eth_v6_schema.data_tables()) -# print(eth_v6_schema.dlt_tables()) +# print(eth_V8_schema.data_tables()) +# print(eth_V8_schema.dlt_tables()) # def test_x_schema_naming_normalize() -> None: @@ -101,14 +104,14 @@ def test_reserved_property_names() -> None: # print(pipeline.dataset_name) # s = small() -# eth_v6 = load_yml_case("schemas/eth/ethereum_schema_v6") -# eth_v6_schema = dlt.Schema.from_dict(eth_v6) +# eth_V8 = load_yml_case("schemas/eth/ethereum_schema_v8") +# eth_V8_schema = dlt.Schema.from_dict(eth_V8) -# pipeline.extract(s, schema=eth_v6_schema) -# print(eth_v6_schema.tables.keys()) +# pipeline.extract(s, schema=eth_V8_schema) +# print(eth_V8_schema.tables.keys()) # default_schema = pipeline.default_schema # print(default_schema.name) -# print(eth_v6_schema.tables.keys()) +# print(eth_V8_schema.tables.keys()) # pipeline.run(s, destination="weaviate") # print(default_schema.tables.keys()) diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index 339c94575e..a4b5098fe7 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -6,15 +6,16 @@ from dlt.common.schema import Schema from dlt.common.utils import uniq_id -from dlt.destinations.weaviate import weaviate_adapter -from dlt.destinations.weaviate.exceptions import PropertyNameConflict -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT -from dlt.destinations.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl.weaviate import weaviate_adapter +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient from dlt.pipeline.exceptions import PipelineStepFailed from tests.pipeline.utils import assert_load_info from .utils import assert_class, drop_active_pipeline_data + @pytest.fixture(autouse=True) def drop_weaviate_schema() -> Iterator[None]: yield @@ -75,6 +76,7 @@ def some_data(): state = client.get_stored_state("test_pipeline_append") assert state + def test_pipeline_append() -> None: generator_instance1 = sequence_generator() generator_instance2 = sequence_generator() @@ -149,7 +151,6 @@ def some_data(): def test_pipeline_replace() -> None: - generator_instance1 = sequence_generator() generator_instance2 = sequence_generator() @@ -196,16 +197,14 @@ def test_pipeline_merge() -> None: "doc_id": 1, "title": "The Shawshank Redemption", "description": ( - "Two imprisoned men find redemption through acts " - "of decency over the years." + "Two imprisoned men find redemption through acts of decency over the years." ), }, { "doc_id": 2, "title": "The Godfather", "description": ( - "A crime dynasty's aging patriarch transfers " - "control to his reluctant son." + "A crime dynasty's aging patriarch transfers control to his reluctant son." ), }, { @@ -310,20 +309,39 @@ def test_merge_github_nested() -> None: p = dlt.pipeline(destination="weaviate", dataset_name="github1", full_refresh=True) assert p.dataset_name.startswith("github1_202") - with open("tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: + with open( + "tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: data = json.load(f) info = p.run( - weaviate_adapter(data[:17], vectorize=["title", "body"], tokenization={"user__login": "lowercase"}), + weaviate_adapter( + data[:17], vectorize=["title", "body"], tokenization={"user__login": "lowercase"} + ), table_name="issues", write_disposition="merge", - primary_key="id" + primary_key="id", ) assert_load_info(info) # assert if schema contains tables with right names - assert set(p.default_schema.tables.keys()) == {'DltVersion', 'DltLoads', 'Issues', 'DltPipelineState', 'Issues__Labels', 'Issues__Assignees'} - assert set([t["name"] for t in p.default_schema.data_tables()]) == {'Issues', 'Issues__Labels', 'Issues__Assignees'} - assert set([t["name"] for t in p.default_schema.dlt_tables()]) == {'DltVersion', 'DltLoads', 'DltPipelineState'} + assert set(p.default_schema.tables.keys()) == { + "DltVersion", + "DltLoads", + "Issues", + "DltPipelineState", + "Issues__Labels", + "Issues__Assignees", + } + assert set([t["name"] for t in p.default_schema.data_tables()]) == { + "Issues", + "Issues__Labels", + "Issues__Assignees", + } + assert set([t["name"] for t in p.default_schema.dlt_tables()]) == { + "DltVersion", + "DltLoads", + "DltPipelineState", + } issues = p.default_schema.tables["Issues"] # make sure that both "id" column and "primary_key" were changed to __id assert issues["columns"]["__id"]["primary_key"] is True @@ -369,15 +387,23 @@ def test_vectorize_property_without_data() -> None: # here we increase the abuse and try to vectorize a `Value` field, where in the data there's `value` # in standard naming convention this results in property conflict with pytest.raises(PipelineStepFailed) as pipe_ex: - p.run(weaviate_adapter(["a", "b", "c"], vectorize="vAlue"), primary_key="vAlue", columns={"vAlue": {"data_type": "text"}}) + p.run( + weaviate_adapter(["a", "b", "c"], vectorize="vAlue"), + primary_key="vAlue", + columns={"vAlue": {"data_type": "text"}}, + ) assert isinstance(pipe_ex.value.__context__, PropertyNameConflict) # set the naming convention to case insensitive # os.environ["SCHEMA__NAMING"] = "direct" - dlt.config["schema.naming"] = "dlt.destinations.weaviate.ci_naming" + dlt.config["schema.naming"] = "dlt.destinations.impl.weaviate.ci_naming" # create new schema with changed naming convention p = p.drop() - info = p.run(weaviate_adapter(["there are", "no stop", "words in here"], vectorize="vAlue"), primary_key="vALue", columns={"vAlue": {"data_type": "text"}}) + info = p.run( + weaviate_adapter(["there are", "no stop", "words in here"], vectorize="vAlue"), + primary_key="vALue", + columns={"vAlue": {"data_type": "text"}}, + ) # dataset in load info is empty assert_load_info(info) # print(p.default_schema.to_pretty_yaml()) diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index d102610f68..48153f7706 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -9,17 +9,24 @@ from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns from dlt.destinations import weaviate -from dlt.destinations.weaviate.exceptions import PropertyNameConflict -from dlt.destinations.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient from dlt.common.storages.file_storage import FileStorage from dlt.common.schema.utils import new_table -from tests.load.utils import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE, TABLE_UPDATE_COLUMNS_SCHEMA, expect_load_file, write_dataset +from tests.load.utils import ( + TABLE_ROW_ALL_DATA_TYPES, + TABLE_UPDATE, + TABLE_UPDATE_COLUMNS_SCHEMA, + expect_load_file, + write_dataset, +) from tests.utils import TEST_STORAGE_ROOT from .utils import drop_active_pipeline_data + @pytest.fixture(autouse=True) def drop_weaviate_schema() -> Iterator[None]: yield @@ -27,26 +34,27 @@ def drop_weaviate_schema() -> Iterator[None]: def get_client_instance(schema: Schema) -> WeaviateClient: - config = weaviate.spec()(dataset_name="ClientTest" + uniq_id()) - with Container().injectable_context(ConfigSectionContext(sections=('destination', 'weaviate'))): - return weaviate.client(schema, config) # type: ignore[return-value] + dest = weaviate(dataset_name="ClientTest" + uniq_id()) + return dest.client(schema, dest.spec()) + # with Container().injectable_context(ConfigSectionContext(sections=('destination', 'weaviate'))): + # return dest.client(schema, config) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def client() -> Iterator[WeaviateClient]: yield from make_client("naming") -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def ci_client() -> Iterator[WeaviateClient]: yield from make_client("ci_naming") def make_client(naming_convention: str) -> Iterator[WeaviateClient]: - schema = Schema('test_schema', { - 'names': f"dlt.destinations.weaviate.{naming_convention}", - 'json': None - }) + schema = Schema( + "test_schema", + {"names": f"dlt.destinations.impl.weaviate.{naming_convention}", "json": None}, + ) _client = get_client_instance(schema) try: yield _client @@ -59,11 +67,15 @@ def file_storage() -> FileStorage: return FileStorage(TEST_STORAGE_ROOT, file_type="b", makedirs=True) -@pytest.mark.parametrize('write_disposition', ["append", "replace", "merge"]) -def test_all_data_types(client: WeaviateClient, write_disposition: TWriteDisposition, file_storage: FileStorage) -> None: +@pytest.mark.parametrize("write_disposition", ["append", "replace", "merge"]) +def test_all_data_types( + client: WeaviateClient, write_disposition: TWriteDisposition, file_storage: FileStorage +) -> None: class_name = "AllTypes" # we should have identical content with all disposition types - client.schema.update_table(new_table(class_name, write_disposition=write_disposition, columns=TABLE_UPDATE)) + client.schema.update_table( + new_table(class_name, write_disposition=write_disposition, columns=TABLE_UPDATE) + ) client.schema.bump_version() client.update_stored_schema() @@ -85,25 +97,22 @@ def test_all_data_types(client: WeaviateClient, write_disposition: TWriteDisposi elif TABLE_UPDATE_COLUMNS_SCHEMA[col_name]["data_type"] == "date": assert table_columns[col_name]["data_type"] == "timestamp" else: - assert table_columns[col_name]["data_type"] == TABLE_UPDATE_COLUMNS_SCHEMA[col_name]["data_type"] + assert ( + table_columns[col_name]["data_type"] + == TABLE_UPDATE_COLUMNS_SCHEMA[col_name]["data_type"] + ) def test_case_sensitive_properties_create(client: WeaviateClient) -> None: class_name = "col_class" # we have two properties which will map to the same name in Weaviate table_create: List[TColumnSchema] = [ - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }, - { - "name": "coL1", - "data_type": "double", - "nullable": False - }, + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "coL1", "data_type": "double", "nullable": False}, ] - client.schema.update_table(client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create))) + client.schema.update_table( + client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) + ) client.schema.bump_version() with pytest.raises(PropertyNameConflict): client.update_stored_schema() @@ -113,38 +122,25 @@ def test_case_insensitive_properties_create(ci_client: WeaviateClient) -> None: class_name = "col_class" # we have two properties which will map to the same name in Weaviate table_create: List[TColumnSchema] = [ - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }, - { - "name": "coL1", - "data_type": "double", - "nullable": False - }, + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "coL1", "data_type": "double", "nullable": False}, ] - ci_client.schema.update_table(ci_client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create))) + ci_client.schema.update_table( + ci_client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) + ) ci_client.schema.bump_version() ci_client.update_stored_schema() _, table_columns = ci_client.get_storage_table("ColClass") # later column overwrites earlier one so: double - assert table_columns == {'col1': {'name': 'col1', 'data_type': 'double'}} + assert table_columns == {"col1": {"name": "col1", "data_type": "double"}} def test_case_sensitive_properties_add(client: WeaviateClient) -> None: class_name = "col_class" # we have two properties which will map to the same name in Weaviate - table_create: List[TColumnSchema] = [{ - "name": "col1", - "data_type": "bigint", - "nullable": False - }] - table_update: List[TColumnSchema] = [{ - "name": "coL1", - "data_type": "double", - "nullable": False - }, + table_create: List[TColumnSchema] = [{"name": "col1", "data_type": "bigint", "nullable": False}] + table_update: List[TColumnSchema] = [ + {"name": "coL1", "data_type": "double", "nullable": False}, ] client.schema.update_table( client.schema.normalize_table_identifiers(new_table(class_name, columns=table_create)) @@ -166,12 +162,9 @@ def test_case_sensitive_properties_add(client: WeaviateClient) -> None: def test_load_case_sensitive_data(client: WeaviateClient, file_storage: FileStorage) -> None: class_name = "col_class" # we have two properties which will map to the same name in Weaviate - table_create: TTableSchemaColumns = {"col1": - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }} + table_create: TTableSchemaColumns = { + "col1": {"name": "col1", "data_type": "bigint", "nullable": False} + } client.schema.update_table(new_table(class_name, columns=[table_create["col1"]])) client.schema.bump_version() client.update_stored_schema() @@ -188,19 +181,18 @@ def test_load_case_sensitive_data(client: WeaviateClient, file_storage: FileStor def test_load_case_sensitive_data_ci(ci_client: WeaviateClient, file_storage: FileStorage) -> None: class_name = "col_class" # we have two properties which will map to the same name in Weaviate - table_create: TTableSchemaColumns = {"col1": - { - "name": "col1", - "data_type": "bigint", - "nullable": False - }} + table_create: TTableSchemaColumns = { + "col1": {"name": "col1", "data_type": "bigint", "nullable": False} + } ci_client.schema.update_table(new_table(class_name, columns=[table_create["col1"]])) ci_client.schema.bump_version() ci_client.update_stored_schema() # prepare a data item where is name clash due to Weaviate being CI # but here we normalize the item data_clash = list( - ci_client.schema.normalize_data_item({"col1": 72187328, "coL1": 726171}, "_load_id_", "col_class") + ci_client.schema.normalize_data_item( + {"col1": 72187328, "coL1": 726171}, "_load_id_", "col_class" + ) )[0][1] # write row @@ -211,4 +203,4 @@ def test_load_case_sensitive_data_ci(ci_client: WeaviateClient, file_storage: Fi response = ci_client.query_class(class_name, ["col1"]).do() objects = response["data"]["Get"][ci_client.make_qualified_class_name(class_name)] # the latter of conflicting fields is stored (so data is lost) - assert objects == [{'col1': 726171}] + assert objects == [{"col1": 726171}] diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py index d5568b0598..ed378191e6 100644 --- a/tests/load/weaviate/utils.py +++ b/tests/load/weaviate/utils.py @@ -6,8 +6,8 @@ from dlt.common.configuration.container import Container from dlt.common.schema.utils import get_columns_names_with_prop -from dlt.destinations.weaviate.weaviate_client import WeaviateClient -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT def assert_unordered_list_equal(list1: List[Any], list2: List[Any]) -> None: diff --git a/tests/normalize/mock_rasa_json_normalizer.py b/tests/normalize/mock_rasa_json_normalizer.py index c54992dc0b..f911c55493 100644 --- a/tests/normalize/mock_rasa_json_normalizer.py +++ b/tests/normalize/mock_rasa_json_normalizer.py @@ -5,14 +5,21 @@ class DataItemNormalizer(RelationalNormalizer): - - def normalize_data_item(self, source_event: TDataItem, load_id: str, table_name: str) -> TNormalizedRowIterator: + def normalize_data_item( + self, source_event: TDataItem, load_id: str, table_name: str + ) -> TNormalizedRowIterator: if self.schema.name == "event": # this emulates rasa parser on standard parser - event = {"sender_id": source_event["sender_id"], "timestamp": source_event["timestamp"], "type": source_event["event"]} + event = { + "sender_id": source_event["sender_id"], + "timestamp": source_event["timestamp"], + "type": source_event["event"], + } yield from super().normalize_data_item(event, load_id, table_name) # add table name which is "event" field in RASA OSS - yield from super().normalize_data_item(source_event, load_id, table_name + "_" + source_event["event"]) + yield from super().normalize_data_item( + source_event, load_id, table_name + "_" + source_event["event"] + ) else: # will generate tables properly yield from super().normalize_data_item(source_event, load_id, table_name) diff --git a/tests/normalize/test_normalize.py b/tests/normalize/test_normalize.py index 12b6267a59..a345a05ebe 100644 --- a/tests/normalize/test_normalize.py +++ b/tests/normalize/test_normalize.py @@ -1,25 +1,39 @@ import pytest from fnmatch import fnmatch from typing import Dict, Iterator, List, Sequence, Tuple + # from multiprocessing import get_start_method, Pool # from multiprocessing.dummy import Pool as ThreadPool from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor from dlt.common import json from dlt.common.schema.schema import Schema +from dlt.common.storages.exceptions import SchemaNotFoundError from dlt.common.utils import uniq_id from dlt.common.typing import StrAny from dlt.common.data_types import TDataType -from dlt.common.storages import NormalizeStorage, LoadStorage +from dlt.common.storages import NormalizeStorage, LoadStorage, ParsedLoadJobFileName, PackageStorage from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.configuration.container import Container -from dlt.extract.extract import ExtractorStorage +from dlt.extract.extract import ExtractStorage from dlt.normalize import Normalize from tests.cases import JSON_TYPED_DICT, JSON_TYPED_DICT_TYPES -from tests.utils import TEST_DICT_CONFIG_PROVIDER, assert_no_dict_key_starts_with, clean_test_storage, init_test_logging -from tests.normalize.utils import json_case_path, INSERT_CAPS, JSONL_CAPS, DEFAULT_CAPS, ALL_CAPABILITIES +from tests.utils import ( + TEST_DICT_CONFIG_PROVIDER, + MockPipeline, + assert_no_dict_key_starts_with, + clean_test_storage, + init_test_logging, +) +from tests.normalize.utils import ( + json_case_path, + INSERT_CAPS, + JSONL_CAPS, + DEFAULT_CAPS, + ALL_CAPABILITIES, +) @pytest.fixture(scope="module", autouse=True) @@ -57,7 +71,9 @@ def rasa_normalize() -> Iterator[Normalize]: def init_normalize(default_schemas_path: str = None) -> Iterator[Normalize]: clean_test_storage() # pass schema config fields to schema storage via dict config provider - with TEST_DICT_CONFIG_PROVIDER().values({"import_schema_path": default_schemas_path, "external_schema_format": "json"}): + with TEST_DICT_CONFIG_PROVIDER().values( + {"import_schema_path": default_schemas_path, "external_schema_format": "json"} + ): # inject the destination capabilities n = Normalize() yield n @@ -74,8 +90,12 @@ def test_initialize(rasa_normalize: Normalize) -> None: @pytest.mark.parametrize("caps", JSONL_CAPS, indirect=True) -def test_normalize_single_user_event_jsonl(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: - expected_tables, load_files = normalize_event_user(raw_normalize, "event.event.user_load_1", EXPECTED_USER_TABLES) +def test_normalize_single_user_event_jsonl( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: + expected_tables, load_files = normalize_event_user( + raw_normalize, "event.event.user_load_1", EXPECTED_USER_TABLES + ) # load, parse and verify jsonl for expected_table in expected_tables: get_line_from_file(raw_normalize.load_storage, load_files[expected_table]) @@ -86,7 +106,11 @@ def test_normalize_single_user_event_jsonl(caps: DestinationCapabilitiesContext, assert event_json["event"] == "user" assert event_json["parse_data__intent__name"] == "greet" assert event_json["text"] == "hello" - event_text, lines = get_line_from_file(raw_normalize.load_storage, load_files["event__parse_data__response_selector__default__ranking"], 9) + event_text, lines = get_line_from_file( + raw_normalize.load_storage, + load_files["event__parse_data__response_selector__default__ranking"], + 9, + ) assert lines == 10 event_json = json.loads(event_text) assert "id" in event_json @@ -95,31 +119,47 @@ def test_normalize_single_user_event_jsonl(caps: DestinationCapabilitiesContext, @pytest.mark.parametrize("caps", INSERT_CAPS, indirect=True) -def test_normalize_single_user_event_insert(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: +def test_normalize_single_user_event_insert( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: # mock_destination_caps(raw_normalize, caps) - expected_tables, load_files = normalize_event_user(raw_normalize, "event.event.user_load_1", EXPECTED_USER_TABLES) + expected_tables, load_files = normalize_event_user( + raw_normalize, "event.event.user_load_1", EXPECTED_USER_TABLES + ) # verify values line for expected_table in expected_tables: get_line_from_file(raw_normalize.load_storage, load_files[expected_table]) # return first values line from event_user file event_text, lines = get_line_from_file(raw_normalize.load_storage, load_files["event"], 2) assert lines == 3 - assert "'user'" in event_text + assert "'user'" in event_text assert "'greet'" in event_text assert "'hello'" in event_text - event_text, lines = get_line_from_file(raw_normalize.load_storage, load_files["event__parse_data__response_selector__default__ranking"], 11) + event_text, lines = get_line_from_file( + raw_normalize.load_storage, + load_files["event__parse_data__response_selector__default__ranking"], + 11, + ) assert lines == 12 assert "(7005479104644416710," in event_text @pytest.mark.parametrize("caps", JSONL_CAPS, indirect=True) -def test_normalize_filter_user_event(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: +def test_normalize_filter_user_event( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.user_load_v228_1"]) _, load_files = expect_load_package( rasa_normalize.load_storage, load_id, - ["event", "event_user", "event_user__metadata__user_nicknames", - "event_user__parse_data__entities", "event_user__parse_data__entities__processors", "event_user__parse_data__intent_ranking"] + [ + "event", + "event_user", + "event_user__metadata__user_nicknames", + "event_user__parse_data__entities", + "event_user__parse_data__entities__processors", + "event_user__parse_data__intent_ranking", + ], ) event_text, lines = get_line_from_file(rasa_normalize.load_storage, load_files["event_user"], 0) assert lines == 1 @@ -130,9 +170,15 @@ def test_normalize_filter_user_event(caps: DestinationCapabilitiesContext, rasa_ @pytest.mark.parametrize("caps", JSONL_CAPS, indirect=True) -def test_normalize_filter_bot_event(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: - load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.bot_load_metadata_2987398237498798"]) - _, load_files = expect_load_package(rasa_normalize.load_storage, load_id, ["event", "event_bot"]) +def test_normalize_filter_bot_event( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: + load_id = extract_and_normalize_cases( + rasa_normalize, ["event.event.bot_load_metadata_2987398237498798"] + ) + _, load_files = expect_load_package( + rasa_normalize.load_storage, load_id, ["event", "event_bot"] + ) event_text, lines = get_line_from_file(rasa_normalize.load_storage, load_files["event_bot"], 0) assert lines == 1 filtered_row = json.loads(event_text) @@ -141,35 +187,41 @@ def test_normalize_filter_bot_event(caps: DestinationCapabilitiesContext, rasa_n @pytest.mark.parametrize("caps", JSONL_CAPS, indirect=True) -def test_preserve_slot_complex_value_json_l(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: +def test_preserve_slot_complex_value_json_l( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.slot_session_metadata_1"]) - _, load_files = expect_load_package(rasa_normalize.load_storage, load_id, ["event", "event_slot"]) + _, load_files = expect_load_package( + rasa_normalize.load_storage, load_id, ["event", "event_slot"] + ) event_text, lines = get_line_from_file(rasa_normalize.load_storage, load_files["event_slot"], 0) assert lines == 1 filtered_row = json.loads(event_text) assert type(filtered_row["value"]) is dict - assert filtered_row["value"] == { - "user_id": "world", - "mitter_id": "hello" - } + assert filtered_row["value"] == {"user_id": "world", "mitter_id": "hello"} @pytest.mark.parametrize("caps", INSERT_CAPS, indirect=True) -def test_preserve_slot_complex_value_insert(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: +def test_preserve_slot_complex_value_insert( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.slot_session_metadata_1"]) - _, load_files = expect_load_package(rasa_normalize.load_storage, load_id, ["event", "event_slot"]) + _, load_files = expect_load_package( + rasa_normalize.load_storage, load_id, ["event", "event_slot"] + ) event_text, lines = get_line_from_file(rasa_normalize.load_storage, load_files["event_slot"], 2) assert lines == 3 - c_val = json.dumps({ - "user_id": "world", - "mitter_id": "hello" - }) + c_val = json.dumps({"user_id": "world", "mitter_id": "hello"}) assert c_val in event_text @pytest.mark.parametrize("caps", INSERT_CAPS, indirect=True) -def test_normalize_many_events_insert(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: - load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.many_load_2", "event.event.user_load_1"]) +def test_normalize_many_events_insert( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: + load_id = extract_and_normalize_cases( + rasa_normalize, ["event.event.many_load_2", "event.event.user_load_1"] + ) expected_tables = EXPECTED_USER_TABLES_RASA_NORMALIZER + ["event_bot", "event_action"] _, load_files = expect_load_package(rasa_normalize.load_storage, load_id, expected_tables) # return first values line from event_user file @@ -180,8 +232,12 @@ def test_normalize_many_events_insert(caps: DestinationCapabilitiesContext, rasa @pytest.mark.parametrize("caps", JSONL_CAPS, indirect=True) -def test_normalize_many_events(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: - load_id = extract_and_normalize_cases(rasa_normalize, ["event.event.many_load_2", "event.event.user_load_1"]) +def test_normalize_many_events( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: + load_id = extract_and_normalize_cases( + rasa_normalize, ["event.event.many_load_2", "event.event.user_load_1"] + ) expected_tables = EXPECTED_USER_TABLES_RASA_NORMALIZER + ["event_bot", "event_action"] _, load_files = expect_load_package(rasa_normalize.load_storage, load_id, expected_tables) # return first values line from event_user file @@ -192,35 +248,58 @@ def test_normalize_many_events(caps: DestinationCapabilitiesContext, rasa_normal @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_normalize_raw_no_type_hints(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: +def test_normalize_raw_no_type_hints( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: normalize_event_user(raw_normalize, "event.event.user_load_1", EXPECTED_USER_TABLES) assert_timestamp_data_type(raw_normalize.load_storage, "double") @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_normalize_raw_type_hints(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: +def test_normalize_raw_type_hints( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: extract_and_normalize_cases(rasa_normalize, ["event.event.user_load_1"]) assert_timestamp_data_type(rasa_normalize.load_storage, "timestamp") + @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_multiprocess_row_counting(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: - extract_cases( - raw_normalize.normalize_storage, - ["github.events.load_page_1_duck"] - ) +def test_multiprocessing_row_counting( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: + extract_cases(raw_normalize, ["github.events.load_page_1_duck"]) # use real process pool in tests with ProcessPoolExecutor(max_workers=4) as p: raw_normalize.run(p) - - assert raw_normalize._row_counts["events"] == 100 - assert raw_normalize._row_counts["events__payload__pull_request__requested_reviewers"] == 24 + # get step info + step_info = raw_normalize.get_step_info(MockPipeline("multiprocessing_pipeline", True)) # type: ignore[abstract] + assert step_info.row_counts["events"] == 100 + assert step_info.row_counts["events__payload__pull_request__requested_reviewers"] == 24 + # check if single load id + assert len(step_info.loads_ids) == 1 + row_counts = { + t: m.items_count + for t, m in step_info.metrics[step_info.loads_ids[0]][0]["table_metrics"].items() + } + assert row_counts == step_info.row_counts @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_normalize_many_schemas(caps: DestinationCapabilitiesContext, rasa_normalize: Normalize) -> None: +def test_normalize_many_packages( + caps: DestinationCapabilitiesContext, rasa_normalize: Normalize +) -> None: + extract_cases( + rasa_normalize, + [ + "event.event.many_load_2", + "event.event.user_load_1", + ], + ) extract_cases( - rasa_normalize.normalize_storage, - ["event.event.many_load_2", "event.event.user_load_1", "ethereum.blocks.9c1d9b504ea240a482b007788d5cd61c_2"] + rasa_normalize, + [ + "ethereum.blocks.9c1d9b504ea240a482b007788d5cd61c_2", + ], ) # use real process pool in tests with ProcessPoolExecutor(max_workers=4) as p: @@ -231,26 +310,30 @@ def test_normalize_many_schemas(caps: DestinationCapabilitiesContext, rasa_norma schemas = [] # load all schemas for load_id in loads: - schema = rasa_normalize.load_storage.load_package_schema(load_id) + schema = rasa_normalize.load_storage.normalized_packages.load_schema(load_id) schemas.append(schema.name) # expect event tables if schema.name == "event": expected_tables = EXPECTED_USER_TABLES_RASA_NORMALIZER + ["event_bot", "event_action"] expect_load_package(rasa_normalize.load_storage, load_id, expected_tables) if schema.name == "ethereum": - expect_load_package(rasa_normalize.load_storage, load_id, EXPECTED_ETH_TABLES, full_schema_update=False) + expect_load_package( + rasa_normalize.load_storage, load_id, EXPECTED_ETH_TABLES, full_schema_update=False + ) assert set(schemas) == set(["ethereum", "event"]) @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_normalize_typed_json(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: - extract_items(raw_normalize.normalize_storage, [JSON_TYPED_DICT], "special", "special") +def test_normalize_typed_json( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: + extract_items(raw_normalize.normalize_storage, [JSON_TYPED_DICT], Schema("special"), "special") with ThreadPoolExecutor(max_workers=1) as pool: raw_normalize.run(pool) loads = raw_normalize.load_storage.list_normalized_packages() assert len(loads) == 1 # load all schemas - schema = raw_normalize.load_storage.load_package_schema(loads[0]) + schema = raw_normalize.load_storage.normalized_packages.load_schema(loads[0]) assert schema.name == "special" # named as schema - default fallback table = schema.get_table_columns("special", include_incomplete=True) @@ -262,24 +345,24 @@ def test_normalize_typed_json(caps: DestinationCapabilitiesContext, raw_normaliz @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) def test_schema_changes(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: doc = {"str": "text", "int": 1} - extract_items(raw_normalize.normalize_storage, [doc], "evolution", "doc") - load_id = normalize_pending(raw_normalize, "evolution") + extract_items(raw_normalize.normalize_storage, [doc], Schema("evolution"), "doc") + load_id = normalize_pending(raw_normalize) _, table_files = expect_load_package(raw_normalize.load_storage, load_id, ["doc"]) get_line_from_file(raw_normalize.load_storage, table_files["doc"], 0) assert len(table_files["doc"]) == 1 - s: Schema = raw_normalize.load_or_create_schema(raw_normalize.schema_storage, "evolution") - doc_table = s.get_table("doc") + schema = raw_normalize.schema_storage.load_schema("evolution") + doc_table = schema.get_table("doc") assert "str" in doc_table["columns"] assert "int" in doc_table["columns"] # add column to doc in second step doc2 = {"str": "text", "int": 1, "bool": True} - extract_items(raw_normalize.normalize_storage, [doc, doc2, doc], "evolution", "doc") - load_id = normalize_pending(raw_normalize, "evolution") + extract_items(raw_normalize.normalize_storage, [doc, doc2, doc], schema, "doc") + load_id = normalize_pending(raw_normalize) _, table_files = expect_load_package(raw_normalize.load_storage, load_id, ["doc"]) assert len(table_files["doc"]) == 1 - s = raw_normalize.load_or_create_schema(raw_normalize.schema_storage, "evolution") - doc_table = s.get_table("doc") + schema = raw_normalize.schema_storage.load_schema("evolution") + doc_table = schema.get_table("doc") assert "bool" in doc_table["columns"] # add and change several tables in one step @@ -287,25 +370,42 @@ def test_schema_changes(caps: DestinationCapabilitiesContext, raw_normalize: Nor doc_v = {"int": "hundred"} doc3_2v = {"comp": [doc2]} doc3_doc_v = {"comp": [doc_v]} - extract_items(raw_normalize.normalize_storage, [doc3, doc, doc_v], "evolution", "doc") - extract_items(raw_normalize.normalize_storage, [doc3_2v, doc3_doc_v], "evolution", "doc") - load_id = normalize_pending(raw_normalize, "evolution") + extract_items( + raw_normalize.normalize_storage, [doc3, doc, doc_v, doc3_2v, doc3_doc_v], schema, "doc" + ) + # schema = raw_normalize.schema_storage.load_schema("evolution") + # extract_items(raw_normalize.normalize_storage, [doc3_2v, doc3_doc_v], schema, "doc") + load_id = normalize_pending(raw_normalize) _, table_files = expect_load_package(raw_normalize.load_storage, load_id, ["doc", "doc__comp"]) assert len(table_files["doc"]) == 1 assert len(table_files["doc__comp"]) == 1 - s = raw_normalize.load_or_create_schema(raw_normalize.schema_storage, "evolution") - doc_table = s.get_table("doc") - assert {"_dlt_load_id", "_dlt_id", "str", "int", "bool", "int__v_text"} == set(doc_table["columns"].keys()) - doc__comp_table = s.get_table("doc__comp") + schema = raw_normalize.schema_storage.load_schema("evolution") + doc_table = schema.get_table("doc") + assert {"_dlt_load_id", "_dlt_id", "str", "int", "bool", "int__v_text"} == set( + doc_table["columns"].keys() + ) + doc__comp_table = schema.get_table("doc__comp") assert doc__comp_table["parent"] == "doc" - assert {"_dlt_id", "_dlt_list_idx", "_dlt_parent_id", "str", "int", "bool", "int__v_text"} == set(doc__comp_table["columns"].keys()) + assert { + "_dlt_id", + "_dlt_list_idx", + "_dlt_parent_id", + "str", + "int", + "bool", + "int__v_text", + } == set(doc__comp_table["columns"].keys()) @pytest.mark.parametrize("caps", ALL_CAPABILITIES, indirect=True) -def test_normalize_twice_with_flatten(caps: DestinationCapabilitiesContext, raw_normalize: Normalize) -> None: +def test_normalize_twice_with_flatten( + caps: DestinationCapabilitiesContext, raw_normalize: Normalize +) -> None: load_id = extract_and_normalize_cases(raw_normalize, ["github.issues.load_page_5_duck"]) - _, table_files = expect_load_package(raw_normalize.load_storage, load_id, ["issues", "issues__labels", "issues__assignees"]) + _, table_files = expect_load_package( + raw_normalize.load_storage, load_id, ["issues", "issues__labels", "issues__assignees"] + ) assert len(table_files["issues"]) == 1 _, lines = get_line_from_file(raw_normalize.load_storage, table_files["issues"], 0) # insert writer adds 2 lines @@ -318,104 +418,192 @@ def assert_schema(_schema: Schema): assert "reactions__x1" in _schema.tables["issues"]["columns"] assert "reactions__1" not in _schema.tables["issues"]["columns"] - - schema = raw_normalize.load_or_create_schema(raw_normalize.schema_storage, "github") + schema = raw_normalize.schema_storage.load_schema("github") assert_schema(schema) load_id = extract_and_normalize_cases(raw_normalize, ["github.issues.load_page_5_duck"]) - _, table_files = expect_load_package(raw_normalize.load_storage, load_id, ["issues", "issues__labels", "issues__assignees"], full_schema_update=False) + _, table_files = expect_load_package( + raw_normalize.load_storage, + load_id, + ["issues", "issues__labels", "issues__assignees"], + full_schema_update=False, + ) assert len(table_files["issues"]) == 1 _, lines = get_line_from_file(raw_normalize.load_storage, table_files["issues"], 0) # insert writer adds 2 lines assert lines in (100, 102) - schema = raw_normalize.load_or_create_schema(raw_normalize.schema_storage, "github") + schema = raw_normalize.schema_storage.load_schema("github") assert_schema(schema) def test_group_worker_files() -> None: - files = ["f%03d" % idx for idx in range(0, 100)] assert Normalize.group_worker_files([], 4) == [] assert Normalize.group_worker_files(["f001"], 1) == [["f001"]] assert Normalize.group_worker_files(["f001"], 100) == [["f001"]] assert Normalize.group_worker_files(files[:4], 4) == [["f000"], ["f001"], ["f002"], ["f003"]] - assert Normalize.group_worker_files(files[:5], 4) == [["f000"], ["f001"], ["f002"], ["f003", "f004"]] - assert Normalize.group_worker_files(files[:8], 4) == [["f000", "f001"], ["f002", "f003"], ["f004", "f005"], ["f006", "f007"]] - assert Normalize.group_worker_files(files[:8], 3) == [["f000", "f001"], ["f002", "f003", "f006"], ["f004", "f005", "f007"]] - assert Normalize.group_worker_files(files[:5], 3) == [["f000"], ["f001", "f003"], ["f002", "f004"]] + assert Normalize.group_worker_files(files[:5], 4) == [ + ["f000"], + ["f001"], + ["f002"], + ["f003", "f004"], + ] + assert Normalize.group_worker_files(files[:8], 4) == [ + ["f000", "f001"], + ["f002", "f003"], + ["f004", "f005"], + ["f006", "f007"], + ] + assert Normalize.group_worker_files(files[:8], 3) == [ + ["f000", "f001"], + ["f002", "f003", "f006"], + ["f004", "f005", "f007"], + ] + assert Normalize.group_worker_files(files[:5], 3) == [ + ["f000"], + ["f001", "f003"], + ["f002", "f004"], + ] # check if sorted files = ["tab1.1", "chd.3", "tab1.2", "chd.4", "tab1.3"] - assert Normalize.group_worker_files(files, 3) == [["chd.3"], ["chd.4", "tab1.2"], ["tab1.1", "tab1.3"]] - - -EXPECTED_ETH_TABLES = ["blocks", "blocks__transactions", "blocks__transactions__logs", "blocks__transactions__logs__topics", - "blocks__uncles", "blocks__transactions__access_list", "blocks__transactions__access_list__storage_keys"] - -EXPECTED_USER_TABLES_RASA_NORMALIZER = ["event", "event_user", "event_user__parse_data__intent_ranking"] - - -EXPECTED_USER_TABLES = ["event", "event__parse_data__intent_ranking", "event__parse_data__response_selector__all_retrieval_intents", - "event__parse_data__response_selector__default__ranking", "event__parse_data__response_selector__default__response__response_templates", - "event__parse_data__response_selector__default__response__responses"] - - -def extract_items(normalize_storage: NormalizeStorage, items: Sequence[StrAny], schema_name: str, table_name: str) -> None: - extractor = ExtractorStorage(normalize_storage.config) - extract_id = extractor.create_extract_id() - extractor.write_data_item("puae-jsonl", extract_id, schema_name, table_name, items, None) - extractor.close_writers(extract_id) - extractor.commit_extract_files(extract_id) + assert Normalize.group_worker_files(files, 3) == [ + ["chd.3"], + ["chd.4", "tab1.2"], + ["tab1.1", "tab1.3"], + ] + + +EXPECTED_ETH_TABLES = [ + "blocks", + "blocks__transactions", + "blocks__transactions__logs", + "blocks__transactions__logs__topics", + "blocks__uncles", + "blocks__transactions__access_list", + "blocks__transactions__access_list__storage_keys", +] + +EXPECTED_USER_TABLES_RASA_NORMALIZER = [ + "event", + "event_user", + "event_user__parse_data__intent_ranking", +] + + +EXPECTED_USER_TABLES = [ + "event", + "event__parse_data__intent_ranking", + "event__parse_data__response_selector__all_retrieval_intents", + "event__parse_data__response_selector__default__ranking", + "event__parse_data__response_selector__default__response__response_templates", + "event__parse_data__response_selector__default__response__responses", +] + + +def extract_items( + normalize_storage: NormalizeStorage, items: Sequence[StrAny], schema: Schema, table_name: str +) -> str: + extractor = ExtractStorage(normalize_storage.config) + load_id = extractor.create_load_package(schema) + extractor.write_data_item("puae-jsonl", load_id, schema.name, table_name, items, None) + extractor.close_writers(load_id) + extractor.commit_new_load_package(load_id, schema) + return load_id -def normalize_event_user(normalize: Normalize, case: str, expected_user_tables: List[str] = None) -> Tuple[List[str], Dict[str, List[str]]]: +def normalize_event_user( + normalize: Normalize, case: str, expected_user_tables: List[str] = None +) -> Tuple[List[str], Dict[str, List[str]]]: expected_user_tables = expected_user_tables or EXPECTED_USER_TABLES_RASA_NORMALIZER load_id = extract_and_normalize_cases(normalize, [case]) return expect_load_package(normalize.load_storage, load_id, expected_user_tables) def extract_and_normalize_cases(normalize: Normalize, cases: Sequence[str]) -> str: - extract_cases(normalize.normalize_storage, cases) + extract_cases(normalize, cases) return normalize_pending(normalize) -def normalize_pending(normalize: Normalize, schema_name: str = "event") -> str: - load_id = uniq_id() - normalize.load_storage.create_temp_load_package(load_id) +def normalize_pending(normalize: Normalize) -> str: # pool not required for map_single - files = normalize.normalize_storage.list_files_to_normalize_sorted() - # create schema if it does not exist - for schema_name, files_in_schema in normalize.normalize_storage.group_by_schema(files): - normalize.spool_files(schema_name, load_id, normalize.map_single, list(files_in_schema)) + load_ids = normalize.normalize_storage.extracted_packages.list_packages() + assert len(load_ids) == 1, "Only one package allowed or rewrite tests" + for load_id in load_ids: + normalize._step_info_start_load_id(load_id) + normalize.load_storage.new_packages.create_package(load_id) + # read schema from package + schema = normalize.normalize_storage.extracted_packages.load_schema(load_id) + # get files + schema_files = normalize.normalize_storage.extracted_packages.list_new_jobs(load_id) + # normalize without pool + normalize.spool_files(load_id, schema, normalize.map_single, schema_files) + return load_id -def extract_cases(normalize_storage: NormalizeStorage, cases: Sequence[str]) -> None: +def extract_cases(normalize: Normalize, cases: Sequence[str]) -> None: + items: List[StrAny] = [] for case in cases: - schema_name, table_name, _, _ = NormalizeStorage.parse_normalize_file_name(case + ".jsonl") + # our cases have schema and table name encoded in file name + schema_name, table_name, _ = case.split(".", maxsplit=3) with open(json_case_path(case), "rb") as f: - items = json.load(f) - extract_items(normalize_storage, items, schema_name, table_name) + item = json.load(f) + if isinstance(item, list): + items.extend(item) + else: + items.append(item) + # we assume that all items belonged to a single schema + extract_items( + normalize.normalize_storage, + items, + load_or_create_schema(normalize, schema_name), + table_name, + ) + + +def load_or_create_schema(normalize: Normalize, schema_name: str) -> Schema: + try: + schema = normalize.schema_storage.load_schema(schema_name) + schema.update_normalizers() + except SchemaNotFoundError: + schema = Schema(schema_name) + return schema -def expect_load_package(load_storage: LoadStorage, load_id: str, expected_tables: Sequence[str], full_schema_update: bool = True) -> Tuple[List[str], Dict[str, List[str]]]: +def expect_load_package( + load_storage: LoadStorage, + load_id: str, + expected_tables: Sequence[str], + full_schema_update: bool = True, +) -> Tuple[List[str], Dict[str, List[str]]]: # normalize tables as paths (original json is snake case so we may do it without real lineage info) - schema = load_storage.load_package_schema(load_id) + schema = load_storage.normalized_packages.load_schema(load_id) # we are still in destination caps context so schema contains length assert schema.naming.max_length > 0 - expected_tables = [schema.naming.shorten_fragments(*schema.naming.break_path(table)) for table in expected_tables] + expected_tables = [ + schema.naming.shorten_fragments(*schema.naming.break_path(table)) + for table in expected_tables + ] # find jobs and processed files files = load_storage.list_new_jobs(load_id) - files_tables = [load_storage.parse_job_file_name(file).table_name for file in files] + files_tables = [ParsedLoadJobFileName.parse(file).table_name for file in files] assert set(files_tables) == set(expected_tables) ofl: Dict[str, List[str]] = {} for expected_table in expected_tables: # find all files for particular table, ignoring file id - file_mask = load_storage.build_job_file_name(expected_table, "*", validate_components=False) + file_mask = PackageStorage.build_job_file_name( + expected_table, + "*", + validate_components=False, + loader_file_format=load_storage.loader_file_format, + ) # files are in normalized//new_jobs - file_path = load_storage._get_job_file_path(load_id, "new_jobs", file_mask) + file_path = load_storage.normalized_packages.get_job_file_path( + load_id, "new_jobs", file_mask + ) candidates = [f for f in files if fnmatch(f, file_path)] # assert len(candidates) == 1 ofl[expected_table] = candidates @@ -428,10 +616,12 @@ def expect_load_package(load_storage: LoadStorage, load_id: str, expected_tables return expected_tables, ofl -def get_line_from_file(load_storage: LoadStorage, loaded_files: List[str], return_line: int = 0) -> Tuple[str, int]: +def get_line_from_file( + load_storage: LoadStorage, loaded_files: List[str], return_line: int = 0 +) -> Tuple[str, int]: lines = [] for file in loaded_files: - with load_storage.storage.open_file(file) as f: + with load_storage.normalized_packages.storage.open_file(file) as f: lines.extend(f.readlines()) return lines[return_line], len(lines) @@ -439,6 +629,6 @@ def get_line_from_file(load_storage: LoadStorage, loaded_files: List[str], retur def assert_timestamp_data_type(load_storage: LoadStorage, data_type: TDataType) -> None: # load generated schema loads = load_storage.list_normalized_packages() - event_schema = load_storage.load_package_schema(loads[0]) + event_schema = load_storage.normalized_packages.load_schema(loads[0]) # in raw normalize timestamp column must not be coerced to timestamp assert event_schema.get_table_columns("event")["timestamp"]["data_type"] == data_type diff --git a/tests/normalize/utils.py b/tests/normalize/utils.py index 3ee14948c1..0ce099d4b6 100644 --- a/tests/normalize/utils.py +++ b/tests/normalize/utils.py @@ -1,10 +1,10 @@ from typing import Mapping, cast -from dlt.destinations.duckdb import capabilities as duck_insert_caps -from dlt.destinations.redshift import capabilities as rd_insert_caps -from dlt.destinations.postgres import capabilities as pg_insert_caps -from dlt.destinations.bigquery import capabilities as jsonl_caps -from dlt.destinations.filesystem import capabilities as filesystem_caps +from dlt.destinations.impl.duckdb import capabilities as duck_insert_caps +from dlt.destinations.impl.redshift import capabilities as rd_insert_caps +from dlt.destinations.impl.postgres import capabilities as pg_insert_caps +from dlt.destinations.impl.bigquery import capabilities as jsonl_caps +from dlt.destinations.impl.filesystem import capabilities as filesystem_caps DEFAULT_CAPS = pg_insert_caps diff --git a/tests/pipeline/cases/github_pipeline/github_extract.py b/tests/pipeline/cases/github_pipeline/github_extract.py index 74ff99033f..6be6643947 100644 --- a/tests/pipeline/cases/github_pipeline/github_extract.py +++ b/tests/pipeline/cases/github_pipeline/github_extract.py @@ -5,7 +5,9 @@ from github_pipeline import github # type: ignore[import-not-found] if __name__ == "__main__": - p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3", full_refresh=False) + p = dlt.pipeline( + "dlt_github_pipeline", destination="duckdb", dataset_name="github_3", full_refresh=False + ) github_source = github() if len(sys.argv) > 1: # load only N issues @@ -13,5 +15,3 @@ github_source.add_limit(limit) info = p.extract(github_source) print(info) - # normalize - don't load - p.normalize() diff --git a/tests/pipeline/cases/github_pipeline/github_normalize.py b/tests/pipeline/cases/github_pipeline/github_normalize.py new file mode 100644 index 0000000000..30a45a2631 --- /dev/null +++ b/tests/pipeline/cases/github_pipeline/github_normalize.py @@ -0,0 +1,6 @@ +import dlt + +if __name__ == "__main__": + p = dlt.attach("dlt_github_pipeline") + info = p.normalize() + print(info) diff --git a/tests/pipeline/cases/github_pipeline/github_pipeline.py b/tests/pipeline/cases/github_pipeline/github_pipeline.py index 6d19709947..e7efef73d9 100644 --- a/tests/pipeline/cases/github_pipeline/github_pipeline.py +++ b/tests/pipeline/cases/github_pipeline/github_pipeline.py @@ -4,20 +4,30 @@ from dlt.common import json + @dlt.source(root_key=True) def github(): - - @dlt.resource(table_name="issues", write_disposition="merge", primary_key="id", merge_key=("node_id", "url")) - def load_issues(): + @dlt.resource( + table_name="issues", + write_disposition="merge", + primary_key="id", + merge_key=("node_id", "url"), + ) + def load_issues(created_at=dlt.sources.incremental[str]("created_at")): # noqa: B008 # we should be in TEST_STORAGE folder - with open("../tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8") as f: - yield from json.load(f) + with open( + "../tests/normalize/cases/github.issues.load_page_5_duck.json", "r", encoding="utf-8" + ) as f: + issues = sorted(json.load(f), key=lambda x: x["created_at"]) + yield from issues return load_issues if __name__ == "__main__": - p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3", full_refresh=False) + p = dlt.pipeline( + "dlt_github_pipeline", destination="duckdb", dataset_name="github_3", full_refresh=False + ) github_source = github() if len(sys.argv) > 1: # load only N issues diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py index a9a94230a2..f6c47e35b1 100644 --- a/tests/pipeline/conftest.py +++ b/tests/pipeline/conftest.py @@ -1,2 +1,8 @@ -from tests.utils import preserve_environ, autouse_test_storage, patch_home_dir, wipe_pipeline, duckdb_pipeline_location -from tests.pipeline.utils import drop_dataset_from_env \ No newline at end of file +from tests.utils import ( + preserve_environ, + autouse_test_storage, + patch_home_dir, + wipe_pipeline, + duckdb_pipeline_location, +) +from tests.pipeline.utils import drop_dataset_from_env diff --git a/tests/pipeline/test_arrow_sources.py b/tests/pipeline/test_arrow_sources.py index 31d5d001df..4991afa002 100644 --- a/tests/pipeline/test_arrow_sources.py +++ b/tests/pipeline/test_arrow_sources.py @@ -6,19 +6,28 @@ import os import io import pyarrow as pa -from typing import List import dlt +from dlt.common import json, Decimal from dlt.common.utils import uniq_id +from dlt.common.libs.pyarrow import NameNormalizationClash + from dlt.pipeline.exceptions import PipelineStepFailed -from tests.cases import arrow_table_all_data_types, TArrowFormat + +from tests.cases import arrow_format_from_pandas, arrow_table_all_data_types, TArrowFormat from tests.utils import preserve_environ -from dlt.common import json -from dlt.common import Decimal @pytest.mark.parametrize( - ("item_type", "is_list"), [("pandas", False), ("table", False), ("record_batch", False), ("pandas", True), ("table", True), ("record_batch", True)] + ("item_type", "is_list"), + [ + ("pandas", False), + ("table", False), + ("record_batch", False), + ("pandas", True), + ("table", True), + ("record_batch", True), + ], ) def test_extract_and_normalize(item_type: TArrowFormat, is_list: bool): item, records = arrow_table_all_data_types(item_type) @@ -32,25 +41,26 @@ def some_data(): else: yield item - pipeline.extract(some_data()) norm_storage = pipeline._get_normalize_storage() - extract_files = [fn for fn in norm_storage.list_files_to_normalize_sorted() if fn.endswith(".parquet")] + extract_files = [ + fn for fn in norm_storage.list_files_to_normalize_sorted() if fn.endswith(".parquet") + ] assert len(extract_files) == 1 - with norm_storage.storage.open_file(extract_files[0], 'rb') as f: + with norm_storage.extracted_packages.storage.open_file(extract_files[0], "rb") as f: extracted_bytes = f.read() info = pipeline.normalize() - assert info.row_counts['some_data'] == len(records) + assert info.row_counts["some_data"] == len(records) load_id = pipeline.list_normalized_load_packages()[0] storage = pipeline._get_load_storage() - jobs = storage.list_new_jobs(load_id) + jobs = storage.normalized_packages.list_new_jobs(load_id) job = [j for j in jobs if "some_data" in j][0] - with storage.storage.open_file(job, 'rb') as f: + with storage.normalized_packages.storage.open_file(job, "rb") as f: normalized_bytes = f.read() # Normalized is linked/copied exactly and should be the same as the extracted file @@ -75,24 +85,31 @@ def some_data(): schema = pipeline.default_schema # Check schema detection - schema_columns = schema.tables['some_data']['columns'] + schema_columns = schema.tables["some_data"]["columns"] assert set(df_tbl.columns) == set(schema_columns) - assert schema_columns['date']['data_type'] == 'date' - assert schema_columns['int']['data_type'] == 'bigint' - assert schema_columns['float']['data_type'] == 'double' - assert schema_columns['decimal']['data_type'] == 'decimal' - assert schema_columns['time']['data_type'] == 'time' - assert schema_columns['binary']['data_type'] == 'binary' - assert schema_columns['string']['data_type'] == 'text' - assert schema_columns['json']['data_type'] == 'complex' - + assert schema_columns["date"]["data_type"] == "date" + assert schema_columns["int"]["data_type"] == "bigint" + assert schema_columns["float"]["data_type"] == "double" + assert schema_columns["decimal"]["data_type"] == "decimal" + assert schema_columns["time"]["data_type"] == "time" + assert schema_columns["binary"]["data_type"] == "binary" + assert schema_columns["string"]["data_type"] == "text" + assert schema_columns["json"]["data_type"] == "complex" @pytest.mark.parametrize( - ("item_type", "is_list"), [("pandas", False), ("table", False), ("record_batch", False), ("pandas", True), ("table", True), ("record_batch", True)] + ("item_type", "is_list"), + [ + ("pandas", False), + ("table", False), + ("record_batch", False), + ("pandas", True), + ("table", True), + ("record_batch", True), + ], ) def test_normalize_jsonl(item_type: TArrowFormat, is_list: bool): - os.environ['DUMMY__LOADER_FILE_FORMAT'] = "jsonl" + os.environ["DUMMY__LOADER_FILE_FORMAT"] = "jsonl" item, records = arrow_table_all_data_types(item_type) @@ -105,25 +122,24 @@ def some_data(): else: yield item - pipeline.extract(some_data()) pipeline.normalize() load_id = pipeline.list_normalized_load_packages()[0] storage = pipeline._get_load_storage() - jobs = storage.list_new_jobs(load_id) + jobs = storage.normalized_packages.list_new_jobs(load_id) job = [j for j in jobs if "some_data" in j][0] - with storage.storage.open_file(job, 'r') as f: + with storage.normalized_packages.storage.open_file(job, "r") as f: result = [json.loads(line) for line in f] for row in result: - row['decimal'] = Decimal(row['decimal']) + row["decimal"] = Decimal(row["decimal"]) for record in records: - record['datetime'] = record['datetime'].replace(tzinfo=None) + record["datetime"] = record["datetime"].replace(tzinfo=None) expected = json.loads(json.dumps(records)) for record in expected: - record['decimal'] = Decimal(record['decimal']) + record["decimal"] = Decimal(record["decimal"]) assert result == expected @@ -136,7 +152,7 @@ def some_data(): yield item def map_func(item): - return item.filter(pa.compute.greater(item['int'], 80)) + return item.filter(pa.compute.greater(item["int"], 80)) # Add map that filters the table some_data.add_map(map_func) @@ -146,7 +162,7 @@ def map_func(item): result_tbl = result[0] assert len(result_tbl) < len(item) - assert pa.compute.all(pa.compute.greater(result_tbl['int'], 80)).as_py() + assert pa.compute.all(pa.compute.greater(result_tbl["int"], 80)).as_py() @pytest.mark.parametrize("item_type", ["pandas", "table", "record_batch"]) @@ -181,6 +197,44 @@ def data_frames(): assert len(pipeline.get_load_package_info(load_id).jobs["new_jobs"]) == 10 +@pytest.mark.parametrize("item_type", ["pandas", "table", "record_batch"]) +def test_arrow_clashing_names(item_type: TArrowFormat) -> None: + # # use parquet for dummy + os.environ["DESTINATION__LOADER_FILE_FORMAT"] = "parquet" + pipeline_name = "arrow_" + uniq_id() + pipeline = dlt.pipeline(pipeline_name=pipeline_name, destination="dummy") + + item, _ = arrow_table_all_data_types(item_type, include_name_clash=True) + + @dlt.resource + def data_frames(): + for _ in range(10): + yield item + + with pytest.raises(PipelineStepFailed) as py_ex: + pipeline.extract(data_frames()) + assert isinstance(py_ex.value.__context__, NameNormalizationClash) + + +@pytest.mark.parametrize("item_type", ["table", "record_batch"]) +def test_load_arrow_vary_schema(item_type: TArrowFormat) -> None: + pipeline_name = "arrow_" + uniq_id() + pipeline = dlt.pipeline(pipeline_name=pipeline_name, destination="duckdb") + + item, _ = arrow_table_all_data_types(item_type, include_not_normalized_name=False) + pipeline.run(item, table_name="data").raise_on_failed_jobs() + + item, _ = arrow_table_all_data_types(item_type, include_not_normalized_name=False) + # remove int column + try: + item = item.drop("int") + except AttributeError: + names = item.schema.names + names.remove("int") + item = item.select(names) + pipeline.run(item, table_name="data").raise_on_failed_jobs() + + @pytest.mark.parametrize("item_type", ["pandas", "table", "record_batch"]) def test_arrow_as_data_loading(item_type: TArrowFormat) -> None: os.environ["RESTORE_FROM_DESTINATION"] = "False" @@ -199,39 +253,39 @@ def test_arrow_as_data_loading(item_type: TArrowFormat) -> None: assert info.row_counts["items"] == len(rows) -@pytest.mark.parametrize("item_type", ["table", "pandas", "record_batch"]) +@pytest.mark.parametrize("item_type", ["table"]) # , "pandas", "record_batch" def test_normalize_with_dlt_columns(item_type: TArrowFormat): item, records = arrow_table_all_data_types(item_type, num_rows=5432) - os.environ['NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_LOAD_ID'] = "True" - os.environ['NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_ID'] = "True" + os.environ["NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_LOAD_ID"] = "True" + os.environ["NORMALIZE__PARQUET_NORMALIZER__ADD_DLT_ID"] = "True" # Test with buffer smaller than the number of batches to be written - os.environ['DATA_WRITER__BUFFER_MAX_ITEMS'] = "100" - os.environ['DATA_WRITER__ROW_GROUP_SIZE'] = "100" + os.environ["DATA_WRITER__BUFFER_MAX_ITEMS"] = "100" + os.environ["DATA_WRITER__ROW_GROUP_SIZE"] = "100" @dlt.resource def some_data(): yield item - pipeline = dlt.pipeline("arrow_" + uniq_id(), destination="filesystem") + pipeline = dlt.pipeline("arrow_" + uniq_id(), destination="duckdb") pipeline.extract(some_data()) - pipeline.normalize() + pipeline.normalize(loader_file_format="parquet") load_id = pipeline.list_normalized_load_packages()[0] storage = pipeline._get_load_storage() - jobs = storage.list_new_jobs(load_id) + jobs = storage.normalized_packages.list_new_jobs(load_id) job = [j for j in jobs if "some_data" in j][0] - with storage.storage.open_file(job, 'rb') as f: + with storage.normalized_packages.storage.open_file(job, "rb") as f: tbl = pa.parquet.read_table(f) assert len(tbl) == 5432 # Test one column matches source data - assert tbl['string'].to_pylist() == [r['string'] for r in records] + assert tbl["string"].to_pylist() == [r["string"] for r in records] - assert pa.compute.all(pa.compute.equal(tbl['_dlt_load_id'], load_id)).as_py() + assert pa.compute.all(pa.compute.equal(tbl["_dlt_load_id"], load_id)).as_py() - all_ids = tbl['_dlt_id'].to_pylist() + all_ids = tbl["_dlt_id"].to_pylist() assert len(all_ids[0]) >= 14 # All ids are unique @@ -239,5 +293,61 @@ def some_data(): # _dlt_id and _dlt_load_id are added to pipeline schema schema = pipeline.default_schema - assert schema.tables['some_data']['columns']['_dlt_id']['data_type'] == 'text' - assert schema.tables['some_data']['columns']['_dlt_load_id']['data_type'] == 'text' + assert schema.tables["some_data"]["columns"]["_dlt_id"]["data_type"] == "text" + assert schema.tables["some_data"]["columns"]["_dlt_load_id"]["data_type"] == "text" + + pipeline.load().raise_on_failed_jobs() + + # should be able to load again + pipeline.run(some_data()).raise_on_failed_jobs() + + # should be able to load arrow without a column + try: + item = item.drop("int") + except AttributeError: + names = item.schema.names + names.remove("int") + item = item.select(names) + pipeline.run(item, table_name="some_data").raise_on_failed_jobs() + + # should be able to load arrow with a new column + # TODO: uncomment when load_id fixed in normalizer + # item, records = arrow_table_all_data_types(item_type, num_rows=200) + # item = item.append_column("static_int", [[0] * 200]) + # pipeline.run(item, table_name="some_data").raise_on_failed_jobs() + + # schema = pipeline.default_schema + # assert schema.tables['some_data']['columns']['static_int']['data_type'] == 'bigint' + + +@pytest.mark.parametrize("item_type", ["pandas", "table", "record_batch"]) +def test_empty_arrow(item_type: TArrowFormat) -> None: + os.environ["RESTORE_FROM_DESTINATION"] = "False" + os.environ["DESTINATION__LOADER_FILE_FORMAT"] = "parquet" + + # always return pandas + item, _ = arrow_table_all_data_types("pandas", num_rows=1) + item_resource = dlt.resource(item, name="items", write_disposition="replace") + + pipeline_name = "arrow_" + uniq_id() + pipeline = dlt.pipeline(pipeline_name=pipeline_name, destination="dummy") + # E & L + info = pipeline.extract(item_resource) + load_id = info.loads_ids[0] + assert info.metrics[load_id][0]["table_metrics"]["items"].items_count == 1 + assert len(pipeline.list_extracted_resources()) == 1 + norm_info = pipeline.normalize() + assert norm_info.row_counts["items"] == 1 + + # load 0 elements to replace + empty_df = pd.DataFrame(columns=item.columns) + + item_resource = dlt.resource( + arrow_format_from_pandas(empty_df, item_type), name="items", write_disposition="replace" + ) + info = pipeline.extract(item_resource) + load_id = info.loads_ids[0] + assert info.metrics[load_id][0]["table_metrics"]["items"].items_count == 0 + assert len(pipeline.list_extracted_resources()) == 1 + norm_info = pipeline.normalize() + assert norm_info.row_counts["items"] == 0 diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 09d8e98d82..5cf1857dfa 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -1,3 +1,4 @@ +import pytest import tempfile import shutil from importlib.metadata import version as pkg_version @@ -5,13 +6,19 @@ import dlt from dlt.common import json from dlt.common.runners import Venv +from dlt.common.storages.exceptions import StorageMigrationError from dlt.common.utils import custom_environ, set_working_dir from dlt.common.configuration.paths import get_dlt_data_dir from dlt.common.storages import FileStorage -from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME, TStoredSchema +from dlt.common.schema.typing import ( + LOADS_TABLE_NAME, + STATE_TABLE_NAME, + VERSION_TABLE_NAME, + TStoredSchema, +) from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient from tests.utils import TEST_STORAGE_ROOT, test_storage @@ -28,37 +35,82 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: # store dlt data in test storage (like patch_home_dir) with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): # save database outside of pipeline dir - with custom_environ({"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"}): + with custom_environ( + {"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"} + ): # create virtual env with (0.3.0) before the current schema upgrade with Venv.create(tempfile.mkdtemp(), ["dlt[duckdb]==0.3.0"]) as venv: # NOTE: we force a newer duckdb into the 0.3.0 dlt version to get compatible duckdb storage venv._install_deps(venv.context, ["duckdb" + "==" + pkg_version("duckdb")]) # load 20 issues - print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_pipeline.py", "20")) + print( + venv.run_script( + "../tests/pipeline/cases/github_pipeline/github_pipeline.py", "20" + ) + ) # load schema and check _dlt_loads definition - github_schema: TStoredSchema = json.loads(test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json")) - # print(github_schema["tables"][LOADS_TABLE_NAME]) + github_schema: TStoredSchema = json.loads( + test_storage.load( + f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" + ) + ) assert github_schema["engine_version"] == 5 - assert "schema_version_hash" not in github_schema["tables"][LOADS_TABLE_NAME]["columns"] + assert ( + "schema_version_hash" + not in github_schema["tables"][LOADS_TABLE_NAME]["columns"] + ) + # check the dlt state table + assert { + "version_hash" not in github_schema["tables"][STATE_TABLE_NAME]["columns"] + } # check loads table without attaching to pipeline - duckdb_cfg = resolve_configuration(DuckDbClientConfiguration(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb")) + duckdb_cfg = resolve_configuration( + DuckDbClientConfiguration(dataset_name=GITHUB_DATASET), + sections=("destination", "duckdb"), + ) with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: rows = client.execute_sql(f"SELECT * FROM {LOADS_TABLE_NAME}") # make sure we have just 4 columns assert len(rows[0]) == 4 rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 20 + rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + # only 5 columns + 2 dlt columns + assert len(rows[0]) == 5 + 2 + # inspect old state + state_dict = json.loads( + test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/state.json") + ) + assert "_version_hash" not in state_dict + assert ( + state_dict["sources"]["github"]["resources"]["load_issues"]["incremental"][ + "created_at" + ]["last_value"] + == "2021-04-16T04:34:05Z" + ) # execute in current version venv = Venv.restore_current() # load all issues print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_pipeline.py")) # hash hash in schema - github_schema = json.loads(test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json")) - assert github_schema["engine_version"] == 6 + github_schema = json.loads( + test_storage.load( + f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" + ) + ) + assert github_schema["engine_version"] == 8 assert "schema_version_hash" in github_schema["tables"][LOADS_TABLE_NAME]["columns"] + # load state + state_dict = json.loads( + test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/state.json") + ) + assert "_version_hash" in state_dict + with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: - rows = client.execute_sql(f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at") + rows = client.execute_sql( + f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at" + ) # we have two loads assert len(rows) == 2 assert len(rows[0]) == 5 @@ -66,13 +118,29 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: assert rows[0][4] is None # hash of schema present in current load (2) assert rows[1][4] == github_schema["version_hash"] + # make sure all 100 records loaded (we do incremental load) rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 100 # two schema versions rows = client.execute_sql(f"SELECT * FROM {VERSION_TABLE_NAME}") assert len(rows) == 2 + rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME} ORDER BY version") + # we have hash columns + assert len(rows[0]) == 6 + 2 + assert len(rows) == 2 + # none inserted after schema migration in load 1 + assert rows[0][7] is None + # hash of schema present in current load (2) + assert rows[1][7] == state_dict["_version_hash"] + # attach to existing pipeline pipeline = dlt.attach(GITHUB_PIPELINE_NAME, credentials=duckdb_cfg.credentials) + assert ( + pipeline.state["sources"]["github"]["resources"]["load_issues"]["incremental"][ + "created_at" + ]["last_value"] + == "2023-02-17T09:52:12Z" + ) pipeline = pipeline.drop() # print(pipeline.working_dir) assert pipeline.dataset_name == GITHUB_DATASET @@ -81,7 +149,7 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: pipeline.sync_destination() # print(pipeline.working_dir) # we have updated schema - assert pipeline.default_schema.ENGINE_VERSION == 6 + assert pipeline.default_schema.ENGINE_VERSION == 8 # make sure that schema hash retrieved from the destination is exactly the same as the schema hash that was in storage before the schema was wiped assert pipeline.default_schema.stored_version_hash == github_schema["version_hash"] @@ -94,26 +162,99 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: # store dlt data in test storage (like patch_home_dir) with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): # save database outside of pipeline dir - with custom_environ({"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"}): + with custom_environ( + {"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"} + ): # create virtual env with (0.3.0) before the current schema upgrade with Venv.create(tempfile.mkdtemp(), ["dlt[duckdb]==0.3.0"]) as venv: venv._install_deps(venv.context, ["duckdb" + "==" + pkg_version("duckdb")]) # extract and normalize on old version but DO NOT LOAD - print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_extract.py", "70")) + print( + venv.run_script( + "../tests/pipeline/cases/github_pipeline/github_extract.py", "70" + ) + ) + print( + venv.run_script( + "../tests/pipeline/cases/github_pipeline/github_normalize.py", + ) + ) # switch to current version and make sure the load package loads and schema migrates venv = Venv.restore_current() print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_load.py")) - duckdb_cfg = resolve_configuration(DuckDbClientConfiguration(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb")) + duckdb_cfg = resolve_configuration( + DuckDbClientConfiguration(dataset_name=GITHUB_DATASET), + sections=("destination", "duckdb"), + ) with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 70 - github_schema = json.loads(test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json")) + github_schema = json.loads( + test_storage.load( + f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" + ) + ) # attach to existing pipeline pipeline = dlt.attach(GITHUB_PIPELINE_NAME, credentials=duckdb_cfg.credentials) # get the schema from schema storage before we sync - github_schema = json.loads(test_storage.load(f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json")) + github_schema = json.loads( + test_storage.load( + f".dlt/pipelines/{GITHUB_PIPELINE_NAME}/schemas/github.schema.json" + ) + ) pipeline = pipeline.drop() pipeline.sync_destination() - assert pipeline.default_schema.ENGINE_VERSION == 6 + assert pipeline.default_schema.ENGINE_VERSION == 8 # schema version does not match `dlt.attach` does not update to the right schema by itself assert pipeline.default_schema.stored_version_hash != github_schema["version_hash"] + # state has hash + assert pipeline.state["_version_hash"] is not None + # but in db there's no hash - we loaded an old package with backward compatible schema + with pipeline.sql_client() as client: + rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + # no hash + assert len(rows[0]) == 5 + 2 + assert len(rows) == 1 + # this will extract state and update state schema + with pipeline.managed_state(extract_state=True): + pass + # this will sync schema to destination + pipeline.sync_schema() + # we have hash now + rows = client.execute_sql(f"SELECT * FROM {STATE_TABLE_NAME}") + assert len(rows[0]) == 6 + 2 + + +def test_normalize_package_with_dlt_update(test_storage: FileStorage) -> None: + shutil.copytree("tests/pipeline/cases/github_pipeline", TEST_STORAGE_ROOT, dirs_exist_ok=True) + + # execute in test storage + with set_working_dir(TEST_STORAGE_ROOT): + # store dlt data in test storage (like patch_home_dir) + with custom_environ({"DLT_DATA_DIR": get_dlt_data_dir()}): + # save database outside of pipeline dir + with custom_environ( + {"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"} + ): + # create virtual env with (0.3.0) before the current schema upgrade + with Venv.create(tempfile.mkdtemp(), ["dlt[duckdb]==0.3.0"]) as venv: + venv._install_deps(venv.context, ["duckdb" + "==" + pkg_version("duckdb")]) + # extract only + print( + venv.run_script( + "../tests/pipeline/cases/github_pipeline/github_extract.py", "70" + ) + ) + # switch to current version and normalize existing extract package + # here we test possible switch in package format between storages + pipeline = dlt.attach(GITHUB_PIPELINE_NAME) + with pytest.raises(StorageMigrationError) as mig_ex: + pipeline.normalize() + assert mig_ex.value.from_version == "1.0.0" + + # delete all files in extracted folder + for file in pipeline._pipeline_storage.list_folder_files("normalize/extracted"): + pipeline._pipeline_storage.delete(file) + # now we can migrate the storage + pipeline.normalize() + assert pipeline._get_normalize_storage().version == "1.0.1" diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index c778e47cd6..f4f248261b 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -1,41 +1,55 @@ +import asyncio +from concurrent.futures import ThreadPoolExecutor import itertools import logging import os -import random -from typing import Any, Optional, Iterator, Dict, Any, cast +from time import sleep +from typing import Any, Tuple, cast +import threading from tenacity import retry_if_exception, Retrying, stop_after_attempt -from pydantic import BaseModel import pytest import dlt -from dlt.common import json, sleep, pendulum +from dlt.common import json, pendulum from dlt.common.configuration.container import Container +from dlt.common.configuration.exceptions import ConfigFieldMissingException from dlt.common.configuration.specs.aws_credentials import AwsCredentials from dlt.common.configuration.specs.exceptions import NativeValueError from dlt.common.configuration.specs.gcp_credentials import GcpOAuthCredentials from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.capabilities import TLoaderFileFormat -from dlt.common.exceptions import DestinationHasFailedJobs, DestinationTerminalException, PipelineStateNotAvailable, UnknownDestinationModule -from dlt.common.pipeline import PipelineContext -from dlt.common.runtime.collector import AliveCollector, EnlightenCollector, LogCollector, TqdmCollector +from dlt.common.destination.reference import WithStateSync +from dlt.common.exceptions import ( + DestinationHasFailedJobs, + DestinationTerminalException, + PipelineStateNotAvailable, + UnknownDestinationModule, +) +from dlt.common.pipeline import LoadInfo, PipelineContext +from dlt.common.runtime.collector import LogCollector from dlt.common.schema.utils import new_column, new_table +from dlt.common.typing import DictStrAny from dlt.common.utils import uniq_id +from dlt.common.schema import Schema +from dlt.destinations import filesystem, redshift, dummy from dlt.extract.exceptions import InvalidResourceDataTypeBasic, PipeGenInvalid, SourceExhausted -from dlt.extract.extract import ExtractorStorage -from dlt.extract.source import DltResource, DltSource +from dlt.extract.extract import ExtractStorage +from dlt.extract import DltResource, DltSource from dlt.load.exceptions import LoadClientJobFailed from dlt.pipeline.exceptions import InvalidPipelineName, PipelineNotActive, PipelineStepFailed from dlt.pipeline.helpers import retry_load -from dlt.pipeline import TCollectorArg from tests.common.utils import TEST_SENTRY_DSN -from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration -from tests.utils import TEST_STORAGE_ROOT from tests.common.configuration.utils import environment +from tests.utils import TEST_STORAGE_ROOT from tests.extract.utils import expect_extracted_file -from tests.pipeline.utils import assert_load_info, airtable_emojis +from tests.pipeline.utils import ( + assert_load_info, + airtable_emojis, + load_data_table_counts, + many_delayed, +) def test_default_pipeline() -> None: @@ -83,7 +97,7 @@ def test_run_full_refresh_default_dataset() -> None: p = dlt.pipeline(full_refresh=True, destination="dummy") assert p.dataset_name is None # simulate set new dataset - p._set_destinations("filesystem", None) + p._set_destinations("filesystem") assert p.dataset_name is None p._set_dataset_name(None) # full refresh is still observed @@ -164,19 +178,41 @@ def test_pipeline_context() -> None: def test_import_unknown_destination() -> None: with pytest.raises(UnknownDestinationModule): - dlt.pipeline(destination="!") + dlt.pipeline(destination="dlt.destinations.unknown") -def test_configured_destination(environment) -> None: - environment["DESTINATION_NAME"] = "postgres" +def test_configured_destination_type(environment) -> None: + environment["DESTINATION_TYPE"] = "dlt.destinations.postgres" environment["PIPELINE_NAME"] = "postgres_pipe" p = dlt.pipeline() assert p.destination is not None - assert p.destination.__name__.endswith("postgres") + assert p.destination.destination_type == "dlt.destinations.postgres" + assert p.destination.destination_name == "postgres" assert p.pipeline_name == "postgres_pipe" +def test_configured_destination_unknown_type(environment) -> None: + environment["DESTINATION_TYPE"] = "dlt.destinations.unknown" + + with pytest.raises(UnknownDestinationModule): + dlt.pipeline() + + +def test_configured_destination_unknown_name(environment) -> None: + environment["DESTINATION_NAME"] = "filesystem-prod" + environment["DESTINATION_TYPE"] = "filesystem" + + p = dlt.pipeline() + assert p.destination is not None + assert p.destination.destination_type == "dlt.destinations.filesystem" + assert p.destination.destination_name == "filesystem-prod" + + # we do not have config for postgres-prod so getting destination client must fail + with pytest.raises(ConfigFieldMissingException): + p.destination_client() + + def test_deterministic_salt(environment) -> None: environment["PIPELINE_NAME"] = "postgres_pipe" p = dlt.pipeline() @@ -188,61 +224,118 @@ def test_deterministic_salt(environment) -> None: assert p.pipeline_salt != p3.pipeline_salt -@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name) -def test_create_pipeline_all_destinations(destination_config: DestinationTestConfiguration) -> None: - # create pipelines, extract and normalize. that should be possible without installing any dependencies - p = dlt.pipeline(pipeline_name=destination_config.destination + "_pipeline", destination=destination_config.destination, staging=destination_config.staging) - # are capabilities injected - caps = p._container[DestinationCapabilitiesContext] - print(caps.naming_convention) - # are right naming conventions created - assert p._default_naming.max_length == min(caps.max_column_identifier_length, caps.max_identifier_length) - p.extract([1, "2", 3], table_name="data") - # is default schema with right naming convention - assert p.default_schema.naming.max_length == min(caps.max_column_identifier_length, caps.max_identifier_length) - p.normalize() - assert p.default_schema.naming.max_length == min(caps.max_column_identifier_length, caps.max_identifier_length) - - def test_destination_explicit_credentials(environment: Any) -> None: # test redshift - p = dlt.pipeline(pipeline_name="postgres_pipeline", destination="redshift", credentials="redshift://loader:loader@localhost:5432/dlt_data") + p = dlt.pipeline( + pipeline_name="postgres_pipeline", + destination="redshift", + credentials="redshift://loader:loader@localhost:5432/dlt_data", + ) config = p._get_destination_client_initial_config() assert config.credentials.is_resolved() # with staging - p = dlt.pipeline(pipeline_name="postgres_pipeline", staging="filesystem", destination="redshift", credentials="redshift://loader:loader@localhost:5432/dlt_data") + p = dlt.pipeline( + pipeline_name="postgres_pipeline", + staging="filesystem", + destination="redshift", + credentials="redshift://loader:loader@localhost:5432/dlt_data", + ) config = p._get_destination_client_initial_config(p.destination) assert config.credentials.is_resolved() config = p._get_destination_client_initial_config(p.staging, as_staging=True) assert config.credentials is None p._wipe_working_folder() # try filesystem which uses union of credentials that requires bucket_url to resolve - p = dlt.pipeline(pipeline_name="postgres_pipeline", destination="filesystem", credentials={"aws_access_key_id": "key_id", "aws_secret_access_key": "key"}) + p = dlt.pipeline( + pipeline_name="postgres_pipeline", + destination="filesystem", + credentials={"aws_access_key_id": "key_id", "aws_secret_access_key": "key"}, + ) config = p._get_destination_client_initial_config(p.destination) assert isinstance(config.credentials, AwsCredentials) assert config.credentials.is_resolved() # resolve gcp oauth - p = dlt.pipeline(pipeline_name="postgres_pipeline", destination="filesystem", credentials={"project_id": "pxid", "refresh_token": "123token", "client_id": "cid", "client_secret": "s"}) + p = dlt.pipeline( + pipeline_name="postgres_pipeline", + destination="filesystem", + credentials={ + "project_id": "pxid", + "refresh_token": "123token", + "client_id": "cid", + "client_secret": "s", + }, + ) config = p._get_destination_client_initial_config(p.destination) assert isinstance(config.credentials, GcpOAuthCredentials) assert config.credentials.is_resolved() +def test_destination_staging_config(environment: Any) -> None: + fs_dest = filesystem("file:///testing-bucket") + p = dlt.pipeline( + pipeline_name="staging_pipeline", + destination=redshift(credentials="redshift://loader:loader@localhost:5432/dlt_data"), + staging=fs_dest, + ) + schema = Schema("foo") + p._inject_schema(schema) + initial_config = p._get_destination_client_initial_config(p.staging, as_staging=True) + staging_config = fs_dest.configuration(initial_config) # type: ignore[arg-type] + + # Ensure that as_staging flag is set in the final resolved conifg + assert staging_config.as_staging is True + + +def test_destination_factory_defaults_resolve_from_config(environment: Any) -> None: + """Params passed explicitly to destination supersede config values. + Env config values supersede default values. + """ + environment["FAIL_PROB"] = "0.3" + environment["RETRY_PROB"] = "0.8" + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=dummy(retry_prob=0.5)) + + client = p.destination_client() + + assert client.config.fail_prob == 0.3 # type: ignore[attr-defined] + assert client.config.retry_prob == 0.5 # type: ignore[attr-defined] + + +def test_destination_credentials_in_factory(environment: Any) -> None: + os.environ["DESTINATION__REDSHIFT__CREDENTIALS"] = "redshift://abc:123@localhost:5432/some_db" + + redshift_dest = redshift("redshift://abc:123@localhost:5432/other_db") + + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift_dest) + + initial_config = p._get_destination_client_initial_config(p.destination) + dest_config = redshift_dest.configuration(initial_config) # type: ignore[arg-type] + # Explicit factory arg supersedes config + assert dest_config.credentials.database == "other_db" + + redshift_dest = redshift() + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift_dest) + + initial_config = p._get_destination_client_initial_config(p.destination) + dest_config = redshift_dest.configuration(initial_config) # type: ignore[arg-type] + assert dest_config.credentials.database == "some_db" + + @pytest.mark.skip(reason="does not work on CI. probably takes right credentials from somewhere....") def test_destination_explicit_invalid_credentials_filesystem(environment: Any) -> None: # if string cannot be parsed - p = dlt.pipeline(pipeline_name="postgres_pipeline", destination="filesystem", credentials="PR8BLEM") + p = dlt.pipeline( + pipeline_name="postgres_pipeline", destination="filesystem", credentials="PR8BLEM" + ) with pytest.raises(NativeValueError): p._get_destination_client_initial_config(p.destination) def test_extract_source_twice() -> None: - def some_data(): yield [1, 2, 3] yield [1, 2, 3] - s = DltSource("source", "module", dlt.Schema("source"), [dlt.resource(some_data())]) + s = DltSource(dlt.Schema("source"), "module", [dlt.resource(some_data())]) dlt.pipeline().extract(s) with pytest.raises(PipelineStepFailed) as py_ex: dlt.pipeline().extract(s) @@ -257,9 +350,9 @@ def test_disable_enable_state_sync(environment: Any) -> None: def some_data(): yield [1, 2, 3] - s = DltSource("default", "module", dlt.Schema("default"), [dlt.resource(some_data())]) + s = DltSource(dlt.Schema("default"), "module", [dlt.resource(some_data())]) dlt.pipeline().extract(s) - storage = ExtractorStorage(p._normalize_storage_config) + storage = ExtractStorage(p._normalize_storage_config()) assert len(storage.list_files_to_normalize_sorted()) == 1 expect_extracted_file(storage, "default", "some_data", json.dumps([1, 2, 3])) with pytest.raises(FileNotFoundError): @@ -267,19 +360,27 @@ def some_data(): p.config.restore_from_destination = True # extract to different schema, state must go to default schema - s = DltSource("default_2", "module", dlt.Schema("default_2"), [dlt.resource(some_data())]) + s = DltSource(dlt.Schema("default_2"), "module", [dlt.resource(some_data())]) dlt.pipeline().extract(s) expect_extracted_file(storage, "default", s.schema.state_table_name, "***") def test_extract_multiple_sources() -> None: - s1 = DltSource("default", "module", dlt.Schema("default"), [dlt.resource([1, 2, 3], name="resource_1"), dlt.resource([3, 4, 5], name="resource_2")]) - s2 = DltSource("default_2", "module", dlt.Schema("default_2"), [dlt.resource([6, 7, 8], name="resource_3"), dlt.resource([9, 10, 0], name="resource_4")]) + s1 = DltSource( + dlt.Schema("default"), + "module", + [dlt.resource([1, 2, 3], name="resource_1"), dlt.resource([3, 4, 5], name="resource_2")], + ) + s2 = DltSource( + dlt.Schema("default_2"), + "module", + [dlt.resource([6, 7, 8], name="resource_3"), dlt.resource([9, 10, 0], name="resource_4")], + ) p = dlt.pipeline(destination="dummy") p.config.restore_from_destination = False p.extract([s1, s2]) - storage = ExtractorStorage(p._normalize_storage_config) + storage = ExtractStorage(p._normalize_storage_config()) expect_extracted_file(storage, "default", "resource_1", json.dumps([1, 2, 3])) expect_extracted_file(storage, "default", "resource_2", json.dumps([3, 4, 5])) expect_extracted_file(storage, "default_2", "resource_3", json.dumps([6, 7, 8])) @@ -293,11 +394,18 @@ def test_extract_multiple_sources() -> None: def i_fail(): raise NotImplementedError() - s3 = DltSource("default_3", "module", dlt.Schema("default_3"), [dlt.resource([1, 2, 3], name="resource_1"), dlt.resource([3, 4, 5], name="resource_2")]) - s4 = DltSource("default_4", "module", dlt.Schema("default_4"), [dlt.resource([6, 7, 8], name="resource_3"), i_fail]) + s3 = DltSource( + dlt.Schema("default_3"), + "module", + [dlt.resource([1, 2, 3], name="resource_1"), dlt.resource([3, 4, 5], name="resource_2")], + ) + s4 = DltSource( + dlt.Schema("default_4"), "module", [dlt.resource([6, 7, 8], name="resource_3"), i_fail] + ) with pytest.raises(PipelineStepFailed): - p.extract([s3, s4]) + # NOTE: if you swap s3 and s4 the test on list_schemas will fail: s3 will extract normally and update live schemas, s4 will break exec later + p.extract([s4, s3]) # nothing to normalize assert len(storage.list_files_to_normalize_sorted()) == 0 @@ -317,7 +425,7 @@ def test_restore_state_on_dummy() -> None: assert p.first_run is False # no effect p.sync_destination() - assert p.state["_state_version"] == 2 + assert p.state["_state_version"] == 1 # wipe out storage p._wipe_working_folder() @@ -325,7 +433,7 @@ def test_restore_state_on_dummy() -> None: assert p.first_run is True p.sync_destination() assert p.first_run is True - assert p.state["_state_version"] == 1 + assert p.state["_state_version"] == 0 def test_first_run_flag() -> None: @@ -342,7 +450,7 @@ def test_first_run_flag() -> None: # attach again p = dlt.attach(pipeline_name=pipeline_name) assert p.first_run is True - assert len(p.list_extracted_resources()) > 0 + assert len(p.list_extracted_load_packages()) > 0 p.normalize() assert len(p.list_normalized_load_packages()) > 0 assert p.first_run is True @@ -397,7 +505,7 @@ def test_sentry_tracing() -> None: def r_check_sentry(): assert sentry_sdk.Hub.current.scope.span.op == "extract" assert sentry_sdk.Hub.current.scope.span.containing_transaction.name == "run" - yield [1,2,3] + yield [1, 2, 3] p.run(r_check_sentry) assert sentry_sdk.Hub.current.scope.span is None @@ -422,12 +530,10 @@ def r_fail(): assert sentry_sdk.Hub.current.scope.span is None - def test_pipeline_state_on_extract_exception() -> None: pipeline_name = "pipe_" + uniq_id() p = dlt.pipeline(pipeline_name=pipeline_name, destination="dummy") - @dlt.resource def data_piece_1(): yield [1, 2, 3] @@ -438,19 +544,27 @@ def data_piece_2(): yield [6, 7, 8] raise NotImplementedError() - with pytest.raises(PipelineStepFailed): + with pytest.raises(PipelineStepFailed) as pip_ex: p.run([data_piece_1, data_piece_2], write_disposition="replace") + # male sure that exception has right step info + assert pip_ex.value.load_id in pip_ex.value.step_info.loads_ids + # print(pip_ex.value.load_id) + # print(pip_ex.value.step_info.asdict()) + # print(p._last_trace.last_pipeline_step_trace("extract").exception_traces) # first run didn't really happen assert p.first_run is True assert p.has_data is False - assert p._schema_storage.list_schemas() == [] assert p.default_schema_name is None + # one of the schemas is in memory + # TODO: we may want to fix that + assert len(p._schema_storage.list_schemas()) == 1 # restore the pipeline p = dlt.attach(pipeline_name) assert p.first_run is True assert p.has_data is False + # no schema was saved to storage, the one above was only in memory assert p._schema_storage.list_schemas() == [] assert p.default_schema_name is None @@ -478,12 +592,14 @@ def data_schema_3(): # first run didn't really happen assert p.first_run is True assert p.has_data is False - assert p._schema_storage.list_schemas() == [] + # schemas from two sources are in memory + # TODO: we may want to fix that + assert len(p._schema_storage.list_schemas()) == 2 assert p.default_schema_name is None os.environ["COMPLETED_PROB"] = "1.0" # make it complete immediately p.run([data_schema_1(), data_schema_2()], write_disposition="replace") - assert p.schema_names == p._schema_storage.list_schemas() + assert set(p.schema_names) == set(p._schema_storage.list_schemas()) def test_run_with_table_name_exceeding_path_length() -> None: @@ -493,7 +609,7 @@ def test_run_with_table_name_exceeding_path_length() -> None: # we must fix that with pytest.raises(PipelineStepFailed) as sf_ex: - p.extract([1, 2, 3], table_name="TABLE_" + "a" * 230) + p.extract([1, 2, 3], table_name="TABLE_" + "a" * 267) assert isinstance(sf_ex.value.__context__, OSError) @@ -535,7 +651,7 @@ def test_load_info_raise_on_failed_jobs() -> None: os.environ["RAISE_ON_FAILED_JOBS"] = "true" with pytest.raises(PipelineStepFailed) as py_ex_2: p.run([1, 2, 3], table_name="numbers") - load_info = py_ex_2.value.step_info + load_info = py_ex_2.value.step_info # type: ignore[assignment] assert load_info.has_failed_jobs is True with pytest.raises(DestinationHasFailedJobs) as py_ex: load_info.raise_on_failed_jobs() @@ -549,7 +665,6 @@ def test_run_load_pending() -> None: pipeline_name = "pipe_" + uniq_id() p = dlt.pipeline(pipeline_name=pipeline_name, destination="dummy") - def some_data(): yield from [1, 2, 3] @@ -594,7 +709,11 @@ def fail_extract(): attempt = None - for attempt in Retrying(stop=stop_after_attempt(3), retry=retry_if_exception(retry_load(("load", "extract"))), reraise=True): + for attempt in Retrying( + stop=stop_after_attempt(3), + retry=retry_if_exception(retry_load(("load", "extract"))), + reraise=True, + ): with attempt: p.run(fail_extract()) # it retried @@ -603,7 +722,9 @@ def fail_extract(): # now it fails (extract is terminal exception) retry_count = 2 with pytest.raises(PipelineStepFailed) as py_ex: - for attempt in Retrying(stop=stop_after_attempt(3), retry=retry_if_exception(retry_load(())), reraise=True): + for attempt in Retrying( + stop=stop_after_attempt(3), retry=retry_if_exception(retry_load(())), reraise=True + ): with attempt: p.run(fail_extract()) assert isinstance(py_ex.value, PipelineStepFailed) @@ -613,7 +734,11 @@ def fail_extract(): os.environ["RAISE_ON_FAILED_JOBS"] = "true" os.environ["FAIL_PROB"] = "1.0" with pytest.raises(PipelineStepFailed) as py_ex: - for attempt in Retrying(stop=stop_after_attempt(3), retry=retry_if_exception(retry_load(("load", "extract"))), reraise=True): + for attempt in Retrying( + stop=stop_after_attempt(3), + retry=retry_if_exception(retry_load(("load", "extract"))), + reraise=True, + ): with attempt: p.run(fail_extract()) assert isinstance(py_ex.value, PipelineStepFailed) @@ -643,6 +768,7 @@ def test_set_get_local_value() -> None: assert p.state["_local"][value] == value # type: ignore[literal-required] new_val = uniq_id() + # check in context manager @dlt.resource def _w_local_state(): @@ -670,29 +796,40 @@ def resource_1(): assert p.default_schema.get_table("resource_1")["write_disposition"] == "append" p.run(resource_1, write_disposition="replace") + print(list(p._schema_storage.live_schemas.values())[0].to_pretty_yaml()) + assert ( + p.schemas[p.default_schema_name].get_table("resource_1")["write_disposition"] == "replace" + ) assert p.default_schema.get_table("resource_1")["write_disposition"] == "replace" -@dlt.transformer(name="github_repo_events", primary_key="id", write_disposition="merge", table_name=lambda i: i['type']) +@dlt.transformer( + name="github_repo_events", + primary_key="id", + write_disposition="merge", + table_name=lambda i: i["type"], +) def github_repo_events(page): yield page @dlt.transformer(name="github_repo_events", primary_key="id", write_disposition="merge") def github_repo_events_table_meta(page): - yield from [dlt.mark.with_table_name(p, p['type']) for p in page] + yield from [dlt.mark.with_table_name(p, p["type"]) for p in page] @dlt.resource -def _get_shuffled_events(): - with open("tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8") as f: - issues = json.load(f) - yield issues +def _get_shuffled_events(repeat: int = 1): + for _ in range(repeat): + with open( + "tests/normalize/cases/github.events.load_page_1_duck.json", "r", encoding="utf-8" + ) as f: + issues = json.load(f) + yield issues -@pytest.mark.parametrize('github_resource', (github_repo_events_table_meta, github_repo_events)) +@pytest.mark.parametrize("github_resource", (github_repo_events_table_meta, github_repo_events)) def test_dispatch_rows_to_tables(github_resource: DltResource): - os.environ["COMPLETED_PROB"] = "1.0" pipeline_name = "pipe_" + uniq_id() p = dlt.pipeline(pipeline_name=pipeline_name, destination="dummy") @@ -702,51 +839,55 @@ def test_dispatch_rows_to_tables(github_resource: DltResource): # get all expected tables events = list(_get_shuffled_events) - expected_tables = set(map(lambda e: p.default_schema.naming.normalize_identifier(e["type"]), events)) + expected_tables = set( + map(lambda e: p.default_schema.naming.normalize_identifier(e["type"]), events) + ) # all the tables present - assert expected_tables.intersection([t["name"] for t in p.default_schema.data_tables()]) == expected_tables + assert ( + expected_tables.intersection([t["name"] for t in p.default_schema.data_tables()]) + == expected_tables + ) # all the columns have primary keys and merge disposition derived from resource - for table in p.default_schema.data_tables(): + for table in p.default_schema.data_tables(): if table.get("parent") is None: assert table["write_disposition"] == "merge" assert table["columns"]["id"]["primary_key"] is True def test_resource_name_in_schema() -> None: - @dlt.resource(table_name='some_table') + @dlt.resource(table_name="some_table") def static_data(): - yield {'a': 1, 'b': 2} + yield {"a": 1, "b": 2} - @dlt.resource(table_name=lambda x: 'dynamic_func_table') + @dlt.resource(table_name=lambda x: "dynamic_func_table") def dynamic_func_data(): - yield {'a': 1, 'b': 2} + yield {"a": 1, "b": 2} @dlt.resource def dynamic_mark_data(): - yield dlt.mark.with_table_name({'a': 1, 'b': 2}, 'dynamic_mark_table') + yield dlt.mark.with_table_name({"a": 1, "b": 2}, "dynamic_mark_table") - @dlt.resource(table_name='parent_table') + @dlt.resource(table_name="parent_table") def nested_data(): - yield {'a': 1, 'items': [{'c': 2}, {'c': 3}, {'c': 4}]} + yield {"a": 1, "items": [{"c": 2}, {"c": 3}, {"c": 4}]} @dlt.source def some_source(): return [static_data(), dynamic_func_data(), dynamic_mark_data(), nested_data()] - source = some_source() - p = dlt.pipeline(pipeline_name=uniq_id(), destination='dummy') + p = dlt.pipeline(pipeline_name=uniq_id(), destination="dummy") p.run(source) schema = p.default_schema - assert schema.tables['some_table']['resource'] == 'static_data' - assert schema.tables['dynamic_func_table']['resource'] == 'dynamic_func_data' - assert schema.tables['dynamic_mark_table']['resource'] == 'dynamic_mark_data' - assert schema.tables['parent_table']['resource'] == 'nested_data' - assert 'resource' not in schema.tables['parent_table__items'] + assert schema.tables["some_table"]["resource"] == "static_data" + assert schema.tables["dynamic_func_table"]["resource"] == "dynamic_func_data" + assert schema.tables["dynamic_mark_table"]["resource"] == "dynamic_mark_data" + assert schema.tables["parent_table"]["resource"] == "nested_data" + assert "resource" not in schema.tables["parent_table__items"] def test_preserve_fields_order() -> None: @@ -770,62 +911,29 @@ def reverse_order(item): p.extract(ordered_dict().add_map(reverse_order)) p.normalize() - assert list(p.default_schema.tables["order_1"]["columns"].keys()) == ["col_1", "col_2", "col_3", '_dlt_load_id', '_dlt_id'] - assert list(p.default_schema.tables["order_2"]["columns"].keys()) == ["col_3", "col_2", "col_1", '_dlt_load_id', '_dlt_id'] - - -def run_deferred(iters): - - @dlt.defer - def item(n): - sleep(random.random() / 2) - return n - - for n in range(iters): - yield item(n) - - -@dlt.source -def many_delayed(many, iters): - for n in range(many): - yield dlt.resource(run_deferred(iters), name="resource_" + str(n)) - - -@pytest.mark.parametrize("progress", ["tqdm", "enlighten", "log", "alive_progress"]) -def test_pipeline_progress(progress: TCollectorArg) -> None: - - os.environ["TIMEOUT"] = "3.0" - - p = dlt.pipeline(destination="dummy", progress=progress) - p.extract(many_delayed(5, 10)) - p.normalize() - - collector = p.collector - - # attach pipeline - p = dlt.attach(progress=collector) - p.extract(many_delayed(5, 10)) - p.run(dataset_name="dummy") - - assert collector == p.drop().collector - - # make sure a valid logger was used - if progress == "tqdm": - assert isinstance(collector, TqdmCollector) - if progress == "enlighten": - assert isinstance(collector, EnlightenCollector) - if progress == "alive_progress": - assert isinstance(collector, AliveCollector) - if progress == "log": - assert isinstance(collector, LogCollector) + assert list(p.default_schema.tables["order_1"]["columns"].keys()) == [ + "col_1", + "col_2", + "col_3", + "_dlt_load_id", + "_dlt_id", + ] + assert list(p.default_schema.tables["order_2"]["columns"].keys()) == [ + "col_3", + "col_2", + "col_1", + "_dlt_load_id", + "_dlt_id", + ] def test_pipeline_log_progress() -> None: - os.environ["TIMEOUT"] = "3.0" # will attach dlt logger - p = dlt.pipeline(destination="dummy", progress=dlt.progress.log(0.5, logger=None, log_level=logging.WARNING)) + p = dlt.pipeline( + destination="dummy", progress=dlt.progress.log(0.5, logger=None, log_level=logging.WARNING) + ) # collector was created before pipeline so logger is not attached assert cast(LogCollector, p.collector).logger is None p.extract(many_delayed(2, 10)) @@ -839,7 +947,6 @@ def test_pipeline_log_progress() -> None: def test_pipeline_source_state_activation() -> None: - appendix_yielded = None @dlt.source @@ -858,7 +965,7 @@ def appendix(): def writes_state(): dlt.current.source_state()["appendix"] = source_st dlt.current.resource_state()["RX"] = resource_st - yield from [1,2,3] + yield from [1, 2, 3] yield writes_state @@ -869,8 +976,11 @@ def writes_state(): assert s_appendix.state == {} # create state by running extract p_appendix.extract(s_appendix) - assert s_appendix.state == {'appendix': 'appendix', 'resources': {'writes_state': {'RX': 'r_appendix'}}} - assert s_appendix.writes_state.state == {'RX': 'r_appendix'} + assert s_appendix.state == { + "appendix": "appendix", + "resources": {"writes_state": {"RX": "r_appendix"}}, + } + assert s_appendix.writes_state.state == {"RX": "r_appendix"} # change the active pipeline p_postfix = dlt.pipeline(pipeline_name="postfix_p") @@ -878,7 +988,7 @@ def writes_state(): assert s_appendix.state == {} # and back p_appendix.activate() - assert s_appendix.writes_state.state == {'RX': 'r_appendix'} + assert s_appendix.writes_state.state == {"RX": "r_appendix"} # create another source s_w_appendix = reads_state("appendix", "r_appendix") @@ -908,7 +1018,10 @@ def test_extract_add_tables() -> None: assert s.resources["🦚Peacock"].compute_table_schema()["resource"] == "🦚Peacock" # only name will be normalized assert s.resources["🦚Peacock"].compute_table_schema()["name"] == "🦚Peacock" - assert s.resources["💰Budget"].compute_table_schema()["columns"]["🔑book_id"]["name"] == "🔑book_id" + assert ( + s.resources["💰Budget"].compute_table_schema()["columns"]["🔑book_id"]["name"] + == "🔑book_id" + ) pipeline = dlt.pipeline(pipeline_name="emojis", destination="dummy") info = pipeline.extract(s) assert info.extract_data_info[0]["name"] == "airtable_emojis" @@ -955,7 +1068,12 @@ def test_emojis_resource_names() -> None: table = info.load_packages[0].schema_update["_schedule"] assert table["resource"] == "📆 Schedule" # only schedule is added - assert set(info.load_packages[0].schema_update.keys()) == {"_dlt_version", "_dlt_loads", "_schedule", "_dlt_pipeline_state"} + assert set(info.load_packages[0].schema_update.keys()) == { + "_dlt_version", + "_dlt_loads", + "_schedule", + "_dlt_pipeline_state", + } info = pipeline.run(airtable_emojis()) assert_load_info(info) # here we add _peacock with has primary_key (so at least single column) @@ -979,12 +1097,13 @@ def test_apply_hints_infer_hints() -> None: @dlt.source def infer(): - yield dlt.resource([{"id": 1, "timestamp": "NOW"}], name="table1", columns=[new_column("timestamp", nullable=True)]) + yield dlt.resource( + [{"id": 1, "timestamp": "NOW"}], + name="table1", + columns=[new_column("timestamp", nullable=True)], + ) - new_new_hints = { - "not_null": ["timestamp"], - "primary_key": ["id"] - } + new_new_hints = {"not_null": ["timestamp"], "primary_key": ["id"]} s = infer() s.schema.merge_hints(new_new_hints) # type: ignore[arg-type] pipeline = dlt.pipeline(pipeline_name="inf", destination="dummy") @@ -992,18 +1111,41 @@ def infer(): # check schema table = pipeline.default_schema.get_table("table1") # nullable True coming from hint overrides inferred hint - assert table["columns"]["timestamp"] == {"name": "timestamp", "data_type": "text", "nullable": True} + assert table["columns"]["timestamp"] == { + "name": "timestamp", + "data_type": "text", + "nullable": True, + } # fully from data - assert table["columns"]["id"] == {"name": "id", "data_type": "bigint", "nullable": True, "primary_key": True} + assert table["columns"]["id"] == { + "name": "id", + "data_type": "bigint", + "nullable": True, + "primary_key": True, + } # remove primary key and change nullable s = infer() - s.table1.apply_hints(columns=[{"name": "timestamp", "nullable": False}, {"name": "id", "nullable": False, "primary_key": False}]) + s.table1.apply_hints( + columns=[ + {"name": "timestamp", "nullable": False}, + {"name": "id", "nullable": False, "primary_key": False}, + ] + ) pipeline.run(s) table = pipeline.default_schema.get_table("table1") # hints overwrite pipeline schema - assert table["columns"]["timestamp"] == {"name": "timestamp", "data_type": "text", "nullable": False} - assert table["columns"]["id"] == {"name": "id", "data_type": "bigint", "nullable": False, "primary_key": False} + assert table["columns"]["timestamp"] == { + "name": "timestamp", + "data_type": "text", + "nullable": False, + } + assert table["columns"]["id"] == { + "name": "id", + "data_type": "bigint", + "nullable": False, + "primary_key": False, + } # print(pipeline.default_schema.to_pretty_yaml()) @@ -1051,61 +1193,17 @@ def res_return_yield(): assert "dlt.resource" in str(pip_ex.value) -@pytest.mark.parametrize('method', ('extract', 'run')) -def test_column_argument_pydantic(method: str) -> None: - """Test columns schema is created from pydantic model""" - p = dlt.pipeline(destination='duckdb') - - @dlt.resource - def some_data() -> Iterator[Dict[str, Any]]: - yield {} - - class Columns(BaseModel): - a: Optional[int] - b: Optional[str] - - if method == 'run': - p.run(some_data(), columns=Columns) - else: - p.extract(some_data(), columns=Columns) - - assert p.default_schema.tables['some_data']['columns']['a']['data_type'] == 'bigint' - assert p.default_schema.tables['some_data']['columns']['a']['nullable'] is True - assert p.default_schema.tables['some_data']['columns']['b']['data_type'] == 'text' - assert p.default_schema.tables['some_data']['columns']['b']['nullable'] is True - - -def test_extract_pydantic_models() -> None: - pipeline = dlt.pipeline(destination='duckdb') - - class User(BaseModel): - user_id: int - name: str - - @dlt.resource - def users() -> Iterator[User]: - yield User(user_id=1, name="a") - yield User(user_id=2, name="b") - - pipeline.extract(users()) - - storage = ExtractorStorage(pipeline._normalize_storage_config) - expect_extracted_file( - storage, pipeline.default_schema_name, "users", json.dumps([{"user_id": 1, "name": "a"}, {"user_id": 2, "name": "b"}]) - ) - - def test_resource_rename_same_table(): @dlt.resource(write_disposition="replace") def generic(start): dlt.current.resource_state()["start"] = start - yield [{"id": idx, "text": "A"*idx} for idx in range(start, start + 10)] + yield [{"id": idx, "text": "A" * idx} for idx in range(start, start + 10)] - pipeline = dlt.pipeline(destination='duckdb') - load_info = pipeline.run([ - generic(10).with_name("state1"), - generic(20).with_name("state2") - ], table_name="single_table") + pipeline = dlt.pipeline(destination="duckdb") + load_info = pipeline.run( + [generic(10).with_name("state1"), generic(20).with_name("state2")], + table_name="single_table", + ) assert_load_info(load_info) # both resources loaded assert pipeline.last_trace.last_normalize_info.row_counts["single_table"] == 20 @@ -1122,9 +1220,12 @@ def generic(start): assert pipeline.default_schema.get_table("single_table")["resource"] == "state2" # now load only state1 - load_info = pipeline.run([ - generic(5).with_name("state1"), - ], table_name="single_table") + load_info = pipeline.run( + [ + generic(5).with_name("state1"), + ], + table_name="single_table", + ) assert_load_info(load_info) # both resources loaded assert pipeline.last_trace.last_normalize_info.row_counts["single_table"] == 10 @@ -1133,17 +1234,6 @@ def generic(start): assert pipeline.default_schema.get_table("single_table")["resource"] == "state1" -@pytest.mark.parametrize("file_format", ("parquet", "insert_values", "jsonl")) -def test_columns_hint_with_file_formats(file_format: TLoaderFileFormat) -> None: - - @dlt.resource(write_disposition="replace", columns=[{"name": "text", "data_type": "text"}]) - def generic(start=8): - yield [{"id": idx, "text": "A"*idx} for idx in range(start, start + 10)] - - pipeline = dlt.pipeline(destination='duckdb') - pipeline.run(generic(), loader_file_format=file_format) - - def test_remove_autodetect() -> None: now = pendulum.now() @@ -1151,20 +1241,25 @@ def test_remove_autodetect() -> None: def autodetect(): # add unix ts autodetection to current source schema dlt.current.source_schema().add_type_detection("timestamp") - return dlt.resource([int(now.timestamp()), int(now.timestamp() + 1), int(now.timestamp() + 2)], name="numbers") + return dlt.resource( + [int(now.timestamp()), int(now.timestamp() + 1), int(now.timestamp() + 2)], + name="numbers", + ) - pipeline = dlt.pipeline(destination='duckdb') + pipeline = dlt.pipeline(destination="duckdb") pipeline.run(autodetect()) # unix ts recognized - assert pipeline.default_schema.get_table("numbers")["columns"]["value"]["data_type"] == "timestamp" + assert ( + pipeline.default_schema.get_table("numbers")["columns"]["value"]["data_type"] == "timestamp" + ) pipeline = pipeline.drop() source = autodetect() source.schema.remove_type_detection("timestamp") - pipeline = dlt.pipeline(destination='duckdb') + pipeline = dlt.pipeline(destination="duckdb") pipeline.run(source) assert pipeline.default_schema.get_table("numbers")["columns"]["value"]["data_type"] == "bigint" @@ -1179,7 +1274,10 @@ def flattened_dict(): # dlt.current.source_schema().add_type_detection("timestamp") for delta in range(4): - yield {"delta": delta, "values": [{"Value": {"timestampValue": now.timestamp() + delta}}]} + yield { + "delta": delta, + "values": [{"Value": {"timestampValue": now.timestamp() + delta}}], + } @dlt.source def nested_resource(): @@ -1190,16 +1288,21 @@ def nested_resource(): values_table = new_table( dict_resource.name + "__values", parent_table_name=dict_resource.name, - columns=[{"name": "value__timestamp_value", "data_type": "timestamp"}] + columns=[{"name": "value__timestamp_value", "data_type": "timestamp"}], ) # and child table dlt.current.source_schema().update_table(values_table) return dict_resource - pipeline = dlt.pipeline(destination='duckdb') + pipeline = dlt.pipeline(destination="duckdb") pipeline.run(nested_resource()) # print(pipeline.default_schema.to_pretty_yaml()) - assert pipeline.default_schema.get_table("flattened_dict__values")["columns"]["value__timestamp_value"]["data_type"] == "timestamp" + assert ( + pipeline.default_schema.get_table("flattened_dict__values")["columns"][ + "value__timestamp_value" + ]["data_type"] + == "timestamp" + ) # make sure data is there assert pipeline.last_trace.last_normalize_info.row_counts["flattened_dict__values"] == 4 @@ -1208,7 +1311,7 @@ def test_empty_rows_are_included() -> None: """Empty rows where all values are `None` or empty dicts create rows in the dataset with `NULL` in all columns """ - pipeline = dlt.pipeline(destination='duckdb') + pipeline = dlt.pipeline(destination="duckdb") pipeline.run(iter([{}, {}, {}]), table_name="empty_rows") pipeline.run(iter([{"a": 1}, {}, {}]), table_name="empty_rows") @@ -1221,6 +1324,57 @@ def test_empty_rows_are_included() -> None: assert values == [1, None, None, None, None, None, None, None] +def test_resource_state_name_not_normalized() -> None: + pipeline = dlt.pipeline(pipeline_name="emojis", destination="duckdb") + peacock_s = airtable_emojis().with_resources("🦚Peacock") + pipeline.extract(peacock_s) + assert peacock_s.resources["🦚Peacock"].state == {"🦚🦚🦚": "🦚"} + pipeline.normalize() + pipeline.load() + + # get state from destination + from dlt.pipeline.state_sync import load_state_from_destination + + client: WithStateSync + with pipeline.destination_client() as client: # type: ignore[assignment] + state = load_state_from_destination(pipeline.pipeline_name, client) + assert "airtable_emojis" in state["sources"] + assert state["sources"]["airtable_emojis"]["resources"] == {"🦚Peacock": {"🦚🦚🦚": "🦚"}} + + +def test_pipeline_list_packages() -> None: + pipeline = dlt.pipeline(pipeline_name="emojis", destination="dummy") + pipeline.extract(airtable_emojis()) + load_ids = pipeline.list_extracted_load_packages() + assert len(load_ids) == 1 + # two new packages: for emojis schema and emojis_2 + pipeline.extract( + [airtable_emojis(), airtable_emojis(), airtable_emojis().clone(with_name="emojis_2")] + ) + load_ids = pipeline.list_extracted_load_packages() + assert len(load_ids) == 3 + extracted_package = pipeline.get_load_package_info(load_ids[0]) + assert extracted_package.state == "extracted" + # same load id continues till the end + pipeline.normalize() + load_ids_n = pipeline.list_normalized_load_packages() + assert load_ids == load_ids_n + normalized_package = pipeline.get_load_package_info(load_ids[0]) + # same number of new jobs + assert normalized_package.state == "normalized" + assert len(normalized_package.jobs["new_jobs"]) == len(extracted_package.jobs["new_jobs"]) + # load all 3 packages and fail all jobs in them + os.environ["FAIL_PROB"] = "1.0" + pipeline.load() + load_ids_l = pipeline.list_completed_load_packages() + assert load_ids == load_ids_l + loaded_package = pipeline.get_load_package_info(load_ids[0]) + assert len(loaded_package.jobs["failed_jobs"]) == len(extracted_package.jobs["new_jobs"]) + assert loaded_package.state == "loaded" + failed_jobs = pipeline.list_failed_jobs_in_package(load_ids[0]) + assert len(loaded_package.jobs["failed_jobs"]) == len(failed_jobs) + + def test_remove_pending_packages() -> None: pipeline = dlt.pipeline(pipeline_name="emojis", destination="dummy") pipeline.extract(airtable_emojis()) @@ -1245,3 +1399,261 @@ def test_remove_pending_packages() -> None: assert pipeline.has_pending_data pipeline.drop_pending_packages() assert pipeline.has_pending_data is False + + +@pytest.mark.parametrize("workers", (1, 4), ids=("1 norm worker", "4 norm workers")) +def test_parallel_pipelines_threads(workers: int) -> None: + # critical section to control pipeline steps + init_lock = threading.Lock() + extract_ev = threading.Event() + normalize_ev = threading.Event() + load_ev = threading.Event() + # control main thread + sem = threading.Semaphore(0) + + # rotate the files frequently so we have parallel normalize and load + os.environ["DATA_WRITER__BUFFER_MAX_ITEMS"] = "10" + os.environ["DATA_WRITER__FILE_MAX_ITEMS"] = "10" + + # force spawn process pool + os.environ["NORMALIZE__START_METHOD"] = "spawn" + + page_repeats = 1 + + # set the extra per pipeline + os.environ["PIPELINE_1__EXTRA"] = "CFG_P_1" + os.environ["PIPELINE_2__EXTRA"] = "CFG_P_2" + + def _run_pipeline(pipeline_name: str) -> Tuple[LoadInfo, PipelineContext, DictStrAny]: + try: + + @dlt.transformer( + name="github_repo_events", + write_disposition="append", + table_name=lambda i: i["type"], + ) + def github_repo_events(page, extra): + # test setting the resource state + dlt.current.resource_state()["extra"] = extra + yield page + + @dlt.transformer + async def slow(items): + await asyncio.sleep(0.1) + return items + + @dlt.transformer + @dlt.defer + def slow_func(items, extra): + # sdd configurable extra to each element + sleep(0.1) + return map(lambda item: {**item, **{"extra": extra}}, items) + + @dlt.source + def github(extra: str = dlt.config.value): + # generate github events, push them through futures and thread pools and then dispatch to separate tables + return ( + _get_shuffled_events(repeat=page_repeats) + | slow + | slow_func(extra) + | github_repo_events(extra) + ) + + # make sure that only one pipeline is created + with init_lock: + pipeline = dlt.pipeline(pipeline_name=pipeline_name, destination="duckdb") + context = Container()[PipelineContext] + finally: + sem.release() + # start every step at the same moment to increase chances of any race conditions to happen + extract_ev.wait() + context_2 = Container()[PipelineContext] + try: + pipeline.extract(github()) + finally: + sem.release() + normalize_ev.wait() + try: + pipeline.normalize(workers=workers) + finally: + sem.release() + load_ev.wait() + info = pipeline.load() + + # get counts in the thread + counts = load_data_table_counts(pipeline) + + assert context is context_2 + return info, context, counts + + with ThreadPoolExecutor(max_workers=4) as pool: + f_1 = pool.submit(_run_pipeline, "pipeline_1") + f_2 = pool.submit(_run_pipeline, "pipeline_2") + + sem.acquire() + sem.acquire() + if f_1.done(): + raise f_1.exception() + if f_2.done(): + raise f_2.exception() + extract_ev.set() + sem.acquire() + sem.acquire() + if f_1.done(): + raise f_1.exception() + if f_2.done(): + raise f_2.exception() + normalize_ev.set() + sem.acquire() + sem.acquire() + if f_1.done(): + raise f_1.exception() + if f_2.done(): + raise f_2.exception() + load_ev.set() + + info_1, context_1, counts_1 = f_1.result() + info_2, context_2, counts_2 = f_2.result() + + assert_load_info(info_1) + assert_load_info(info_2) + + pipeline_1: dlt.Pipeline = context_1.pipeline() # type: ignore + pipeline_2: dlt.Pipeline = context_2.pipeline() # type: ignore + + n_counts_1 = pipeline_1.last_trace.last_normalize_info + assert n_counts_1.row_counts["push_event"] == 8 * page_repeats == counts_1["push_event"] + n_counts_2 = pipeline_2.last_trace.last_normalize_info + assert n_counts_2.row_counts["push_event"] == 8 * page_repeats == counts_2["push_event"] + + assert pipeline_1.pipeline_name == "pipeline_1" + assert pipeline_2.pipeline_name == "pipeline_2" + + # check if resource state has extra + assert pipeline_1.state["sources"]["github"]["resources"]["github_repo_events"] == { + "extra": "CFG_P_1" + } + assert pipeline_2.state["sources"]["github"]["resources"]["github_repo_events"] == { + "extra": "CFG_P_2" + } + + # make sure we can still access data + pipeline_1.activate() # activate pipeline to access inner duckdb + assert load_data_table_counts(pipeline_1) == counts_1 + pipeline_2.activate() + assert load_data_table_counts(pipeline_2) == counts_2 + + +@pytest.mark.parametrize("workers", (1, 4), ids=("1 norm worker", "4 norm workers")) +def test_parallel_pipelines_async(workers: int) -> None: + os.environ["NORMALIZE__WORKERS"] = str(workers) + + # create both futures and thread parallel resources + + def async_table(): + async def _gen(idx): + await asyncio.sleep(0.1) + return {"async_gen": idx} + + # just yield futures in a loop + for idx_ in range(10): + yield _gen(idx_) + + def defer_table(): + @dlt.defer + def _gen(idx): + sleep(0.1) + return {"thread_gen": idx} + + # just yield futures in a loop + for idx_ in range(5): + yield _gen(idx_) + + def _run_pipeline(pipeline, gen_) -> LoadInfo: + # run the pipeline in a thread, also instantiate generators here! + # Python does not let you use generators across instances + return pipeline.run(gen_()) + + # declare pipelines in main thread then run them "async" + pipeline_1 = dlt.pipeline("pipeline_1", destination="duckdb", full_refresh=True) + pipeline_2 = dlt.pipeline("pipeline_2", destination="duckdb", full_refresh=True) + + async def _run_async(): + loop = asyncio.get_running_loop() + with ThreadPoolExecutor() as executor: + results = await asyncio.gather( + loop.run_in_executor(executor, _run_pipeline, pipeline_1, async_table), + loop.run_in_executor(executor, _run_pipeline, pipeline_2, defer_table), + ) + assert_load_info(results[0]) + assert_load_info(results[1]) + + asyncio.run(_run_async()) + pipeline_1.activate() # activate pipeline 1 to access inner duckdb + assert load_data_table_counts(pipeline_1) == {"async_table": 10} + pipeline_2.activate() # activate pipeline 2 to access inner duckdb + assert load_data_table_counts(pipeline_2) == {"defer_table": 5} + + +def test_resource_while_stop() -> None: + def product(): + stop = False + + @dlt.defer + def get_page(page_num): + nonlocal stop + + # print(f"Fetching page {page_num}") + items = api_fetch(page_num) + # print(f"Retrieved {len(items)} from page {page_num}") + + if len(items) == 0: + stop = True + return items + + idx = 0 + while not stop: + yield get_page(idx) + idx += 1 + + def api_fetch(page_num): + import time + + time.sleep(1) + if page_num < 12: + return [{"id": page_num}] + else: + return [] + + pipeline = dlt.pipeline("pipeline_1", destination="duckdb", full_refresh=True) + load_info = pipeline.run(product()) + assert_load_info(load_info) + assert pipeline.last_trace.last_normalize_info.row_counts["product"] == 12 + + +@pytest.mark.skip("skipped until async generators are implemented") +def test_async_generator() -> None: + def async_inner_table(): + async def _gen(idx): + for l_ in ["a", "b", "c"]: + await asyncio.sleep(1) + yield {"async_gen": idx, "letter": l_} + + # just yield futures in a loop + for idx_ in range(10): + yield _gen(idx_) + + async def async_gen_table(idx): + for l_ in ["a", "b", "c"]: + await asyncio.sleep(1) + yield {"async_gen": idx, "letter": l_} + + @dlt.resource + async def async_gen_resource(idx): + for l_ in ["a", "b", "c"]: + await asyncio.sleep(1) + yield {"async_gen": idx, "letter": l_} + + pipeline_1 = dlt.pipeline("pipeline_1", destination="duckdb", full_refresh=True) + pipeline_1.run(async_gen_resource(10)) + pipeline_1.run(async_gen_table(11)) diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py new file mode 100644 index 0000000000..dd60002e6c --- /dev/null +++ b/tests/pipeline/test_pipeline_extra.py @@ -0,0 +1,202 @@ +import os +from typing import Any, ClassVar, Dict, Iterator, List, Optional +import pytest +from pydantic import BaseModel + +import dlt +from dlt.common import json, pendulum +from dlt.common.destination import DestinationCapabilitiesContext +from dlt.common.destination.capabilities import TLoaderFileFormat +from dlt.common.libs.pydantic import DltConfig +from dlt.common.runtime.collector import ( + AliveCollector, + EnlightenCollector, + LogCollector, + TqdmCollector, +) +from dlt.extract.storage import ExtractStorage +from dlt.extract.validation import PydanticValidator + +from dlt.pipeline import TCollectorArg + +from tests.extract.utils import expect_extracted_file +from tests.load.utils import DestinationTestConfiguration, destinations_configs +from tests.pipeline.utils import assert_load_info, load_data_table_counts, many_delayed + + +@pytest.mark.parametrize( + "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name +) +def test_create_pipeline_all_destinations(destination_config: DestinationTestConfiguration) -> None: + # create pipelines, extract and normalize. that should be possible without installing any dependencies + p = dlt.pipeline( + pipeline_name=destination_config.destination + "_pipeline", + destination=destination_config.destination, + staging=destination_config.staging, + ) + # are capabilities injected + caps = p._container[DestinationCapabilitiesContext] + print(caps.naming_convention) + # are right naming conventions created + assert p._default_naming.max_length == min( + caps.max_column_identifier_length, caps.max_identifier_length + ) + p.extract([1, "2", 3], table_name="data") + # is default schema with right naming convention + assert p.default_schema.naming.max_length == min( + caps.max_column_identifier_length, caps.max_identifier_length + ) + p.normalize() + assert p.default_schema.naming.max_length == min( + caps.max_column_identifier_length, caps.max_identifier_length + ) + + +@pytest.mark.parametrize("progress", ["tqdm", "enlighten", "log", "alive_progress"]) +def test_pipeline_progress(progress: TCollectorArg) -> None: + os.environ["TIMEOUT"] = "3.0" + + p = dlt.pipeline(destination="dummy", progress=progress) + p.extract(many_delayed(5, 10)) + p.normalize() + + collector = p.collector + + # attach pipeline + p = dlt.attach(progress=collector) + p.extract(many_delayed(5, 10)) + p.run(dataset_name="dummy") + + assert collector == p.drop().collector + + # make sure a valid logger was used + if progress == "tqdm": + assert isinstance(collector, TqdmCollector) + if progress == "enlighten": + assert isinstance(collector, EnlightenCollector) + if progress == "alive_progress": + assert isinstance(collector, AliveCollector) + if progress == "log": + assert isinstance(collector, LogCollector) + + +@pytest.mark.parametrize("method", ("extract", "run")) +def test_column_argument_pydantic(method: str) -> None: + """Test columns schema is created from pydantic model""" + p = dlt.pipeline(destination="duckdb") + + @dlt.resource + def some_data() -> Iterator[Dict[str, Any]]: + yield {} + + class Columns(BaseModel): + a: Optional[int] = None + b: Optional[str] = None + + if method == "run": + p.run(some_data(), columns=Columns) + else: + p.extract(some_data(), columns=Columns) + + assert p.default_schema.tables["some_data"]["columns"]["a"]["data_type"] == "bigint" + assert p.default_schema.tables["some_data"]["columns"]["a"]["nullable"] is True + assert p.default_schema.tables["some_data"]["columns"]["b"]["data_type"] == "text" + assert p.default_schema.tables["some_data"]["columns"]["b"]["nullable"] is True + + +@pytest.mark.parametrize("yield_list", [True, False]) +def test_pydantic_columns_with_contracts(yield_list: bool) -> None: + from datetime import datetime # noqa + + class UserLabel(BaseModel): + label: str + + class User(BaseModel): + user_id: int + name: str + created_at: datetime + labels: List[str] + user_label: UserLabel + user_labels: List[UserLabel] + + dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True} + + user = User( + user_id=1, + name="u1", + created_at=pendulum.now(), + labels=["l1", "l2"], + user_label=UserLabel(label="in_l1"), + user_labels=[UserLabel(label="l_l1"), UserLabel(label="l_l1")], + ) + + @dlt.resource(columns=User) + def users(users_list: List[Any]) -> Iterator[Any]: + if yield_list: + yield users_list + else: + yield from users_list + + pipeline = dlt.pipeline(destination="duckdb") + info = pipeline.run(users([user.dict(), user.dict()])) + assert_load_info(info) + print(pipeline.last_trace.last_normalize_info) + # data is passing validation, all filled in + assert load_data_table_counts(pipeline) == { + "users": 2, + "users__labels": 4, + "users__user_labels": 4, + } + + # produce two users with extra attrs in the child model but set the rows to discard so nothing is loaded + u1 = user.dict() + u1["user_labels"][0]["extra_1"] = "extra" + u1["user_labels"][1]["extra_1"] = "extra" + u2 = user.dict() + u2["user_labels"][0]["is_extra"] = True + + r = users([u1, u2]) + r.apply_hints(schema_contract="discard_row") + validator: PydanticValidator[User] = r.validator # type: ignore[assignment] + assert validator.data_mode == "discard_row" + assert validator.column_mode == "discard_row" + pipeline.run(r) + assert load_data_table_counts(pipeline) == { + "users": 2, + "users__labels": 4, + "users__user_labels": 4, + } + print(pipeline.last_trace.last_normalize_info) + + +def test_extract_pydantic_models() -> None: + pipeline = dlt.pipeline(destination="duckdb") + + class User(BaseModel): + user_id: int + name: str + + @dlt.resource + def users() -> Iterator[User]: + yield User(user_id=1, name="a") + yield User(user_id=2, name="b") + + pipeline.extract(users()) + + storage = ExtractStorage(pipeline._normalize_storage_config()) + expect_extracted_file( + storage, + pipeline.default_schema_name, + "users", + json.dumps([{"user_id": 1, "name": "a"}, {"user_id": 2, "name": "b"}]), + ) + + +@pytest.mark.parametrize("file_format", ("parquet", "insert_values", "jsonl")) +def test_columns_hint_with_file_formats(file_format: TLoaderFileFormat) -> None: + @dlt.resource(write_disposition="replace", columns=[{"name": "text", "data_type": "text"}]) + def generic(start=8): + yield [{"id": idx, "text": "A" * idx} for idx in range(start, start + 10)] + + pipeline = dlt.pipeline(destination="duckdb") + pipeline.run(generic(), loader_file_format=file_format) diff --git a/tests/pipeline/test_pipeline_file_format_resolver.py b/tests/pipeline/test_pipeline_file_format_resolver.py index 6602b5f876..49a38c455b 100644 --- a/tests/pipeline/test_pipeline_file_format_resolver.py +++ b/tests/pipeline/test_pipeline_file_format_resolver.py @@ -3,13 +3,20 @@ import dlt import pytest -from dlt.common.exceptions import DestinationIncompatibleLoaderFileFormatException, DestinationLoadingViaStagingNotSupported, DestinationNoStagingMode +from dlt.common.exceptions import ( + DestinationIncompatibleLoaderFileFormatException, + DestinationLoadingViaStagingNotSupported, + DestinationNoStagingMode, +) from dlt.common.destination.capabilities import DestinationCapabilitiesContext -def test_file_format_resolution() -> None: + +def test_file_format_resolution() -> None: # raise on destinations that does not support staging with pytest.raises(DestinationLoadingViaStagingNotSupported): - p = dlt.pipeline(pipeline_name="managed_state_pipeline", destination="postgres", staging="filesystem") + p = dlt.pipeline( + pipeline_name="managed_state_pipeline", destination="postgres", staging="filesystem" + ) # raise on staging that does not support staging interface with pytest.raises(DestinationNoStagingMode): @@ -20,7 +27,7 @@ def test_file_format_resolution() -> None: if TYPE_CHECKING: cp = DestinationCapabilitiesContext - class cp(): # type: ignore[no-redef] + class cp: # type: ignore[no-redef] def __init__(self) -> None: self.preferred_loader_file_format: str = None self.supported_loader_file_formats: List[str] = [] diff --git a/tests/pipeline/test_pipeline_state.py b/tests/pipeline/test_pipeline_state.py index 14b881eedc..ee788367e1 100644 --- a/tests/pipeline/test_pipeline_state.py +++ b/tests/pipeline/test_pipeline_state.py @@ -10,46 +10,55 @@ from dlt.common.storages import FileStorage from dlt.common import pipeline as state_module from dlt.common.utils import uniq_id -from dlt.destinations.job_client_impl import SqlJobClientBase +from dlt.common.destination.reference import Destination from dlt.pipeline.exceptions import PipelineStateEngineNoUpgradePathException, PipelineStepFailed from dlt.pipeline.pipeline import Pipeline -from dlt.pipeline.state_sync import migrate_state, STATE_ENGINE_VERSION +from dlt.pipeline.state_sync import generate_version_hash, migrate_state, STATE_ENGINE_VERSION from tests.utils import test_storage -from tests.pipeline.utils import json_case_path, load_json_case, airtable_emojis +from tests.pipeline.utils import json_case_path, load_json_case @dlt.resource() def some_data(): last_value = dlt.current.source_state().get("last_value", 0) - yield [1,2,3] + yield [1, 2, 3] dlt.current.source_state()["last_value"] = last_value + 1 @dlt.resource() def some_data_resource_state(): last_value = dlt.current.resource_state().get("last_value", 0) - yield [1,2,3] + yield [1, 2, 3] dlt.current.resource_state()["last_value"] = last_value + 1 def test_restore_state_props() -> None: - p = dlt.pipeline(pipeline_name="restore_state_props", destination="redshift", staging="filesystem", dataset_name="the_dataset") + p = dlt.pipeline( + pipeline_name="restore_state_props", + destination=Destination.from_reference("redshift", destination_name="redshift_name"), + staging=Destination.from_reference("filesystem", destination_name="filesystem_name"), + dataset_name="the_dataset", + ) p.extract(some_data()) state = p.state assert state["dataset_name"] == "the_dataset" - assert state["destination"].endswith("redshift") - assert state["staging"].endswith("filesystem") + assert state["destination_type"].endswith("redshift") + assert state["staging_type"].endswith("filesystem") + assert state["destination_name"] == "redshift_name" + assert state["staging_name"] == "filesystem_name" p = dlt.pipeline(pipeline_name="restore_state_props") state = p.state assert state["dataset_name"] == "the_dataset" - assert state["destination"].endswith("redshift") - assert state["staging"].endswith("filesystem") + assert state["destination_type"].endswith("redshift") + assert state["staging_type"].endswith("filesystem") # also instances are restored - assert p.destination.__name__.endswith("redshift") - assert p.staging.__name__.endswith("filesystem") + assert p.destination.destination_type.endswith("redshift") + assert p.destination.config_params["destination_name"] == "redshift_name" + assert p.staging.destination_type.endswith("filesystem") + assert p.staging.config_params["destination_name"] == "filesystem_name" def test_managed_state() -> None: @@ -78,7 +87,9 @@ def some_source(): sources_state = p.state["sources"] # the source name is the source state key assert sources_state[s.name]["last_value"] == 1 - assert sources_state["managed_state"]["last_value"] == 2 # the state for standalone resource not affected + assert ( + sources_state["managed_state"]["last_value"] == 2 + ) # the state for standalone resource not affected @dlt.source def source_same_section(): @@ -115,7 +126,6 @@ def test_no_active_pipeline_required_for_resource() -> None: def test_active_pipeline_required_for_source() -> None: - @dlt.source def some_source(): dlt.current.source_state().get("last_value", 0) @@ -135,6 +145,7 @@ def some_source(): p.deactivate() list(s) + def test_source_state_iterator(): os.environ["COMPLETED_PROB"] = "1.0" pipeline_name = "pipe_" + uniq_id() @@ -147,7 +158,7 @@ def main(): # increase the multiplier each time state is obtained state["mark"] *= 2 yield [1, 2, 3] - assert dlt.current.source_state()["mark"] == mark*2 + assert dlt.current.source_state()["mark"] == mark * 2 @dlt.transformer(data_from=main) def feeding(item): @@ -155,7 +166,7 @@ def feeding(item): assert dlt.current.source_state()["mark"] > 1 print(f"feeding state {dlt.current.source_state()}") mark = dlt.current.source_state()["mark"] - yield from map(lambda i: i*mark, item) + yield from map(lambda i: i * mark, item) @dlt.source def pass_the_state(): @@ -189,6 +200,7 @@ def test_unmanaged_state() -> None: def _gen_inner(): dlt.state()["gen"] = True yield 1 + list(dlt.resource(_gen_inner)) list(dlt.resource(_gen_inner())) assert state_module._last_full_state["sources"]["unmanaged"]["gen"] is True @@ -237,7 +249,12 @@ def _gen_inner(): def test_resource_state_write() -> None: r = some_data_resource_state() assert list(r) == [1, 2, 3] - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["some_data_resource_state"]["last_value"] == 1 + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "some_data_resource_state" + ]["last_value"] + == 1 + ) with pytest.raises(ResourceNameNotAvailable): get_current_pipe_name() @@ -248,7 +265,12 @@ def _gen_inner(): p = dlt.pipeline() r = dlt.resource(_gen_inner(), name="name_ovrd") assert list(r) == [1] - assert state_module._last_full_state["sources"][p._make_schema_with_default_name().name]["resources"]["name_ovrd"]["gen"] is True + assert ( + state_module._last_full_state["sources"][p._make_schema_with_default_name().name][ + "resources" + ]["name_ovrd"]["gen"] + is True + ) with pytest.raises(ResourceNameNotAvailable): get_current_pipe_name() @@ -268,20 +290,29 @@ def _gen_inner(tv="df"): r = dlt.resource(_gen_inner("gen_tf"), name="name_ovrd") p.extract(r) assert r.state["gen"] == "gen_tf" - assert state_module._last_full_state["sources"][p.default_schema_name]["resources"]["name_ovrd"]["gen"] == "gen_tf" + assert ( + state_module._last_full_state["sources"][p.default_schema_name]["resources"]["name_ovrd"][ + "gen" + ] + == "gen_tf" + ) with pytest.raises(ResourceNameNotAvailable): get_current_pipe_name() r = dlt.resource(_gen_inner, name="pure_function") p.extract(r) assert r.state["gen"] == "df" - assert state_module._last_full_state["sources"][p.default_schema_name]["resources"]["pure_function"]["gen"] == "df" + assert ( + state_module._last_full_state["sources"][p.default_schema_name]["resources"][ + "pure_function" + ]["gen"] + == "df" + ) with pytest.raises(ResourceNameNotAvailable): get_current_pipe_name() # get resource state in defer function def _gen_inner_defer(tv="df"): - @dlt.defer def _run(): dlt.current.resource_state()["gen"] = tv @@ -297,7 +328,6 @@ def _run(): # get resource state in defer explicitly def _gen_inner_defer_explicit_name(resource_name, tv="df"): - @dlt.defer def _run(): dlt.current.resource_state(resource_name)["gen"] = tv @@ -308,11 +338,15 @@ def _run(): r = dlt.resource(_gen_inner_defer_explicit_name, name="defer_function_explicit") p.extract(r("defer_function_explicit", "expl")) assert r.state["gen"] == "expl" - assert state_module._last_full_state["sources"][p.default_schema_name]["resources"]["defer_function_explicit"]["gen"] == "expl" + assert ( + state_module._last_full_state["sources"][p.default_schema_name]["resources"][ + "defer_function_explicit" + ]["gen"] + == "expl" + ) # get resource state in yielding defer (which btw is invalid and will be resolved in main thread) def _gen_inner_defer_yielding(tv="yielding"): - @dlt.defer def _run(): dlt.current.resource_state()["gen"] = tv @@ -323,11 +357,15 @@ def _run(): r = dlt.resource(_gen_inner_defer_yielding, name="defer_function_yielding") p.extract(r) assert r.state["gen"] == "yielding" - assert state_module._last_full_state["sources"][p.default_schema_name]["resources"]["defer_function_yielding"]["gen"] == "yielding" + assert ( + state_module._last_full_state["sources"][p.default_schema_name]["resources"][ + "defer_function_yielding" + ]["gen"] + == "yielding" + ) # get resource state in async function def _gen_inner_async(tv="async"): - async def _run(): dlt.current.resource_state()["gen"] = tv return 1 @@ -352,8 +390,18 @@ def _gen_inner(item): # p = dlt.pipeline() # p.extract(dlt.transformer(_gen_inner, data_from=r, name="tx_other_name")) assert list(dlt.transformer(_gen_inner, data_from=r, name="tx_other_name")) == [2, 4, 6] - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["some_data_resource_state"]["last_value"] == 1 - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["tx_other_name"]["gen"] is True + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "some_data_resource_state" + ]["last_value"] + == 1 + ) + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "tx_other_name" + ]["gen"] + is True + ) # returning transformer def _gen_inner_rv(item): @@ -361,8 +409,20 @@ def _gen_inner_rv(item): return item * 2 r = some_data_resource_state() - assert list(dlt.transformer(_gen_inner_rv, data_from=r, name="tx_other_name_rv")) == [1, 2, 3, 1, 2, 3] - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["tx_other_name_rv"]["gen"] is True + assert list(dlt.transformer(_gen_inner_rv, data_from=r, name="tx_other_name_rv")) == [ + 1, + 2, + 3, + 1, + 2, + 3, + ] + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "tx_other_name_rv" + ]["gen"] + is True + ) # deferred transformer @dlt.defer @@ -391,8 +451,17 @@ async def _gen_inner_rv_async_name(item, r_name): return item r = some_data_resource_state() - assert list(dlt.transformer(_gen_inner_rv_async_name, data_from=r, name="tx_other_name_async")("tx_other_name_async")) == [1, 2, 3] - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["tx_other_name_async"]["gen"] is True + assert list( + dlt.transformer(_gen_inner_rv_async_name, data_from=r, name="tx_other_name_async")( + "tx_other_name_async" + ) + ) == [1, 2, 3] + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "tx_other_name_async" + ]["gen"] + is True + ) def test_transform_function_state_write() -> None: @@ -401,46 +470,103 @@ def test_transform_function_state_write() -> None: # transform executed within the same thread def transform(item): dlt.current.resource_state()["form"] = item - return item*2 + return item * 2 r.add_map(transform) assert list(r) == [2, 4, 6] - assert state_module._last_full_state["sources"]["test_pipeline_state"]["resources"]["some_data_resource_state"]["form"] == 3 + assert ( + state_module._last_full_state["sources"]["test_pipeline_state"]["resources"][ + "some_data_resource_state" + ]["form"] + == 3 + ) def test_migrate_state(test_storage: FileStorage) -> None: + # test generation of version hash on migration to v3 state_v1 = load_json_case("state/state.v1") - state = migrate_state("test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION) - assert state["_state_engine_version"] == STATE_ENGINE_VERSION + state = migrate_state("test_pipeline", state_v1, state_v1["_state_engine_version"], 3) + assert state["_state_engine_version"] == 3 assert "_local" in state + assert "_version_hash" in state + assert state["_version_hash"] == generate_version_hash(state) + + # full migration + state_v1 = load_json_case("state/state.v1") + state = migrate_state( + "test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION + ) + assert state["_state_engine_version"] == STATE_ENGINE_VERSION + + # check destination migration + assert state["destination_name"] == "postgres" + assert state["destination_type"] == "dlt.destinations.postgres" + assert "destination" not in state with pytest.raises(PipelineStateEngineNoUpgradePathException) as py_ex: state_v1 = load_json_case("state/state.v1") - migrate_state("test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION + 1) + migrate_state( + "test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION + 1 + ) assert py_ex.value.init_engine == state_v1["_state_engine_version"] assert py_ex.value.from_engine == STATE_ENGINE_VERSION assert py_ex.value.to_engine == STATE_ENGINE_VERSION + 1 # also test pipeline init where state is old test_storage.create_folder("debug_pipeline") - shutil.copy(json_case_path("state/state.v1"), test_storage.make_full_path(f"debug_pipeline/{Pipeline.STATE_FILE}")) + shutil.copy( + json_case_path("state/state.v1"), + test_storage.make_full_path(f"debug_pipeline/{Pipeline.STATE_FILE}"), + ) p = dlt.attach(pipeline_name="debug_pipeline", pipelines_dir=test_storage.storage_path) assert p.dataset_name == "debug_pipeline_data" assert p.default_schema_name == "example_source" - - -def test_resource_state_name_not_normalized() -> None: - pipeline = dlt.pipeline(pipeline_name="emojis", destination="duckdb") - peacock_s = airtable_emojis().with_resources("🦚Peacock") - pipeline.extract(peacock_s) - assert peacock_s.resources["🦚Peacock"].state == {"🦚🦚🦚": "🦚"} - pipeline.normalize() - pipeline.load() - - # get state from destination - from dlt.pipeline.state_sync import load_state_from_destination - client: SqlJobClientBase - with pipeline.destination_client() as client: # type: ignore[assignment] - state = load_state_from_destination(pipeline.pipeline_name, client) - assert "airtable_emojis" in state["sources"] - assert state["sources"]["airtable_emojis"]["resources"] == {"🦚Peacock": {"🦚🦚🦚": "🦚"}} + state = p.state + assert state["_version_hash"] == generate_version_hash(state) + + # specifically check destination v3 to v4 migration + state_v3 = { + "destination": "dlt.destinations.redshift", + "staging": "dlt.destinations.filesystem", + "_state_engine_version": 3, + } + migrate_state( + "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION # type: ignore + ) + assert state_v3["destination_name"] == "redshift" + assert state_v3["destination_type"] == "dlt.destinations.redshift" + assert "destination" not in state_v3 + assert state_v3["staging_name"] == "filesystem" + assert state_v3["staging_type"] == "dlt.destinations.filesystem" + assert "staging" not in state_v3 + + state_v3 = { + "destination": "dlt.destinations.redshift", + "_state_engine_version": 3, + } + migrate_state( + "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION # type: ignore + ) + assert state_v3["destination_name"] == "redshift" + assert state_v3["destination_type"] == "dlt.destinations.redshift" + assert "destination" not in state_v3 + assert "staging_name" not in state_v3 + assert "staging_type" not in state_v3 + + state_v3 = {"destination": None, "staging": None, "_state_engine_version": 3} + migrate_state( + "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION # type: ignore + ) + assert "destination_name" not in state_v3 + assert "destination_type" not in state_v3 + assert "staging_name" not in state_v3 + assert "staging_type" not in state_v3 + + state_v3 = {"_state_engine_version": 2} + migrate_state( + "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION # type: ignore + ) + assert "destination_name" not in state_v3 + assert "destination_type" not in state_v3 + assert "staging_name" not in state_v3 + assert "staging_type" not in state_v3 diff --git a/tests/pipeline/test_pipeline_trace.py b/tests/pipeline/test_pipeline_trace.py index 706644b60e..cec578cb7b 100644 --- a/tests/pipeline/test_pipeline_trace.py +++ b/tests/pipeline/test_pipeline_trace.py @@ -20,24 +20,30 @@ from dlt.pipeline.exceptions import PipelineStepFailed from dlt.pipeline.pipeline import Pipeline -from dlt.pipeline.trace import PipelineTrace, SerializableResolvedValueTrace, describe_extract_data, load_trace +from dlt.pipeline.trace import ( + PipelineTrace, + SerializableResolvedValueTrace, + load_trace, +) from dlt.pipeline.track import slack_notify_load_success -from dlt.extract.source import DltResource, DltSource +from dlt.extract import DltResource, DltSource +from dlt.extract.extract import describe_extract_data from dlt.extract.pipe import Pipe from tests.utils import start_test_telemetry from tests.common.configuration.utils import toml_providers, environment -def test_create_trace(toml_providers: ConfigProvidersContext) -> None: + +def test_create_trace(toml_providers: ConfigProvidersContext, environment: Any) -> None: + dlt.secrets["load.delete_completed_jobs"] = True @dlt.source def inject_tomls( - api_type = dlt.config.value, - credentials: CredentialsConfiguration = dlt.secrets.value, - secret_value: TSecretValue = TSecretValue("123") # noqa: B008 + api_type=dlt.config.value, + credentials: CredentialsConfiguration = dlt.secrets.value, + secret_value: TSecretValue = TSecretValue("123"), # noqa: B008 ): - - @dlt.resource + @dlt.resource(write_disposition="replace", primary_key="id") def data(): yield [1, 2, 3] @@ -53,7 +59,7 @@ def data(): extract_info = p.extract(inject_tomls()) trace = p.last_trace assert trace is not None - assert p._trace is None + # assert p._trace is None assert len(trace.steps) == 1 step = trace.steps[0] assert step.step == "extract" @@ -62,7 +68,41 @@ def data(): assert isinstance(step.step_info, ExtractInfo) assert step.step_info.extract_data_info == [{"name": "inject_tomls", "data_type": "source"}] # check infos - assert isinstance(p.last_trace.last_extract_info, ExtractInfo) + extract_info = p.last_trace.last_extract_info + assert isinstance(extract_info, ExtractInfo) + # should have single job and single load id + assert len(extract_info.loads_ids) == 1 + load_id = extract_info.loads_ids[0] + assert len(extract_info.metrics) == 1 + + # extract of data in the first one + metrics = extract_info.metrics[load_id][0] + # inject tomls and dlt state + assert len(metrics["job_metrics"]) == 1 + assert "data" in metrics["table_metrics"] + assert set(metrics["resource_metrics"].keys()) == {"data"} + assert metrics["schema_name"] == "inject_tomls" + # check dag and hints + assert metrics["dag"] == [("data", "data")] + assert metrics["hints"]["data"] == {"write_disposition": "replace", "primary_key": "id"} + + metrics = extract_info.metrics[load_id][1] + # inject tomls and dlt state + assert len(metrics["job_metrics"]) == 1 + assert "_dlt_pipeline_state" in metrics["table_metrics"] + assert set(metrics["resource_metrics"].keys()) == {"_dlt_pipeline_state"} + assert metrics["schema_name"] == "inject_tomls" + # check dag and hints + assert metrics["dag"] == [("_dlt_pipeline_state", "_dlt_pipeline_state")] + # state has explicit columns set + assert metrics["hints"]["_dlt_pipeline_state"]["original_columns"] == "dict" + + # check packages + assert len(extract_info.load_packages) == 1 + # two jobs + print(extract_info.load_packages[0]) + assert len(extract_info.load_packages[0].jobs["new_jobs"]) == 2 + assert extract_info.load_packages[0].state == "extracted" # check config trace resolved = _find_resolved_value(trace.resolved_config_values, "api_type", []) @@ -82,11 +122,12 @@ def data(): assert resolved.is_secret_hint is True assert resolved.value == databricks_creds assert_trace_printable(trace) + # activate pipeline because other was running in assert trace + p.activate() # extract with exception @dlt.source def async_exception(max_range=1): - async def get_val(v): await asyncio.sleep(0.1) if v % 3 == 0: @@ -95,7 +136,7 @@ async def get_val(v): @dlt.resource def data(): - yield from [get_val(v) for v in range(1,max_range)] + yield from [get_val(v) for v in range(1, max_range)] return data() @@ -109,9 +150,21 @@ def data(): assert step.step == "extract" assert isinstance(step.step_exception, str) assert isinstance(step.step_info, ExtractInfo) + assert len(step.exception_traces) > 0 assert step.step_info.extract_data_info == [{"name": "async_exception", "data_type": "source"}] assert_trace_printable(trace) + extract_info = step.step_info + # only new (unprocessed) package is present, all other metrics are empty, state won't be extracted + assert len(extract_info.loads_ids) == 1 + load_id = extract_info.loads_ids[0] + package = extract_info.load_packages[0] + assert package.state == "new" + # no jobs + assert len(package.jobs["new_jobs"]) == 0 + # no metrics - exception happened first + assert len(extract_info.metrics[load_id]) == 0 + # normalize norm_info = p.normalize() trace = p.last_trace @@ -122,7 +175,24 @@ def data(): assert step.step_info is norm_info assert_trace_printable(trace) assert isinstance(p.last_trace.last_normalize_info, NormalizeInfo) - assert p.last_trace.last_normalize_info.row_counts == {'_dlt_pipeline_state': 1, 'data': 3} + assert p.last_trace.last_normalize_info.row_counts == {"_dlt_pipeline_state": 1, "data": 3} + + assert len(norm_info.loads_ids) == 1 + load_id = norm_info.loads_ids[0] + assert len(norm_info.metrics) == 1 + + # just one load package with single metrics + assert len(norm_info.metrics[load_id]) == 1 + norm_metrics = norm_info.metrics[load_id][0] + # inject tomls and dlt state + assert len(norm_metrics["job_metrics"]) == 2 + assert "data" in norm_metrics["table_metrics"] + + # check packages + assert len(norm_info.load_packages) == 1 + # two jobs + assert len(norm_info.load_packages[0].jobs["new_jobs"]) == 2 + assert norm_info.load_packages[0].state == "normalized" # load os.environ["COMPLETED_PROB"] = "1.0" # make it complete immediately @@ -133,12 +203,21 @@ def data(): step = trace.steps[3] assert step.step == "load" assert step.step_info is load_info + load_info = step.step_info # type: ignore[assignment] + + # check packages + assert len(load_info.load_packages) == 1 + # two jobs + assert load_info.load_packages[0].state == "loaded" + assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 2 + resolved = _find_resolved_value(trace.resolved_config_values, "completed_prob", []) assert resolved.is_secret_hint is False assert resolved.value == "1.0" assert resolved.config_type_name == "DummyClientConfiguration" assert_trace_printable(trace) assert isinstance(p.last_trace.last_load_info, LoadInfo) + p.activate() # run resets the trace load_info = inject_tomls().run() @@ -162,14 +241,14 @@ def data(): def test_save_load_trace() -> None: os.environ["COMPLETED_PROB"] = "1.0" - info = dlt.pipeline().run([1,2,3], table_name="data", destination="dummy") + info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy") pipeline = dlt.pipeline() # will get trace from working dir trace = pipeline.last_trace assert trace is not None assert pipeline._trace is None assert len(trace.steps) == 4 == len(info.pipeline.last_trace.steps) # type: ignore[attr-defined] - step = trace.steps[-2] # the previoius to last one should be load + step = trace.steps[-2] # the previous to last one should be load assert step.step == "load" resolved = _find_resolved_value(trace.resolved_config_values, "completed_prob", []) assert resolved.is_secret_hint is False @@ -177,7 +256,18 @@ def test_save_load_trace() -> None: assert resolved.config_type_name == "DummyClientConfiguration" assert_trace_printable(trace) # check row counts - assert pipeline.last_trace.last_normalize_info.row_counts == {'_dlt_pipeline_state': 1, 'data': 3} + assert pipeline.last_trace.last_normalize_info.row_counts == { + "_dlt_pipeline_state": 1, + "data": 3, + } + # reactivate the pipeline + pipeline.activate() + + # load trace and check if all elements are present + loaded_trace = load_trace(pipeline.working_dir) + print(loaded_trace.asstr(2)) + assert len(trace.steps) == 4 + assert loaded_trace.asdict() == trace.asdict() # exception also saves trace @dlt.resource @@ -207,22 +297,31 @@ def data(): def test_disable_trace(environment: DictStrStr) -> None: environment["ENABLE_RUNTIME_TRACE"] = "false" environment["COMPLETED_PROB"] = "1.0" - dlt.pipeline().run([1,2,3], table_name="data", destination="dummy") + dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy") assert dlt.pipeline().last_trace is None def test_trace_on_restore_state(environment: DictStrStr) -> None: environment["COMPLETED_PROB"] = "1.0" - def _sync_destination_patch(self: Pipeline, destination: str = None, staging: str = None, dataset_name: str = None): + def _sync_destination_patch( + self: Pipeline, destination: str = None, staging: str = None, dataset_name: str = None + ): # just wipe the pipeline simulating deleted dataset self._wipe_working_folder() - self._configure(self._schema_storage_config.export_schema_path, self._schema_storage_config.import_schema_path, False) - - with patch.object(Pipeline, 'sync_destination', _sync_destination_patch): - dlt.pipeline().run([1,2,3], table_name="data", destination="dummy") + self._configure( + self._schema_storage_config.export_schema_path, + self._schema_storage_config.import_schema_path, + False, + ) + + with patch.object(Pipeline, "sync_destination", _sync_destination_patch): + dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy") assert len(dlt.pipeline().last_trace.steps) == 4 - assert dlt.pipeline().last_trace.last_normalize_info.row_counts == {'_dlt_pipeline_state': 1, 'data': 3} + assert dlt.pipeline().last_trace.last_normalize_info.row_counts == { + "_dlt_pipeline_state": 1, + "data": 3, + } def test_load_none_trace() -> None: @@ -231,14 +330,18 @@ def test_load_none_trace() -> None: def test_trace_telemetry() -> None: - with patch("dlt.common.runtime.sentry.before_send", _mock_sentry_before_send), patch("dlt.common.runtime.segment.before_send", _mock_segment_before_send): + with patch("dlt.common.runtime.sentry.before_send", _mock_sentry_before_send), patch( + "dlt.common.runtime.segment.before_send", _mock_segment_before_send + ): # os.environ["FAIL_PROB"] = "1.0" # make it complete immediately start_test_telemetry() SEGMENT_SENT_ITEMS.clear() SENTRY_SENT_ITEMS.clear() # default dummy fails all files - load_info = dlt.pipeline().run([1,2,3], table_name="data", destination="dummy", dataset_name="data_data") + load_info = dlt.pipeline().run( + [1, 2, 3], table_name="data", destination="dummy", dataset_name="data_data" + ) # we should have 4 segment items assert len(SEGMENT_SENT_ITEMS) == 4 expected_steps = ["extract", "normalize", "load", "run"] @@ -246,9 +349,16 @@ def test_trace_telemetry() -> None: assert event["event"] == f"pipeline_{step}" assert event["properties"]["success"] is True assert event["properties"]["destination_name"] == "dummy" - assert event["properties"]["pipeline_name_hash"] == digest128(load_info.pipeline.pipeline_name) - assert event["properties"]["dataset_name_hash"] == digest128(load_info.pipeline.dataset_name) - assert event["properties"]["default_schema_name_hash"] == digest128(load_info.pipeline.default_schema_name) + assert event["properties"]["destination_type"] == "dlt.destinations.dummy" + assert event["properties"]["pipeline_name_hash"] == digest128( + load_info.pipeline.pipeline_name + ) + assert event["properties"]["dataset_name_hash"] == digest128( + load_info.pipeline.dataset_name + ) + assert event["properties"]["default_schema_name_hash"] == digest128( + load_info.pipeline.default_schema_name + ) assert isinstance(event["properties"]["elapsed"], float) assert isinstance(event["properties"]["transaction_id"], str) # check extract info @@ -275,10 +385,13 @@ def data(): assert event["event"] == "pipeline_extract" assert event["properties"]["success"] is False assert event["properties"]["destination_name"] == "dummy" + assert event["properties"]["destination_type"] == "dlt.destinations.dummy" assert isinstance(event["properties"]["elapsed"], float) # check extract info if step == "extract": - assert event["properties"]["extract_data"] == [{"name": "data", "data_type": "resource"}] + assert event["properties"]["extract_data"] == [ + {"name": "data", "data_type": "resource"} + ] # we didn't log any errors assert len(SENTRY_SENT_ITEMS) == 0 @@ -291,6 +404,7 @@ def data(): assert event["event"] == "pipeline_extract" assert event["properties"]["success"] is True assert event["properties"]["destination_name"] is None + assert event["properties"]["destination_type"] is None assert event["properties"]["pipeline_name_hash"] == digest128("fresh") assert event["properties"]["dataset_name_hash"] == digest128(p.dataset_name) assert event["properties"]["default_schema_name_hash"] == digest128(p.default_schema_name) @@ -298,25 +412,32 @@ def data(): def test_extract_data_describe() -> None: schema = Schema("test") - assert describe_extract_data(DltSource("sss_extract", "sect", schema)) == [{"name": "sss_extract", "data_type": "source"}] - assert describe_extract_data(DltResource(Pipe("rrr_extract"), None, False)) == [{"name": "rrr_extract", "data_type": "resource"}] - assert describe_extract_data([DltSource("sss_extract", "sect", schema)]) == [{"name": "sss_extract", "data_type": "source"}] - assert describe_extract_data([DltResource(Pipe("rrr_extract"), None, False)]) == [{"name": "rrr_extract", "data_type": "resource"}] + assert describe_extract_data(DltSource(schema, "sect")) == [ + {"name": "test", "data_type": "source"} + ] + assert describe_extract_data(DltResource(Pipe("rrr_extract"), None, False)) == [ + {"name": "rrr_extract", "data_type": "resource"} + ] + assert describe_extract_data([DltSource(schema, "sect")]) == [ + {"name": "test", "data_type": "source"} + ] + assert describe_extract_data([DltResource(Pipe("rrr_extract"), None, False)]) == [ + {"name": "rrr_extract", "data_type": "resource"} + ] assert describe_extract_data( - [DltResource(Pipe("rrr_extract"), None, False), DltSource("sss_extract", "sect", schema)] - ) == [ - {"name": "rrr_extract", "data_type": "resource"}, {"name": "sss_extract", "data_type": "source"} - ] + [DltResource(Pipe("rrr_extract"), None, False), DltSource(schema, "sect")] + ) == [{"name": "rrr_extract", "data_type": "resource"}, {"name": "test", "data_type": "source"}] assert describe_extract_data([{"a": "b"}]) == [{"name": "", "data_type": "dict"}] from pandas import DataFrame + # we assume that List content has same type - assert describe_extract_data([DataFrame(), {"a": "b"}]) == [{"name": "", "data_type": "DataFrame"}] + assert describe_extract_data([DataFrame(), {"a": "b"}]) == [ + {"name": "", "data_type": "DataFrame"} + ] # first unnamed element in the list breaks checking info assert describe_extract_data( - [DltResource(Pipe("rrr_extract"), None, False), DataFrame(), DltSource("sss_extract", "sect", schema)] - ) == [ - {"name": "rrr_extract", "data_type": "resource"}, {"name": "", "data_type": "DataFrame"} - ] + [DltResource(Pipe("rrr_extract"), None, False), DataFrame(), DltSource(schema, "sect")] + ) == [{"name": "rrr_extract", "data_type": "resource"}, {"name": "", "data_type": "DataFrame"}] def test_slack_hook(environment: DictStrStr) -> None: @@ -328,7 +449,7 @@ def test_slack_hook(environment: DictStrStr) -> None: environment["RUNTIME__SLACK_INCOMING_HOOK"] = hook_url with requests_mock.mock() as m: m.post(hook_url, json={}) - load_info = dlt.pipeline().run([1,2,3], table_name="data", destination="dummy") + load_info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy") assert slack_notify_load_success(load_info.pipeline.runtime_config.slack_incoming_hook, load_info, load_info.pipeline.last_trace) == 200 # type: ignore[attr-defined] assert m.called message = m.last_request.json() @@ -339,7 +460,7 @@ def test_slack_hook(environment: DictStrStr) -> None: def test_broken_slack_hook(environment: DictStrStr) -> None: environment["COMPLETED_PROB"] = "1.0" environment["RUNTIME__SLACK_INCOMING_HOOK"] = "http://localhost:22" - load_info = dlt.pipeline().run([1,2,3], table_name="data", destination="dummy") + load_info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy") # connection error assert slack_notify_load_success(load_info.pipeline.runtime_config.slack_incoming_hook, load_info, load_info.pipeline.last_trace) == -1 # type: ignore[attr-defined] # pipeline = dlt.pipeline() @@ -352,26 +473,43 @@ def test_broken_slack_hook(environment: DictStrStr) -> None: # assert run_step.step_exception is None -def _find_resolved_value(resolved: List[SerializableResolvedValueTrace], key: str, sections: List[str]) -> SerializableResolvedValueTrace: +def _find_resolved_value( + resolved: List[SerializableResolvedValueTrace], key: str, sections: List[str] +) -> SerializableResolvedValueTrace: return next((v for v in resolved if v.key == key and v.sections == sections), None) SEGMENT_SENT_ITEMS = [] + + def _mock_segment_before_send(event: DictStrAny) -> DictStrAny: SEGMENT_SENT_ITEMS.append(event) return event SENTRY_SENT_ITEMS = [] + + def _mock_sentry_before_send(event: DictStrAny, _unused_hint: Any = None) -> DictStrAny: SENTRY_SENT_ITEMS.append(event) return event + def assert_trace_printable(trace: PipelineTrace) -> None: str(trace) trace.asstr(0) trace.asstr(1) - json.dumps(trace) + trace.asdict() with io.BytesIO() as b: - json.typed_dump(trace, b) + json.typed_dump(trace, b, pretty=True) b.getvalue() + json.dumps(trace) + + # load trace to duckdb + from dlt.destinations import duckdb + + trace_pipeline = dlt.pipeline("trace", destination=duckdb(":pipeline:")).drop() + load_info = trace_pipeline.run([trace], table_name="trace_data") + load_info.raise_on_failed_jobs() + + # print(trace_pipeline.default_schema.to_pretty_yaml()) diff --git a/tests/pipeline/test_platform_connection.py b/tests/pipeline/test_platform_connection.py new file mode 100644 index 0000000000..a0893cfc93 --- /dev/null +++ b/tests/pipeline/test_platform_connection.py @@ -0,0 +1,73 @@ +import dlt +import os +import time +import requests_mock + +TRACE_URL_SUFFIX = "/trace" +STATE_URL_SUFFIX = "/state" + + +def test_platform_connection() -> None: + mock_platform_url = "http://platform.com/endpoint" + + os.environ["RUNTIME__DLTHUB_DSN"] = mock_platform_url + + trace_url = mock_platform_url + TRACE_URL_SUFFIX + state_url = mock_platform_url + STATE_URL_SUFFIX + + # simple pipeline + @dlt.source(name="first_source") + def my_source(): + @dlt.resource(name="test_resource") + def data(): + yield [1, 2, 3] + + return data() + + @dlt.source(name="second_source") + def my_source_2(): + @dlt.resource(name="test_resource") + def data(): + yield [1, 2, 3] + + return data() + + p = dlt.pipeline( + destination="duckdb", + pipeline_name="platform_test_pipeline", + dataset_name="platform_test_dataset", + ) + + with requests_mock.mock() as m: + m.put(mock_platform_url, json={}, status_code=200) + p.run([my_source(), my_source_2()]) + + # sleep a bit and find trace in mock requests + time.sleep(2) + + trace_result = None + state_result = None + for call in m.request_history: + if call.url == trace_url: + assert not trace_result, "Multiple calls to trace endpoint" + trace_result = call.json() + + if call.url == state_url: + assert not state_result, "Multiple calls to state endpoint" + state_result = call.json() + + # basic check of trace result + assert trace_result, "no trace" + assert trace_result["pipeline_name"] == "platform_test_pipeline" + assert len(trace_result["steps"]) == 4 + assert trace_result["execution_context"]["library"]["name"] == "dlt" + + # basic check of state result + assert state_result, "no state update" + assert state_result["pipeline_name"] == "platform_test_pipeline" + assert state_result["dataset_name"] == "platform_test_dataset" + assert len(state_result["schemas"]) == 2 + assert {state_result["schemas"][0]["name"], state_result["schemas"][1]["name"]} == { + "first_source", + "second_source", + } diff --git a/tests/pipeline/test_schema_contracts.py b/tests/pipeline/test_schema_contracts.py new file mode 100644 index 0000000000..2f2e6b6932 --- /dev/null +++ b/tests/pipeline/test_schema_contracts.py @@ -0,0 +1,624 @@ +import dlt, os, pytest +import contextlib +from typing import Any, Callable, Iterator, Union, Optional + +from dlt.common.schema.typing import TSchemaContract +from dlt.common.utils import uniq_id +from dlt.common.schema.exceptions import DataValidationError + +from dlt.extract import DltResource +from dlt.pipeline.pipeline import Pipeline +from dlt.pipeline.exceptions import PipelineStepFailed + +from tests.load.pipeline.utils import load_table_counts +from tests.utils import ( + TDataItemFormat, + skip_if_not_active, + data_to_item_format, + ALL_DATA_ITEM_FORMATS, +) + +skip_if_not_active("duckdb") + +schema_contract = ["evolve", "discard_value", "discard_row", "freeze"] +LOCATIONS = ["source", "resource", "override"] +SCHEMA_ELEMENTS = ["tables", "columns", "data_type"] + + +@contextlib.contextmanager +def raises_frozen_exception(check_raise: bool = True) -> Any: + if not check_raise: + yield + return + with pytest.raises(PipelineStepFailed) as py_exc: + yield + if py_exc.value.step == "extract": + assert isinstance(py_exc.value.__context__, DataValidationError) + else: + # normalize + assert isinstance(py_exc.value.__context__.__context__, DataValidationError) + + +def items(settings: TSchemaContract) -> Any: + # NOTE: names must be normalizeds + @dlt.resource(name="Items", write_disposition="append", schema_contract=settings) + def load_items(): + for _, index in enumerate(range(0, 10), 1): + yield {"id": index, "SomeInt": 1, "name": f"item {index}"} + + return load_items + + +def items_with_variant(settings: TSchemaContract) -> Any: + @dlt.resource(name="Items", write_disposition="append", schema_contract=settings) + def load_items(): + for _, index in enumerate(range(0, 10), 1): + yield {"id": index, "name": f"item {index}", "SomeInt": "hello"} + + return load_items + + +def items_with_new_column(settings: TSchemaContract) -> Any: + @dlt.resource(name="Items", write_disposition="append", schema_contract=settings) + def load_items(): + for _, index in enumerate(range(0, 10), 1): + yield {"id": index, "name": f"item {index}", "New^Col": "hello"} + + return load_items + + +def items_with_subtable(settings: TSchemaContract) -> Any: + @dlt.resource(name="Items", write_disposition="append", schema_contract=settings) + def load_items(): + for _, index in enumerate(range(0, 10), 1): + yield { + "id": index, + "name": f"item {index}", + "sub_items": [{"id": index + 1000, "name": f"sub item {index + 1000}"}], + } + + return load_items + + +def new_items(settings: TSchemaContract) -> Any: + @dlt.resource(name="new_items", write_disposition="append", schema_contract=settings) + def load_items(): + for _, index in enumerate(range(0, 10), 1): + yield {"id": index, "some_int": 1, "name": f"item {index}"} + + return load_items + + +OLD_COLUMN_NAME = "name" +NEW_COLUMN_NAME = "new_col" +VARIANT_COLUMN_NAME = "some_int__v_text" +SUBITEMS_TABLE = "items__sub_items" +NEW_ITEMS_TABLE = "new_items" + + +def run_resource( + pipeline: Pipeline, + resource_fun: Callable[..., DltResource], + settings: Any, + item_format: TDataItemFormat = "json", + duplicates: int = 1, +) -> None: + for item in settings.keys(): + assert item in LOCATIONS + ev_settings = settings[item] + if ev_settings in schema_contract: + continue + for key, val in ev_settings.items(): + assert val in schema_contract + assert key in SCHEMA_ELEMENTS + + @dlt.source(name="freeze_tests", schema_contract=settings.get("source")) + def source() -> Iterator[DltResource]: + for idx in range(duplicates): + resource: DltResource = resource_fun(settings.get("resource")) + if item_format != "json": + resource._pipe.replace_gen(data_to_item_format(item_format, resource._pipe.gen())) # type: ignore + resource.table_name = resource.name + yield resource.with_name(resource.name + str(idx)) + + # run pipeline + pipeline.run(source(), schema_contract=settings.get("override")) + + # check global settings + assert pipeline.default_schema._settings.get("schema_contract", None) == ( + settings.get("override") or settings.get("source") + ) + + # check items table settings + # assert pipeline.default_schema.tables["items"].get("schema_contract", {}) == (settings.get("resource") or {}) + + # check effective table settings + # assert resolve_contract_settings_for_table(None, "items", pipeline.default_schema) == expand_schema_contract_settings(settings.get("resource") or settings.get("override") or "evolve") + + +def get_pipeline(): + import duckdb + + return dlt.pipeline( + pipeline_name=uniq_id(), + destination="duckdb", + credentials=duckdb.connect(":memory:"), + full_refresh=True, + ) + + +@pytest.mark.parametrize("contract_setting", schema_contract) +@pytest.mark.parametrize("setting_location", LOCATIONS) +@pytest.mark.parametrize("item_format", ALL_DATA_ITEM_FORMATS) +def test_new_tables( + contract_setting: str, setting_location: str, item_format: TDataItemFormat +) -> None: + pipeline = get_pipeline() + + full_settings = {setting_location: {"tables": contract_setting}} + run_resource(pipeline, items, {}, item_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + assert OLD_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + run_resource(pipeline, items_with_new_column, full_settings, item_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 20 + assert NEW_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + # test adding new table + with raises_frozen_exception(contract_setting == "freeze"): + run_resource(pipeline, new_items, full_settings, item_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts.get("new_items", 0) == (10 if contract_setting in ["evolve"] else 0) + # delete extracted files if left after exception + pipeline.drop_pending_packages() + + # NOTE: arrow / pandas do not support variants and subtables so we must skip + if item_format == "json": + # run add variant column + run_resource(pipeline, items_with_variant, full_settings) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 30 + assert VARIANT_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + # test adding new subtable + with raises_frozen_exception(contract_setting == "freeze"): + run_resource(pipeline, items_with_subtable, full_settings) + + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 30 if contract_setting in ["freeze"] else 40 + assert table_counts.get(SUBITEMS_TABLE, 0) == (10 if contract_setting in ["evolve"] else 0) + + +@pytest.mark.parametrize("contract_setting", schema_contract) +@pytest.mark.parametrize("setting_location", LOCATIONS) +@pytest.mark.parametrize("item_format", ALL_DATA_ITEM_FORMATS) +def test_new_columns( + contract_setting: str, setting_location: str, item_format: TDataItemFormat +) -> None: + full_settings = {setting_location: {"columns": contract_setting}} + + pipeline = get_pipeline() + run_resource(pipeline, items, {}, item_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + assert OLD_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + # new should work + run_resource(pipeline, new_items, full_settings, item_format) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + expected_items_count = 10 + assert table_counts["items"] == expected_items_count + assert table_counts[NEW_ITEMS_TABLE] == 10 + + # test adding new column twice: filter will try to catch it before it is added for the second time + with raises_frozen_exception(contract_setting == "freeze"): + run_resource(pipeline, items_with_new_column, full_settings, item_format, duplicates=2) + # delete extracted files if left after exception + pipeline.drop_pending_packages() + + if contract_setting == "evolve": + assert NEW_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + else: + assert NEW_COLUMN_NAME not in pipeline.default_schema.tables["items"]["columns"] + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + expected_items_count += 20 if contract_setting in ["evolve", "discard_value"] else 0 + assert table_counts["items"] == expected_items_count + + # NOTE: arrow / pandas do not support variants and subtables so we must skip + if item_format == "json": + # subtable should work + run_resource(pipeline, items_with_subtable, full_settings) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + expected_items_count += 10 + assert table_counts["items"] == expected_items_count + assert table_counts[SUBITEMS_TABLE] == 10 + + # test adding variant column + run_resource(pipeline, items_with_variant, full_settings) + # variants are not new columns and should be able to always evolve + assert VARIANT_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + expected_items_count += 10 + assert table_counts["items"] == expected_items_count + + +@pytest.mark.parametrize("contract_setting", schema_contract) +@pytest.mark.parametrize("setting_location", LOCATIONS) +def test_freeze_variants(contract_setting: str, setting_location: str) -> None: + full_settings = {setting_location: {"data_type": contract_setting}} + pipeline = get_pipeline() + run_resource(pipeline, items, {}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + assert OLD_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + # subtable should work + run_resource(pipeline, items_with_subtable, full_settings) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 20 + assert table_counts[SUBITEMS_TABLE] == 10 + + # new should work + run_resource(pipeline, new_items, full_settings) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 20 + assert table_counts[NEW_ITEMS_TABLE] == 10 + + # test adding new column + run_resource(pipeline, items_with_new_column, full_settings) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 30 + assert NEW_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + + # test adding variant column + with raises_frozen_exception(contract_setting == "freeze"): + run_resource(pipeline, items_with_variant, full_settings) + + if contract_setting == "evolve": + assert VARIANT_COLUMN_NAME in pipeline.default_schema.tables["items"]["columns"] + else: + assert VARIANT_COLUMN_NAME not in pipeline.default_schema.tables["items"]["columns"] + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == (40 if contract_setting in ["evolve", "discard_value"] else 30) + + +def test_settings_precedence() -> None: + pipeline = get_pipeline() + + # load some data + run_resource(pipeline, items, {}) + + # trying to add new column when forbidden on resource will fail + run_resource(pipeline, items_with_new_column, {"resource": {"columns": "discard_row"}}) + + # when allowed on override it will work + run_resource( + pipeline, + items_with_new_column, + {"resource": {"columns": "freeze"}, "override": {"columns": "evolve"}}, + ) + + +def test_settings_precedence_2() -> None: + pipeline = get_pipeline() + + # load some data + run_resource(pipeline, items, {"source": {"data_type": "discard_row"}}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # trying to add variant when forbidden on source will fail + run_resource(pipeline, items_with_variant, {"source": {"data_type": "discard_row"}}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # if allowed on resource it will pass + run_resource( + pipeline, + items_with_variant, + {"resource": {"data_type": "evolve"}, "source": {"data_type": "discard_row"}}, + ) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 20 + + # if allowed on override it will also pass + run_resource( + pipeline, + items_with_variant, + { + "resource": {"data_type": "discard_row"}, + "source": {"data_type": "discard_row"}, + "override": {"data_type": "evolve"}, + }, + ) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 30 + + +@pytest.mark.parametrize("setting_location", LOCATIONS) +def test_change_mode(setting_location: str) -> None: + pipeline = get_pipeline() + + # load some data + run_resource(pipeline, items, {}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # trying to add variant when forbidden will fail + run_resource(pipeline, items_with_variant, {setting_location: {"data_type": "discard_row"}}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # now allow + run_resource(pipeline, items_with_variant, {setting_location: {"data_type": "evolve"}}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 20 + + +@pytest.mark.parametrize("setting_location", LOCATIONS) +def test_single_settings_value(setting_location: str) -> None: + pipeline = get_pipeline() + + run_resource(pipeline, items, {}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # trying to add variant when forbidden will fail + run_resource(pipeline, items_with_variant, {setting_location: "discard_row"}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # trying to add new column will fail + run_resource(pipeline, items_with_new_column, {setting_location: "discard_row"}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + + # trying to add new table will fail + run_resource(pipeline, new_items, {setting_location: "discard_row"}) + table_counts = load_table_counts( + pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()] + ) + assert table_counts["items"] == 10 + assert "new_items" not in table_counts + + +def test_data_contract_interaction() -> None: + """ + ensure data contracts with pydantic are enforced properly + """ + from pydantic import BaseModel, Extra + + class Items(BaseModel): + id: int # noqa: A003 + name: Optional[str] + amount: Union[int, str, None] + + class Config: + extra = Extra.forbid + + @dlt.resource(name="items") + def get_items(): + yield from [ + { + "id": 5, + "name": "dave", + "amount": 6, + } + ] + + @dlt.resource(name="items", columns=Items) + def get_items_with_model(): + yield from [ + { + "id": 5, + "name": "dave", + "amount": 6, + } + ] + + @dlt.resource(name="items") + def get_items_new_col(): + yield from [{"id": 5, "name": "dave", "amount": 6, "new_col": "hello"}] + + @dlt.resource(name="items") + def get_items_subtable(): + yield from [{"id": 5, "name": "dave", "amount": 6, "sub": [{"hello": "dave"}]}] + + # test valid object + pipeline = get_pipeline() + # items with model work + pipeline.run([get_items_with_model()]) + assert pipeline.last_trace.last_normalize_info.row_counts.get("items", 0) == 1 + + # loading once with pydantic will freeze the cols + pipeline = get_pipeline() + pipeline.run([get_items_with_model()]) + with raises_frozen_exception(True): + pipeline.run([get_items_new_col()]) + + # it is possible to override contract when there are new columns + # items with model alone does not work, since contract is set to freeze + pipeline = get_pipeline() + pipeline.run([get_items_with_model()]) + pipeline.run([get_items_new_col()], schema_contract="evolve") + assert pipeline.last_trace.last_normalize_info.row_counts.get("items", 0) == 1 + + +def test_different_objects_in_one_load() -> None: + pipeline = get_pipeline() + + @dlt.resource(name="items") + def get_items(): + yield {"id": 1, "name": "dave", "amount": 50} + yield {"id": 2, "name": "dave", "amount": 50, "new_column": "some val"} + + pipeline.run([get_items()], schema_contract={"columns": "freeze", "tables": "evolve"}) + assert pipeline.last_trace.last_normalize_info.row_counts["items"] == 2 + + +@pytest.mark.parametrize("table_mode", ["discard_row", "evolve", "freeze"]) +def test_dynamic_tables(table_mode: str) -> None: + pipeline = get_pipeline() + + # adding columns with a data type makes this columns complete which makes this table complete -> it fails in the normalize because + # the tables is NOT new according to normalizer so the row is not discarded + # remove that and it will pass because the table contains just one incomplete column so it is incomplete so it is treated as new + # if you uncomment update code in the extract the problem probably goes away + @dlt.resource(name="items", table_name=lambda i: i["tables"], columns={"id": {}}) + def get_items(): + yield { + "id": 1, + "tables": "one", + } + yield {"id": 2, "tables": "two", "new_column": "some val"} + + with raises_frozen_exception(table_mode == "freeze"): + pipeline.run([get_items()], schema_contract={"tables": table_mode}) + + if table_mode != "freeze": + assert pipeline.last_trace.last_normalize_info.row_counts.get("one", 0) == ( + 1 if table_mode == "evolve" else 0 + ) + assert pipeline.last_trace.last_normalize_info.row_counts.get("two", 0) == ( + 1 if table_mode == "evolve" else 0 + ) + + +@pytest.mark.parametrize("column_mode", ["discard_row", "evolve", "freeze"]) +def test_defined_column_in_new_table(column_mode: str) -> None: + pipeline = get_pipeline() + + @dlt.resource(name="items", columns=[{"name": "id", "data_type": "bigint", "nullable": False}]) + def get_items(): + yield { + "id": 1, + "key": "value", + } + + pipeline.run([get_items()], schema_contract={"columns": column_mode}) + assert pipeline.last_trace.last_normalize_info.row_counts.get("items", 0) == 1 + + +@pytest.mark.parametrize("column_mode", ["freeze", "discard_row", "evolve"]) +def test_new_column_from_hint_and_data(column_mode: str) -> None: + pipeline = get_pipeline() + + # we define complete column on id, this creates a complete table + # normalizer does not know that it is a new table and discards the row + # and it also excepts on column freeze + + @dlt.resource(name="items", columns=[{"name": "id", "data_type": "bigint", "nullable": False}]) + def get_items(): + yield { + "id": 1, + "key": "value", + } + + pipeline.run([get_items()], schema_contract={"columns": column_mode}) + assert pipeline.last_trace.last_normalize_info.row_counts.get("items", 0) == 1 + + +@pytest.mark.parametrize("column_mode", ["freeze", "discard_row", "evolve"]) +def test_two_new_columns_from_two_rows(column_mode: str) -> None: + pipeline = get_pipeline() + + # this creates a complete table in first row + # and adds a new column to complete tables in 2nd row + # the test does not fail only because you clone schema in normalize + + @dlt.resource() + def items(): + yield { + "id": 1, + } + yield { + "id": 1, + "key": "value", + } + + pipeline.run([items()], schema_contract={"columns": column_mode}) + assert pipeline.last_trace.last_normalize_info.row_counts["items"] == 2 + + +@pytest.mark.parametrize("column_mode", ["freeze", "discard_row", "evolve"]) +def test_dynamic_new_columns(column_mode: str) -> None: + pipeline = get_pipeline() + + # fails because dlt is not able to add _dlt_load_id to tables. I think we should do an exception for those + # 1. schema.dlt_tables() - everything evolve + # 2. is_dlt_column (I hope we have helper) - column evolve, data_type freeze + + def dynamic_columns(item): + if item["id"] == 1: + return [{"name": "key", "data_type": "text", "nullable": True}] + if item["id"] == 2: + return [{"name": "id", "data_type": "bigint", "nullable": True}] + + @dlt.resource(name="items", table_name=lambda i: "items", schema_contract={"columns": column_mode}) # type: ignore + def get_items(): + yield { + "id": 1, + "key": "value", + } + yield { + "id": 2, + "key": "value", + } + + items = get_items() + items.apply_hints(columns=dynamic_columns) + # apply hints apply to `items` not the original resource, so doing get_items() below removed them completely + pipeline.run(items) + assert pipeline.last_trace.last_normalize_info.row_counts.get("items", 0) == 2 diff --git a/tests/pipeline/test_schema_updates.py b/tests/pipeline/test_schema_updates.py index 97345061e3..be397f796c 100644 --- a/tests/pipeline/test_schema_updates.py +++ b/tests/pipeline/test_schema_updates.py @@ -1,31 +1,30 @@ +import os import dlt def test_schema_updates() -> None: + os.environ["COMPLETED_PROB"] = "1.0" # make it complete immediately p = dlt.pipeline(pipeline_name="test_schema_updates", full_refresh=True, destination="dummy") @dlt.source() def source(): @dlt.resource() def resource(): - yield [1,2,3] + yield [1, 2, 3] + return resource # test without normalizer attributes s = source() p.run(s, table_name="items", write_disposition="append") - assert p.default_schema._normalizers_config["json"]["config"] == {} + assert "config" not in p.default_schema._normalizers_config["json"] # add table propagation s = source() p.run(s, table_name="items", write_disposition="merge") assert p.default_schema._normalizers_config["json"]["config"] == { - "propagation": { - "tables": { - "items": {'_dlt_id': '_dlt_root_id'} - } - } + "propagation": {"tables": {"items": {"_dlt_id": "_dlt_root_id"}}} } # set root key @@ -34,10 +33,8 @@ def resource(): p.run(s, table_name="items", write_disposition="merge") assert p.default_schema._normalizers_config["json"]["config"] == { "propagation": { - "tables": { - "items": {'_dlt_id': '_dlt_root_id'} - }, - "root": {'_dlt_id': '_dlt_root_id'} + "tables": {"items": {"_dlt_id": "_dlt_root_id"}}, + "root": {"_dlt_id": "_dlt_root_id"}, } } @@ -45,13 +42,9 @@ def resource(): s = source() s.root_key = False p.run(s, table_name="items", write_disposition="merge") + # source schema overwrites normalizer settings so `root` propagation is gone assert p.default_schema._normalizers_config["json"]["config"] == { - "propagation": { - "tables": { - "items": {'_dlt_id': '_dlt_root_id'} - }, - "root": {'_dlt_id': '_dlt_root_id'} - } + "propagation": {"tables": {"items": {"_dlt_id": "_dlt_root_id"}}} } # set max nesting @@ -59,13 +52,8 @@ def resource(): s.max_table_nesting = 5 p.run(s, table_name="items", write_disposition="merge") assert p.default_schema._normalizers_config["json"]["config"] == { - "propagation": { - "tables": { - "items": {'_dlt_id': '_dlt_root_id'} - }, - "root": {'_dlt_id': '_dlt_root_id'} - }, - "max_nesting": 5 + "propagation": {"tables": {"items": {"_dlt_id": "_dlt_root_id"}}}, + "max_nesting": 5, } # update max nesting and new table @@ -75,10 +63,9 @@ def resource(): assert p.default_schema._normalizers_config["json"]["config"] == { "propagation": { "tables": { - "items": {'_dlt_id': '_dlt_root_id'}, - "items2": {'_dlt_id': '_dlt_root_id'}, - }, - "root": {'_dlt_id': '_dlt_root_id'} + "items": {"_dlt_id": "_dlt_root_id"}, + "items2": {"_dlt_id": "_dlt_root_id"}, + } }, - "max_nesting": 50 - } \ No newline at end of file + "max_nesting": 50, + } diff --git a/tests/pipeline/utils.py b/tests/pipeline/utils.py index 3e61c9510c..94683e4995 100644 --- a/tests/pipeline/utils.py +++ b/tests/pipeline/utils.py @@ -1,10 +1,16 @@ +import posixpath +from typing import Any, Dict, List, Tuple import pytest +import random from os import environ import dlt -from dlt.common import json -from dlt.common.pipeline import LoadInfo, PipelineContext +from dlt.common import json, sleep +from dlt.common.pipeline import LoadInfo +from dlt.common.schema.typing import LOADS_TABLE_NAME from dlt.common.typing import DictStrAny +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient +from dlt.pipeline.exceptions import SqlClientNotAvailable from tests.utils import TEST_STORAGE_ROOT @@ -35,12 +41,174 @@ def load_json_case(name: str) -> DictStrAny: return json.load(f) +def load_table_counts(p: dlt.Pipeline, *table_names: str) -> DictStrAny: + """Returns row counts for `table_names` as dict""" + + # try sql, could be other destination though + try: + with p.sql_client() as c: + qualified_names = [c.make_qualified_table_name(name) for name in table_names] + query = "\nUNION ALL\n".join( + [ + f"SELECT '{name}' as name, COUNT(1) as c FROM {q_name}" + for name, q_name in zip(table_names, qualified_names) + ] + ) + with c.execute_query(query) as cur: + rows = list(cur.fetchall()) + return {r[0]: r[1] for r in rows} + except SqlClientNotAvailable: + pass + + # try filesystem + file_tables = load_files(p, *table_names) + result = {} + for table_name, items in file_tables.items(): + result[table_name] = len(items) + return result + + +def load_data_table_counts(p: dlt.Pipeline) -> DictStrAny: + tables = [table["name"] for table in p.default_schema.data_tables()] + return load_table_counts(p, *tables) + + +def assert_data_table_counts(p: dlt.Pipeline, expected_counts: DictStrAny) -> None: + table_counts = load_data_table_counts(p) + assert ( + table_counts == expected_counts + ), f"Table counts do not match, expected {expected_counts}, got {table_counts}" + + +def load_file(path: str, file: str) -> Tuple[str, List[Dict[str, Any]]]: + """ + util function to load a filesystem destination file and return parsed content + values may not be cast to the right type, especially for insert_values, please + make sure to do conversions and casting if needed in your tests + """ + result: List[Dict[str, Any]] = [] + + # check if this is a file we want to read + file_name_items = file.split(".") + ext = file_name_items[-1] + if ext not in ["jsonl", "insert_values", "parquet"]: + return "skip", [] + + # table name will be last element of path + table_name = path.split("/")[-1] + + # skip loads table + if table_name == "_dlt_loads": + return table_name, [] + + full_path = posixpath.join(path, file) + + # load jsonl + if ext == "jsonl": + with open(full_path, "rU", encoding="utf-8") as f: + for line in f: + result.append(json.loads(line)) + + # load insert_values (this is a bit volatile if the exact format of the source file changes) + elif ext == "insert_values": + with open(full_path, "rU", encoding="utf-8") as f: + lines = f.readlines() + # extract col names + cols = lines[0][15:-2].split(",") + for line in lines[2:]: + values = line[1:-3].split(",") + result.append(dict(zip(cols, values))) + + # load parquet + elif ext == "parquet": + import pyarrow.parquet as pq + + with open(full_path, "rb") as f: + table = pq.read_table(f) + cols = table.column_names + count = 0 + for column in table: + column_name = cols[count] + item_count = 0 + for item in column.to_pylist(): + if len(result) <= item_count: + result.append({column_name: item}) + else: + result[item_count][column_name] = item + item_count += 1 + count += 1 + + return table_name, result + + +def load_files(p: dlt.Pipeline, *table_names: str) -> Dict[str, List[Dict[str, Any]]]: + """For now this will expect the standard layout in the filesystem destination, if changed the results will not be correct""" + client: FilesystemClient = p.destination_client() # type: ignore[assignment] + result: Dict[str, Any] = {} + for basedir, _dirs, files in client.fs_client.walk( + client.dataset_path, detail=False, refresh=True + ): + for file in files: + table_name, items = load_file(basedir, file) + if table_name not in table_names: + continue + if table_name in result: + result[table_name] = result[table_name] + items + else: + result[table_name] = items + + # loads file is special case + if LOADS_TABLE_NAME in table_names and file.find(".{LOADS_TABLE_NAME}."): + result[LOADS_TABLE_NAME] = [] + + return result + + +def load_tables_to_dicts(p: dlt.Pipeline, *table_names: str) -> Dict[str, List[Dict[str, Any]]]: + # try sql, could be other destination though + try: + result = {} + for table_name in table_names: + table_rows = [] + columns = p.default_schema.get_table_columns(table_name).keys() + query_columns = ",".join(columns) + + with p.sql_client() as c: + f_q_table_name = c.make_qualified_table_name(table_name) + query = f"SELECT {query_columns} FROM {f_q_table_name}" + with c.execute_query(query) as cur: + for row in list(cur.fetchall()): + table_rows.append(dict(zip(columns, row))) + result[table_name] = table_rows + return result + + except SqlClientNotAvailable: + pass + + # try files + return load_files(p, *table_names) + + +def load_table_distinct_counts( + p: dlt.Pipeline, distinct_column: str, *table_names: str +) -> DictStrAny: + """Returns counts of distinct values for column `distinct_column` for `table_names` as dict""" + query = "\nUNION ALL\n".join( + [ + f"SELECT '{name}' as name, COUNT(DISTINCT {distinct_column}) as c FROM {name}" + for name in table_names + ] + ) + with p.sql_client() as c: + with c.execute_query(query) as cur: + rows = list(cur.fetchall()) + return {r[0]: r[1] for r in rows} + + @dlt.source def airtable_emojis(): - @dlt.resource(name="📆 Schedule") def schedule(): - yield [1, 2, 3] @dlt.resource(name="💰Budget", primary_key=("🔑book_id", "asset_id")) @@ -57,5 +225,20 @@ def peacock(): def wide_peacock(): yield [{"peacock": [1, 2, 3]}] - return budget, schedule, peacock, wide_peacock + + +def run_deferred(iters): + @dlt.defer + def item(n): + sleep(random.random() / 2) + return n + + for n in range(iters): + yield item(n) + + +@dlt.source +def many_delayed(many, iters): + for n in range(many): + yield dlt.resource(run_deferred(iters), name="resource_" + str(n)) diff --git a/tests/reflection/module_cases/__init__.py b/tests/reflection/module_cases/__init__.py index 851514132d..4b792d81c0 100644 --- a/tests/reflection/module_cases/__init__.py +++ b/tests/reflection/module_cases/__init__.py @@ -1,4 +1,4 @@ import xxx.absolutely from xxx.absolutely import a1, a3 -from dlt.common.utils import uniq_id \ No newline at end of file +from dlt.common.utils import uniq_id diff --git a/tests/reflection/module_cases/all_imports.py b/tests/reflection/module_cases/all_imports.py index 0cfde3a9a1..32ca48ec6f 100644 --- a/tests/reflection/module_cases/all_imports.py +++ b/tests/reflection/module_cases/all_imports.py @@ -1 +1 @@ -from dlt.common.utils import uniq_id \ No newline at end of file +from dlt.common.utils import uniq_id diff --git a/tests/reflection/module_cases/executes_resource.py b/tests/reflection/module_cases/executes_resource.py index a2024398fc..3049eb51f9 100644 --- a/tests/reflection/module_cases/executes_resource.py +++ b/tests/reflection/module_cases/executes_resource.py @@ -1,9 +1,10 @@ import dlt + @dlt.resource def aleph(n: int): for i in range(0, n): yield i -print(list(aleph(10))) \ No newline at end of file +print(list(aleph(10))) diff --git a/tests/reflection/module_cases/import_as_type.py b/tests/reflection/module_cases/import_as_type.py index 500a1bf8a0..38604304ba 100644 --- a/tests/reflection/module_cases/import_as_type.py +++ b/tests/reflection/module_cases/import_as_type.py @@ -1,6 +1,8 @@ from xxx.aa import Tx + def create_tx() -> Tx: return Tx() + tx = Tx() diff --git a/tests/reflection/module_cases/no_pkg.py b/tests/reflection/module_cases/no_pkg.py index 62e3377048..497740970c 100644 --- a/tests/reflection/module_cases/no_pkg.py +++ b/tests/reflection/module_cases/no_pkg.py @@ -1 +1 @@ -from . import uniq_id \ No newline at end of file +from . import uniq_id diff --git a/tests/reflection/module_cases/raises.py b/tests/reflection/module_cases/raises.py index 2c4cc4daa1..d2f5167716 100644 --- a/tests/reflection/module_cases/raises.py +++ b/tests/reflection/module_cases/raises.py @@ -1,4 +1,4 @@ from xxx.absolutely import a1, a3 from dlt.common.utils import uniq_id -raise NotImplementedError("empty module") \ No newline at end of file +raise NotImplementedError("empty module") diff --git a/tests/reflection/module_cases/stripe_analytics/__init__.py b/tests/reflection/module_cases/stripe_analytics/__init__.py index 6877ef5475..8f0b2ff6b6 100644 --- a/tests/reflection/module_cases/stripe_analytics/__init__.py +++ b/tests/reflection/module_cases/stripe_analytics/__init__.py @@ -1,2 +1,2 @@ from .stripe_analytics import VALUE -from .helpers import HELPERS_VALUE \ No newline at end of file +from .helpers import HELPERS_VALUE diff --git a/tests/reflection/module_cases/stripe_analytics/stripe_analytics.py b/tests/reflection/module_cases/stripe_analytics/stripe_analytics.py index d41cb0c51a..6ee95e6bf8 100644 --- a/tests/reflection/module_cases/stripe_analytics/stripe_analytics.py +++ b/tests/reflection/module_cases/stripe_analytics/stripe_analytics.py @@ -1,3 +1,3 @@ import stripe -VALUE = 1 \ No newline at end of file +VALUE = 1 diff --git a/tests/reflection/module_cases/stripe_analytics_pipeline.py b/tests/reflection/module_cases/stripe_analytics_pipeline.py index 7cb84c9e6e..67002f6ed9 100644 --- a/tests/reflection/module_cases/stripe_analytics_pipeline.py +++ b/tests/reflection/module_cases/stripe_analytics_pipeline.py @@ -1,4 +1,4 @@ from stripe_analytics import VALUE, HELPERS_VALUE print(VALUE) -print(HELPERS_VALUE) \ No newline at end of file +print(HELPERS_VALUE) diff --git a/tests/reflection/test_script_inspector.py b/tests/reflection/test_script_inspector.py index 291c823357..0769a2aa82 100644 --- a/tests/reflection/test_script_inspector.py +++ b/tests/reflection/test_script_inspector.py @@ -1,12 +1,18 @@ from types import SimpleNamespace import pytest -from dlt.reflection.script_inspector import load_script_module, inspect_pipeline_script, DummyModule, PipelineIsRunning +from dlt.reflection.script_inspector import ( + load_script_module, + inspect_pipeline_script, + DummyModule, + PipelineIsRunning, +) from tests.utils import unload_modules MODULE_CASES = "./tests/reflection/module_cases" + def test_import_init_module() -> None: with pytest.raises(ModuleNotFoundError): load_script_module("./tests/reflection/", "module_cases", ignore_missing_imports=False) @@ -27,7 +33,9 @@ def test_import_module() -> None: with pytest.raises(ImportError): load_script_module(MODULE_CASES, "no_pkg", ignore_missing_imports=True) # but with package name in module name it will work - m = load_script_module("./tests/reflection/", "module_cases.no_pkg", ignore_missing_imports=True) + m = load_script_module( + "./tests/reflection/", "module_cases.no_pkg", ignore_missing_imports=True + ) # uniq_id got imported assert isinstance(m.uniq_id(), str) @@ -58,4 +66,4 @@ def test_package_dummy_clash() -> None: m = load_script_module(MODULE_CASES, "stripe_analytics_pipeline", ignore_missing_imports=True) # and those would fails assert m.VALUE == 1 - assert m.HELPERS_VALUE == 3 \ No newline at end of file + assert m.HELPERS_VALUE == 3 diff --git a/tests/sources/helpers/test_requests.py b/tests/sources/helpers/test_requests.py index ea728b92cb..695fa93eca 100644 --- a/tests/sources/helpers/test_requests.py +++ b/tests/sources/helpers/test_requests.py @@ -15,18 +15,23 @@ from dlt.common.configuration.specs import RunConfiguration from dlt.sources.helpers.requests import Session, Client, client as default_client from dlt.sources.helpers.requests.retry import ( - DEFAULT_RETRY_EXCEPTIONS, DEFAULT_RETRY_STATUS, retry_if_status, retry_any, Retrying, wait_exponential_retry_after + DEFAULT_RETRY_EXCEPTIONS, + DEFAULT_RETRY_STATUS, + retry_if_status, + retry_any, + Retrying, + wait_exponential_retry_after, ) -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def mock_sleep() -> Iterator[mock.MagicMock]: - with mock.patch('time.sleep') as m: + with mock.patch("time.sleep") as m: yield m def test_default_session_retry_settings() -> None: - retry: Retrying = Client().session.request.retry # type: ignore + retry: Retrying = Client().session.request.retry # type: ignore assert retry.stop.max_attempt_number == 5 # type: ignore assert isinstance(retry.retry, retry_any) retries = retry.retry.retries @@ -36,7 +41,7 @@ def test_default_session_retry_settings() -> None: assert retry.wait.multiplier == 1 -@pytest.mark.parametrize('respect_retry_after_header', (True, False)) +@pytest.mark.parametrize("respect_retry_after_header", (True, False)) def test_custom_session_retry_settings(respect_retry_after_header: bool) -> None: def custom_retry_cond(response, exception): return True @@ -52,14 +57,14 @@ def custom_retry_cond(response, exception): assert retry.stop.max_attempt_number == 14 # type: ignore assert isinstance(retry.retry, retry_any) retries = retry.retry.retries - assert retries[2].predicate == custom_retry_cond # type: ignore + assert retries[2].predicate == custom_retry_cond # type: ignore assert isinstance(retry.wait, wait_exponential) assert retry.wait.multiplier == 2 def test_retry_on_status_all_fails(mock_sleep: mock.MagicMock) -> None: session = Client().session - url = 'https://example.com/data' + url = "https://example.com/data" with requests_mock.mock(session=session) as m: m.get(url, status_code=503) @@ -68,16 +73,16 @@ def test_retry_on_status_all_fails(mock_sleep: mock.MagicMock) -> None: assert m.call_count == RunConfiguration.request_max_attempts + def test_retry_on_status_success_after_2(mock_sleep: mock.MagicMock) -> None: - """Test successful request after 2 retries - """ + """Test successful request after 2 retries""" session = Client().session - url = 'https://example.com/data' + url = "https://example.com/data" responses = [ - dict(text='error', status_code=503), - dict(text='error', status_code=503), - dict(text='error', status_code=200) + dict(text="error", status_code=503), + dict(text="error", status_code=503), + dict(text="error", status_code=200), ] with requests_mock.mock(session=session) as m: @@ -87,8 +92,9 @@ def test_retry_on_status_success_after_2(mock_sleep: mock.MagicMock) -> None: assert resp.status_code == 200 assert m.call_count == 3 + def test_retry_on_status_without_raise_for_status(mock_sleep: mock.MagicMock) -> None: - url = 'https://example.com/data' + url = "https://example.com/data" session = Client(raise_for_status=False).session with requests_mock.mock(session=session) as m: @@ -98,10 +104,16 @@ def test_retry_on_status_without_raise_for_status(mock_sleep: mock.MagicMock) -> assert m.call_count == RunConfiguration.request_max_attempts -@pytest.mark.parametrize('exception_class', [requests.ConnectionError, requests.ConnectTimeout, requests.exceptions.ChunkedEncodingError]) -def test_retry_on_exception_all_fails(exception_class: Type[Exception], mock_sleep: mock.MagicMock) -> None: + +@pytest.mark.parametrize( + "exception_class", + [requests.ConnectionError, requests.ConnectTimeout, requests.exceptions.ChunkedEncodingError], +) +def test_retry_on_exception_all_fails( + exception_class: Type[Exception], mock_sleep: mock.MagicMock +) -> None: session = Client().session - url = 'https://example.com/data' + url = "https://example.com/data" with requests_mock.mock(session=session) as m: m.get(url, exc=exception_class) @@ -110,41 +122,44 @@ def test_retry_on_exception_all_fails(exception_class: Type[Exception], mock_sle assert m.call_count == RunConfiguration.request_max_attempts + def test_retry_on_custom_condition(mock_sleep: mock.MagicMock) -> None: def retry_on(response: requests.Response, exception: BaseException) -> bool: - return response.text == 'error' + return response.text == "error" session = Client(retry_condition=retry_on).session - url = 'https://example.com/data' + url = "https://example.com/data" with requests_mock.mock(session=session) as m: - m.get(url, text='error') + m.get(url, text="error") response = session.get(url) assert response.content == b"error" assert m.call_count == RunConfiguration.request_max_attempts + def test_retry_on_custom_condition_success_after_2(mock_sleep: mock.MagicMock) -> None: def retry_on(response: requests.Response, exception: BaseException) -> bool: - return response.text == 'error' + return response.text == "error" session = Client(retry_condition=retry_on).session - url = 'https://example.com/data' - responses = [dict(text='error'), dict(text='error'), dict(text='success')] + url = "https://example.com/data" + responses = [dict(text="error"), dict(text="error"), dict(text="success")] with requests_mock.mock(session=session) as m: m.get(url, responses) resp = session.get(url) - assert resp.text == 'success' + assert resp.text == "success" assert m.call_count == 3 + def test_wait_retry_after_int(mock_sleep: mock.MagicMock) -> None: session = Client(request_backoff_factor=0).session - url = 'https://example.com/data' + url = "https://example.com/data" responses = [ - dict(text='error', headers={'retry-after': '4'}, status_code=429), - dict(text='success') + dict(text="error", headers={"retry-after": "4"}, status_code=429), + dict(text="success"), ] with requests_mock.mock(session=session) as m: @@ -155,46 +170,46 @@ def test_wait_retry_after_int(mock_sleep: mock.MagicMock) -> None: assert 4 <= mock_sleep.call_args[0][0] <= 5 # Adds jitter up to 1s -@pytest.mark.parametrize('existing_session', (False, True)) +@pytest.mark.parametrize("existing_session", (False, True)) def test_init_default_client(existing_session: bool) -> None: """Test that the default client config is updated from runtime configuration. Run twice. 1. Clean start with no existing session attached. 2. With session in thread local (session is updated) """ cfg = { - 'RUNTIME__REQUEST_TIMEOUT': random.randrange(1, 100), - 'RUNTIME__REQUEST_MAX_ATTEMPTS': random.randrange(1, 100), - 'RUNTIME__REQUEST_BACKOFF_FACTOR': random.randrange(1, 100), - 'RUNTIME__REQUEST_MAX_RETRY_DELAY': random.randrange(1, 100), + "RUNTIME__REQUEST_TIMEOUT": random.randrange(1, 100), + "RUNTIME__REQUEST_MAX_ATTEMPTS": random.randrange(1, 100), + "RUNTIME__REQUEST_BACKOFF_FACTOR": random.randrange(1, 100), + "RUNTIME__REQUEST_MAX_RETRY_DELAY": random.randrange(1, 100), } os.environ.update({key: str(value) for key, value in cfg.items()}) - dlt.pipeline(pipeline_name='dummy_pipeline') + dlt.pipeline(pipeline_name="dummy_pipeline") session = default_client.session - assert session.timeout == cfg['RUNTIME__REQUEST_TIMEOUT'] + assert session.timeout == cfg["RUNTIME__REQUEST_TIMEOUT"] retry = session.request.retry # type: ignore[attr-defined] - assert retry.wait.multiplier == cfg['RUNTIME__REQUEST_BACKOFF_FACTOR'] - assert retry.stop.max_attempt_number == cfg['RUNTIME__REQUEST_MAX_ATTEMPTS'] - assert retry.wait.max == cfg['RUNTIME__REQUEST_MAX_RETRY_DELAY'] + assert retry.wait.multiplier == cfg["RUNTIME__REQUEST_BACKOFF_FACTOR"] + assert retry.stop.max_attempt_number == cfg["RUNTIME__REQUEST_MAX_ATTEMPTS"] + assert retry.wait.max == cfg["RUNTIME__REQUEST_MAX_RETRY_DELAY"] -@pytest.mark.parametrize('existing_session', (False, True)) +@pytest.mark.parametrize("existing_session", (False, True)) def test_client_instance_with_config(existing_session: bool) -> None: cfg = { - 'RUNTIME__REQUEST_TIMEOUT': random.randrange(1, 100), - 'RUNTIME__REQUEST_MAX_ATTEMPTS': random.randrange(1, 100), - 'RUNTIME__REQUEST_BACKOFF_FACTOR': random.randrange(1, 100), - 'RUNTIME__REQUEST_MAX_RETRY_DELAY': random.randrange(1, 100), + "RUNTIME__REQUEST_TIMEOUT": random.randrange(1, 100), + "RUNTIME__REQUEST_MAX_ATTEMPTS": random.randrange(1, 100), + "RUNTIME__REQUEST_BACKOFF_FACTOR": random.randrange(1, 100), + "RUNTIME__REQUEST_MAX_RETRY_DELAY": random.randrange(1, 100), } os.environ.update({key: str(value) for key, value in cfg.items()}) client = Client() session = client.session - assert session.timeout == cfg['RUNTIME__REQUEST_TIMEOUT'] + assert session.timeout == cfg["RUNTIME__REQUEST_TIMEOUT"] retry = session.request.retry # type: ignore[attr-defined] - assert retry.wait.multiplier == cfg['RUNTIME__REQUEST_BACKOFF_FACTOR'] - assert retry.stop.max_attempt_number == cfg['RUNTIME__REQUEST_MAX_ATTEMPTS'] - assert retry.wait.max == cfg['RUNTIME__REQUEST_MAX_RETRY_DELAY'] + assert retry.wait.multiplier == cfg["RUNTIME__REQUEST_BACKOFF_FACTOR"] + assert retry.stop.max_attempt_number == cfg["RUNTIME__REQUEST_MAX_ATTEMPTS"] + assert retry.wait.max == cfg["RUNTIME__REQUEST_MAX_RETRY_DELAY"] diff --git a/tests/tools/clean_redshift.py b/tests/tools/clean_redshift.py index 7444d69685..f81407f74a 100644 --- a/tests/tools/clean_redshift.py +++ b/tests/tools/clean_redshift.py @@ -1,10 +1,10 @@ -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.sql_client import psycopg2 +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.sql_client import psycopg2 from psycopg2.errors import InsufficientPrivilege, InternalError_, SyntaxError CONNECTION_STRING = "" -if __name__ == '__main__': +if __name__ == "__main__": # connect connection = psycopg2.connect(CONNECTION_STRING) connection.set_isolation_level(0) diff --git a/tests/tools/create_storages.py b/tests/tools/create_storages.py index 4f0abe3512..5b8788f99f 100644 --- a/tests/tools/create_storages.py +++ b/tests/tools/create_storages.py @@ -1,4 +1,11 @@ -from dlt.common.storages import NormalizeStorage, LoadStorage, SchemaStorage, NormalizeStorageConfiguration, LoadStorageConfiguration, SchemaStorageConfiguration +from dlt.common.storages import ( + NormalizeStorage, + LoadStorage, + SchemaStorage, + NormalizeStorageConfiguration, + LoadStorageConfiguration, + SchemaStorageConfiguration, +) # NormalizeStorage(True, NormalizeVolumeConfiguration) diff --git a/tests/utils.py b/tests/utils.py index b3b7f7969b..cf172f9733 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -3,7 +3,7 @@ import platform import sys from os import environ -from typing import Iterator +from typing import Any, Iterable, Iterator, List, Literal, Union, get_args from unittest.mock import patch import pytest @@ -24,8 +24,9 @@ from dlt.common.schema import Schema from dlt.common.storages import FileStorage from dlt.common.storages.versioned_storage import VersionedStorage -from dlt.common.typing import StrAny +from dlt.common.typing import StrAny, TDataItem from dlt.common.utils import custom_environ, uniq_id +from dlt.common.pipeline import PipelineContext, SupportsPipeline TEST_STORAGE_ROOT = "_storage" @@ -55,9 +56,7 @@ # filter out active destinations for current tests -ACTIVE_DESTINATIONS = set( - dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS -) +ACTIVE_DESTINATIONS = set(dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS) ACTIVE_SQL_DESTINATIONS = SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS) ACTIVE_NON_SQL_DESTINATIONS = NON_SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS) @@ -66,19 +65,19 @@ assert len(ACTIVE_DESTINATIONS) >= 0, "No active destinations selected" for destination in NON_SQL_DESTINATIONS: - assert ( - destination in IMPLEMENTED_DESTINATIONS - ), f"Unknown non sql destination {destination}" + assert destination in IMPLEMENTED_DESTINATIONS, f"Unknown non sql destination {destination}" for destination in SQL_DESTINATIONS: - assert ( - destination in IMPLEMENTED_DESTINATIONS - ), f"Unknown sql destination {destination}" + assert destination in IMPLEMENTED_DESTINATIONS, f"Unknown sql destination {destination}" for destination in ACTIVE_DESTINATIONS: - assert ( - destination in IMPLEMENTED_DESTINATIONS - ), f"Unknown active destination {destination}" + assert destination in IMPLEMENTED_DESTINATIONS, f"Unknown active destination {destination}" + + +# possible TDataItem types +TDataItemFormat = Literal["json", "pandas", "arrow", "arrow-batch"] +ALL_DATA_ITEM_FORMATS = get_args(TDataItemFormat) +"""List with TDataItem formats: json, arrow table/batch / pandas""" def TEST_DICT_CONFIG_PROVIDER(): @@ -101,6 +100,12 @@ def raise_for_status(self) -> None: raise requests.HTTPError(response=self) +class MockPipeline(SupportsPipeline): + def __init__(self, pipeline_name: str, first_run: bool) -> None: + self.pipeline_name = pipeline_name + self.first_run = first_run + + def write_version(storage: FileStorage, version: str) -> None: storage.save(VersionedStorage.VERSION_FILE, str(version)) @@ -163,6 +168,7 @@ def unload_modules() -> Iterator[None]: @pytest.fixture(autouse=True) def wipe_pipeline() -> Iterator[None]: + """Wipes pipeline local state and deactivates it""" container = Container() if container[PipelineContext].is_active(): container[PipelineContext].deactivate() @@ -175,6 +181,28 @@ def wipe_pipeline() -> Iterator[None]: container[PipelineContext].deactivate() +def data_to_item_format( + item_format: TDataItemFormat, data: Union[Iterator[TDataItem], Iterable[TDataItem]] +) -> Any: + """Return the given data in the form of pandas, arrow table/batch or json items""" + if item_format == "json": + return data + + import pandas as pd + from dlt.common.libs.pyarrow import pyarrow as pa + + # Make dataframe from the data + df = pd.DataFrame(list(data)) + if item_format == "pandas": + return [df] + elif item_format == "arrow": + return [pa.Table.from_pandas(df)] + elif item_format == "arrow-batch": + return [pa.RecordBatch.from_pandas(df)] + else: + raise ValueError(f"Unknown item format: {item_format}") + + def init_test_logging(c: RunConfiguration = None) -> None: if not c: c = resolve_configuration(RunConfiguration()) @@ -215,33 +243,16 @@ def assert_no_dict_key_starts_with(d: StrAny, key_prefix: str) -> None: def skip_if_not_active(destination: str) -> None: - assert ( - destination in IMPLEMENTED_DESTINATIONS - ), f"Unknown skipped destination {destination}" + assert destination in IMPLEMENTED_DESTINATIONS, f"Unknown skipped destination {destination}" if destination not in ACTIVE_DESTINATIONS: - pytest.skip( - f"{destination} not in ACTIVE_DESTINATIONS", allow_module_level=True - ) + pytest.skip(f"{destination} not in ACTIVE_DESTINATIONS", allow_module_level=True) def is_running_in_github_fork() -> bool: - event_path = os.environ.get("GITHUB_EVENT_PATH") - is_pull_request_from_fork = False - - if event_path: - # Extract necessary information from the GitHub Actions event payload - with open(event_path, encoding="utf-8") as f: - event_data = dlt.common.json.load(f) - - # Check if the pull request is from a fork - is_pull_request_from_fork = ( - event_data.get("pull_request", {}) - .get("head", {}) - .get("repo", {}) - .get("fork", False) - ) - - return is_pull_request_from_fork + """Check if executed by GitHub Actions, in a repo fork.""" + is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true" + is_fork = os.environ.get("IS_FORK") == "true" # custom var set by us in the workflow's YAML + return is_github_actions and is_fork skipifspawn = pytest.mark.skipif( @@ -252,15 +263,12 @@ def is_running_in_github_fork() -> bool: platform.python_implementation() == "PyPy", reason="won't run in PyPy interpreter" ) -skipifnotwindows = pytest.mark.skipif( - platform.system() != "Windows", reason="runs only on windows" -) +skipifnotwindows = pytest.mark.skipif(platform.system() != "Windows", reason="runs only on windows") skipifwindows = pytest.mark.skipif( platform.system() == "Windows", reason="does not runs on windows" ) skipifgithubfork = pytest.mark.skipif( - is_running_in_github_fork(), - reason="Skipping test because it runs on a PR coming from fork", + is_running_in_github_fork(), reason="Skipping test because it runs on a PR coming from fork" ) diff --git a/tox.ini b/tox.ini index 7d16160004..9469001572 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [flake8] -ignore=E1,E2,E3,E4,F401,W391,W292,E501,E731,F811 +ignore=E1,E2,E3,E4,F401,W391,W292,E501,E731,F811,W503,E704, W504 banned-modules = datetime = use dlt.common.pendulum json = use dlt.common.json decimal = use dlt.common.decimal