From fd6f6f084f94271aa12649f7f19864f49ad867b7 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 10:46:42 +0200 Subject: [PATCH 01/83] Add Github action for integration test --- .github/workflows/integration.yml | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/integration.yml diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 000000000..75203a1cb --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,86 @@ +# **what?** +# Runs integration tests. + +# **why?** +# Ensure code for dbt meets a certain quality standard. + +# **when?** +# This will run for all PRs, when code is pushed to a release +# branch, and when manually triggered. + +name: Integration tests + +on: + push: + branches: + - "main" + - "*.latest" + - "releases/*" + pull_request: + workflow_dispatch: + +# explicitly turn off permissions for `GITHUB_TOKEN` +permissions: read-all + +# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + tests: + name: test with python ${{ matrix.python-version }} + + runs-on: ubuntu-latest + timeout-minutes: 10 + + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + + env: + TOXENV: "unit" + PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + + steps: + - name: Check out the repository + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - uses: isbang/compose-action@v1.5.1 + with: + compose-file: "./docker/docker-compose.yml" + + - name: Install tox + run: | + python -m pip install --upgrade pip + python -m pip install tox + + - name: Run tox + run: | + tox -e integration-spark-session + tox -e integration-spark-thrift + + - name: Get current date + if: always() + id: date + run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT # Colons are not allowed in artifacts name + + - uses: actions/upload-artifact@v3 + if: always() + with: + name: tests_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv + path: tests_results.csv From 795e40a01cfb1de47168eb0c8d49c231989d2e08 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 11:54:41 +0200 Subject: [PATCH 02/83] Update tox --- tox.ini | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tox.ini b/tox.ini index 97017a926..e456d55d0 100644 --- a/tox.ini +++ b/tox.ini @@ -56,10 +56,7 @@ deps = [testenv:integration-spark-thrift] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*' +description = run integration tests against a Spark thrift server passenv = DBT_* PYTEST_ADDOPTS @@ -67,12 +64,10 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev-requirements.txt -e. +commands = pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/* [testenv:integration-spark-session] -allowlist_externals = - /bin/bash -basepython = python3.10 -commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*' +description = run integration tests against a Spark session passenv = DBT_* PYTEST_* @@ -81,3 +76,4 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev-requirements.txt -e.[session] +commands = pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/* From ff39c5d065e8b8ec065e5531e29107e35ccfcd6e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 14:42:11 +0200 Subject: [PATCH 03/83] Fetch spark from https link --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index bb4d378ed..b310fde4d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -14,7 +14,7 @@ ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" RUN apt-get update && \ apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ - wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ From 1505fc6fb4d26245e18e65485e73407c867a3ef3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 14:42:40 +0200 Subject: [PATCH 04/83] Use Spark version 3.1.2 --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b310fde4d..d1fd5357f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ ARG OPENJDK_VERSION=8 FROM eclipse-temurin:${OPENJDK_VERSION}-jre ARG BUILD_DATE -ARG SPARK_VERSION=3.3.2 +ARG SPARK_VERSION=3.1.2 ARG HADOOP_VERSION=3 LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ From 44fe33f4bd233f508c59c527a69590de1ec5f463 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 14:50:13 +0200 Subject: [PATCH 05/83] Seperate running Spark session and thrift --- .github/workflows/integration.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 75203a1cb..d455e804b 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -69,10 +69,11 @@ jobs: python -m pip install --upgrade pip python -m pip install tox - - name: Run tox - run: | - tox -e integration-spark-session - tox -e integration-spark-thrift + - name: Run tox for Spark session + run: tox -e integration-spark-session + + - name: Run tox for Spark thrift + run: tox -e integration-spark-thrift - name: Get current date if: always() From 2655631fa3b6db8a7515f11495710675bca0ba4e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 14:51:40 +0200 Subject: [PATCH 06/83] Use Spark 3.1.2 and Hadoop 3.2 --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d1fd5357f..85d01ba8a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8 FROM eclipse-temurin:${OPENJDK_VERSION}-jre ARG BUILD_DATE -ARG SPARK_VERSION=3.1.2 -ARG HADOOP_VERSION=3 +ARG SPARK_VERSION=3.1.3 +ARG HADOOP_VERSION=3.2 LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ From 915f67e9203dfb891ad4a22f3db7f9251b19ab84 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 14:57:20 +0200 Subject: [PATCH 07/83] Reset tox.ini --- tox.ini | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index e456d55d0..33055a211 100644 --- a/tox.ini +++ b/tox.ini @@ -57,6 +57,9 @@ deps = [testenv:integration-spark-thrift] description = run integration tests against a Spark thrift server +allowlist_externals = + /bin/bash +basepython = python3.8 passenv = DBT_* PYTEST_ADDOPTS @@ -64,10 +67,13 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev-requirements.txt -e. -commands = pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/* +commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*' [testenv:integration-spark-session] description = run integration tests against a Spark session +allowlist_externals = + /bin/bash +basepython = python3.10 passenv = DBT_* PYTEST_* @@ -76,4 +82,4 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev-requirements.txt -e.[session] -commands = pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/* +commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*' From f0ef215e1c8186cf4270e695ec8663a5d745d127 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 15:08:48 +0200 Subject: [PATCH 08/83] Remove base pythons in tox.ini --- tox.ini | 2 -- 1 file changed, 2 deletions(-) diff --git a/tox.ini b/tox.ini index 33055a211..31396b5ef 100644 --- a/tox.ini +++ b/tox.ini @@ -59,7 +59,6 @@ deps = description = run integration tests against a Spark thrift server allowlist_externals = /bin/bash -basepython = python3.8 passenv = DBT_* PYTEST_ADDOPTS @@ -73,7 +72,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg description = run integration tests against a Spark session allowlist_externals = /bin/bash -basepython = python3.10 passenv = DBT_* PYTEST_* From e8457df87d636324aae416c4a8eea363779f0156 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 15:19:19 +0200 Subject: [PATCH 09/83] Fix reference to Docker compose file --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d455e804b..517815e27 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -62,7 +62,7 @@ jobs: - uses: isbang/compose-action@v1.5.1 with: - compose-file: "./docker/docker-compose.yml" + compose-file: "./docker-compose.yml" - name: Install tox run: | From 842466a2883efd3a13826410f1477a0ff84c5e8f Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 15:42:11 +0200 Subject: [PATCH 10/83] Remove timeout --- .github/workflows/integration.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 517815e27..8eafa5c72 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -36,7 +36,6 @@ jobs: name: test with python ${{ matrix.python-version }} runs-on: ubuntu-latest - timeout-minutes: 10 strategy: fail-fast: false From 0738f2d0bcc5f30eab1cc92b4c82720ce99e3265 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 15:55:55 +0200 Subject: [PATCH 11/83] Remove artifact steps --- .github/workflows/integration.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 8eafa5c72..9f26bd2be 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -73,14 +73,3 @@ jobs: - name: Run tox for Spark thrift run: tox -e integration-spark-thrift - - - name: Get current date - if: always() - id: date - run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT # Colons are not allowed in artifacts name - - - uses: actions/upload-artifact@v3 - if: always() - with: - name: tests_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv - path: tests_results.csv From 277bef1a2a4368d54b2b1ce41b7894c51d4f7ef1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 15:56:33 +0200 Subject: [PATCH 12/83] Bump Spark and Hadoop versions --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 85d01ba8a..a9b9e0a2c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8 FROM eclipse-temurin:${OPENJDK_VERSION}-jre ARG BUILD_DATE -ARG SPARK_VERSION=3.1.3 -ARG HADOOP_VERSION=3.2 +ARG SPARK_VERSION=3.4.1 +ARG HADOOP_VERSION=3 LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ From 8d5853d3049c5e299ab7d824ab33fc374a9894ff Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 16:08:16 +0200 Subject: [PATCH 13/83] Reset Spark and Hadoop version --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a9b9e0a2c..85d01ba8a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8 FROM eclipse-temurin:${OPENJDK_VERSION}-jre ARG BUILD_DATE -ARG SPARK_VERSION=3.4.1 -ARG HADOOP_VERSION=3 +ARG SPARK_VERSION=3.1.3 +ARG HADOOP_VERSION=3.2 LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ From 919528ab14dd731f9efa913d37b051bda8922e44 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 16:09:09 +0200 Subject: [PATCH 14/83] Update comment --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 9f26bd2be..f4c34c5fb 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -2,7 +2,7 @@ # Runs integration tests. # **why?** -# Ensure code for dbt meets a certain quality standard. +# Ensure code runs as expected. # **when?** # This will run for all PRs, when code is pushed to a release From 15e48fd3f1f8d421f7f079a20ca8ba5fd5995d69 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 29 Sep 2023 16:12:25 +0200 Subject: [PATCH 15/83] Add changie --- .changes/unreleased/Under the Hood-20230929-161218.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20230929-161218.yaml diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml new file mode 100644 index 000000000..c82e8252e --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Add Github action for integration testing +time: 2023-09-29T16:12:18.968755+02:00 +custom: + Author: JCZuurmond + Issue: "719" From 31cb05e7d7dc6e5e63b3027a66428f22d40f86ce Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 18 Oct 2023 16:54:42 -0700 Subject: [PATCH 16/83] add databricks and PR execution protections --- .github/scripts/update_dbt_core_branch.sh | 20 +++ .github/scripts/update_release_branch.sh | 11 ++ .github/workflows/integration.yml | 193 +++++++++++++++++++++- 3 files changed, 215 insertions(+), 9 deletions(-) create mode 100755 .github/scripts/update_dbt_core_branch.sh create mode 100644 .github/scripts/update_release_branch.sh diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh new file mode 100755 index 000000000..d28a40c35 --- /dev/null +++ b/.github/scripts/update_dbt_core_branch.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +set -e + +git_branch=$1 +target_req_file="dev-requirements.txt" +core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" +postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g" +tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "$core_req_sed_pattern" $target_req_file + sed -i "" "$postgres_req_sed_pattern" $target_req_file + sed -i "" "$tests_req_sed_pattern" $target_req_file +else + sed -i "$core_req_sed_pattern" $target_req_file + sed -i "$postgres_req_sed_pattern" $target_req_file + sed -i "$tests_req_sed_pattern" $target_req_file +fi +core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2) +bumpversion --allow-dirty --new-version "$core_version" major diff --git a/.github/scripts/update_release_branch.sh b/.github/scripts/update_release_branch.sh new file mode 100644 index 000000000..75b9ccef6 --- /dev/null +++ b/.github/scripts/update_release_branch.sh @@ -0,0 +1,11 @@ +#!/bin/bash -e +set -e + +release_branch=$1 +target_req_file=".github/workflows/nightly-release.yml" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file +else + sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file +fi diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f4c34c5fb..684bcfab5 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -18,6 +18,11 @@ on: - "releases/*" pull_request: workflow_dispatch: + inputs: + dbt-core-branch: + description: "branch of dbt-core to use in dev-requirements.txt" + required: false + type: string # explicitly turn off permissions for `GITHUB_TOKEN` permissions: read-all @@ -32,8 +37,60 @@ defaults: shell: bash jobs: - tests: - name: test with python ${{ matrix.python-version }} + # generate test metadata about what files changed and the testing matrix to use + test-metadata: + # run if not a PR from a forked repository or has a label to mark as safe to test + if: >- + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + runs-on: ubuntu-latest + + outputs: + matrix: ${{ steps.generate-matrix.outputs.result }} + run-python-tests: ${{ steps.filter.outputs.bigquery-python }} + + steps: + - name: Check out the repository (non-PR) + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + - name: Check if relevant files changed + if: github.event_name == 'pull_request_target' + # https://github.com/marketplace/actions/paths-changes-filter + # For each filter, it sets output variable named by the filter to the text: + # 'true' - if any of changed files matches any of filter rules + # 'false' - if none of changed files matches any of filter rules + # also, returns: + # `changes` - JSON array with names of all filters matching any of the changed files + uses: dorny/paths-filter@v2 + id: get-changes + with: + token: ${{ secrets.GITHUB_TOKEN }} + filters: | + spark: + - 'dbt/**' + - 'tests/**' + - 'dev-requirements.txt' + local-tests: + name: test spark local against python ${{ matrix.python-version }} + + # run if not a PR from a forked repository or has a label to mark as safe to test + # also checks that the matrix generated is not empty + if: >- + ( + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + ) runs-on: ubuntu-latest @@ -45,31 +102,149 @@ jobs: - "3.9" - "3.10" - "3.11" + test: + - "spark-thrift" + - "spark-session" env: - TOXENV: "unit" PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} steps: - name: Check out the repository + if: github.event_name != 'pull_request_target' uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicity checkout the branch for the PR, + # this is necessary for the `pull_request_target` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install python dependencies + run: | + python -m pip install --user --upgrade pip + python -m pip install tox + python -m pip --version + tox --version + + - name: Update dev_requirements.txt + if: inputs.dbt-core-branch != '' + run: | + pip install bumpversion + ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} + - uses: isbang/compose-action@v1.5.1 + if: ${{ matrix.test == 'spark-thrift'}} with: compose-file: "./docker-compose.yml" - - name: Install tox + - name: Run tox for Spark ${{ matrix.test }} + run: tox -e integration-${{ matrix.test }} + + databricks-tests: + name: test spark databricks against python ${{ matrix.python-version }} + # run if not a PR from a forked repository or has a label to mark as safe to test + # also checks that the matrix generated is not empty + if: >- + ( + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + ) + + runs-on: ubuntu-latest + container: + image: "fishtownanalytics/test-container:10" + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + test: + - "databricks-odbc-sql-endpoint" + - "databricks-odbc-cluster" + - "spark-databricks-http" + + env: + PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} + DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} + DBT_DATABRICKS_HOSTNAME: ${{ secrets.DBT_DATABRICKS_HOST }} + DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} + DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }} + + steps: + - name: Check out the repository + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicity checkout the branch for the PR, + # this is necessary for the `pull_request_target` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies run: | - python -m pip install --upgrade pip + python -m pip install --user --upgrade pip python -m pip install tox + python -m pip --version + tox --version - - name: Run tox for Spark session - run: tox -e integration-spark-session + - name: Update dev_requirements.txt + if: inputs.dbt-core-branch != '' + run: | + pip install bumpversion + ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - - name: Run tox for Spark thrift - run: tox -e integration-spark-thrift + - name: Configure ODBC + if: ${{ matrix.test != "spark-databricks-http" }} + run: | + apt-get update && apt-get install -y --no-install-recommends \ + g++ \ + unixodbc-dev \ + unzip + + unzip /tmp/simba_odbc.zip -d /tmp/ \ + && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + && rm /tmp/simba_odbc.zip \ + && rm -rf /tmp/SimbaSparkODBC* + + - name: Run tox for Spark ${{ matrix.test }} + run: tox -e integration-${{ matrix.test }} \ No newline at end of file From fd54d7f78ccc3b42ac12d7b3f95b99992996e606 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 09:47:53 -0700 Subject: [PATCH 17/83] use single quotes --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 684bcfab5..a37744ca2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -233,7 +233,7 @@ jobs: ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - name: Configure ODBC - if: ${{ matrix.test != "spark-databricks-http" }} + if: ${{ matrix.test != 'spark-databricks-http' }} run: | apt-get update && apt-get install -y --no-install-recommends \ g++ \ From 8de83390c8b7c4a169df33982cc61b59337e1dc2 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 09:53:06 -0700 Subject: [PATCH 18/83] remove `_target` suffix --- .github/workflows/integration.yml | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a37744ca2..f33ade986 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -41,7 +41,7 @@ jobs: test-metadata: # run if not a PR from a forked repository or has a label to mark as safe to test if: >- - github.event_name != 'pull_request_target' || + github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') runs-on: ubuntu-latest @@ -52,19 +52,19 @@ jobs: steps: - name: Check out the repository (non-PR) - if: github.event_name != 'pull_request_target' + if: github.event_name != 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false - name: Check out the repository (PR) - if: github.event_name == 'pull_request_target' + if: github.event_name == 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - name: Check if relevant files changed - if: github.event_name == 'pull_request_target' + if: github.event_name == 'pull_request' # https://github.com/marketplace/actions/paths-changes-filter # For each filter, it sets output variable named by the filter to the text: # 'true' - if any of changed files matches any of filter rules @@ -87,7 +87,7 @@ jobs: # also checks that the matrix generated is not empty if: >- ( - github.event_name != 'pull_request_target' || + github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') ) @@ -99,9 +99,6 @@ jobs: matrix: python-version: - "3.8" - - "3.9" - - "3.10" - - "3.11" test: - "spark-thrift" - "spark-session" @@ -117,15 +114,15 @@ jobs: steps: - name: Check out the repository - if: github.event_name != 'pull_request_target' + if: github.event_name != 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false # explicity checkout the branch for the PR, - # this is necessary for the `pull_request_target` event + # this is necessary for the `pull_request` event - name: Check out the repository (PR) - if: github.event_name == 'pull_request_target' + if: github.event_name == 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false @@ -163,7 +160,7 @@ jobs: # also checks that the matrix generated is not empty if: >- ( - github.event_name != 'pull_request_target' || + github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') ) @@ -176,9 +173,6 @@ jobs: matrix: python-version: - "3.8" - - "3.9" - - "3.10" - - "3.11" test: - "databricks-odbc-sql-endpoint" - "databricks-odbc-cluster" @@ -200,15 +194,15 @@ jobs: steps: - name: Check out the repository - if: github.event_name != 'pull_request_target' + if: github.event_name != 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false # explicity checkout the branch for the PR, - # this is necessary for the `pull_request_target` event + # this is necessary for the `pull_request` event - name: Check out the repository (PR) - if: github.event_name == 'pull_request_target' + if: github.event_name == 'pull_request' uses: actions/checkout@v3 with: persist-credentials: false From e85232f3e476f4f80dfe188f3395612589245f7b Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 10:33:28 -0700 Subject: [PATCH 19/83] add comment to test --- .github/workflows/integration.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f33ade986..b4f78a1c9 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -63,6 +63,7 @@ jobs: with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} + - name: Check if relevant files changed if: github.event_name == 'pull_request' # https://github.com/marketplace/actions/paths-changes-filter @@ -80,6 +81,7 @@ jobs: - 'dbt/**' - 'tests/**' - 'dev-requirements.txt' + local-tests: name: test spark local against python ${{ matrix.python-version }} From fe3300e22b830b4f78c6e9877ff8521ccc838019 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 10:58:39 -0700 Subject: [PATCH 20/83] specify container user as root --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index b4f78a1c9..4f45fc6ae 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -170,6 +170,7 @@ jobs: runs-on: ubuntu-latest container: image: "fishtownanalytics/test-container:10" + options: --user root strategy: fail-fast: false matrix: From b37e14b9dc2c0279d669c2a8fcb8b098834cd27b Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 10:59:08 -0700 Subject: [PATCH 21/83] formatting --- .github/workflows/integration.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 4f45fc6ae..72a86c92e 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -236,7 +236,7 @@ jobs: g++ \ unixodbc-dev \ unzip - + unzip /tmp/simba_odbc.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ @@ -244,4 +244,4 @@ jobs: && rm -rf /tmp/SimbaSparkODBC* - name: Run tox for Spark ${{ matrix.test }} - run: tox -e integration-${{ matrix.test }} \ No newline at end of file + run: tox -e integration-${{ matrix.test }} From 51511ecfee08958080dbb0a9c8dbe881bec7c9b3 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:01:03 -0700 Subject: [PATCH 22/83] remove python setup for pre-existing container --- .github/workflows/integration.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 72a86c92e..288c7ea18 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -211,11 +211,6 @@ jobs: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install python dependencies run: | python -m pip install --user --upgrade pip From 98607b61458199d006ce8526e763bcc89f5426a6 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:21:38 -0700 Subject: [PATCH 23/83] download simba --- .github/workflows/integration.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 288c7ea18..5f6e4b45b 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -230,8 +230,13 @@ jobs: apt-get update && apt-get install -y --no-install-recommends \ g++ \ unixodbc-dev \ + curl \ unzip + curl -OL \ + https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip \ + /tmp/simba_odbc.zip + unzip /tmp/simba_odbc.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ From e6ec41460d986cc552fa46024be471147f152920 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:31:45 -0700 Subject: [PATCH 24/83] fix curl call --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 5f6e4b45b..764038394 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -233,9 +233,9 @@ jobs: curl \ unzip - curl -OL \ - https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip \ - /tmp/simba_odbc.zip + curl --create-dirs -OL \ + --output-dir "/tmp/simba_odbc.zip" \ + "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" unzip /tmp/simba_odbc.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ From 05a2c0858434686ecc5f64ac4dd3d0bc3344c325 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:33:31 -0700 Subject: [PATCH 25/83] fix curl call --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 764038394..9fcd701fe 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -230,8 +230,9 @@ jobs: apt-get update && apt-get install -y --no-install-recommends \ g++ \ unixodbc-dev \ - curl \ unzip + + apt-get install curl curl --create-dirs -OL \ --output-dir "/tmp/simba_odbc.zip" \ From a89ec581eff88b1c24a1da3cebd19c8981b6cd88 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:35:13 -0700 Subject: [PATCH 26/83] fix curl call --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 9fcd701fe..f8fa81ceb 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -232,7 +232,7 @@ jobs: unixodbc-dev \ unzip - apt-get install curl + apt-get install -y curl curl --create-dirs -OL \ --output-dir "/tmp/simba_odbc.zip" \ From 2a18fad185a748cb9ac82198653d97b7f3a5b597 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:39:53 -0700 Subject: [PATCH 27/83] fix curl call --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f8fa81ceb..be6443a13 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -226,6 +226,7 @@ jobs: - name: Configure ODBC if: ${{ matrix.test != 'spark-databricks-http' }} + shell: bash run: | apt-get update && apt-get install -y --no-install-recommends \ g++ \ @@ -235,7 +236,7 @@ jobs: apt-get install -y curl curl --create-dirs -OL \ - --output-dir "/tmp/simba_odbc.zip" \ + --output "/tmp/simba_odbc.zip" \ "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" unzip /tmp/simba_odbc.zip -d /tmp/ \ From 1481396d6307b93f0b21aed722a6299bb50d29ba Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:44:48 -0700 Subject: [PATCH 28/83] fix curl call --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index be6443a13..a47d5271f 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -235,7 +235,7 @@ jobs: apt-get install -y curl - curl --create-dirs -OL \ + curl --create-dirs \ --output "/tmp/simba_odbc.zip" \ "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" From 31b427c47b6c064ba284b91818d964b3b03eff3a Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 11:50:51 -0700 Subject: [PATCH 29/83] fix curl call --- .github/workflows/integration.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a47d5271f..d9e71d5e6 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -234,16 +234,14 @@ jobs: unzip apt-get install -y curl + rm -rf /tmp && mkdir /tmp + + curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ - curl --create-dirs \ - --output "/tmp/simba_odbc.zip" \ - "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" - - unzip /tmp/simba_odbc.zip -d /tmp/ \ + unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - && rm /tmp/simba_odbc.zip \ - && rm -rf /tmp/SimbaSparkODBC* + && rm -rf /tmp - name: Run tox for Spark ${{ matrix.test }} run: tox -e integration-${{ matrix.test }} From 15ba1da4adcb33dedec541dcdda6e0e1de1728a2 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 12:00:02 -0700 Subject: [PATCH 30/83] fix db test naming --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d9e71d5e6..ff48a9b30 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -177,8 +177,8 @@ jobs: python-version: - "3.8" test: - - "databricks-odbc-sql-endpoint" - - "databricks-odbc-cluster" + - "spark-databricks-odbc-sql-endpoint" + - "spark-databricks-odbc-cluster" - "spark-databricks-http" env: @@ -190,7 +190,7 @@ jobs: DD_ENV: ci DD_SERVICE: ${{ github.event.repository.name }} DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} - DBT_DATABRICKS_HOSTNAME: ${{ secrets.DBT_DATABRICKS_HOST }} + DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }} DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }} From ca33a236ebbdd9fa9cef5b1a703b0002b03257fe Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 13:52:37 -0700 Subject: [PATCH 31/83] confirm ODBC driver installed --- .github/workflows/integration.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index ff48a9b30..a8a131a61 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -241,7 +241,9 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - && rm -rf /tmp + && rm -rf /tmp \ + && dpkg -l | grep Simba # confirm that the driver is installed + - name: Run tox for Spark ${{ matrix.test }} run: tox -e integration-${{ matrix.test }} From 6274d77151ba32cb4b45abddb300603d88d860c6 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 14:17:52 -0700 Subject: [PATCH 32/83] add odbc driver env var --- .github/workflows/integration.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a8a131a61..27f5d6bda 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -194,7 +194,7 @@ jobs: DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }} - + ODBC_DRIVER: "Simba" steps: - name: Check out the repository if: github.event_name != 'pull_request' @@ -240,7 +240,7 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ - && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + && echo "[Simba]\nDriver = $HOME/opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ && rm -rf /tmp \ && dpkg -l | grep Simba # confirm that the driver is installed From 0ba91a2ebc553e322fd20ff3ebb49c9aa810e656 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 14:40:02 -0700 Subject: [PATCH 33/83] add odbc driver env var --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 27f5d6bda..1dd657085 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -240,7 +240,7 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ - && echo "[Simba]\nDriver = $HOME/opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ && rm -rf /tmp \ && dpkg -l | grep Simba # confirm that the driver is installed From f09202681f49ac144508d4bc4c0f72460767455c Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 15:11:32 -0700 Subject: [PATCH 34/83] specify platform --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 1dd657085..38b8faa35 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -170,7 +170,7 @@ jobs: runs-on: ubuntu-latest container: image: "fishtownanalytics/test-container:10" - options: --user root + options: --user root --platform linux/amd64 strategy: fail-fast: false matrix: From b968985be43080580252b9ac38e410248103e4e6 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 15:47:48 -0700 Subject: [PATCH 35/83] check odbc driver integrity --- .github/workflows/integration.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 38b8faa35..61cf5a634 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -157,7 +157,7 @@ jobs: run: tox -e integration-${{ matrix.test }} databricks-tests: - name: test spark databricks against python ${{ matrix.python-version }} + name: run ${{ matrix.test }} against python ${{ matrix.python-version }} # run if not a PR from a forked repository or has a label to mark as safe to test # also checks that the matrix generated is not empty if: >- @@ -193,7 +193,6 @@ jobs: DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }} DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} - DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }} ODBC_DRIVER: "Simba" steps: - name: Check out the repository @@ -244,6 +243,8 @@ jobs: && rm -rf /tmp \ && dpkg -l | grep Simba # confirm that the driver is installed + ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so + - name: Run tox for Spark ${{ matrix.test }} run: tox -e integration-${{ matrix.test }} From 8a49567fcf3c9748dd75e6ff9c629759b92a4bbd Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 15:53:42 -0700 Subject: [PATCH 36/83] add dbt user env var --- .github/workflows/integration.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 61cf5a634..41177f054 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -193,6 +193,10 @@ jobs: DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }} DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICKS_USERNAME: ${{ secrets.DBT_DATABRICKS_USERNAME }} + DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" + DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" + DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" ODBC_DRIVER: "Simba" steps: - name: Check out the repository From 7723e8d90e7af6c2513b8e435ca40805591fcedc Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 15:58:29 -0700 Subject: [PATCH 37/83] add dbt user env var --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 41177f054..c91dc9bbb 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -193,7 +193,7 @@ jobs: DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }} DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} - DBT_DATABRICKS_USERNAME: ${{ secrets.DBT_DATABRICKS_USERNAME }} + DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }} DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" From ea5ebfa32a90c909cbbc87e79bd094eb16030a1d Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 23 Oct 2023 16:42:13 -0700 Subject: [PATCH 38/83] fix host_name env var --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index c91dc9bbb..5ee981c45 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -190,7 +190,7 @@ jobs: DD_ENV: ci DD_SERVICE: ${{ github.event.repository.name }} DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} - DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST }} + DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }} DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }} From 610e5e912bebdcf105fcd64f777a035983fbffcb Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 09:55:55 -0700 Subject: [PATCH 39/83] try removing architecture arg --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 5ee981c45..631e8a6de 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -170,7 +170,7 @@ jobs: runs-on: ubuntu-latest container: image: "fishtownanalytics/test-container:10" - options: --user root --platform linux/amd64 + options: --user root strategy: fail-fast: false matrix: From b4411ab011bb285cf2d07bf0be2ff90ee185f682 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 10:01:01 -0700 Subject: [PATCH 40/83] swap back to pull_request_target --- .github/workflows/integration.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 631e8a6de..62e276cc1 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -16,7 +16,7 @@ on: - "main" - "*.latest" - "releases/*" - pull_request: + pull_request_target: workflow_dispatch: inputs: dbt-core-branch: @@ -29,7 +29,7 @@ permissions: read-all # will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise concurrency: - group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }} + group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request_target') && github.event.pull_request.head.ref || github.sha }} cancel-in-progress: true defaults: @@ -41,7 +41,7 @@ jobs: test-metadata: # run if not a PR from a forked repository or has a label to mark as safe to test if: >- - github.event_name != 'pull_request' || + github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') runs-on: ubuntu-latest @@ -52,20 +52,20 @@ jobs: steps: - name: Check out the repository (non-PR) - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false - name: Check out the repository (PR) - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - name: Check if relevant files changed - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request_target' # https://github.com/marketplace/actions/paths-changes-filter # For each filter, it sets output variable named by the filter to the text: # 'true' - if any of changed files matches any of filter rules @@ -89,7 +89,7 @@ jobs: # also checks that the matrix generated is not empty if: >- ( - github.event_name != 'pull_request' || + github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') ) @@ -116,7 +116,7 @@ jobs: steps: - name: Check out the repository - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false @@ -124,7 +124,7 @@ jobs: # explicity checkout the branch for the PR, # this is necessary for the `pull_request` event - name: Check out the repository (PR) - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false @@ -162,7 +162,7 @@ jobs: # also checks that the matrix generated is not empty if: >- ( - github.event_name != 'pull_request' || + github.event_name != 'pull_request_target' || github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') ) @@ -200,7 +200,7 @@ jobs: ODBC_DRIVER: "Simba" steps: - name: Check out the repository - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false @@ -208,7 +208,7 @@ jobs: # explicity checkout the branch for the PR, # this is necessary for the `pull_request` event - name: Check out the repository (PR) - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request_target' uses: actions/checkout@v3 with: persist-credentials: false From cae6c8abc0abfc57d9a17dba3c0abb0495841249 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 12:13:18 -0700 Subject: [PATCH 41/83] try running on host instead of container --- .github/workflows/integration.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 62e276cc1..10f9ce6f0 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -168,9 +168,6 @@ jobs: ) runs-on: ubuntu-latest - container: - image: "fishtownanalytics/test-container:10" - options: --user root strategy: fail-fast: false matrix: @@ -214,6 +211,11 @@ jobs: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install python dependencies run: | python -m pip install --user --upgrade pip From 0c689720b96d592ff2f8e8267bb5ef0e1e0a9736 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:13:43 -0700 Subject: [PATCH 42/83] Update .github/workflows/integration.yml Co-authored-by: Emily Rockman --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 10f9ce6f0..d1829197b 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -203,7 +203,7 @@ jobs: persist-credentials: false # explicity checkout the branch for the PR, - # this is necessary for the `pull_request` event + # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' uses: actions/checkout@v3 From b2f63bd09fb59ba9f751bc425f81242afeef8bd6 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 14:37:41 -0700 Subject: [PATCH 43/83] try running odbcinst -j --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d1829197b..f3368d11a 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -250,7 +250,8 @@ jobs: && dpkg -l | grep Simba # confirm that the driver is installed ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so - + echo "--------------------------------------------" + odbcinst -j - name: Run tox for Spark ${{ matrix.test }} run: tox -e integration-${{ matrix.test }} From 80eb7e45e25316dfa539786975c34b6655d77e88 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 14:51:32 -0700 Subject: [PATCH 44/83] remove bash --- .github/workflows/integration.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f3368d11a..45e313482 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -17,6 +17,10 @@ on: - "*.latest" - "releases/*" pull_request_target: + types: + - opened + - synchronize + - labeled workflow_dispatch: inputs: dbt-core-branch: @@ -231,7 +235,6 @@ jobs: - name: Configure ODBC if: ${{ matrix.test != 'spark-databricks-http' }} - shell: bash run: | apt-get update && apt-get install -y --no-install-recommends \ g++ \ From 4bbfa71b2c80f056a1e67c1587dbe06ac8fa3613 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 14:54:33 -0700 Subject: [PATCH 45/83] add sudo --- .github/workflows/integration.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 45e313482..90e2782a8 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -236,12 +236,12 @@ jobs: - name: Configure ODBC if: ${{ matrix.test != 'spark-databricks-http' }} run: | - apt-get update && apt-get install -y --no-install-recommends \ + sudo apt-get update && sudo apt-get install -y --no-install-recommends \ g++ \ unixodbc-dev \ unzip - apt-get install -y curl + sudo apt-get install -y curl rm -rf /tmp && mkdir /tmp curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ From b1d202023f10aaeb5b7742996ddcdf7ca4bc7abf Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 14:55:46 -0700 Subject: [PATCH 46/83] add sudo --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 90e2782a8..142752b66 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -247,12 +247,12 @@ jobs: curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ - && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ && rm -rf /tmp \ - && dpkg -l | grep Simba # confirm that the driver is installed + && sudo dpkg -l | grep Simba # confirm that the driver is installed - ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so + sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so echo "--------------------------------------------" odbcinst -j From 38fda3d22f8103c07ce0091a1b3b530c5d36d26f Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 14:59:11 -0700 Subject: [PATCH 47/83] update odbc.ini --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 142752b66..08f55f848 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -248,6 +248,7 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + && echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \ && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ && rm -rf /tmp \ && sudo dpkg -l | grep Simba # confirm that the driver is installed From 6b599a1eceb755a5ef5b91d95760b01a364f648c Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:02:54 -0700 Subject: [PATCH 48/83] install libsasl2-modules-gssapi-mit --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 08f55f848..6dfd716b2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -239,6 +239,7 @@ jobs: sudo apt-get update && sudo apt-get install -y --no-install-recommends \ g++ \ unixodbc-dev \ + libsasl2-modules-gssapi-mit \ unzip sudo apt-get install -y curl From 0976c4f70fe8e36169dfb34b922c4e5cdc1f2238 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:08:07 -0700 Subject: [PATCH 49/83] install libsasl2-modules-gssapi-mit --- .github/workflows/integration.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6dfd716b2..6807507df 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -249,8 +249,8 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ - && echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \ - && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + && sudo echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \ + && sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ && rm -rf /tmp \ && sudo dpkg -l | grep Simba # confirm that the driver is installed From 42f2784210514349c14b54dcba673139f0226470 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:09:23 -0700 Subject: [PATCH 50/83] set -e on odbc install --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6807507df..235fb49e2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -236,6 +236,7 @@ jobs: - name: Configure ODBC if: ${{ matrix.test != 'spark-databricks-http' }} run: | + set -e sudo apt-get update && sudo apt-get install -y --no-install-recommends \ g++ \ unixodbc-dev \ From 4f11291045081be0c2975772475b917ee24e4173 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:13:07 -0700 Subject: [PATCH 51/83] set -e on odbc install --- .github/workflows/integration.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 235fb49e2..92794b427 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -248,12 +248,12 @@ jobs: curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ - unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ \ - && sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ - && sudo echo "[ODBC Data Sources]\nSimba=Databricks ODBC Connector" >> /etc/odbc.ini \ - && sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - && rm -rf /tmp \ - && sudo dpkg -l | grep Simba # confirm that the driver is installed + unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ + sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + echo "--------------------------------------------" + sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + rm -rf /tmp \ + sudo dpkg -l | grep Simba # confirm that the driver is installed sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so echo "--------------------------------------------" From 1384084e4d08c3b3c9b449229192685eb90c96e0 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:14:20 -0700 Subject: [PATCH 52/83] set -e on odbc install --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 92794b427..e76a5d9ac 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -249,7 +249,7 @@ jobs: curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ - sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb echo "--------------------------------------------" sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ rm -rf /tmp \ From 543e321077ed193d05e60a3c3acaba7aca2c0e37 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 24 Oct 2023 15:21:08 -0700 Subject: [PATCH 53/83] sudo echo odbc.inst --- .github/workflows/integration.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index e76a5d9ac..da40dde86 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -251,8 +251,9 @@ jobs: unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb echo "--------------------------------------------" - sudo echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - rm -rf /tmp \ + sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + + rm -rf /tmp sudo dpkg -l | grep Simba # confirm that the driver is installed sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so From f380d46a99205051d1bac84d4741009fb5f1de77 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 20:19:17 -0400 Subject: [PATCH 54/83] remove postgres components --- .github/scripts/update_dbt_core_branch.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh index d28a40c35..1a5a5c2d7 100755 --- a/.github/scripts/update_dbt_core_branch.sh +++ b/.github/scripts/update_dbt_core_branch.sh @@ -4,16 +4,13 @@ set -e git_branch=$1 target_req_file="dev-requirements.txt" core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" -postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g" tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" if [[ "$OSTYPE" == darwin* ]]; then # mac ships with a different version of sed that requires a delimiter arg sed -i "" "$core_req_sed_pattern" $target_req_file - sed -i "" "$postgres_req_sed_pattern" $target_req_file sed -i "" "$tests_req_sed_pattern" $target_req_file else sed -i "$core_req_sed_pattern" $target_req_file - sed -i "$postgres_req_sed_pattern" $target_req_file sed -i "$tests_req_sed_pattern" $target_req_file fi core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2) From c334f3273bd7dda434d9bb4dac0f57579c2117d7 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 20:23:18 -0400 Subject: [PATCH 55/83] remove release related items --- .github/scripts/update_release_branch.sh | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .github/scripts/update_release_branch.sh diff --git a/.github/scripts/update_release_branch.sh b/.github/scripts/update_release_branch.sh deleted file mode 100644 index 75b9ccef6..000000000 --- a/.github/scripts/update_release_branch.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -e -set -e - -release_branch=$1 -target_req_file=".github/workflows/nightly-release.yml" -if [[ "$OSTYPE" == darwin* ]]; then - # mac ships with a different version of sed that requires a delimiter arg - sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file -else - sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file -fi From 19dcff3f4f44c99ab4c4e3ad8872597a5185cefa Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 20:33:12 -0400 Subject: [PATCH 56/83] remove irrelevant output --- .github/workflows/integration.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index da40dde86..b85e058e2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -52,7 +52,6 @@ jobs: outputs: matrix: ${{ steps.generate-matrix.outputs.result }} - run-python-tests: ${{ steps.filter.outputs.bigquery-python }} steps: - name: Check out the repository (non-PR) @@ -242,20 +241,20 @@ jobs: unixodbc-dev \ libsasl2-modules-gssapi-mit \ unzip - + sudo apt-get install -y curl rm -rf /tmp && mkdir /tmp - + curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb echo "--------------------------------------------" sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - + rm -rf /tmp sudo dpkg -l | grep Simba # confirm that the driver is installed - + sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so echo "--------------------------------------------" odbcinst -j From 01b0c0cdd74b88e92c7f44d58e092e356ed01b00 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 20:37:19 -0400 Subject: [PATCH 57/83] move long bash script into its own file --- .github/scripts/configure_odbc.sh | 23 +++++++++++++++++++++++ .github/workflows/integration.yml | 24 +----------------------- 2 files changed, 24 insertions(+), 23 deletions(-) create mode 100644 .github/scripts/configure_odbc.sh diff --git a/.github/scripts/configure_odbc.sh b/.github/scripts/configure_odbc.sh new file mode 100644 index 000000000..e2bad8886 --- /dev/null +++ b/.github/scripts/configure_odbc.sh @@ -0,0 +1,23 @@ +set -e +sudo apt-get update && sudo apt-get install -y --no-install-recommends \ + g++ \ + unixodbc-dev \ + libsasl2-modules-gssapi-mit \ + unzip + +sudo apt-get install -y curl +rm -rf /tmp && mkdir /tmp + +curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" + +unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ +sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb +echo "--------------------------------------------" +sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini + +rm -rf /tmp +sudo dpkg -l | grep Simba # confirm that the driver is installed + +sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so +echo "--------------------------------------------" +odbcinst -j diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index b85e058e2..b9d6ddcbe 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -235,29 +235,7 @@ jobs: - name: Configure ODBC if: ${{ matrix.test != 'spark-databricks-http' }} run: | - set -e - sudo apt-get update && sudo apt-get install -y --no-install-recommends \ - g++ \ - unixodbc-dev \ - libsasl2-modules-gssapi-mit \ - unzip - - sudo apt-get install -y curl - rm -rf /tmp && mkdir /tmp - - curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" \ - - unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ - sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb - echo "--------------------------------------------" - sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ - - rm -rf /tmp - sudo dpkg -l | grep Simba # confirm that the driver is installed - - sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so - echo "--------------------------------------------" - odbcinst -j + ./.github/scripts/configure_odbc.sh - name: Run tox for Spark ${{ matrix.test }} run: tox -e integration-${{ matrix.test }} From d3d28446b87595580380136c4cc42a369e38a069 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 20:58:25 -0400 Subject: [PATCH 58/83] update integration.yml to align with other adapters --- .github/workflows/integration.yml | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index b9d6ddcbe..6bdee8c32 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ # This will run for all PRs, when code is pushed to a release # branch, and when manually triggered. -name: Integration tests +name: Adapter Integration Tests on: push: @@ -49,7 +49,6 @@ jobs: github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') runs-on: ubuntu-latest - outputs: matrix: ${{ steps.generate-matrix.outputs.result }} @@ -86,7 +85,7 @@ jobs: - 'dev-requirements.txt' local-tests: - name: test spark local against python ${{ matrix.python-version }} + name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest # run if not a PR from a forked repository or has a label to mark as safe to test # also checks that the matrix generated is not empty @@ -96,7 +95,6 @@ jobs: github.event.pull_request.head.repo.full_name == github.repository || contains(github.event.pull_request.labels.*.name, 'ok to test') ) - runs-on: ubuntu-latest strategy: @@ -109,7 +107,8 @@ jobs: - "spark-session" env: - PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + TOXENV: integration-${{ matrix.test }} + PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv" DBT_INVOCATION_ENV: github-actions DD_CIVISIBILITY_AGENTLESS_ENABLED: true DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} @@ -124,7 +123,7 @@ jobs: with: persist-credentials: false - # explicity checkout the branch for the PR, + # explicitly checkout the branch for the PR, # this is necessary for the `pull_request` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' @@ -156,11 +155,12 @@ jobs: with: compose-file: "./docker-compose.yml" - - name: Run tox for Spark ${{ matrix.test }} - run: tox -e integration-${{ matrix.test }} + - name: Run tox for ${{ matrix.test }} + run: tox -- --ddtrace databricks-tests: - name: run ${{ matrix.test }} against python ${{ matrix.python-version }} + name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest + # run if not a PR from a forked repository or has a label to mark as safe to test # also checks that the matrix generated is not empty if: >- @@ -182,6 +182,7 @@ jobs: - "spark-databricks-http" env: + TOXENV: integration-${{ matrix.test }} PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" DBT_INVOCATION_ENV: github-actions DD_CIVISIBILITY_AGENTLESS_ENABLED: true @@ -205,7 +206,7 @@ jobs: with: persist-credentials: false - # explicity checkout the branch for the PR, + # explicitly checkout the branch for the PR, # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' @@ -237,5 +238,5 @@ jobs: run: | ./.github/scripts/configure_odbc.sh - - name: Run tox for Spark ${{ matrix.test }} - run: tox -e integration-${{ matrix.test }} + - name: Run tox for ${{ matrix.test }} + run: tox -- --ddtrace From 72daf90d0a5a20534e2b9c5b97f79cb50ca7742c Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 21:08:36 -0400 Subject: [PATCH 59/83] revert name change --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6bdee8c32..37449d892 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ # This will run for all PRs, when code is pushed to a release # branch, and when manually triggered. -name: Adapter Integration Tests +name: Integration tests on: push: From b43c9d1a2e7a97ed1c59e28a74e36769de69616c Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 21:11:00 -0400 Subject: [PATCH 60/83] revert name change --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 37449d892..6bdee8c32 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ # This will run for all PRs, when code is pushed to a release # branch, and when manually triggered. -name: Integration tests +name: Adapter Integration Tests on: push: From 91715d23a01f0a1039d961b2c24790c8f1ded30e Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 21:23:31 -0400 Subject: [PATCH 61/83] combine databricks and spark tests --- .github/workflows/integration.yml | 86 ++++--------------------------- 1 file changed, 10 insertions(+), 76 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6bdee8c32..1e60aee1b 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -84,7 +84,7 @@ jobs: - 'tests/**' - 'dev-requirements.txt' - local-tests: + test: name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest # run if not a PR from a forked repository or has a label to mark as safe to test @@ -105,85 +105,13 @@ jobs: test: - "spark-thrift" - "spark-session" - - env: - TOXENV: integration-${{ matrix.test }} - PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv" - DBT_INVOCATION_ENV: github-actions - DD_CIVISIBILITY_AGENTLESS_ENABLED: true - DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} - DD_SITE: datadoghq.com - DD_ENV: ci - DD_SERVICE: ${{ github.event.repository.name }} - - steps: - - name: Check out the repository - if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 - with: - persist-credentials: false - - # explicitly checkout the branch for the PR, - # this is necessary for the `pull_request` event - - name: Check out the repository (PR) - if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 - with: - persist-credentials: false - ref: ${{ github.event.pull_request.head.sha }} - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install python dependencies - run: | - python -m pip install --user --upgrade pip - python -m pip install tox - python -m pip --version - tox --version - - - name: Update dev_requirements.txt - if: inputs.dbt-core-branch != '' - run: | - pip install bumpversion - ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - - - uses: isbang/compose-action@v1.5.1 - if: ${{ matrix.test == 'spark-thrift'}} - with: - compose-file: "./docker-compose.yml" - - - name: Run tox for ${{ matrix.test }} - run: tox -- --ddtrace - - databricks-tests: - name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest - - # run if not a PR from a forked repository or has a label to mark as safe to test - # also checks that the matrix generated is not empty - if: >- - ( - github.event_name != 'pull_request_target' || - github.event.pull_request.head.repo.full_name == github.repository || - contains(github.event.pull_request.labels.*.name, 'ok to test') - ) - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: - - "3.8" - test: - "spark-databricks-odbc-sql-endpoint" - "spark-databricks-odbc-cluster" - "spark-databricks-http" env: TOXENV: integration-${{ matrix.test }} - PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv" DBT_INVOCATION_ENV: github-actions DD_CIVISIBILITY_AGENTLESS_ENABLED: true DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} @@ -199,6 +127,7 @@ jobs: DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" ODBC_DRIVER: "Simba" + steps: - name: Check out the repository if: github.event_name != 'pull_request_target' @@ -207,7 +136,7 @@ jobs: persist-credentials: false # explicitly checkout the branch for the PR, - # this is necessary for the `pull_request_target` event + # this is necessary for the `pull_request` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' uses: actions/checkout@v3 @@ -233,8 +162,13 @@ jobs: pip install bumpversion ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} + - uses: isbang/compose-action@v1.5.1 + if: ${{ matrix.test == 'spark-thrift'}} + with: + compose-file: "./docker-compose.yml" + - name: Configure ODBC - if: ${{ matrix.test != 'spark-databricks-http' }} + if: ${{ matrix.test == 'spark-databricks-odbc-sql-endpoint' || matrix.test == 'spark-databricks-odbc-cluster' }} run: | ./.github/scripts/configure_odbc.sh From 943a8dc3030a4fbff9a1f401133a1ef382bb538a Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 1 Nov 2023 21:26:27 -0400 Subject: [PATCH 62/83] combine databricks and spark tests --- .github/workflows/integration.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 1e60aee1b..1389550a2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -17,10 +17,6 @@ on: - "*.latest" - "releases/*" pull_request_target: - types: - - opened - - synchronize - - labeled workflow_dispatch: inputs: dbt-core-branch: From 3d0decefb6a2a453c6a806cc467a2763f02a9ade Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 30 Nov 2023 15:14:17 -0800 Subject: [PATCH 63/83] Add dagger --- .github/scripts/configure_odbc.sh | 23 ------------------- dagger/configure_odbc.sh | 20 ++++++++++++++++ dagger/run_dbt_spark_tests.py | 38 +++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 23 deletions(-) delete mode 100644 .github/scripts/configure_odbc.sh create mode 100755 dagger/configure_odbc.sh create mode 100644 dagger/run_dbt_spark_tests.py diff --git a/.github/scripts/configure_odbc.sh b/.github/scripts/configure_odbc.sh deleted file mode 100644 index e2bad8886..000000000 --- a/.github/scripts/configure_odbc.sh +++ /dev/null @@ -1,23 +0,0 @@ -set -e -sudo apt-get update && sudo apt-get install -y --no-install-recommends \ - g++ \ - unixodbc-dev \ - libsasl2-modules-gssapi-mit \ - unzip - -sudo apt-get install -y curl -rm -rf /tmp && mkdir /tmp - -curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" - -unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ -sudo dpkg -i /tmp/SimbaSparkODBC-*/*.deb -echo "--------------------------------------------" -sudo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini - -rm -rf /tmp -sudo dpkg -l | grep Simba # confirm that the driver is installed - -sudo ldd /opt/simba/spark/lib/64/libsparkodbc_sb64.so -echo "--------------------------------------------" -odbcinst -j diff --git a/dagger/configure_odbc.sh b/dagger/configure_odbc.sh new file mode 100755 index 000000000..7126298c0 --- /dev/null +++ b/dagger/configure_odbc.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e +apt update && apt install -y --no-install-recommends \ + g++ \ + git \ + curl \ + unixodbc-dev \ + libsasl2-modules-gssapi-mit \ + unzip + +rm -rf /tmp && mkdir /tmp + +curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" + +unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ +dpkg -i /tmp/*/simbaspark_2.6.16.1019-2_amd64.deb +echo "--------------------------------------------" +echo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini +dpkg -l | grep Simba # confirm that the driver is installed +rm -rf /tmp diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py new file mode 100644 index 000000000..85a3b8246 --- /dev/null +++ b/dagger/run_dbt_spark_tests.py @@ -0,0 +1,38 @@ +import argparse +import sys + +import anyio as anyio +import dagger as dagger + + +async def test_spark(test_args): + async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: + install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"]) + platform = dagger.Platform("linux/amd64") + tst_container = ( + client.container() + .from_("python:3.8-slim") + .with_directory("/dbt_spark", install_dir) + .with_workdir("/dbt_spark") + .with_exec("./dagger/configure_odbc.sh") + .with_exec(["pip", "install", "-r", "requirements.txt"]) + .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) + ) + + result = await (tst_container + .with_workdir("/dbt_spark") + .with_exec(["python", '-m', 'pytest', '-v', + '--profile', test_args.profile, + '-n', 'auto', + 'tests/functional/'] + ) + ).stdout() + + return result + + +parser = argparse.ArgumentParser() +parser.add_argument("--profile", required=True, type=str) +args = parser.parse_args() + +anyio.run(test_spark, args) From 080b816731708bc2bdae8f648588799b358b939c Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 30 Nov 2023 15:15:07 -0800 Subject: [PATCH 64/83] remove platform --- dagger/run_dbt_spark_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 85a3b8246..a2125a310 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -8,7 +8,7 @@ async def test_spark(test_args): async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"]) - platform = dagger.Platform("linux/amd64") + tst_container = ( client.container() .from_("python:3.8-slim") From c8477ced3779879a40db2beca2135de38d9c3a87 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 11:14:35 -0800 Subject: [PATCH 65/83] add dagger setup --- .github/workflows/integration.yml | 30 +++---- .gitignore | 2 + dagger/run_dbt_spark_tests.py | 105 +++++++++++++++++++++---- dagger/{ => scripts}/configure_odbc.sh | 16 +--- dev-requirements.txt | 4 +- tests/conftest.py | 2 +- 6 files changed, 109 insertions(+), 50 deletions(-) rename dagger/{ => scripts}/configure_odbc.sh (51%) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 1389550a2..88a73884f 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -96,14 +96,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - "3.8" test: - - "spark-thrift" - - "spark-session" - - "spark-databricks-odbc-sql-endpoint" - - "spark-databricks-odbc-cluster" - - "spark-databricks-http" + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" env: TOXENV: integration-${{ matrix.test }} @@ -143,14 +141,13 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: "3.11" - name: Install python dependencies run: | python -m pip install --user --upgrade pip - python -m pip install tox python -m pip --version - tox --version + python -m pip install dagger-io~=0.8.0 - name: Update dev_requirements.txt if: inputs.dbt-core-branch != '' @@ -158,15 +155,6 @@ jobs: pip install bumpversion ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - - uses: isbang/compose-action@v1.5.1 - if: ${{ matrix.test == 'spark-thrift'}} - with: - compose-file: "./docker-compose.yml" - - - name: Configure ODBC - if: ${{ matrix.test == 'spark-databricks-odbc-sql-endpoint' || matrix.test == 'spark-databricks-odbc-cluster' }} - run: | - ./.github/scripts/configure_odbc.sh - - name: Run tox for ${{ matrix.test }} - run: tox -- --ddtrace + - name: Run tests for ${{ matrix.test }} + run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} diff --git a/.gitignore b/.gitignore index 33a83848c..1e8ff7411 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,5 @@ test.env .hive-metastore/ .spark-warehouse/ dbt-integration-tests +/.tool-versions +/.hypothesis/* diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index a2125a310..a5be95dd4 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -4,29 +4,106 @@ import anyio as anyio import dagger as dagger +PG_PORT = 5432 + + +async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): + ctr = await ( + client.container() + .from_("postgres:13") + .with_env_variable("POSTGRES_PASSWORD", "postgres") + .with_exposed_port(PG_PORT) + ) + + return ctr, "postgres_db" + + +async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): + spark_dir = client.host().directory("./dagger/spark-container") + spark_ctr = ( + client.container() + .from_("eclipse-temurin:8-jre") + .with_directory("/spark_setup", spark_dir) + .with_env_variable("SPARK_HOME", "/usr/spark") + .with_env_variable("PATH", "/usr/spark/bin:/usr/spark/sbin:$PATH", expand=True) + .with_file( + "/scripts/entrypoint.sh", + client.host().file("./dagger/spark-container/entrypoint.sh"), + permissions=755, + ) + .with_file( + "/scripts/install_spark.sh", + client.host().file("./dagger/spark-container/install_spark.sh"), + permissions=755, + ) + .with_exec(["./spark_setup/install_spark.sh"]) + .with_file("/usr/spark/conf/hive-site.xml", spark_dir.file("/hive-site.xml")) + .with_file("/usr/spark/conf/spark-defaults.conf", spark_dir.file("spark-defaults.conf")) + ) + + # postgres is the metastore here + pg_ctr, pg_host = await get_postgres_container(client) + + spark_ctr = ( + spark_ctr.with_service_binding(alias=pg_host, service=pg_ctr) + .with_exec( + [ + "/scripts/entrypoint.sh", + "--class", + "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2", + "--name", + "Thrift JDBC/ODBC Server", + ] + ) + .with_exposed_port(10000) + ) + + return spark_ctr, "spark_db" + async def test_spark(test_args): async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: - install_dir = client.host().directory("./", exclude=["\\.pytest_cache/*", ".idea/*"]) - + req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"]) + dbt_spark_dir = client.host().directory("./dbt") + test_dir = client.host().directory("./tests") + scripts = client.host().directory("./dagger/scripts") + platform = dagger.Platform("linux/amd64") tst_container = ( - client.container() + client.container(platform=platform) .from_("python:3.8-slim") - .with_directory("/dbt_spark", install_dir) - .with_workdir("/dbt_spark") - .with_exec("./dagger/configure_odbc.sh") + .with_directory("/.", req_files) + .with_directory("/dbt", dbt_spark_dir) + .with_directory("/tests", test_dir) + .with_directory("/scripts", scripts) + .with_exec("./scripts/install_os_reqs.sh") .with_exec(["pip", "install", "-r", "requirements.txt"]) .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) ) - result = await (tst_container - .with_workdir("/dbt_spark") - .with_exec(["python", '-m', 'pytest', '-v', - '--profile', test_args.profile, - '-n', 'auto', - 'tests/functional/'] - ) - ).stdout() + if test_args.profile == "apache_spark": + spark_ctr, spark_host = await get_spark_container(client) + tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) + + elif test_args.profile in ["databricks_cluster", "databricks_sql_endpoint"]: + tst_container = tst_container.with_exec("./scripts/configure_odbc.sh") + + elif test_args.profile == "spark_session": + tst_container = tst_container.with_exec(["pip", "install", "pyspark"]) + tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"]) + + result = await tst_container.with_exec( + [ + "python", + "-m", + "pytest", + "-v", + "--profile", + test_args.profile, + "-n", + "auto", + "tests/functional/", + ] + ).stdout() return result diff --git a/dagger/configure_odbc.sh b/dagger/scripts/configure_odbc.sh similarity index 51% rename from dagger/configure_odbc.sh rename to dagger/scripts/configure_odbc.sh index 7126298c0..50e80914d 100755 --- a/dagger/configure_odbc.sh +++ b/dagger/scripts/configure_odbc.sh @@ -1,20 +1,12 @@ #!/bin/bash -set -e -apt update && apt install -y --no-install-recommends \ - g++ \ - git \ - curl \ - unixodbc-dev \ - libsasl2-modules-gssapi-mit \ - unzip - +set -eo rm -rf /tmp && mkdir /tmp curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" - unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ -dpkg -i /tmp/*/simbaspark_2.6.16.1019-2_amd64.deb +dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_amd64.deb echo "--------------------------------------------" -echo sh -c echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini +echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini dpkg -l | grep Simba # confirm that the driver is installed +export ODBC_DRIVER="/opt/simba/spark/lib/64/libsparkodbc_sb64.so" rm -rf /tmp diff --git a/dev-requirements.txt b/dev-requirements.txt index 8f94d509d..89c55d3f9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor diff --git a/tests/conftest.py b/tests/conftest.py index 94969e406..700ade4d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,7 +38,7 @@ def dbt_profile_target(request): def apache_spark_target(): return { "type": "spark", - "host": "localhost", + "host": "spark_db", "user": "dbt", "method": "thrift", "port": 10000, From c0a37aeff43c549131299ea4b5a487baf06634ae Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 11:15:00 -0800 Subject: [PATCH 66/83] add dagger setup --- dagger/scripts/install_os_reqs.sh | 10 +++++ dagger/spark-container/entrypoint.sh | 15 +++++++ dagger/spark-container/hive-site.xml | 46 ++++++++++++++++++++++ dagger/spark-container/install_spark.sh | 15 +++++++ dagger/spark-container/spark-defaults.conf | 9 +++++ 5 files changed, 95 insertions(+) create mode 100755 dagger/scripts/install_os_reqs.sh create mode 100644 dagger/spark-container/entrypoint.sh create mode 100644 dagger/spark-container/hive-site.xml create mode 100755 dagger/spark-container/install_spark.sh create mode 100644 dagger/spark-container/spark-defaults.conf diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh new file mode 100755 index 000000000..47457b8d6 --- /dev/null +++ b/dagger/scripts/install_os_reqs.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -eo +apt-get update && apt-get install -y --no-install-recommends \ + g++ \ + git \ + curl \ + unixodbc \ + unixodbc-dev \ + libsasl2-modules-gssapi-mit \ + unzip \ No newline at end of file diff --git a/dagger/spark-container/entrypoint.sh b/dagger/spark-container/entrypoint.sh new file mode 100644 index 000000000..4b15cab61 --- /dev/null +++ b/dagger/spark-container/entrypoint.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ -n "$WAIT_FOR" ]; then + IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR" + for HOSTPORT in "${HOSTPORT_ARRAY[@]}" + do + WAIT_FOR_HOST=${HOSTPORT%:*} + WAIT_FOR_PORT=${HOSTPORT#*:} + + echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT... + while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done + done +fi +echo "$PATH" +exec spark-submit "$@" diff --git a/dagger/spark-container/hive-site.xml b/dagger/spark-container/hive-site.xml new file mode 100644 index 000000000..93e966fb7 --- /dev/null +++ b/dagger/spark-container/hive-site.xml @@ -0,0 +1,46 @@ + + + + + + + + javax.jdo.option.ConnectionURL + jdbc:postgresql://postgres_db/postgres + + + + javax.jdo.option.ConnectionDriverName + org.postgresql.Driver + + + + javax.jdo.option.ConnectionUserName + postgres + + + + javax.jdo.option.ConnectionPassword + postgres + + + + hive.metastore.schema.verification + false + + diff --git a/dagger/spark-container/install_spark.sh b/dagger/spark-container/install_spark.sh new file mode 100755 index 000000000..476f362a9 --- /dev/null +++ b/dagger/spark-container/install_spark.sh @@ -0,0 +1,15 @@ +set -e + +SPARK_VERSION=3.1.3 +HADOOP_VERSION=3.2 + +apt-get update && \ +apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ +wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ +ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ +apt-get remove -y wget && \ +apt-get autoremove -y && \ +apt-get clean diff --git a/dagger/spark-container/spark-defaults.conf b/dagger/spark-container/spark-defaults.conf new file mode 100644 index 000000000..30ec59591 --- /dev/null +++ b/dagger/spark-container/spark-defaults.conf @@ -0,0 +1,9 @@ +spark.driver.memory 2g +spark.executor.memory 2g +spark.hadoop.datanucleus.autoCreateTables true +spark.hadoop.datanucleus.schema.autoCreateTables true +spark.hadoop.datanucleus.fixedDatastore false +spark.serializer org.apache.spark.serializer.KryoSerializer +spark.jars.packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0 +spark.sql.extensions org.apache.spark.sql.hudi.HoodieSparkSessionExtension +spark.driver.userClassPathFirst true From 8c6a7455a411d8573005ff555491ef438c0aea3d Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 11:39:10 -0800 Subject: [PATCH 67/83] set env vars --- dagger/run_dbt_spark_tests.py | 38 ++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index a5be95dd4..ca7cffd3b 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -1,10 +1,38 @@ +import os + import argparse import sys import anyio as anyio import dagger as dagger +from dotenv import find_dotenv, load_dotenv PG_PORT = 5432 +load_dotenv(find_dotenv("test.env")) +DEFAULT_ENV_VARS = { +"DBT_TEST_USER_1": "buildbot+dbt_test_user_1@dbtlabs.com", +"DBT_TEST_USER_2":"buildbot+dbt_test_user_2@dbtlabs.com", +"DBT_TEST_USER_3": "buildbot+dbt_test_user_3@dbtlabs.com", +} + +def env_variables(envs: dict[str, str]): + def env_variables_inner(ctr: dagger.Container): + for key, value in envs.items(): + ctr = ctr.with_env_variable(key, value) + return ctr + + return env_variables_inner + + +def get_databricks_env_vars(): + + return { + "DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"], + "DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"], + "DBT_DATABRICKS_ENDPOINT": os.environ["DBT_DATABRICKS_ENDPOINT"], + "DBT_DATABRICKS_CLUSTER_NAME": os.environ["DBT_DATABRICKS_CLUSTER_NAME"], + "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so", + } async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): @@ -63,6 +91,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): async def test_spark(test_args): async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: + test_profile = test_args.profile req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"]) dbt_spark_dir = client.host().directory("./dbt") test_dir = client.host().directory("./tests") @@ -80,17 +109,20 @@ async def test_spark(test_args): .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) ) - if test_args.profile == "apache_spark": + if test_profile == "apache_spark": spark_ctr, spark_host = await get_spark_container(client) tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) - elif test_args.profile in ["databricks_cluster", "databricks_sql_endpoint"]: + elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]: tst_container = tst_container.with_exec("./scripts/configure_odbc.sh") - elif test_args.profile == "spark_session": + elif test_profile == "spark_session": tst_container = tst_container.with_exec(["pip", "install", "pyspark"]) tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"]) + if "databricks" in test_profile: + tst_container = tst_container.with_(env_variables(get_databricks_env_vars())) + tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS)) result = await tst_container.with_exec( [ "python", From 1ae321a264a1ebefa76ce1cb777ed2c9732bedc6 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 11:41:10 -0800 Subject: [PATCH 68/83] install requirements --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 88a73884f..67b6ed8e3 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -147,7 +147,7 @@ jobs: run: | python -m pip install --user --upgrade pip python -m pip --version - python -m pip install dagger-io~=0.8.0 + python -m pip install -r dagger/requirements.txt - name: Update dev_requirements.txt if: inputs.dbt-core-branch != '' From 6361429e44b7e8bb0182a629850ca2db922e0ab6 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 11:41:18 -0800 Subject: [PATCH 69/83] install requirements --- dagger/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 dagger/requirements.txt diff --git a/dagger/requirements.txt b/dagger/requirements.txt new file mode 100644 index 000000000..3634ceeb7 --- /dev/null +++ b/dagger/requirements.txt @@ -0,0 +1,2 @@ +dagger-io~=0.8.0 +python-dotenv \ No newline at end of file From 6bca5dc715f7b142bc35c6e64c8bef7a89edbdee Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 14:51:42 -0800 Subject: [PATCH 70/83] add DEFAULT_ENV_VARS and test_path arg --- dagger/run_dbt_spark_tests.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index ca7cffd3b..864d9cad6 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -10,11 +10,12 @@ PG_PORT = 5432 load_dotenv(find_dotenv("test.env")) DEFAULT_ENV_VARS = { -"DBT_TEST_USER_1": "buildbot+dbt_test_user_1@dbtlabs.com", -"DBT_TEST_USER_2":"buildbot+dbt_test_user_2@dbtlabs.com", -"DBT_TEST_USER_3": "buildbot+dbt_test_user_3@dbtlabs.com", + "DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"), + "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2","buildbot+dbt_test_user_2@dbtlabs.com"), + "DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"), } + def env_variables(envs: dict[str, str]): def env_variables_inner(ctr: dagger.Container): for key, value in envs.items(): @@ -25,7 +26,6 @@ def env_variables_inner(ctr: dagger.Container): def get_databricks_env_vars(): - return { "DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"], "DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"], @@ -123,18 +123,14 @@ async def test_spark(test_args): if "databricks" in test_profile: tst_container = tst_container.with_(env_variables(get_databricks_env_vars())) tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS)) + test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter" result = await tst_container.with_exec( - [ - "python", - "-m", - "pytest", - "-v", - "--profile", - test_args.profile, - "-n", - "auto", - "tests/functional/", - ] + ["python", "-m", "pytest", + "-v", + "--profile", test_args.profile, + "-n", "auto", + test_path, + ] ).stdout() return result @@ -142,6 +138,7 @@ async def test_spark(test_args): parser = argparse.ArgumentParser() parser.add_argument("--profile", required=True, type=str) +parser.add_argument("--test-path", required=False, type=str) args = parser.parse_args() anyio.run(test_spark, args) From f4293e0999276393d7ce4e288dbd87c58d3adc32 Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 15:00:54 -0800 Subject: [PATCH 71/83] remove circle ci --- .circleci/config.yml | 136 ------------------------------------------- README.md | 3 - 2 files changed, 139 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index f2a3b6357..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,136 +0,0 @@ -version: 2.1 - -jobs: - unit: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: fishtownanalytics/test-container:10 - steps: - - checkout - - run: tox -e flake8,unit - -# Turning off for now due to flaky runs of tests will turn back on at later date. - integration-spark-session: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: godatadriven/pyspark:3.1 - steps: - - checkout - - run: apt-get update - - run: conda install python=3.10 - - run: python3 -m pip install --upgrade pip - - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev - - run: python3 -m pip install tox - - run: - name: Run integration tests - command: tox -e integration-spark-session - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-thrift: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: fishtownanalytics/test-container:10 - - image: godatadriven/spark:3.1.1 - environment: - WAIT_FOR: localhost:5432 - command: > - --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 - --name Thrift JDBC/ODBC Server - - image: postgres:9.6.17-alpine - environment: - POSTGRES_USER: dbt - POSTGRES_PASSWORD: dbt - POSTGRES_DB: metastore - - steps: - - checkout - - - run: - name: Wait for Spark-Thrift - command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s - - - run: - name: Run integration tests - command: tox -e integration-spark-thrift - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-http: - environment: - DBT_INVOCATION_ENV: circle - DBT_DATABRICKS_RETRY_ALL: True - DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" - DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" - DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" - docker: - - image: fishtownanalytics/test-container:10 - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-http - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-odbc-cluster: &databricks-odbc - environment: - DBT_INVOCATION_ENV: circle - ODBC_DRIVER: Simba # TODO: move env var to Docker image - DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" - DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" - DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" - docker: - # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed - - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest - aws_auth: - aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING - aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-odbc-cluster - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-odbc-endpoint: - <<: *databricks-odbc - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-odbc-sql-endpoint - no_output_timeout: 1h - - store_artifacts: - path: ./logs - -workflows: - version: 2 - test-everything: - jobs: - - unit - - integration-spark-session: - requires: - - unit - - integration-spark-thrift: - requires: - - unit - - integration-spark-databricks-http: - requires: - - integration-spark-thrift - - integration-spark-databricks-odbc-cluster: - context: aws-credentials - requires: - - integration-spark-thrift - - integration-spark-databricks-odbc-endpoint: - context: aws-credentials - requires: - - integration-spark-thrift diff --git a/README.md b/README.md index 2d2586795..7e95b1fc3 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,6 @@ Unit Tests Badge - - Integration Tests Badge -

**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications. From d39806558844a5babd6c1c0ad8e4712be7b89a4f Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 19:45:29 -0800 Subject: [PATCH 72/83] formatting --- dagger/requirements.txt | 2 +- dagger/run_dbt_spark_tests.py | 9 ++------- dagger/scripts/install_os_reqs.sh | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/dagger/requirements.txt b/dagger/requirements.txt index 3634ceeb7..df36543c2 100644 --- a/dagger/requirements.txt +++ b/dagger/requirements.txt @@ -1,2 +1,2 @@ dagger-io~=0.8.0 -python-dotenv \ No newline at end of file +python-dotenv diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 864d9cad6..c9455bdde 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -11,7 +11,7 @@ load_dotenv(find_dotenv("test.env")) DEFAULT_ENV_VARS = { "DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"), - "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2","buildbot+dbt_test_user_2@dbtlabs.com"), + "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2", "buildbot+dbt_test_user_2@dbtlabs.com"), "DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"), } @@ -125,12 +125,7 @@ async def test_spark(test_args): tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS)) test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter" result = await tst_container.with_exec( - ["python", "-m", "pytest", - "-v", - "--profile", test_args.profile, - "-n", "auto", - test_path, - ] + ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path] ).stdout() return result diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh index 47457b8d6..b50027f52 100755 --- a/dagger/scripts/install_os_reqs.sh +++ b/dagger/scripts/install_os_reqs.sh @@ -7,4 +7,4 @@ apt-get update && apt-get install -y --no-install-recommends \ unixodbc \ unixodbc-dev \ libsasl2-modules-gssapi-mit \ - unzip \ No newline at end of file + unzip From 6108d4405630639022346d48f5e8a9e39286757e Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 8 Jan 2024 19:52:59 -0800 Subject: [PATCH 73/83] update changie --- .changes/unreleased/Under the Hood-20230929-161218.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml index c82e8252e..4dc54ae5c 100644 --- a/.changes/unreleased/Under the Hood-20230929-161218.yaml +++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml @@ -1,6 +1,6 @@ kind: Under the Hood -body: Add Github action for integration testing +body: Add Github action for integration testing, use dagger-io to run tests. Remove circle ci workflow. time: 2023-09-29T16:12:18.968755+02:00 custom: - Author: JCZuurmond + Author: JCZuurmond, colin-rogers-dbt Issue: "719" From d472f3b61a4d84bc93323431638869a8ed1687b5 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 9 Jan 2024 09:33:57 -0800 Subject: [PATCH 74/83] Update .changes/unreleased/Under the Hood-20230929-161218.yaml Co-authored-by: Emily Rockman --- .changes/unreleased/Under the Hood-20230929-161218.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml index 4dc54ae5c..9b5c6818b 100644 --- a/.changes/unreleased/Under the Hood-20230929-161218.yaml +++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml @@ -1,5 +1,5 @@ kind: Under the Hood -body: Add Github action for integration testing, use dagger-io to run tests. Remove circle ci workflow. +body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. time: 2023-09-29T16:12:18.968755+02:00 custom: Author: JCZuurmond, colin-rogers-dbt From ce92bcf4a9063d75beed734d9009a3e8f4be1dd0 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 09:50:03 -0800 Subject: [PATCH 75/83] formatting fixes and simplify env_var handling --- dagger/run_dbt_spark_tests.py | 26 ++++++++------------------ dagger/scripts/configure_odbc.sh | 1 - docker/Dockerfile | 6 +++--- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index c9455bdde..3e4c8347f 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -9,11 +9,13 @@ PG_PORT = 5432 load_dotenv(find_dotenv("test.env")) -DEFAULT_ENV_VARS = { - "DBT_TEST_USER_1": os.getenv("DBT_TEST_USER_1", "buildbot+dbt_test_user_1@dbtlabs.com"), - "DBT_TEST_USER_2": os.getenv("DBT_TEST_USER_2", "buildbot+dbt_test_user_2@dbtlabs.com"), - "DBT_TEST_USER_3": os.getenv("DBT_TEST_USER_3", "buildbot+dbt_test_user_3@dbtlabs.com"), -} +# if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host +TESTING_ENV_VARS = {env_name: os.environ[env_name] for env_name in os.environ + if env_name.startswith(("DD_", "DBT_"))} + +TESTING_ENV_VARS.update({ + "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so", +}) def env_variables(envs: dict[str, str]): @@ -25,16 +27,6 @@ def env_variables_inner(ctr: dagger.Container): return env_variables_inner -def get_databricks_env_vars(): - return { - "DBT_DATABRICKS_TOKEN": os.environ["DBT_DATABRICKS_TOKEN"], - "DBT_DATABRICKS_HOST_NAME": os.environ["DBT_DATABRICKS_HOST_NAME"], - "DBT_DATABRICKS_ENDPOINT": os.environ["DBT_DATABRICKS_ENDPOINT"], - "DBT_DATABRICKS_CLUSTER_NAME": os.environ["DBT_DATABRICKS_CLUSTER_NAME"], - "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so", - } - - async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): ctr = await ( client.container() @@ -120,9 +112,7 @@ async def test_spark(test_args): tst_container = tst_container.with_exec(["pip", "install", "pyspark"]) tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"]) - if "databricks" in test_profile: - tst_container = tst_container.with_(env_variables(get_databricks_env_vars())) - tst_container = tst_container.with_(env_variables(DEFAULT_ENV_VARS)) + tst_container = tst_container.with_(env_variables(TESTING_ENV_VARS)) test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter" result = await tst_container.with_exec( ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path] diff --git a/dagger/scripts/configure_odbc.sh b/dagger/scripts/configure_odbc.sh index 50e80914d..ddf020ad2 100755 --- a/dagger/scripts/configure_odbc.sh +++ b/dagger/scripts/configure_odbc.sh @@ -8,5 +8,4 @@ dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_am echo "--------------------------------------------" echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini dpkg -l | grep Simba # confirm that the driver is installed -export ODBC_DRIVER="/opt/simba/spark/lib/64/libsparkodbc_sb64.so" rm -rf /tmp diff --git a/docker/Dockerfile b/docker/Dockerfile index 85d01ba8a..bb4d378ed 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ ARG OPENJDK_VERSION=8 FROM eclipse-temurin:${OPENJDK_VERSION}-jre ARG BUILD_DATE -ARG SPARK_VERSION=3.1.3 -ARG HADOOP_VERSION=3.2 +ARG SPARK_VERSION=3.3.2 +ARG HADOOP_VERSION=3 LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ @@ -14,7 +14,7 @@ ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" RUN apt-get update && \ apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ - wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ From 56b14bcd3702cfe85de73d3c8bdf6b794aeb1664 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 13:30:42 -0800 Subject: [PATCH 76/83] remove tox, update CONTRIBUTING.md and cleanup GHA workflows --- .github/workflows/integration.yml | 66 ++++-------------------- .github/workflows/main.yml | 12 ++--- CONTRIBUTING.md | 24 +++++++-- dagger/run_dbt_spark_tests.py | 2 +- tox.ini | 83 ------------------------------- 5 files changed, 33 insertions(+), 154 deletions(-) delete mode 100644 tox.ini diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 67b6ed8e3..53fb9c2ac 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -15,8 +15,14 @@ on: branches: - "main" - "*.latest" - - "releases/*" + pull_request_target: + paths-ignore: + - ".changes/**" + - ".flake8" + - ".gitignore" + - "**.md" + workflow_dispatch: inputs: dbt-core-branch: @@ -37,60 +43,9 @@ defaults: shell: bash jobs: - # generate test metadata about what files changed and the testing matrix to use - test-metadata: - # run if not a PR from a forked repository or has a label to mark as safe to test - if: >- - github.event_name != 'pull_request_target' || - github.event.pull_request.head.repo.full_name == github.repository || - contains(github.event.pull_request.labels.*.name, 'ok to test') - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.generate-matrix.outputs.result }} - - steps: - - name: Check out the repository (non-PR) - if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 - with: - persist-credentials: false - - - name: Check out the repository (PR) - if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 - with: - persist-credentials: false - ref: ${{ github.event.pull_request.head.sha }} - - - name: Check if relevant files changed - if: github.event_name == 'pull_request_target' - # https://github.com/marketplace/actions/paths-changes-filter - # For each filter, it sets output variable named by the filter to the text: - # 'true' - if any of changed files matches any of filter rules - # 'false' - if none of changed files matches any of filter rules - # also, returns: - # `changes` - JSON array with names of all filters matching any of the changed files - uses: dorny/paths-filter@v2 - id: get-changes - with: - token: ${{ secrets.GITHUB_TOKEN }} - filters: | - spark: - - 'dbt/**' - - 'tests/**' - - 'dev-requirements.txt' test: - name: ${{ matrix.test }} / python ${{ matrix.python-version }} / ubuntu-latest - - # run if not a PR from a forked repository or has a label to mark as safe to test - # also checks that the matrix generated is not empty - if: >- - ( - github.event_name != 'pull_request_target' || - github.event.pull_request.head.repo.full_name == github.repository || - contains(github.event.pull_request.labels.*.name, 'ok to test') - ) + name: ${{ matrix.test }} runs-on: ubuntu-latest strategy: @@ -104,8 +59,6 @@ jobs: - "databricks_http_cluster" env: - TOXENV: integration-${{ matrix.test }} - PYTEST_ADDOPTS: "-v --color=yes --csv integration_results.csv" DBT_INVOCATION_ENV: github-actions DD_CIVISIBILITY_AGENTLESS_ENABLED: true DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} @@ -138,7 +91,7 @@ jobs: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.11" @@ -155,6 +108,5 @@ jobs: pip install bumpversion ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - - name: Run tests for ${{ matrix.test }} run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 30126325e..338413116 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,6 @@ on: branches: - "main" - "*.latest" - - "releases/*" pull_request: workflow_dispatch: @@ -81,10 +80,6 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] - env: - TOXENV: "unit" - PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv" - steps: - name: Check out the repository uses: actions/checkout@v3 @@ -100,10 +95,9 @@ jobs: sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version - python -m pip install tox - tox --version - - name: Run tox - run: tox + + - name: Run unit tests + run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit - name: Get current date if: always() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a61306ea5..9145436b6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -65,11 +65,27 @@ $EDITOR test.env ### Test commands There are a few methods for running tests locally. -#### `tox` -`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`. +#### dagger +To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against. -#### `pytest` -Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like: +```sh +pip install -r dagger/requirements.txt +python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base +``` + +`--profile`: required, this is the kind of spark connection to test against + +_options_: + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" + +`--test-path`: optional, this is the path to the test file you want to run. If not specified, all tests will be run. + +#### pytest +Finally, you can also run a specific test or group of tests using `pytest` directly (if you have all the dependencies set up on your machine). With a Python virtualenv active and dev dependencies installed you can do things like: ```sh # run all functional tests diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 3e4c8347f..4cb16f7a0 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -14,7 +14,7 @@ if env_name.startswith(("DD_", "DBT_"))} TESTING_ENV_VARS.update({ - "ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so", + "ODBC_DRIVER": "Simba", }) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 31396b5ef..000000000 --- a/tox.ini +++ /dev/null @@ -1,83 +0,0 @@ -[tox] -skipsdist = True -envlist = unit, flake8, integration-spark-thrift - -[testenv:{unit,py38,py39,py310,py}] -allowlist_externals = - /bin/bash -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit' -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -[testenv:integration-spark-databricks-http] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - -[testenv:integration-spark-databricks-odbc-cluster] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS - ODBC_DRIVER -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - -[testenv:integration-spark-databricks-odbc-sql-endpoint] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS - ODBC_DRIVER -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - - -[testenv:integration-spark-thrift] -description = run integration tests against a Spark thrift server -allowlist_externals = - /bin/bash -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. -commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*' - -[testenv:integration-spark-session] -description = run integration tests against a Spark session -allowlist_externals = - /bin/bash -passenv = - DBT_* - PYTEST_* - PIP_CACHE_DIR -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e.[session] -commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*' From 9849c1c2b4e3c14a772ef59b5f331e0b5785d673 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 13:34:17 -0800 Subject: [PATCH 77/83] remove tox, update CONTRIBUTING.md and cleanup GHA workflows --- .github/workflows/integration.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 53fb9c2ac..e2f0dcfdc 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -73,7 +73,6 @@ jobs: DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" - ODBC_DRIVER: "Simba" steps: - name: Check out the repository From f9a4c585a263d0a76c009ba1c9c7acc30f3bf462 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 13:42:48 -0800 Subject: [PATCH 78/83] install test reqs in main.yml --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 338413116..c16a16206 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -95,6 +95,8 @@ jobs: sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version + python -m pip install -e . + python -m pip install -r dev-requirements.txt - name: Run unit tests run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit From bbe17a8fa8a2c181d5d98aafdf12eba9c371d96e Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 13:45:37 -0800 Subject: [PATCH 79/83] install test reqs in main.yml --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c16a16206..20f3f88f4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -95,8 +95,9 @@ jobs: sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version - python -m pip install -e . + python -m pip install -r requirements.txt python -m pip install -r dev-requirements.txt + python -m pip install -e . - name: Run unit tests run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit From 3f44e9663db6606a9fe0c5d5208ab2c2d31a791b Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jan 2024 13:51:23 -0800 Subject: [PATCH 80/83] formatting --- CONTRIBUTING.md | 4 ++-- dagger/run_dbt_spark_tests.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9145436b6..6fcaacea8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,14 +66,14 @@ $EDITOR test.env There are a few methods for running tests locally. #### dagger -To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against. +To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against. ```sh pip install -r dagger/requirements.txt python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base ``` -`--profile`: required, this is the kind of spark connection to test against +`--profile`: required, this is the kind of spark connection to test against _options_: - "apache_spark" diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 4cb16f7a0..dd1a4395d 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -10,12 +10,13 @@ PG_PORT = 5432 load_dotenv(find_dotenv("test.env")) # if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host -TESTING_ENV_VARS = {env_name: os.environ[env_name] for env_name in os.environ - if env_name.startswith(("DD_", "DBT_"))} +TESTING_ENV_VARS = { + env_name: os.environ[env_name] + for env_name in os.environ + if env_name.startswith(("DD_", "DBT_")) +} -TESTING_ENV_VARS.update({ - "ODBC_DRIVER": "Simba", -}) +TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"}) def env_variables(envs: dict[str, str]): From afd3866a4b39c0df0999bbcbc333d78eff9927eb Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 10 Jan 2024 09:59:30 -0800 Subject: [PATCH 81/83] remove tox from dev-requirements.txt and Makefile --- Makefile | 7 ++++--- dev-requirements.txt | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index cc1d9f75d..2bd1055fa 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ .PHONY: dev dev: ## Installs adapter in develop mode along with development dependencies @\ - pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install + pip install -e . -r requirements.txt -r dev-requirements.txt -r dagger/requirements.txt && pre-commit install .PHONY: dev-uninstall dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment @@ -40,12 +40,13 @@ linecheck: ## Checks for all Python lines 100 characters or more .PHONY: unit unit: ## Runs unit tests with py38. @\ - tox -e py38 + python -m pytest tests/unit .PHONY: test test: ## Runs unit tests with py38 and code checks against staged changes. @\ - tox -p -e py38; \ + python -m pytest tests/unit; \ + python dagger/run_dbt_spark_tests.py --profile spark_session \ pre-commit run black-check --hook-stage manual | grep -v "INFO"; \ pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ pre-commit run mypy-check --hook-stage manual | grep -v "INFO" diff --git a/dev-requirements.txt b/dev-requirements.txt index bb3282b44..765482e25 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -22,7 +22,6 @@ pytest-dotenv~=0.5.2 pytest-logbook~=1.2 pytest-xdist~=3.5 pytz~=2023.3 -tox~=4.11 types-pytz~=2023.3 types-requests~=2.31 twine~=4.0 From 259ebc7cbe75a7f22bff8075e7c7bba0581cd585 Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 10 Jan 2024 10:33:50 -0800 Subject: [PATCH 82/83] clarify spark crt instantiation --- dagger/run_dbt_spark_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index dd1a4395d..718519909 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -41,7 +41,7 @@ async def get_postgres_container(client: dagger.Client) -> (dagger.Container, st async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): spark_dir = client.host().directory("./dagger/spark-container") - spark_ctr = ( + spark_ctr_base = ( client.container() .from_("eclipse-temurin:8-jre") .with_directory("/spark_setup", spark_dir) @@ -66,7 +66,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): pg_ctr, pg_host = await get_postgres_container(client) spark_ctr = ( - spark_ctr.with_service_binding(alias=pg_host, service=pg_ctr) + spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr) .with_exec( [ "/scripts/entrypoint.sh", From a8a7010d934c951512cd66f8b8cbf13d71c45176 Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 10 Jan 2024 11:52:57 -0800 Subject: [PATCH 83/83] add comments on python-version --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index e2f0dcfdc..94dece350 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -90,7 +90,8 @@ jobs: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python + # the python version used here is not what is used in the tests themselves + - name: Set up Python for dagger uses: actions/setup-python@v4 with: python-version: "3.11"