Skip to content

Create aws.yml

Create aws.yml #19

Workflow file for this run

name: List S3 Objects - AWS
on:
pull_request:
env:
AWS_REGION: eu-west-1
AWS_ROLE_ARN: "arn:aws:iam::719197435995:role/DbtSparkTestingActions"
S3_BUCKET: "dbt-spark-iceberg/github-integration-testing"
DBT_PROFILES_DIR: ./ci
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
jobs:
list_s3_objects:
name: list_s3_objects
runs-on: ubuntu-latest
defaults:
run:
# Run tests from integration_tests sub dir
working-directory: .github/workflows/spark_deployment
steps:
# Move checkout earlier in the process
- name: Check out repository
uses: actions/checkout@v4
# Configure AWS credentials using OIDC
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ env.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
mask-aws-account-id: true
mask-aws-role-arn: true
role-session-name: GithubActionsSession
role-duration-seconds: 3600
output-credentials: true
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
# Verify AWS credentials are working
- name: Verify AWS CLI configuration
run: |
aws sts get-caller-identity
aws configure list
- name: List objects from S3 bucket
run: |
aws s3 ls s3://${{ env.S3_BUCKET }} --summarize
# Set up Docker environment with AWS credentials
- name: Configure Docker environment
run: |
# Export AWS credentials from assumed role
export AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id)
export AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key)
export AWS_SESSION_TOKEN=$(aws configure get aws_session_token)
# Create Docker environment file
echo "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> .env
echo "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> .env
echo "AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN" >> .env
echo "AWS_REGION=${{ env.AWS_REGION }}" >> .env
- name: Configure Docker credentials
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_USERNAME }}
password: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_PASSWORD }}
- name: Build and start Spark cluster
run: |
# # Use environment variables from .env file
# set -a
# source ./.env
# set +a
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: "3.8.x"
- name: Install spark dependencies
working-directory: ./integration_tests
run: |
pip install --upgrade pip wheel setuptools
pip install "dbt-spark[PyHive]~=1.7.0" --upgrade
dbt deps
# Verify Spark cluster is running
- name: Verify Spark cluster
run: |
docker ps
docker logs spark_deployment_thrift-server_1
- name: "DBT Debug"
working-directory: ./integration_tests
run: |
dbt debug --target spark_iceberg
- name: "After DBT Debug logs"
if: always()
run: |
docker ps
docker logs spark_deployment_thrift-server_1
docker exec spark_deployment_thrift-server_1 beeline -u "jdbc:hive2://localhost:10000"
docker exec spark_deployment_thrift-server_1 ls -l /spark/logs
- name: "Pre-test: Drop ci schemas"
working-directory: ./integration_tests
run: |
dbt run-operation post_ci_cleanup --target spark_iceberg
- name: Run tests
working-directory: ./integration_tests
run: ./.scripts/integration_test.sh -d spark_iceberg
- name: "Post-test: Drop ci schemas"
working-directory: ./integration_tests
run: |
dbt run-operation post_ci_cleanup --target spark_iceberg
- name: Cleanup
if: always()
run: |
docker-compose -f .github/workflows/spark_deployment/docker-compose.yml down
rm -f .env # Remove the environment file with credentials