Skip to content

Commit

Permalink
chore: split metadata extract across several workflows (#1849)
Browse files Browse the repository at this point in the history
  • Loading branch information
pnadolny13 authored Sep 19, 2024
1 parent 20bfbbf commit 5ea6e6e
Show file tree
Hide file tree
Showing 3 changed files with 401 additions and 0 deletions.
89 changes: 89 additions & 0 deletions .github/workflows/metadata-extract-airbyte.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Metadata Extract

on:
workflow_dispatch:
inputs:
environment:
description: 'The environment to run the workflow in'
required: true
type: choice
default: 'preview'
options:
- 'preview'
- 'production'
schedule:
- cron: '0 9 * * 1' # Run at midnight UTC on Mondays

jobs:
get_variants_list:
runs-on: ubuntu-latest
outputs:
airbyte_matrix: ${{ steps.setmatrix_airbyte.outputs.airbyte_matrix }}

steps:
- uses: actions/[email protected]

- name: Install hub-utils
run: pipx install git+https://github.com/meltano/hub-utils.git

- name: Get Variants List - Airbyte
id: get-variants-list-airbyte
run: echo "VARIANTS_AIRBYTE=$(hub-utils get-variant-names $(pwd) --metadata-type=airbyte)" >> $GITHUB_OUTPUT

- name: Set Dynamic Airbyte Matrix
id: setmatrix_airbyte
run: |
matrixStringifiedObject="{\"include\": ${{ steps.get-variants-list-airbyte.outputs.VARIANTS_AIRBYTE }}}"
echo "::set-output name=airbyte_matrix::$matrixStringifiedObject"
metadata_extract_airbyte_p1:
name: Airbyte - Part 1 Metadata Extract
needs: get_variants_list
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get_variants_list.outputs.airbyte_matrix) }}
container:
image: airbyte/${{ matrix.source-name }}:latest
steps:
- name: Get Airbyte Spec
run: |
$AIRBYTE_ENTRYPOINT spec | grep '^.*"type":\s*"SPEC".*$' > extractor--${{ matrix.source-name }}--airbyte.json
id: get-airbyte-stdout
- uses: actions/upload-artifact@v3
with:
name: extractor--${{ matrix.source-name }}--airbyte.json
path: extractor--${{ matrix.source-name }}--airbyte.json

metadata_extract_airbyte_p2:
name: Airbyte - Part 2 Metadata Extract
if: ${{ always() }}
needs: [metadata_extract_airbyte_p1, get_variants_list]
runs-on: ubuntu-latest
environment: ${{ github.event.inputs.environment || github.event_name == 'schedule' && 'production' || 'preview' }}
env:
AWS_S3_BUCKET: "${{secrets.HUB_METADATA_S3_BUCKET }}"
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get_variants_list.outputs.airbyte_matrix) }}
permissions:
id-token: write # This is required for requesting the JWT
steps:
- uses: actions/[email protected]

- name: Configure AWS credentials
uses: aws-actions/[email protected]
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: us-west-2
role-session-name: "GitHubActions"

- uses: actions/download-artifact@v3
with:
name: extractor--${{ matrix.source-name }}--airbyte.json

- name: Install hub-utils
run: pipx install git+https://github.com/meltano/hub-utils.git

- name: Upload Metadata
run: hub-utils upload-airbyte "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" extractor--${{ matrix.source-name }}--airbyte.json
190 changes: 190 additions & 0 deletions .github/workflows/metadata-extract-extractors.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
name: Metadata Extract

on:
workflow_dispatch:
inputs:
environment:
description: 'The environment to run the workflow in'
required: true
type: choice
default: 'preview'
options:
- 'preview'
- 'production'
schedule:
- cron: '0 9 * * 1' # Run at midnight UTC on Mondays

jobs:
get_variants_list:
runs-on: ubuntu-latest
outputs:
sdk_ex_matrix: ${{ steps.setmatrix_sdk_ex.outputs.sdk_ex_matrix }}

steps:
- uses: actions/[email protected]

- name: Install hub-utils
run: pipx install git+https://github.com/meltano/hub-utils.git

- name: Get Variants List - SDK Extractors (p1)
id: get-variants-list-sdk-ex-p1
run: echo "VARIANTS_SDK_EX_P1=$(hub-utils get-variant-names $(pwd) --plugin-type=extractors --metadata-type=sdk --limit=200)" >> $GITHUB_OUTPUT

- name: Set Dynamic Matrix - SDK Extractors (p1)
id: setmatrix_sdk_ex_p1
run: |
matrixStringifiedObject="{\"include\": ${{ steps.get-variants-list-sdk-ex-p1.outputs.VARIANTS_SDK_EX_P1 }}}"
echo "::set-output name=sdk_ex_p1_matrix::$matrixStringifiedObject"
- name: Get Variants List - SDK Extractors (p2)
id: get-variants-list-sdk-ex-p2
run: echo "VARIANTS_SDK_EX_P2=$(hub-utils get-variant-names $(pwd) --plugin-type=extractors --metadata-type=sdk --skip=200)" >> $GITHUB_OUTPUT

- name: Set Dynamic Matrix - SDK Extractors (p2)
id: setmatrix_sdk_ex_p2
run: |
matrixStringifiedObject="{\"include\": ${{ steps.get-variants-list-sdk-ex-p2.outputs.VARIANTS_SDK_EX_P2 }}}"
echo "::set-output name=sdk_ex_p2_matrix::$matrixStringifiedObject"
metadata_extract_sdk_ex_p1:
name: SDK Extractors - Metadata Extract (p1)
needs: get_variants_list
runs-on: ubuntu-latest
environment: ${{ github.event.inputs.environment || github.event_name == 'schedule' && 'production' || 'preview' }}
env:
AWS_S3_BUCKET: "${{secrets.HUB_METADATA_S3_BUCKET }}"
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get_variants_list.outputs.sdk_ex_p1_matrix) }}
permissions:
id-token: write # This is required for requesting the JWT
steps:
- uses: actions/[email protected]

- name: Configure AWS credentials
uses: aws-actions/[email protected]
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: us-west-2
role-session-name: "GitHubActions"

- name: Install hub-utils
run: pipx install git+https://github.com/meltano/hub-utils.git

# This starts the attempts to install and extract metadata. Since we don't
# know what python versions are accepted we start with the latest and continue
# decreasing until we have a success or run out of python versions to attempt.
- name: Install Python 3.12
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Extract Metadata (3.12)
id: extract_312
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.11 (Only if 3.12 failed)
if: always() && (steps.extract_312.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Extract Metadata (3.11)
if: always() && (steps.extract_312.outcome == 'failure')
id: extract_311
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.10 (Only if 3.11 failed)
if: always() && (steps.extract_311.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Extract Metadata (3.10)
if: always() && (steps.extract_311.outcome == 'failure')
id: extract_310
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.9 (Only if 3.10 failed)
if: always() && (steps.extract_310.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.9'

- name: Extract Metadata (3.9)
if: always() && (steps.extract_310.outcome == 'failure')
id: extract_39
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.8 (Only if 3.9 failed)
if: always() && (steps.extract_39.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.8'

- name: Extract Metadata (3.8)
if: always() && (steps.extract_39.outcome == 'failure')
id: extract_38
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data

metadata_extract_sdk_ex_p2:
name: SDK Extractors - Metadata Extract (p2)
needs: get_variants_list
runs-on: ubuntu-latest
environment: ${{ github.event.inputs.environment || github.event_name == 'schedule' && 'production' || 'preview' }}
env:
AWS_S3_BUCKET: "${{secrets.HUB_METADATA_S3_BUCKET }}"
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get_variants_list.outputs.sdk_ex_p2_matrix) }}
permissions:
id-token: write # This is required for requesting the JWT
steps:
- uses: actions/[email protected]

- name: Configure AWS credentials
uses: aws-actions/[email protected]
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: us-west-2
role-session-name: "GitHubActions"

- name: Install hub-utils
run: pipx install git+https://github.com/meltano/hub-utils.git

- name: Install Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Extract Metadata (3.10)
id: extract_310
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.9 (Only if 3.10 failed)
if: always() && (steps.extract_310.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.9'

- name: Extract Metadata (3.9)
if: always() && (steps.extract_310.outcome == 'failure')
id: extract_39
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
continue-on-error: true

- name: Install Python 3.8 (Only if 3.9 failed)
if: always() && (steps.extract_39.outcome == 'failure')
uses: actions/setup-python@v5
with:
python-version: '3.8'

- name: Extract Metadata (3.8)
if: always() && (steps.extract_39.outcome == 'failure')
id: extract_38
run: hub-utils extract-sdk-metadata-to-s3 "/home/runner/work/hub/hub/_data/meltano/${{ matrix.plugin-name }}" $(pwd)/extract_data
Loading

0 comments on commit 5ea6e6e

Please sign in to comment.