Skip to content

Commit

Permalink
Merge branch 'main' into inspect
Browse files Browse the repository at this point in the history
  • Loading branch information
ranchodeluxe committed Mar 6, 2024
2 parents b883c8f + 34cb432 commit 91baa0b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 45 deletions.
21 changes: 3 additions & 18 deletions .github/workflows/config.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
import os

bucket_choice = os.environ.get("S3_BUCKET")
bucket_options = {
"default": "s3://gcorradini-forge-runner-test",
"test": "s3://gcorradini-forge-runner-test",
}
s3_uri = bucket_options.get(bucket_choice)
if not s3_uri:
raise ValueError(
f"'S3_BUCKET_OPTIONS_MAP' did not have a key for '{bucket_choice}'. Options are {bucket_options}"
)


def calc_task_manager_resources(task_manager_process_memory):
"""
illustration of Flink memory model:
Expand Down Expand Up @@ -80,8 +68,8 @@ def calc_task_manager_resources(task_manager_process_memory):
# calculate dynamic values
return {
"total_flink": int(total_flink_memory),
"task_heap": int(remaining_memory * 0.90),
"task_off_heap": int(remaining_memory * 0.10),
"task_heap": int(remaining_memory * 0.75),
"task_off_heap": int(remaining_memory * 0.25),
"task_memory_managed_fraction": managed_memory_ratio
}

Expand All @@ -104,8 +92,6 @@ def calc_task_manager_resources(task_manager_process_memory):
)
print(f"[ CALCULATED TASK MANAGER RESOURCES ]: {task_manager_resources}")


BUCKET_PREFIX = s3_uri
c.Bake.prune = bool(int(os.environ.get("PRUNE_OPTION")))
c.Bake.container_image = "apache/beam_python3.10_sdk:2.52.0"
c.Bake.bakery_class = "pangeo_forge_runner.bakery.flink.FlinkOperatorBakery"
Expand All @@ -127,11 +113,10 @@ def calc_task_manager_resources(task_manager_process_memory):
"taskmanager.memory.managed.fraction": f"{task_manager_resources['task_memory_managed_fraction']}"
}

BUCKET_PREFIX = os.environ.get("OUTPUT_BUCKET")
c.TargetStorage.fsspec_class = "s3fs.S3FileSystem"
c.TargetStorage.root_path = f"{BUCKET_PREFIX}/{{job_name}}/output"
c.TargetStorage.fsspec_args = {
"key": os.environ.get("S3_DEFAULT_AWS_ACCESS_KEY_ID"),
"secret": os.environ.get("S3_DEFAULT_AWS_SECRET_ACCESS_KEY"),
"anon": False,
"client_kwargs": {"region_name": "us-west-2"},
}
Expand Down
50 changes: 23 additions & 27 deletions .github/workflows/job-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,27 @@ on:
description: 'The subdir of the feedstock directory in the repo'
required: true
default: 'feedstock'
bucket:
description: 'This job runner leverages s3fs.S3FileSystem for your recipe cache and output. Choices currently are: "default"'
required: true
default: 'default'
prune:
description: 'Only run the first two time steps'
required: true
default: '0'
parallelism:
description: 'Number of task managers to spin up'
description: 'Number of workers to run in parallel'
required: true
default: '1'
protocol:
description: 'What protocol to use when accessing files (s3 or https).'
auth_mode:
description: 'What auth mode (edl or iamrole) to use when accessing files.'
required: false
default: 's3'
default: 'iamrole'
resource_profile:
description: 'jobs have different memory requirements so choose (small[7168M], medium[10240M], large[15360M], xlarge[20480M])'
required: false
default: 'small'

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout

jobs:
name-job:
runs-on: ubuntu-latest
Expand All @@ -50,6 +50,7 @@ jobs:
if: contains('["ranchodeluxe","abarciauskas-bgse", "norlandrhagen", "sharkinsspatial", "moradology", "thodson-usgs"]', github.actor)
name: kickoff job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
needs: name-job
environment: veda-smce
outputs:
job_name: ${{ steps.report_ids.outputs.job_name }}
job_id: ${{ steps.report_ids.outputs.job_id }}
Expand All @@ -59,6 +60,12 @@ jobs:
- name: checkout repository
uses: actions/checkout@v3

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
role-session-name: veda-pforge-run-job
aws-region: us-west-2

- name: set up python 3.10
uses: actions/setup-python@v3
Expand All @@ -70,7 +77,6 @@ jobs:
echo "Manually triggered workflow: \
${{ github.event.inputs.repo }} \
${{ github.event.inputs.ref }} \
${{ github.event.inputs.bucket }} \
${{ github.event.inputs.parallelism }} \
${{ github.event.inputs.prune }}"
Expand All @@ -79,13 +85,6 @@ jobs:
python -m pip install --upgrade pip
pip install pangeo-forge-runner>=0.10.0
- name: set up aws credentials for job runner user
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.GH_ACTIONS_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.GH_ACTIONS_AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.GH_ACTIONS_AWS_REGION }}

- name: install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
Expand All @@ -94,7 +93,7 @@ jobs:
- name: update kubeconfig with cluster
run: |
aws eks update-kubeconfig --name pangeo-forge-v3 --region ${{ secrets.GH_ACTIONS_AWS_REGION }}
aws eks update-kubeconfig --name ${{ vars.EKS_CLUSTER_NAME }} --region us-west-2
- name: execute recipe on k8s cluster
id: executejob
Expand Down Expand Up @@ -125,18 +124,15 @@ jobs:
FLINK_DASH=$(cat execute.log | grep -oP "You can run '\K[^']+(?=')")
echo "FLINK_DASH=$FLINK_DASH" >> $GITHUB_ENV
env:
EARTHDATA_TOKEN: ${{ secrets.EARTHDATA_TOKEN }}
EARTHDATA_USERNAME: ${{ secrets.EARTHDATA_USERNAME }}
EARTHDATA_PASSWORD: ${{ secrets.EARTHDATA_PASSWORD }}
REPO: ${{ github.event.inputs.repo }}
REF: ${{ github.event.inputs.ref }}
FEEDSTOCK_SUBDIR: ${{ github.event.inputs.feedstock_subdir }}
PRUNE_OPTION: ${{ github.event.inputs.prune }}
PARALLELISM_OPTION: ${{ github.event.inputs.parallelism }}
S3_BUCKET: ${{ github.event.inputs.bucket }}
S3_DEFAULT_AWS_ACCESS_KEY_ID: ${{ secrets.S3_DEFAULT_AWS_ACCESS_KEY_ID }}
S3_DEFAULT_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_DEFAULT_AWS_SECRET_ACCESS_KEY }}
PROTOCOL: ${{ github.event.inputs.protocol }}
OUTPUT_BUCKET: ${{ vars.OUTPUT_BUCKET }}
AUTH_MODE: ${{ github.event.inputs.auth_mode }}
RESOURCE_PROFILE: ${{ github.event.inputs.resource_profile }}

- name: cleanup if "pangeo-forge-runner bake" failed
Expand Down Expand Up @@ -201,12 +197,12 @@ jobs:
name: monitor job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
needs: [name-job, run-job]
steps:
- name: set up aws credentials for job runner user
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.GH_ACTIONS_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.GH_ACTIONS_AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.GH_ACTIONS_AWS_REGION }}
role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
role-session-name: veda-pforge-monitor-job
aws-region: us-west-2

- name: install kubectl
run: |
Expand All @@ -216,7 +212,7 @@ jobs:
- name: update kubeconfig with cluster
run: |
aws eks update-kubeconfig --name pangeo-forge-v3 --region ${{ secrets.GH_ACTIONS_AWS_REGION }}
aws eks update-kubeconfig --name ${{ vars.EKS_CLUSTER_NAME }} --region us-west-2
# - name: Setup upterm session
# uses: lhotari/action-upterm@v1
Expand Down

0 comments on commit 91baa0b

Please sign in to comment.