dispatch job #388

Workflow file for this run

.github/workflows/job-runner.yaml at ed817ec

	name: dispatch job

	on:
	workflow_dispatch:
	inputs:
	repo:
	description: 'The https github url for the recipe feedstock'
	required: true
	ref:
	description: 'The tag or branch to target in your recipe repo'
	required: true
	default: 'main'
	feedstock_subdir:
	description: 'The subdir of the feedstock directory in the repo'
	required: true
	default: 'feedstock'
	spark_params:
	description: 'space delimited --conf values: https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/jobs-spark.html'
	required: true
	default: '--conf spark.executor.cores=16 --conf spark.executor.memory=60G --conf spark.executor.memoryOverhead=60G --conf spark.driver.memory=10G --conf spark.driver.memoryOverhead=4G --conf spark.shuffle.file.buffer=64k --conf spark.default.parallelism=1280 --conf spark.emr-serverless.executor.disk=200G --conf spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory --conf spark.emr-serverless.driverEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/ --conf spark.executorEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/'
	job_name:
	description: 'Name the EMR job'
	required: true

	permissions:
	id-token: write # This is required for requesting the JWT
	contents: read # This is required for actions/checkout

	jobs:
	name-job:
	runs-on: ubuntu-latest
	outputs:
	repo_name: ${{ steps.string_manipulation.outputs.result }}
	steps:
	- name: manipuluate strings
	id: string_manipulation
	run: \|
	repo_name=$(basename -s .git "${{ github.event.inputs.repo }}")
	echo "result=$repo_name" >> $GITHUB_OUTPUT
	run-job:
	if: contains('["ranchodeluxe","abarciauskas-bgse", "norlandrhagen", "sharkinsspatial", "moradology", "thodson-usgs"]', github.actor)
	name: kickoff job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
	needs: name-job
	environment: veda-smce
	outputs:
	job_name: ${{ steps.report_ids.outputs.job_name }}
	job_id: ${{ steps.report_ids.outputs.job_id }}
	runs-on: ubuntu-latest
	steps:

	- name: checkout repository
	uses: actions/checkout@v3

	- name: configure aws credentials
	uses: aws-actions/configure-aws-credentials@v3
	with:
	role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
	role-session-name: veda-pforge-run-job
	role-duration-seconds: 3600
	aws-region: us-west-2

	- name: set up python 3.10 and cache pip deps
	uses: actions/setup-python@v3
	with:
	python-version: '3.10'
	cache: 'pip' # caching pip dependencies
	- run: pip install -r .github/workflows/requirements.txt

	- name: echo inputs to user
	run: \|
	echo "Manually triggered workflow": \
	${{ github.event.inputs.repo }} \
	${{ github.event.inputs.ref }} \
	${{ github.event.inputs.feedstock_subdir}} \
	${{ github.event.inputs.parallelism }}

	- name: install zip
	uses: montudor/action-zip@v1

	- name: submit job to emr serverless
	id: executejob
	continue-on-error: true
	run: \|
	# TODO: make submit_spark_job.py or some other config.py checkout, build env and package on s3
	# before submission
	python .github/workflows/submit_spark_job.py \
	--name=${{ github.event.inputs.job_name }} \
	--application-id="00firgpmjusj5e0l" \
	--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \
	--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \
	--entry-point-arguments="${{ github.event.inputs.repo }} ${{ github.event.inputs.ref }} ${{ github.event.inputs.feedstock_subdir }}" \
	--spark-submit-parameters="${{ github.event.inputs.spark_params }}"
	env:
	REPO: ${{ github.event.inputs.repo }}
	REF: ${{ github.event.inputs.ref }}
	FEEDSTOCK_SUBDIR: ${{ github.event.inputs.feedstock_subdir }}
	PARALLELISM_OPTION: ${{ github.event.inputs.parallelism }}
	JOB_NAME: ${{ github.event.inputs.job_name }}

	- name: cleanup if submission failed
	if: steps.executejob.outcome == 'failure'
	run: \|
	echo "The previous command failed. Running cleanup logic..."
	# force GH action to show failed result
	exit 128

	- name: echo job metadata
	id: report_ids
	run: \|
	echo '############ CW DASHBOARD ################'
	echo "https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#dashboards/dashboard/veda-pangeo-forge-emr-jobs"

	echo '############ JOB ID and JOB DASHBOARD ################'
	python .github/workflows/submit_spark_job.py \
	--name="whatever" \
	--application-id="00firgpmjusj5e0l" \
	--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \
	--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \
	--workflow="getjob"


	# monitor-job:
	# runs-on: ubuntu-latest
	# name: monitor job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
	# environment: veda-smce
	# needs: [name-job, run-job]
	# steps:
	# - name: Configure AWS credentials
	# uses: aws-actions/configure-aws-credentials@v3
	# with:
	# role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
	# role-session-name: veda-pforge-monitor-job
	# role-duration-seconds: 43200 # note this has to match our timeout-minutes below for monitoring
	# aws-region: us-west-2

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

dispatch job #388

Workflow file

dispatch job #388

Jobs

Run details

Workflow file for this run