Skip to content

download and cache data #1

download and cache data

download and cache data #1

Workflow file for this run

name: check and update webbpsf and crds cache

Check failure on line 1 in .github/workflows/data.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/data.yml

Invalid workflow file

No steps defined in `steps` and no workflow called in `uses` for the following jobs: path
on:
workflow_call:
outputs:
path:
value: ${{ jobs.path.outputs.path }}
crds_path:
value: ${{ jobs.crds.outputs.path }}
crds_server:
value: ${{ jobs.crds.outputs.server }}
crds_context:
value: ${{ jobs.crds.outputs.context }}
webbpsf_path:
value: ${{ jobs.download_webbpsf_data.outputs.path }}
webbpsf_hash:
value: ${{ jobs.retrieve_webbpsf_data_hash.outputs.hash }}
workflow_dispatch:
schedule:
- cron: "42 4 * * 3"
jobs:
path:
name: set data path
runs-on: ubuntu-latest
outputs:
path: /tmp/data
crds:
needs: [ path ]
name: retrieve current CRDS context
runs-on: ubuntu-latest
env:
CRDS_PATH: ${{ needs.path.outputs.path }}/crds
CRDS_SERVER_URL: https://roman-crds.stsci.edu
OBSERVATORY: roman
outputs:
path: ${{ env.CRDS_PATH }}
server: ${{ env.CRDS_SERVER_URL }}
context: ${{ steps.crds_context.outputs.pmap }}
steps:
- id: crds_context
run: >
echo "pmap=$(
curl -s -X POST -d '{"jsonrpc": "1.0", "method": "get_default_context", "params": ["${{ env.OBSERVATORY }}"], "id": 1}' ${{ env.CRDS_SERVER_URL }}/json/ |
python -c "import sys, json; print(json.load(sys.stdin)['result'])"
)" >> $GITHUB_OUTPUT
# Get default CRDS_CONTEXT without installing crds client
# See https://hst-crds.stsci.edu/static/users_guide/web_services.html#generic-request
download_webbpsf_data:
needs: [ path ]
if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data')))
name: download and cache WebbPSF data
runs-on: ubuntu-latest
env:
WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz
outputs:
path: ${{ steps.cache_path.outputs.path }}
hash: ${{ steps.download.outputs.hash }}
steps:
- id: download
name: download data from URL
run: |
mkdir -p tmp
wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz
echo "hash=${{ hashFiles( 'tmp/webbpsf-data.tar.gz' }}" >> $GITHUB_OUTPUT
- id: cache_path
run: echo "path=${{ needs.path.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT
- id: cache_download
name: check downloaded data against the existing cache
uses: actions/cache@v3
with:
path: ${{ steps.cache_path.outputs.path }}
key: data-${{ steps.download.outputs.hash }}-
- if: ${{ steps.cache_download.outputs.cache-hit != 'true' }}
name: extract data to cache directory
run: |
mkdir -p ${{ needs.path.outputs.path }}
tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ needs.path.outputs.path }}
retrieve_webbpsf_data_hash:
needs: [ path, download_webbpsf_data ]
# run regardless if `download_webbpsf_data' succeeds or is skipped
if: always() && (needs.download_webbpsf_data.result == 'success' || needs.download_webbpsf_data.result == 'skipped')
name: retrieve hash of cached WebbPSF data
runs-on: ubuntu-latest
env:
GH_TOKEN: ${{ github.token }}
outputs:
hash: ${{ steps.retrieve_hash.outputs.hash }}
steps:
- id: retrieve_hash
name: retrieve data hash of latest cache key
run: |
# use actions/gh-actions-cache to allow filtering by key
gh extension install actions/gh-actions-cache
LATEST_CACHE=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1)
echo "LATEST_CACHE=$LATEST_CACHE"
HASH=$(echo $LATEST_CACHE | cut -d '-' -f 2)
echo "hash=$HASH" >> $GITHUB_OUTPUT
if [ "$HASH" == '' ]; then exit 1; fi