Skip to content

Fetch pathway figures and metadata from date range #1

Fetch pathway figures and metadata from date range

Fetch pathway figures and metadata from date range #1

name: Fetch pathway figures and metadata from date range
on:
workflow_dispatch:
schedule:
- cron: "0 1-7 1-3 1,4,7,10 *" # For 9PM-3AM ET (1-7 AM UTC), for the first 3 days of every Jan, Apr, Jul, Oct.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: false # to allow multiple runs to queue up rather than clobber
jobs:
check:
runs-on: ubuntu-latest
outputs:
status: ${{ steps.early.outputs.status }}
steps:
- name: Checkout
uses: actions/checkout@v4
- id: early
name: Date check
run: |
last_run_date=$(sed -n 's/^last_run:[[:space:]]*\([[:digit:]]\{4\}\/[[:digit:]]\{2\}\/[[:digit:]]\{2\}\).*$/\1/p' query_config.yml)
today_date="$(date +'%Y/%m/%d')"
if [[ "$last_run_date" < "$today_date" ]]; then
echo "status=proceed" >> $GITHUB_OUTPUT
else
echo "status=quit" >> $GITHUB_OUTPUT
echo "Nothing more to collect at this time (see last_run date). Skipping other steps..."
fi
scrape:
needs: check
if: ${{needs.check.outputs.status == 'proceed' }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: Install Packages
run: |
sudo apt-get update
sudo apt-get install libcurl4-openssl-dev
Rscript -e 'install.packages(c("rentrez", "xml2", "httr", "yaml", "stringr", "lubridate"))'
- name: Run R Script to Fetch Figures
run: Rscript scripts/fetch_figures_v2.R || true #commit files even if R crashes
- name: Commit, pull with rebase (any remote changes during this run), and then push
run: |
git config --global user.name 'GitHub Action'
git config --global user.email '[email protected]'
git add .
if git diff --exit-code --staged; then
echo "No changes"
else
git commit -m 'new figures from pmc; updated config and log'
git pull --rebase
git push --force-with-lease
fi