Skip to content

bb

bb #406

Workflow file for this run

name: bb
on:
workflow_dispatch:
inputs:
start_date:
description: 'Start date'
end_date:
description: 'End date'
legislative_term:
description: 'Legislative term'
document_type:
description: 'Document type'
schedule:
- cron: "0 3 * * *"
jobs:
scrape:
runs-on: ubuntu-latest
container: ghcr.io/okfde/dokukratie:main
defaults:
run:
working-directory: /opt/dokukratie
services:
redis:
image: redis:alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
env:
ARCHIVE_BUCKET: ${{ secrets.ARCHIVE_BUCKET }}
ARCHIVE_ENDPOINT_URL: ${{ secrets.ARCHIVE_ENDPOINT_URL }}
DATA_BUCKET: ${{ secrets.DATA_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Pull crawler runtime data
run: make bb.pull
- name: Run crawler
env:
START_DATE: ${{ github.event.inputs.start_date }}
END_DATE: ${{ github.event.inputs.end_date }}
LEGISLATIVE_TERMS: ${{ github.event.inputs.legislative_term }}
DOCUMENT_TYPES: ${{ github.event.inputs.document_type }}
run: make bb.run_prod
- name: Generate mmmeta
if: always()
run: make bb.mmmeta
- name: Upload data
if: always()
run: make bb.upload