Skip to content

Scrape on Target Date #20

Scrape on Target Date

Scrape on Target Date #20

Workflow file for this run

# .github/workflows/scraping.yml
name: Scrape on Target Date
on:
schedule:
# Verificar todos los días a las 23:59 UTC
- cron: '59 23 * * *'
workflow_dispatch:
jobs:
check_and_scrape:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# Instalar los navegadores de playwright
python -m playwright install
- name: Leer fecha guardada
id: read_date
run: |
echo "next_date=$(cat next_date.txt)" >> $GITHUB_OUTPUT
- name: Verificar fecha
id: check_date
run: |
TODAY=$(date +'%Y-%m-%d')
if [[ "${{ steps.read_date.outputs.next_date }}" == "$TODAY" ]]; then
echo "should_scrape=true" >> $GITHUB_OUTPUT
else
echo "should_scrape=false" >> $GITHUB_OUTPUT
fi
# Paso para simular que la fecha del informe es hoy (para pruebas)
# - name: Simular fecha de informe
# id: check_date
# run: echo "should_scrape=true" >> $GITHUB_OUTPUT
- name: Ejecutar scraping
if: ${{ steps.check_date.outputs.should_scrape == 'true' }}
env:
API_KEY: ${{ secrets.API_KEY }}
API_SECRET: ${{ secrets.API_SECRET }}
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
ACCESS_SECRET: ${{ secrets.ACCESS_SECRET }}
run: python -m src.main
- name: Commit y push de datos
if: ${{ steps.check_date.outputs.should_scrape == 'true' }}
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add data/
git commit -m "Actualizar datos: $(date +'%Y-%m-%d')"
git push