Skip to content

Scrape on Target Date #21

Scrape on Target Date

Scrape on Target Date #21

Workflow file for this run

# .github/workflows/scraping.yml
name: Scrape on Target Date
on:
schedule:
- cron: '0 22 * * *' # Se ejecutará todos los dias a las 22:00 UTC (19:00 ARG)
workflow_dispatch:
jobs:
check_and_scrape:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
# Instalar los navegadores de playwright
python -m playwright install
- name: Leer fecha guardada
id: read_date
run: |
echo "next_date=$(cat next_date.txt)" >> $GITHUB_OUTPUT
#- name: Verificar fecha
# id: check_date
# run: |
# TODAY=$(date +'%Y-%m-%d')
# if [[ "${{ steps.read_date.outputs.next_date }}" == "$TODAY" ]]; then
# echo "should_scrape=true" >> $GITHUB_OUTPUT
#else
# echo "should_scrape=false" >> $GITHUB_OUTPUT
#fi
# Paso para simular que la fecha del informe es hoy (para pruebas)
- name: Simular fecha de informe
id: check_date
run: echo "should_scrape=true" >> $GITHUB_OUTPUT
- name: Ejecutar scraping
if: ${{ steps.check_date.outputs.should_scrape == 'true' }}
env:
API_KEY: ${{ secrets.API_KEY }}
API_SECRET: ${{ secrets.API_SECRET }}
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
ACCESS_SECRET: ${{ secrets.ACCESS_SECRET }}
run: python -m src.main
- name: Commit y push de datos
if: ${{ steps.check_date.outputs.should_scrape == 'true' }}
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add data/
git commit -m "Actualizar datos: $(date +'%Y-%m-%d')"
git push