Scrape on Target Date #20
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# .github/workflows/scraping.yml | |
name: Scrape on Target Date | |
on: | |
schedule: | |
# Verificar todos los días a las 23:59 UTC | |
- cron: '59 23 * * *' | |
workflow_dispatch: | |
jobs: | |
check_and_scrape: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.8' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
# Instalar los navegadores de playwright | |
python -m playwright install | |
- name: Leer fecha guardada | |
id: read_date | |
run: | | |
echo "next_date=$(cat next_date.txt)" >> $GITHUB_OUTPUT | |
- name: Verificar fecha | |
id: check_date | |
run: | | |
TODAY=$(date +'%Y-%m-%d') | |
if [[ "${{ steps.read_date.outputs.next_date }}" == "$TODAY" ]]; then | |
echo "should_scrape=true" >> $GITHUB_OUTPUT | |
else | |
echo "should_scrape=false" >> $GITHUB_OUTPUT | |
fi | |
# Paso para simular que la fecha del informe es hoy (para pruebas) | |
# - name: Simular fecha de informe | |
# id: check_date | |
# run: echo "should_scrape=true" >> $GITHUB_OUTPUT | |
- name: Ejecutar scraping | |
if: ${{ steps.check_date.outputs.should_scrape == 'true' }} | |
env: | |
API_KEY: ${{ secrets.API_KEY }} | |
API_SECRET: ${{ secrets.API_SECRET }} | |
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} | |
ACCESS_SECRET: ${{ secrets.ACCESS_SECRET }} | |
run: python -m src.main | |
- name: Commit y push de datos | |
if: ${{ steps.check_date.outputs.should_scrape == 'true' }} | |
run: | | |
git config --local user.email "github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
git add data/ | |
git commit -m "Actualizar datos: $(date +'%Y-%m-%d')" | |
git push |