openml · PGijsbers · Jun 17, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/.github/actions/runbenchmark/action.yml b/.github/actions/runbenchmark/action.yml
@@ -6,21 +6,55 @@ inputs:
     description: 'Framework to run'
     default: 'constantpredictor'
     required: true
-  task:
-    description: 'Task to run the framework on'
-    default: 'openml/t/59'
-    required: true
+
+  benchmark:
+    description: 'benchmark to run'
+    default: 'test'
+    required: false
+
+  store-results:
+    description: "If set (default), store the `results` directory as run artifact."
+    default: true
+    required: false
+
+outputs:
+  failure:
+    description: "Indicates which step failed, if any. One of 'install', 'run', or ''."
+    value: ${{ steps.output-failure.outputs.failure }}
 
 runs:
   using: "composite"
   steps:
-    - name: Install Requirements
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
+    - name: Install ${{ inputs.framework }}
+      id: install
+      run: python runbenchmark.py ${{ inputs.framework }} -s only
       shell: bash
-    - name: Run ${{ inputs.framework }} on ${{ inputs.task }}
+    - name: Benchmark ${{ inputs.framework }}
+      id: run
       run: |
-        python runbenchmark.py ${{ inputs.framework }} validation test -f 0 -t ${{ inputs.task }} -e
-        echo "Exit with status $?"
+        python runbenchmark.py ${{ inputs.framework }} ${{ inputs.benchmark }} -f 0
+        if grep -q "Error" "results/results.csv"; then
+          exit 1
+        fi
+      shell: bash
+    - name: Save Results
+      if: always() && inputs.store-results
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ inputs.framework }}-results
+        path: results
+    - name: Output Failures
+      if: always()
+      id: output-failure
       shell: bash
+      run: |
+        FAILURE=''
+        if [[ "$INSTALL_FAILURE" = "failure" ]]; then
+          FAILURE='install'
+        elif [[ "$RUN_FAILURE" = "failure" ]]; then
+          FAILURE='run'
+        fi
+        echo "failure=$FAILURE" >> $GITHUB_OUTPUT
+      env:
+        RUN_FAILURE: ${{ steps.run.conclusion }}
+        INSTALL_FAILURE: ${{ steps.install.conclusion }}
diff --git a/.github/workflows/runbenchmark.yml b/.github/workflows/runbenchmark.yml
@@ -1,6 +1,16 @@
+# This workflow periodically runs the automl benchmark for the supported frameworks to report
+# any errors that may arise from updates versions.
+# It can also be invoked manually to call `runbenchmark` for a single framework.
 name: Run `runbenchmark.py`
 
+permissions:
+  contents: read
+  issues: write
+
 on:
+  schedule:
+    - cron: '3 3 * * 1'
+
   workflow_dispatch:
     inputs:
       python_version:
@@ -13,26 +23,103 @@ on:
         default: ''
 
 jobs:
-  run_benchmark:
-    runs-on: ${{ inputs.os }}
+  set_os:
+    # The matrix of the next job includes an extra job that is defined by the workflow dispatch input
+    # However, on a scheduled job this input is an empty string. Which means that it can not be
+    # directly used in the `runs-on` field. So we set up a variable to use for `runs-on` that
+    # is always populated
+    runs-on: 'ubuntu-latest'
+    outputs:
+      os: ${{ steps.os.outputs.os }}
+    steps:
+      - id: os
+        run: |
+          echo "os=${{ inputs.os }}" >> "$GITHUB_OUTPUT"
+          if [[ -z "${{ inputs.os }}" ]]; then
+            echo "os=ubuntu-latest" >> "$GITHUB_OUTPUT"
+          fi
+      - name: Echo Inputs # Just for debugging purposes
+        run: echo "${{ toJSON(github.event.inputs) }}"
+
+  run:
+    needs: [set_os]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ['ubuntu-latest']
+        python: ['3.9']
+        framework: ['autogluon', 'autosklearn', 'flaml', 'gama', 'h2oautoml','lightautoml', 'naiveautoml', 'randomforest', 'tpot']
+        include:
+          - python: ${{ inputs.python_version }}
+            os: ${{ needs.set_os.outputs.os }}
+            framework: ${{ inputs.framework }}
+            options: ${{ inputs.options }}
+
+    runs-on: ${{ matrix.os }}
     steps:
-      - name: Runs on ${{ inputs.os }}
-        run: echo Preparing to benchmark ${{ inputs.framework }} ${{ inputs.options }}
+      - name: Stop Matrix Jobs on Dispatch
+        if: (github.event_name == 'workflow_dispatch') && !(( matrix.framework == inputs.framework ) && ( matrix.os == inputs.os )&& ( matrix.python == inputs.python_version ))
+        run: |
+          echo "Workflow triggered by workflow_dispatch, and current configuration is not the one specified in dispatch."
+          exit 1
+      - name: Runs on ${{ matrix.os }}
+        run: |
+          echo Preparing to benchmark ${{ matrix.framework }} ${{ matrix.options }}
       - name: Checkout ${{ github.ref_name }}
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           ref: ${{ github.ref_name }}
-      - name: Install Py${{ inputs.python_version }}
-        uses: actions/setup-python@v4
+      - name: Install Py${{ matrix.python }}
+        uses: actions/setup-python@v5
         with:
-          python-version: ${{ inputs.python_version }}
+          python-version: ${{ matrix.python }}
       - name: Install benchmark dependencies
+        if: matrix.framework != ''
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements.txt
-      - name: Install ${{ inputs.framework }}
-        run: python runbenchmark.py ${{ inputs.framework }} -s only
+      - uses: ./.github/actions/runbenchmark
+        id: benchmark
+        with:
+          framework: ${{ matrix.framework }}
+      - name: Report Installation Failure
+        if: failure() && steps.benchmark.outputs.failure == 'install' && github.event_name != 'workflow_dispatch'
+        run: |  # https://github.com/orgs/community/discussions/8945 can't link to job in matrix...
+          LOG_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}/"
+          TITLE="${{ matrix.framework }} failed to install"
+          NUMBERS=$(gh issue list --json number,title,closed --jq '.[] | select( .title == "'"$TITLE"'") | select( .closed == false ) | {number: .number}')
+          echo "Found " $(echo "$NUMBERS" | wc -l) " issues: $NUMBERS"
+          NUMBER=$(echo "$NUMBERS" | sed -E 's/[^0-9]//g' | head -n1)
+
+          echo "During a routine test installation of ${{ matrix.framework }} failed."\
+               "For more information, see the logs: $LOG_URL" > error.txt
+
+          if [[ -z "$NUMBER" ]]; then  
+            gh issue create --title "$TITLE" --label "bot" --body-file error.txt
+          else
+            gh issue comment "$NUMBER" --body-file error.txt
+          fi
+        env:
+          GH_TOKEN: ${{ github.token }}
+      - name: Report Running Failure
+        if: failure() && steps.benchmark.outputs.failure == 'run' && github.event_name != 'workflow_dispatch'
+        run: |
+          python -m pip install tabulate
+
+          LOG_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}/"
+          TITLE="${{ matrix.framework }} fails on test"
+          NUMBERS=$(gh issue list --json number,title,closed --jq '.[] | select( .title == "'"$TITLE"'") | select( .closed == false ) | {number: .number}')
+          echo "Found " $(echo "$NUMBERS" | wc -l) " issues: $NUMBERS"
+          NUMBER=$(echo "$NUMBERS" | sed -E 's/[^0-9]//g' | head -n1)
+
+          echo "During a routine test ${{ matrix.framework }} installed successfully but failed to pass the test benchmark." > error.txt
+          python -c "import pandas as pd; df = pd.read_csv('results/results.csv'); print('\n'+df.to_markdown()+'\n')" >> error.txt
+          echo "For more information, see the logs: $LOG_URL" >> error.txt
+
+          if [[ -z "$NUMBER" ]]; then  
+            gh issue create --title "$TITLE" --label "bot" --body-file error.txt
+          else
+            gh issue comment "$NUMBER" --body-file error.txt
+          fi
         env:
-          GITHUB_PAT: ${{ secrets.PUBLIC_ACCESS_GITHUB_PAT }}
-      - name: Benchmark ${{ inputs.framework }}
-        run: python runbenchmark.py ${{ inputs.framework }} ${{ inputs.options }}
+          GH_TOKEN: ${{ github.token }}