From 8a97fdac1fc9285cc8ee7dd644de35b00a086519 Mon Sep 17 00:00:00 2001
From: Andrei Kashchikhin <andrey.kashchikhin@intel.com>
Date: Tue, 16 Apr 2024 15:52:12 +0100
Subject: [PATCH] [CI] [GHA] Introduce GHA pipeline rerunner (#23865)

### Details:
This PR introduces a GHA pipeline rerunner. It should scan the failed
workflows' logs and re-run those having known sporadic errors.

The rerunner is a Python script which is used by a dedicated workflow.

The workflow will not run from this PR, it needs to be in `master`. I've
checked the workflow and script in the private repo.

### Tickets:
 - *136935*
---
 .gitattributes                                |   2 +
 .github/scripts/workflow_rerun/__init__.py    |   0
 .../scripts/workflow_rerun/argument_parser.py |  20 +++
 .github/scripts/workflow_rerun/constants.py   |  17 +++
 .../workflow_rerun/errors_to_look_for.json    |  42 ++++++
 .../scripts/workflow_rerun/log_analyzer.py    | 132 ++++++++++++++++++
 .../scripts/workflow_rerun/log_collector.py   |  21 +++
 .github/scripts/workflow_rerun/rerunner.py    |  53 +++++++
 .../scripts/workflow_rerun/tests/__init__.py  |   0
 .../tests/data/log_archive_with_error.zip     |   3 +
 .../tests/data/log_archive_wo_error.zip       |   3 +
 .../workflow_rerun/tests/integration_test.py  |  52 +++++++
 .../workflow_rerun/tests/log_analyzer_test.py | 101 ++++++++++++++
 .../tests/log_collector_test.py               |  38 +++++
 .github/workflows/workflow_rerunner.yml       |  72 ++++++++++
 15 files changed, 556 insertions(+)
 create mode 100644 .github/scripts/workflow_rerun/__init__.py
 create mode 100644 .github/scripts/workflow_rerun/argument_parser.py
 create mode 100644 .github/scripts/workflow_rerun/constants.py
 create mode 100644 .github/scripts/workflow_rerun/errors_to_look_for.json
 create mode 100644 .github/scripts/workflow_rerun/log_analyzer.py
 create mode 100644 .github/scripts/workflow_rerun/log_collector.py
 create mode 100644 .github/scripts/workflow_rerun/rerunner.py
 create mode 100644 .github/scripts/workflow_rerun/tests/__init__.py
 create mode 100644 .github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip
 create mode 100644 .github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip
 create mode 100644 .github/scripts/workflow_rerun/tests/integration_test.py
 create mode 100644 .github/scripts/workflow_rerun/tests/log_analyzer_test.py
 create mode 100644 .github/scripts/workflow_rerun/tests/log_collector_test.py
 create mode 100644 .github/workflows/workflow_rerunner.yml

diff --git a/.gitattributes b/.gitattributes
index dfeac125fb1c3f..a0f976d8fb1fbe 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -65,3 +65,5 @@
 *.vsdx filter=lfs diff=lfs merge=lfs -text
 *.bmp filter=lfs diff=lfs merge=lfs -text
 *.svg filter=lfs diff=lfs merge=lfs -text
+.github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip filter=lfs diff=lfs merge=lfs -text
+.github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/scripts/workflow_rerun/__init__.py b/.github/scripts/workflow_rerun/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/.github/scripts/workflow_rerun/argument_parser.py b/.github/scripts/workflow_rerun/argument_parser.py
new file mode 100644
index 00000000000000..e73485dafd09c0
--- /dev/null
+++ b/.github/scripts/workflow_rerun/argument_parser.py
@@ -0,0 +1,20 @@
+import argparse
+from pathlib import Path
+
+
+def get_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-r', '--repository-name', 
+                        type=str, 
+                        required=True,
+                        help='Repository name in the OWNER/REPOSITORY format')
+    parser.add_argument('--run-id', 
+                        type=int, 
+                        required=True,
+                        help='Workflow Run ID')
+    parser.add_argument('--errors-to-look-for-file', 
+                        type=str, 
+                        required=False,
+                        help='.json file with the errors to look for in logs',
+                        default=Path(__file__).resolve().parent.joinpath('errors_to_look_for.json'))
+    return parser.parse_args()
diff --git a/.github/scripts/workflow_rerun/constants.py b/.github/scripts/workflow_rerun/constants.py
new file mode 100644
index 00000000000000..174ffd74e6371f
--- /dev/null
+++ b/.github/scripts/workflow_rerun/constants.py
@@ -0,0 +1,17 @@
+import logging
+import os
+
+
+GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
+
+
+def init_logger():
+    LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
+    logging.basicConfig(level=LOGLEVEL,
+                        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                        datefmt='%m-%d-%Y %H:%M:%S')
+
+
+init_logger()
+
+LOGGER = logging.getLogger('rerunner')
diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json
new file mode 100644
index 00000000000000..836b21a0042bfd
--- /dev/null
+++ b/.github/scripts/workflow_rerun/errors_to_look_for.json
@@ -0,0 +1,42 @@
+[
+    {
+        "error_text": "This is a problem related to network connectivity",
+        "ticket": 135929
+    },
+    {
+        "error_text": "Unable to make request",
+        "ticket": 135715
+    },
+    {
+        "error_text": "GnuTLS recv error",
+        "ticket": 131918
+    },
+    {
+        "error_text": "Connection was reset",
+        "ticket": 131818
+    },
+    {
+        "error_text": "Failed to connect to github.com",
+        "ticket": 131657
+    },
+    {
+        "error_text": "Could not resolve host: github.com",
+        "ticket": 131546
+    },
+    {
+        "error_text": "retrieving gpg key timed out",
+        "ticket": 131538
+    },
+    {
+        "error_text": "Retry limit has been reached for chunk",
+        "ticket": 131537
+    },
+    {
+        "error_text": "fatal error: downloading",
+        "ticket": 131424
+    },
+    {
+        "error_text": "Failure when receiving data from the peer",
+        "ticket": 137121
+    }
+]
\ No newline at end of file
diff --git a/.github/scripts/workflow_rerun/log_analyzer.py b/.github/scripts/workflow_rerun/log_analyzer.py
new file mode 100644
index 00000000000000..73f73ee53e1efd
--- /dev/null
+++ b/.github/scripts/workflow_rerun/log_analyzer.py
@@ -0,0 +1,132 @@
+import json
+import re
+import tempfile
+from pathlib import Path
+from typing import TypedDict
+from zipfile import ZipFile
+
+from workflow_rerun.constants import LOGGER
+
+
+class LogFile(TypedDict):
+    file_name: str
+    path: Path
+
+
+class ErrorData(TypedDict):
+    error_text: str
+    ticket: int
+
+
+class LogAnalyzer:
+    def __init__(self, 
+                 path_to_log_archive: Path,
+                 path_to_errors_file: Path) -> None:
+        self._path_to_log_archive = path_to_log_archive
+        self._path_to_errors_file = path_to_errors_file
+
+        self._errors_to_look_for: list[ErrorData] = []
+        self._collect_errors_to_look_for()
+
+        self._log_dir = tempfile.TemporaryDirectory().name
+        
+        self._log_files: list[LogFile] = []
+        self._collect_log_files()
+        
+        all_txt_log_files_pretty = '\n'.join(map(lambda item: str(item['path']), self._log_files))
+        LOGGER.info(f'ALL .txt LOG FILES: \n{all_txt_log_files_pretty}')
+
+        self.found_matching_error = False
+    
+    def _collect_errors_to_look_for(self) -> None:
+        with open(file=self._path_to_errors_file, 
+                  mode='r', 
+                  encoding='utf-8') as errors_file:
+            errors_data = json.load(errors_file)
+            for error_data in errors_data:
+                self._errors_to_look_for.append(
+                    ErrorData(error_text=error_data['error_text'], 
+                              ticket=error_data['ticket'])
+                    )
+
+    def _collect_log_files(self) -> None:
+        """
+        Collects the .txt log files from the log archive
+
+        The GitHub Actions pipeline logs archive should have the following structure:
+            > Job_name_0
+                > step_name_0.txt
+                > step_name_1.txt
+                ...
+            > Job_name_1
+                > step_name_0.txt
+                > step_name_1.txt
+                ...
+            > Job_name_2
+                ...
+            ...
+        
+        We need to only analyze the `*.txt` files
+        """
+        
+        with ZipFile(file=self._path_to_log_archive, 
+                     mode='r') as zip_file:
+            zip_file.extractall(self._log_dir)
+        
+        for _file in Path(self._log_dir).iterdir():
+            if _file.is_dir():
+                for log_file in _file.iterdir():
+                    self._log_files.append(LogFile(file_name=log_file.name, 
+                                                   path=log_file.resolve()))
+
+    def _is_error_in_log(self, 
+                         error_to_look_for: str, 
+                         log_file_path: Path) -> bool:
+        """
+        Searches for the error in the provided log
+        """
+
+        error_to_look_for = self._clean_up_string(error_to_look_for)        
+
+        with open(file=log_file_path, 
+                  mode='r', 
+                  encoding='utf-8') as log_file:
+            for line in log_file:
+                if error_to_look_for in self._clean_up_string(line):
+                    return True
+        return False
+        
+    @staticmethod
+    def _clean_up_string(string: str) -> str:
+        """
+        Replaces special characters with spaces in the string, strips it from leading and following spaces,
+        and lowers it
+        
+        for "Could not resolve host: github.com" returns "could not resolve host github com"
+        
+        This cleanup is applied to both errors to look for and logs themselves for matching
+        """
+        return re.sub(r'[^A-Za-z0-9]+', ' ', string).lower().strip()
+
+    def analyze(self) -> None:
+        """
+        Iterates over the known errors and tries to find them in the collected log files
+        """
+        for error in self._errors_to_look_for:
+            
+            LOGGER.info(f'LOOKING FOR "{error["error_text"]}" ERROR...')
+            
+            for log_file in self._log_files:
+                if self._is_error_in_log(error_to_look_for=error['error_text'], 
+                                         log_file_path=log_file['path']):
+                    LOGGER.info(f'FOUND "{error["error_text"]}" ERROR IN {log_file["path"]}. TICKET: {error["ticket"]}')
+                    self.found_matching_error = True
+                    return
+
+if __name__ == '__main__':
+    # Usage example
+    log_analyzer = LogAnalyzer(path_to_log_archive=Path('/tmp/logs/log.zip'), 
+                               path_to_errors_file=Path('/tmp/errors_to_look_for.json'))
+    log_analyzer.analyze()
+    if log_analyzer.found_matching_error:
+        print('found matching error, see logs above')
diff --git a/.github/scripts/workflow_rerun/log_collector.py b/.github/scripts/workflow_rerun/log_collector.py
new file mode 100644
index 00000000000000..6045a4750b824b
--- /dev/null
+++ b/.github/scripts/workflow_rerun/log_collector.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+import requests
+from github.WorkflowRun import WorkflowRun
+from workflow_rerun.constants import GITHUB_TOKEN, LOGGER
+
+
+def collect_logs_for_run(run: WorkflowRun,
+                         log_archive_path: Path) -> Path:
+    """
+    Collects log archive for a pipeline
+    """
+    with open(file=log_archive_path, 
+              mode='wb') as log_archive:
+        LOGGER.info(f'STARTED LOG COLLECTION FOR {run.id} IN {log_archive_path}')
+        # PyGitHub does not expose the "/repos/{owner}/{repo}/actions/runs/{run_id}/logs" endpoint so we have to use requests
+        log_archive.write(requests.get(url=run.logs_url, 
+                                       headers={'Authorization': f'Bearer {GITHUB_TOKEN}'}).content)
+        LOGGER.info(f'COLLECTED LOGS FOR {run.id} IN {log_archive_path}')
+
+    return log_archive_path
diff --git a/.github/scripts/workflow_rerun/rerunner.py b/.github/scripts/workflow_rerun/rerunner.py
new file mode 100644
index 00000000000000..49ba2031a1e747
--- /dev/null
+++ b/.github/scripts/workflow_rerun/rerunner.py
@@ -0,0 +1,53 @@
+import sys
+import tempfile
+from pathlib import Path
+
+from github import Github, Auth
+from workflow_rerun.argument_parser import get_arguments
+from workflow_rerun.constants import GITHUB_TOKEN, LOGGER
+from workflow_rerun.log_analyzer import LogAnalyzer
+from workflow_rerun.log_collector import collect_logs_for_run
+
+if __name__ == '__main__':
+
+    args = get_arguments()
+    run_id = args.run_id
+    repository_name = args.repository_name
+
+    github = Github(auth=Auth.Token(token=GITHUB_TOKEN))
+    gh_repo = github.get_repo(full_name_or_id=repository_name)
+    run = gh_repo.get_workflow_run(id_=run_id)
+    
+    LOGGER.info(f'CHECKING IF RERUN IS NEEDED FOR {run.html_url} RUN IN {repository_name}.')
+    
+    # Check if the run has already been retriggered
+    # we do not want to fall into a loop with retriggers
+    if run.run_attempt > 1:
+        LOGGER.info(f'THERE ARE {run.run_attempt} ATTEMPTS ALREADY. NOT CHECKING LOGS AND NOT RETRIGGERING. EXITING')
+        sys.exit(0)
+        
+    log_archive_path = Path(tempfile.NamedTemporaryFile(suffix='.zip').name)
+
+    collect_logs_for_run(
+        run=run,
+        log_archive_path=log_archive_path,
+    )
+
+    log_analyzer = LogAnalyzer(
+        path_to_log_archive=log_archive_path,
+        path_to_errors_file=args.error_to_look_for_file,
+    )
+    log_analyzer.analyze()
+    
+    if log_analyzer.found_matching_error:
+        LOGGER.info(f'FOUND MATCHING ERROR, RETRIGGERING {run.html_url}')
+        status = run.rerun()
+        if status:
+            LOGGER.info(f'RUN RETRIGGERED SUCCESSFULLY: {run.html_url}')
+        else:
+            LOGGER.info(f'RUN WAS NOT RETRIGGERED, SEE ABOVE')
+        
+        # "status" is True (which is 1) if everything is ok, False (which is 0) otherwise
+        sys.exit(not status)
+    else:
+        LOGGER.info(f'NO ERROR WAS FOUND, NOT RETRIGGERING')
diff --git a/.github/scripts/workflow_rerun/tests/__init__.py b/.github/scripts/workflow_rerun/tests/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/.github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip b/.github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip
new file mode 100644
index 00000000000000..c02b478af0076a
--- /dev/null
+++ b/.github/scripts/workflow_rerun/tests/data/log_archive_with_error.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:894d636bcf156a7f3fae09f3c1d61df6b3db89117a917a3079995805c29115b3
+size 89247
diff --git a/.github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip b/.github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip
new file mode 100644
index 00000000000000..42be8d16787555
--- /dev/null
+++ b/.github/scripts/workflow_rerun/tests/data/log_archive_wo_error.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f094a737d7ea40dba8d3fb13493275cae243d08e5f1dabce90c316c951a6ac2
+size 52047
diff --git a/.github/scripts/workflow_rerun/tests/integration_test.py b/.github/scripts/workflow_rerun/tests/integration_test.py
new file mode 100644
index 00000000000000..6e950772d486f6
--- /dev/null
+++ b/.github/scripts/workflow_rerun/tests/integration_test.py
@@ -0,0 +1,52 @@
+"""
+Integration tests
+"""
+
+import unittest
+from pathlib import Path
+from github import Github, Auth
+import os
+import tempfile
+
+
+from workflow_rerun.log_analyzer import LogAnalyzer
+from workflow_rerun.log_collector import collect_logs_for_run
+
+
+class IntegrationTest(unittest.TestCase):
+    """
+    A class for testing integration between LogAnalyzer and log_collection
+    """
+
+    def setUp(self) -> None:
+        print(f'\nIn test: "{self._testMethodName}"', flush=True)
+        self._cwd = Path(__file__).parent
+        self.errors_to_look_for_file = self._cwd.parent.joinpath(
+            'errors_to_look_for.json'
+        )
+        self.github = Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN')))
+        self.gh_repo = self.github.get_repo(full_name_or_id='openvinotoolkit/openvino')
+
+        # Even if we use "failure" for status we cannot guarantee logs containing any of the known error
+        # So these tests use the logs of the most recent successfull pipeline
+        self.wf_run = self.gh_repo.get_workflow_runs(status='success')[0]
+        print(f'Workflow run for testing: {self.wf_run}', flush=True)
+
+    def test_log_collection_and_analysis(self) -> None:
+        """
+        Ensure logs collected by collect_logs_for_run are analyzed by LogAnalyzer
+        """
+
+        log_archive_path = Path(tempfile.NamedTemporaryFile(suffix='.zip').name)
+        collect_logs_for_run(run=self.wf_run, 
+                             log_archive_path=log_archive_path)
+
+        analyzer = LogAnalyzer(
+            path_to_log_archive=log_archive_path,
+            path_to_errors_file=self.errors_to_look_for_file,
+        )
+        analyzer.analyze()
+        self.assertFalse(analyzer.found_matching_error)
+    
+    def tearDown(self) -> None:
+        self.github.close()
diff --git a/.github/scripts/workflow_rerun/tests/log_analyzer_test.py b/.github/scripts/workflow_rerun/tests/log_analyzer_test.py
new file mode 100644
index 00000000000000..b10e4166bb038f
--- /dev/null
+++ b/.github/scripts/workflow_rerun/tests/log_analyzer_test.py
@@ -0,0 +1,101 @@
+"""
+LogAnalyzer tests
+"""
+
+import unittest
+from pathlib import Path
+
+
+from workflow_rerun.log_analyzer import LogAnalyzer
+
+
+class LogAnalyzerTest(unittest.TestCase):
+    """
+    A class for testing LogAnalyzer
+    """
+
+    def setUp(self) -> None:
+        print(f'\nIn test: "{self._testMethodName}"', flush=True)
+        self._cwd = Path(__file__).parent
+        self.log_archive_with_error = self._cwd.joinpath("data").joinpath(
+            'log_archive_with_error.zip'
+        )
+        self.log_archive_wo_error = self._cwd.joinpath("data").joinpath(
+            'log_archive_wo_error.zip'
+        )
+        self.errors_to_look_for_file = self._cwd.parent.joinpath(
+            'errors_to_look_for.json'
+        )
+
+    def test_log_analyzer_instantiation(self) -> None:
+        """
+        Ensure LogAnalyzer is instantiated correctly.
+        """
+        analyzer = LogAnalyzer(
+            path_to_log_archive=self.log_archive_wo_error,
+            path_to_errors_file=self.errors_to_look_for_file,
+        )
+        self.assertTrue(
+            hasattr(analyzer, '_errors_to_look_for'),
+            'Analyzer should have _errors_to_look_for',
+        )
+        self.assertTrue(
+            hasattr(analyzer, '_log_files'), 'Analyzer should have _log_files'
+        )
+
+        for error_data in analyzer._errors_to_look_for:
+            self.assertTrue(
+                error_data['error_text'], 'Each error_data should have text'
+            )
+            self.assertTrue(error_data['ticket'], 'Each error_data should have ticket')
+
+        for log_file in analyzer._log_files:
+            self.assertTrue(
+                log_file['file_name'], 'Each log_file should have file_name'
+            )
+            self.assertTrue(log_file['path'], 'Each log_file should have path')
+
+    def test_string_cleanup(self) -> None:
+        """
+        Ensure log cleanup function returns correct results
+        """
+        analyzer = LogAnalyzer(
+            path_to_log_archive=self.log_archive_wo_error,
+            path_to_errors_file=self.errors_to_look_for_file,
+        )
+
+        data = (
+            'Connection was reset',
+            'Failed to connect to github.com',
+            'Could not resolve host: github.com',
+        )
+        expected = (
+            'connection was reset',
+            'failed to connect to github com',
+            'could not resolve host github com',
+        )
+
+        for input_str, expected_str in zip(data, expected):
+            self.assertEqual(analyzer._clean_up_string(string=input_str), expected_str)
+
+    def test_analyzer_with_error(self) -> None:
+        """
+        Ensure LogAnalyzer can find an error
+        """
+        analyzer = LogAnalyzer(
+            path_to_log_archive=self.log_archive_with_error,
+            path_to_errors_file=self.errors_to_look_for_file,
+        )
+        analyzer.analyze()
+        self.assertTrue(analyzer.found_matching_error)
+
+    def test_analyzer_wo_error(self) -> None:
+        """
+        Ensure LogAnalyzer does not find an error in the log files w/o errors
+        """
+        analyzer = LogAnalyzer(
+            path_to_log_archive=self.log_archive_wo_error,
+            path_to_errors_file=self.errors_to_look_for_file,
+        )
+        analyzer.analyze()
+        self.assertFalse(analyzer.found_matching_error)
diff --git a/.github/scripts/workflow_rerun/tests/log_collector_test.py b/.github/scripts/workflow_rerun/tests/log_collector_test.py
new file mode 100644
index 00000000000000..f325576262c203
--- /dev/null
+++ b/.github/scripts/workflow_rerun/tests/log_collector_test.py
@@ -0,0 +1,38 @@
+"""
+log collector tests
+"""
+
+import os
+import unittest
+import tempfile
+from pathlib import Path
+
+from github import Github, Auth
+
+from workflow_rerun.log_collector import collect_logs_for_run
+
+
+class LogCollectorTest(unittest.TestCase):
+    """
+    A class for testing log collection
+    """
+
+    def setUp(self) -> None:
+        print(f'\nIn test: "{self._testMethodName}"', flush=True)
+        self._cwd = Path(__file__).parent
+        self.github = Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN')))
+        self.gh_repo = self.github.get_repo(full_name_or_id='openvinotoolkit/openvino')
+        # Use the logs of the most recent successfull pipeline
+        self.wf_run = self.gh_repo.get_workflow_runs(status='success')[0]
+        print(f'Workflow run for testing: {self.wf_run}', flush=True)
+
+    def test_log_collection(self) -> None:
+        """
+        Ensure log collection is working
+        """
+        log_archive_path = Path(tempfile.NamedTemporaryFile(suffix='.zip').name)
+        collect_logs_for_run(run=self.wf_run, log_archive_path=log_archive_path)
+        self.assertTrue(Path(log_archive_path).exists())
+
+    def tearDown(self) -> None:
+        self.github.close()
diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml
new file mode 100644
index 00000000000000..77b8b68f25b359
--- /dev/null
+++ b/.github/workflows/workflow_rerunner.yml
@@ -0,0 +1,72 @@
+name: Rerun Workflow with Known Errors
+
+on:
+  workflow_run:
+    workflows:
+      - Linux (Ubuntu 20.04, Python 3.11)
+      - Linux ARM64 (Ubuntu 20.04, Python 3.11)
+      - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang)
+      - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10)
+      - Windows (VS 2019, Python 3.11)
+      - Windows Conditional Compilation (VS 2022, Python 3.11)
+    types:
+      - completed
+  pull_request:
+    paths:
+      - '.github/workflows/workflow_rerunner.yml'
+      - '.github/scripts/workflow_rerun/**'
+
+jobs:
+  rerun:
+    name: Rerun Workflow
+    if: ${{ github.event.workflow_run.conclusion == 'failure' }}  # Run only for the completed workflows
+    runs-on: aks-linux-2-cores-8gb
+    permissions:
+      actions: write
+      contents: read
+      statuses: read
+      checks: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: '.github/scripts/workflow_rerun'
+
+      - name: Install deps
+        run: pip3 install PyGithub==2.2.0 requests==2.31.0
+
+      - name: Dump GitHub context
+        env:
+          GITHUB_CONTEXT: ${{ toJson(github) }}
+        run: echo "$GITHUB_CONTEXT"
+
+      - name: Rerun
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH
+          python3 ${{ github.workspace }}/.github/scripts/workflow_rerun/rerunner.py \
+          --run-id ${{ github.event.workflow_run.id }} \
+          --repository-name ${GITHUB_REPOSITORY}
+
+  rerunner_tests:
+    name: Rerunner Tests
+    if: ${{ github.event_name == 'pull_request' }}
+    runs-on: aks-linux-2-cores-8gb
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: '.github/scripts/workflow_rerun'
+          lfs: true
+
+      - name: Install deps
+        run: pip3 install PyGithub==2.2.0 requests==2.31.0
+
+      - name: Test Rerunner
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun
+        run: |
+          export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH
+          python3 -m unittest tests/*_test.py