|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: MIT |
| 3 | + |
| 4 | +# Provide a markdown-formatted message summarizing the reasons why a pipeline failed. |
| 5 | +# Marge bot can use this script to provide more helpful comments when CI fails. |
| 6 | +# Example for running locally: |
| 7 | +# ./bin/ci/pipeline_message.sh --project-id 176 --pipeline-id 1310098 |
| 8 | + |
| 9 | + |
| 10 | +import argparse |
| 11 | +import asyncio |
| 12 | +import logging |
| 13 | +from typing import Any |
| 14 | + |
| 15 | +import aiohttp |
| 16 | + |
| 17 | +PER_PAGE: int = 6000 |
| 18 | + |
| 19 | + |
| 20 | +async def get_pipeline_status( |
| 21 | + session: aiohttp.ClientSession, project_id: str, pipeline_id: str |
| 22 | +): |
| 23 | + url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/pipelines/{pipeline_id}" |
| 24 | + logging.info(f"Fetching pipeline status from {url}") |
| 25 | + async with session.get(url) as response: |
| 26 | + response.raise_for_status() |
| 27 | + pipeline_details = await response.json() |
| 28 | + return pipeline_details.get("status") |
| 29 | + |
| 30 | + |
| 31 | +async def get_jobs_for_pipeline( |
| 32 | + session: aiohttp.ClientSession, project_id: str, pipeline_id: str |
| 33 | +): |
| 34 | + url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/pipelines/{pipeline_id}/jobs" |
| 35 | + logging.info(url) |
| 36 | + jobs = [] |
| 37 | + params = {"per_page": PER_PAGE} |
| 38 | + async with session.get(url, params=params) as response: |
| 39 | + response.raise_for_status() |
| 40 | + jobs = await response.json() |
| 41 | + return jobs |
| 42 | + |
| 43 | + |
| 44 | +def get_problem_jobs(jobs: list[dict[str, Any]]): |
| 45 | + ignore_stage_list = [ |
| 46 | + "postmerge", |
| 47 | + "performance", |
| 48 | + ] |
| 49 | + problem_jobs = [] |
| 50 | + for job in jobs: |
| 51 | + if any(ignore.lower() in job["stage"] for ignore in ignore_stage_list): |
| 52 | + continue |
| 53 | + if job["status"] in {"failed", "canceled"}: |
| 54 | + problem_jobs.append(job) |
| 55 | + return problem_jobs |
| 56 | + |
| 57 | + |
| 58 | +def unexpected_improvements(failed_test_array): |
| 59 | + if failed_test_array["unexpected_improvements"]: |
| 60 | + unexpected_improvements_count = len( |
| 61 | + failed_test_array["unexpected_improvements"] |
| 62 | + ) |
| 63 | + return f" {unexpected_improvements_count} improved test{'s' if unexpected_improvements_count != 1 else ''}" |
| 64 | + return "" |
| 65 | + |
| 66 | + |
| 67 | +def fails(failed_test_array): |
| 68 | + if failed_test_array["fails"]: |
| 69 | + fails_count = len(failed_test_array["fails"]) |
| 70 | + return f" {fails_count} failed test{'s' if fails_count != 1 else ''}" |
| 71 | + return "" |
| 72 | + |
| 73 | + |
| 74 | +def crashes(failed_test_array): |
| 75 | + if failed_test_array["crashes"]: |
| 76 | + crash_count = len(failed_test_array["crashes"]) |
| 77 | + return f" {crash_count} crashed test{'s' if crash_count != 1 else ''}" |
| 78 | + return "" |
| 79 | + |
| 80 | + |
| 81 | +def get_failed_test_details(failed_test_array): |
| 82 | + message = "" |
| 83 | + max_tests_to_display = 5 |
| 84 | + |
| 85 | + if failed_test_array["unexpected_improvements"]: |
| 86 | + for i, test in enumerate(failed_test_array["unexpected_improvements"]): |
| 87 | + if i > max_tests_to_display: |
| 88 | + message += " \nand more...<br>" |
| 89 | + break |
| 90 | + message += f"{test}<br>" |
| 91 | + |
| 92 | + if failed_test_array["fails"]: |
| 93 | + for i, test in enumerate(failed_test_array["fails"]): |
| 94 | + if i > max_tests_to_display: |
| 95 | + message += " \nand more...<br>" |
| 96 | + break |
| 97 | + message += f"{test}<br>" |
| 98 | + |
| 99 | + if failed_test_array["crashes"]: |
| 100 | + for i, test in enumerate(failed_test_array["crashes"]): |
| 101 | + if i > max_tests_to_display: |
| 102 | + message += " \nand more...<br>" |
| 103 | + break |
| 104 | + message += f"{test}<br>" |
| 105 | + |
| 106 | + return message |
| 107 | + |
| 108 | + |
| 109 | +def get_failed_test_summary_message(failed_test_array): |
| 110 | + summary_msg = "<summary>" |
| 111 | + summary_msg += unexpected_improvements(failed_test_array) |
| 112 | + summary_msg += fails(failed_test_array) |
| 113 | + summary_msg += crashes(failed_test_array) |
| 114 | + summary_msg += "</summary>" |
| 115 | + return summary_msg |
| 116 | + |
| 117 | + |
| 118 | +def sort_failed_tests_by_status(failures_csv): |
| 119 | + failed_test_array = { |
| 120 | + "unexpected_improvements": [], |
| 121 | + "fails": [], |
| 122 | + "crashes": [], |
| 123 | + "timeouts": [], |
| 124 | + } |
| 125 | + |
| 126 | + for test in failures_csv.splitlines(): |
| 127 | + if "UnexpectedImprovement" in test: |
| 128 | + failed_test_array["unexpected_improvements"].append(test) |
| 129 | + elif "Fail" in test: |
| 130 | + failed_test_array["fails"].append(test) |
| 131 | + elif "Crash" in test: |
| 132 | + failed_test_array["crashes"].append(test) |
| 133 | + elif "Timeout" in test: |
| 134 | + failed_test_array["timeouts"].append(test) |
| 135 | + |
| 136 | + return failed_test_array |
| 137 | + |
| 138 | + |
| 139 | +async def get_failures_csv(session, project_id, job): |
| 140 | + job_id = job["id"] |
| 141 | + url = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}/jobs/{job_id}/artifacts/results/failures.csv" |
| 142 | + async with session.get(url) as response: |
| 143 | + if response.status == 200: |
| 144 | + text = await response.text() |
| 145 | + return text |
| 146 | + else: |
| 147 | + logging.debug(f"No response from: {url}") |
| 148 | + return "" |
| 149 | + |
| 150 | + |
| 151 | +async def get_test_failures(session, project_id, job): |
| 152 | + failures_csv = await get_failures_csv(session, project_id, job) |
| 153 | + if not failures_csv: |
| 154 | + return "" |
| 155 | + |
| 156 | + # If just one test failed, don't bother with more complicated sorting |
| 157 | + lines = failures_csv.splitlines() |
| 158 | + if len(lines) == 1: |
| 159 | + return ": " + lines[0] + "<br>" |
| 160 | + |
| 161 | + failed_test_array = sort_failed_tests_by_status(failures_csv) |
| 162 | + failures_msg = "<details>" |
| 163 | + failures_msg += get_failed_test_summary_message(failed_test_array) |
| 164 | + failures_msg += get_failed_test_details(failed_test_array) |
| 165 | + failures_msg += "</details>" |
| 166 | + |
| 167 | + return failures_msg |
| 168 | + |
| 169 | + |
| 170 | +async def get_trace_failures(session, project_id, job): |
| 171 | + project_json = await get_project_json(session, project_id) |
| 172 | + path = project_json.get("path", "") |
| 173 | + if not path: |
| 174 | + return "" |
| 175 | + |
| 176 | + job_id = job["id"] |
| 177 | + url = f"https://mesa.pages.freedesktop.org/-/{path}/-/jobs/{job_id}/artifacts/results/summary/problems.html" |
| 178 | + async with session.get(url) as response: |
| 179 | + if response.status == 200: |
| 180 | + return url |
| 181 | + else: |
| 182 | + logging.debug(f"No response from: {url}") |
| 183 | + return "" |
| 184 | + |
| 185 | + |
| 186 | +async def get_project_json(session, project_id): |
| 187 | + url_project_id = f"https://gitlab.freedesktop.org/api/v4/projects/{project_id}" |
| 188 | + async with session.get(url_project_id) as response: |
| 189 | + if response.status == 200: |
| 190 | + return await response.json() |
| 191 | + else: |
| 192 | + logging.debug(f"No response from: {url_project_id}") |
| 193 | + return "" |
| 194 | + |
| 195 | + |
| 196 | +async def get_job_log(session: aiohttp.ClientSession, project_id: str, job_id: int): |
| 197 | + project_json = await get_project_json(session, project_id) |
| 198 | + path_with_namespace = project_json.get("path_with_namespace", "") |
| 199 | + if not path_with_namespace: |
| 200 | + return "" |
| 201 | + |
| 202 | + url_job_log = ( |
| 203 | + f"https://gitlab.freedesktop.org/{path_with_namespace}/-/jobs/{job_id}/raw" |
| 204 | + ) |
| 205 | + async with session.get(url_job_log) as response: |
| 206 | + if response.status == 200: |
| 207 | + return await response.text() |
| 208 | + else: |
| 209 | + logging.debug(f"No response from job log: {url_job_log}") |
| 210 | + return "" |
| 211 | + |
| 212 | + |
| 213 | +async def search_job_log_for_errors(session, project_id, job): |
| 214 | + log_error_message = "" |
| 215 | + |
| 216 | + # Bypass these generic error messages in hopes of finding a more specific error. |
| 217 | + # The entries are case insensitive. Keep them in alphabetical order and don't |
| 218 | + # forget to add a comma after each entry |
| 219 | + ignore_list = [ |
| 220 | + "aborting", |
| 221 | + "error_msg : None", |
| 222 | + "error_type : None", |
| 223 | + "exit code", |
| 224 | + "exit status", |
| 225 | + "exiting now", |
| 226 | + "job failed", |
| 227 | + "no files to upload", |
| 228 | + "ret code", |
| 229 | + "retry", |
| 230 | + "retry-all-errors", |
| 231 | + "unknown-section", |
| 232 | + ] |
| 233 | + job_log = await get_job_log(session, project_id, job["id"]) |
| 234 | + |
| 235 | + for line in reversed(job_log.splitlines()): |
| 236 | + if "error" in line.lower(): |
| 237 | + if any(ignore.lower() in line.lower() for ignore in ignore_list): |
| 238 | + continue |
| 239 | + # remove date and formatting before error message |
| 240 | + log_error_message = line[line.lower().find("error") :] |
| 241 | + # if there is no further info after the word error then it's not helpful |
| 242 | + if log_error_message.lower() == "error": |
| 243 | + continue |
| 244 | + if log_error_message.lower() == "errors": |
| 245 | + continue |
| 246 | + break |
| 247 | + |
| 248 | + # timeout msg from .gitlab-ci/lava/lava_job_submitter.py |
| 249 | + if "expected to take at least" in line.lower(): |
| 250 | + log_error_message = line |
| 251 | + break |
| 252 | + |
| 253 | + return log_error_message |
| 254 | + |
| 255 | + |
| 256 | +async def process_single_job(session, project_id, job): |
| 257 | + job_url = job.get("web_url", "") |
| 258 | + if not job_url: |
| 259 | + logging.info(f"Job {job['name']} is missing a web_url") |
| 260 | + |
| 261 | + job_name = job.get("name", "Unnamed Job") |
| 262 | + message = f"[{job_name}]({job_url})" |
| 263 | + |
| 264 | + # if a job times out it's cancelled, so worth mentioning here |
| 265 | + if job["status"] == "canceled": |
| 266 | + return f"{message}: canceled<br>" |
| 267 | + |
| 268 | + # if it's not a script failure then all we can do is give the gitlab assigned reason |
| 269 | + if job["failure_reason"] != "script_failure": |
| 270 | + return f"{message}: {job['failure_reason']}<br>" |
| 271 | + |
| 272 | + test_failures = await get_test_failures(session, project_id, job) |
| 273 | + if test_failures: |
| 274 | + return f"{message}{test_failures}" |
| 275 | + |
| 276 | + trace_failures = await get_trace_failures(session, project_id, job) |
| 277 | + if trace_failures: |
| 278 | + return f"{message}: has a [trace failure]({trace_failures})<br>" |
| 279 | + |
| 280 | + log_error_message = await search_job_log_for_errors(session, project_id, job) |
| 281 | + if log_error_message: |
| 282 | + return f"{message}: {log_error_message}<br>" |
| 283 | + |
| 284 | + return message |
| 285 | + |
| 286 | + |
| 287 | +async def process_job_with_limit(session, project_id, job): |
| 288 | + # Use at most 10 concurrent tasks |
| 289 | + semaphore = asyncio.Semaphore(10) |
| 290 | + async with semaphore: |
| 291 | + return await process_single_job(session, project_id, job) |
| 292 | + |
| 293 | + |
| 294 | +async def process_problem_jobs(session, project_id, problem_jobs): |
| 295 | + |
| 296 | + problem_jobs_count = len(problem_jobs) |
| 297 | + |
| 298 | + if problem_jobs_count == 1: |
| 299 | + message = f"<br>There were problems with job: " |
| 300 | + message += await process_single_job(session, project_id, problem_jobs[0]) |
| 301 | + return message |
| 302 | + |
| 303 | + message = f"<details>" |
| 304 | + message += f"<summary>" |
| 305 | + message += f"There were problems with {problem_jobs_count} jobs: " |
| 306 | + message += "</summary>" |
| 307 | + |
| 308 | + tasks = [process_job_with_limit(session, project_id, job) for job in problem_jobs] |
| 309 | + |
| 310 | + results = await asyncio.gather(*tasks) |
| 311 | + |
| 312 | + for result in results: |
| 313 | + message += result |
| 314 | + |
| 315 | + message += f"</details>" |
| 316 | + |
| 317 | + return message |
| 318 | + |
| 319 | + |
| 320 | +async def main(pipeline_id: str, project_id: str = "176") -> str: |
| 321 | + |
| 322 | + message = "" |
| 323 | + timeout = aiohttp.ClientTimeout(total=120) |
| 324 | + logging.basicConfig(level=logging.INFO) |
| 325 | + |
| 326 | + try: |
| 327 | + async with aiohttp.ClientSession(timeout=timeout) as session: |
| 328 | + pipeline_status = await get_pipeline_status( |
| 329 | + session, project_id, pipeline_id |
| 330 | + ) |
| 331 | + logging.debug(f"Pipeline status: {pipeline_status}") |
| 332 | + if pipeline_status != "failed": |
| 333 | + return message |
| 334 | + |
| 335 | + jobs = await get_jobs_for_pipeline(session, project_id, pipeline_id) |
| 336 | + problem_jobs = get_problem_jobs(jobs) |
| 337 | + |
| 338 | + if len(problem_jobs) == 0: |
| 339 | + return message |
| 340 | + |
| 341 | + message = await process_problem_jobs(session, project_id, problem_jobs) |
| 342 | + except Exception as e: |
| 343 | + logging.error(f"An error occurred: {e}") |
| 344 | + return "" |
| 345 | + |
| 346 | + return message |
| 347 | + |
| 348 | + |
| 349 | +if __name__ == "__main__": |
| 350 | + parser = argparse.ArgumentParser(description="Fetch GitLab pipeline details") |
| 351 | + parser.add_argument( |
| 352 | + "--project-id", default="176", help="Project ID (default: 176 i.e. mesa/mesa)" |
| 353 | + ) |
| 354 | + parser.add_argument("--pipeline-id", required=True, help="Pipeline ID") |
| 355 | + |
| 356 | + args = parser.parse_args() |
| 357 | + |
| 358 | + message = asyncio.run(main(args.pipeline_id, args.project_id)) |
| 359 | + |
| 360 | + print(message) |
0 commit comments