towards unified report code

ngc92 · ngc92 · commit cfe0eb0b4aaa · 2025-01-13T13:47:21.000+02:00
diff --git a/src/discord-cluster-manager/cogs/github_cog.py b/src/discord-cluster-manager/cogs/github_cog.py
@@ -14,7 +14,9 @@
 from env import GITHUB_REPO, GITHUB_TOKEN
 from github import Github
 from leaderboard_eval import cu_eval, py_eval
+from run_eval import RunResult, CompileResult
 from utils import get_github_branch_name, send_discord_message, setup_logging
+from report import generate_report
 
 logger = setup_logging()
 
@@ -92,10 +94,20 @@ async def run_github(
 
                 await thread.send(f"Training completed with status: {status}")
 
-                if len(logs) > 1900:
-                    await self.bot.send_chunked_message(thread, logs, code_block=True)
+                if expect_result:
+                    # {"success": True, **json.loads(logs)}
+                    if logs['success']:
+                        generate_report(thread,
+                                        CompileResult(**logs['compile']),
+                                        RunResult(**logs['run']))
+                    else:
+                        await thread.send(logs['error'])
+
                 else:
-                    await thread.send(f"```\n!!Logs!!:\n{logs}\n```")
+                    if len(logs) > 1900:
+                        await self.bot.send_chunked_message(thread, logs, code_block=True)
+                    else:
+                        await thread.send(f"```\nLogs:\n{logs}\n```")
 
                 if url:
                     await thread.send(f"View the full run at: <{url}>")
@@ -233,8 +245,7 @@ async def check_workflow_status(self, run_id, thread, expect_result: bool=False)
 
                 if run.status == "completed":
                     if expect_result:
-                        result = await self.download_results(run_id)
-                        logs = self.make_logs(result)
+                        logs = await self.download_results(run_id)
                     else:
                         logs = await self.handle_training_log(run_id)
                     return run.conclusion, logs, run.html_url
@@ -248,9 +259,6 @@ async def check_workflow_status(self, run_id, thread, expect_result: bool=False)
             except Exception as e:
                 return "error", str(e), None
 
-    def make_logs(self, result: dict):
-        return pprint.pformat(result)
-
     async def download_results(self, run_id):
         try:
             data = await self.download_artifact(run_id, name="run-result")
diff --git a/src/discord-cluster-manager/report.py b/src/discord-cluster-manager/report.py
@@ -0,0 +1,117 @@
+import pprint
+from run_eval import CompileResult, RunResult
+import discord
+
+
+def _limit_length(text: str, maxlen: int):
+    if len(text) > maxlen:
+        return text[:maxlen-6] + " [...]"
+    else:
+        return text
+
+
+def _send_split_log(thread: discord.Thread, partial_message: str, header: str, log: str):
+    if len(partial_message) + len(log) + len(header) < 1900:
+        partial_message += f"\n\n## {header}:\n"
+        partial_message += f"```\n{log}```"
+        return partial_message
+    else:
+        # send previous chunk
+        thread.send(partial_message)
+        lines = log.splitlines()
+        chunks = []
+        partial_message = ""
+        for line in lines:
+            if len(partial_message) + len(line) < 1900:
+                partial_message += line + "\n"
+            else:
+                chunks.append(partial_message)
+                partial_message = line
+
+        # now, format the chunks
+        for i, chunk in enumerate(chunks):
+            partial_message += f"\n\n## {header} ({i}/{len(chunks)}):\n"
+            partial_message += f"```\n{_limit_length(log, 1900)}```"
+            thread.send(partial_message)
+
+        return ""
+
+
+def generate_report(thread: discord.Thread, comp: CompileResult, run: RunResult):
+    message = ""
+    if not comp.success or not run.success:
+        message = "# Run was not successful\n"
+    else:
+        message = "# Run was successful\n"
+
+    if not comp.success:
+        if not comp.nvcc_found:
+            message += "**Compilation failed**\nNVCC could not be found.\n"
+            message += "This indicates a bug in the runner configuration, _not in your code_.\n"
+            message += "Please notify the server admins of this problem"
+            thread.send(message)
+            return
+
+        # ok, we found nvcc
+        message += "**Compilation failed**\n"
+        message += "Command "
+        message += f"```bash\n>{_limit_length(comp.command, 1000)}```\n"
+        message += f"exited with code **{comp.exit_code}**."
+
+        message += _send_split_log(thread, message, "Compiler stderr", comp.stderr.strip())
+
+        if len(comp.stdout.strip()) > 0:
+            message += _send_split_log(thread, message, "Compiler stdout", comp.stdout.strip())
+
+        if len(message) != 0:
+            thread.send(message)
+
+        return
+
+    if not run.success:
+        message += "**Running failed**\n"
+        message += "Command "
+        message += f"```bash\n>{_limit_length(run.command, 1000)}```\n"
+        message += f"exited with error code **{run.exit_code}** after {run.duration} seconds."
+
+        if len(run.stderr.strip()) > 0:
+            message += _send_split_log(thread, message, "Program stderr", run.stderr.strip())
+
+        if len(run.stdout.strip()) > 0:
+            message += _send_split_log(thread, message, "Program stdout", run.stdout.strip())
+
+        if len(message) != 0:
+            thread.send(message)
+
+        return
+
+    if not run.passed:
+        message += "**Testing failed**\n"
+        message += "Command "
+        message += f"```bash\n>{_limit_length(run.command, 1000)}```\n"
+        message += f"ran successfully in {run.duration} seconds, but did not pass all tests.\n"
+
+        if len(run.stderr.strip()) > 0:
+            message += _send_split_log(thread, message, "Program stderr", run.stderr.strip())
+
+        if len(run.stdout.strip()) > 0:
+            message += _send_split_log(thread, message, "Program stdout", run.stdout.strip())
+
+        if len(message) != 0:
+            thread.send(message)
+
+        # TODO dedicated "error" entry in our results dict that gets populated by check_implementation
+        return
+
+    # OK, we were successful
+    message += "**Success!**\n"
+    message += _send_split_log(thread, message, "Result", pprint.pformat(run.result))
+
+    if len(run.stderr.strip()) > 0:
+        message += _send_split_log(thread, message, "Program stderr", run.stderr).strip()
+
+    if len(run.stdout.strip()) > 0:
+        message += _send_split_log(thread, message, "Program stdout", run.stdout.strip())
+
+    if len(message) != 0:
+        thread.send(message)
diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py
@@ -25,6 +25,7 @@ class CompileResult:
 class RunResult:
     # fmt: off
     success: bool       # did the compiled executable run successfully
+    passed: bool        # did it pass all tests
     command: str        # the command that was run to compile the code
     stdout: str         # standard output produced by the compiler
     stderr: str         # standard error produced by the compiler
@@ -142,7 +143,9 @@ def run_cuda_program(args: list[str]) -> RunResult:
         result_dict[key.strip()] = value.strip()
 
     return RunResult(
-        success=run_process.returncode == 0,
+        # TODO should we return 0 also on test failure?
+        success=(run_process.returncode == 0 or run_process.returncode == 1),
+        passed=result_dict.get("check", None) == "pass",
         command=_make_cmd(run_process.args),
         stdout=run_process.stdout,
         stderr=run_process.stderr,