From 8c25286447e595bc945071ecf033f8b0ba7c1503 Mon Sep 17 00:00:00 2001 From: Oliver Chang Date: Wed, 30 Oct 2024 14:11:14 +1100 Subject: [PATCH] html formatting --- agent/base_agent.py | 4 +- report/common.py | 80 ++++++++++++++++++++++++++++++++++-- report/templates/base.html | 13 ++++++ report/templates/sample.html | 6 ++- 4 files changed, 95 insertions(+), 8 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 800671b470..4eaf19a3a0 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -44,8 +44,8 @@ def get_tool(self, tool_name: str) -> Optional[BaseTool]: def chat_llm(self, cur_round: int, client: Any, prompt: Prompt) -> str: """Chat with LLM.""" - logger.info('%s', cur_round, - prompt.get(), cur_round) + logger.info('%s', + cur_round, prompt.get(), cur_round) response = self.llm.chat_llm(client=client, prompt=prompt) logger.info('%s', cur_round, response, cur_round) diff --git a/report/common.py b/report/common.py index 78a72dcece..5572b2a568 100644 --- a/report/common.py +++ b/report/common.py @@ -129,6 +129,13 @@ class Triage: triager_prompt: str +@dataclasses.dataclass +class LogPart: + chat_prompt: bool = False + chat_response: bool = False + content: str = '' + + class FileSystem: """ FileSystem provides a wrapper over standard library and GCS client and @@ -267,6 +274,9 @@ def match_benchmark(self, benchmark_id: str, results: list[evaluator.Result], def get_final_target_code(self, benchmark: str, sample: str) -> str: """Gets the targets of benchmark |benchmark| with sample ID |sample|.""" targets_dir = os.path.join(self._results_dir, benchmark, 'fixed_targets') + # TODO(donggeliu): Make this consistent with agent output. + if not os.path.exists(targets_dir): + return '' for name in sorted(FileSystem(targets_dir).listdir()): path = os.path.join(targets_dir, name) @@ -277,14 +287,14 @@ def get_final_target_code(self, benchmark: str, sample: str) -> str: return code return '' - def get_logs(self, benchmark: str, sample: str) -> str: + def get_logs(self, benchmark: str, sample: str) -> list[LogPart]: status_dir = os.path.join(self._results_dir, benchmark, 'status') results_path = os.path.join(status_dir, sample, 'log.txt') if not FileSystem(results_path).exists(): - return '' + return [] with FileSystem(results_path).open() as f: - return f.read() + return _parse_log_parts(f.read()) def get_run_logs(self, benchmark: str, sample: str) -> str: """Returns the content of the last run log.""" @@ -353,6 +363,10 @@ def _get_targets(self, benchmark: str, sample: str) -> list[Target]: """Gets the targets of benchmark |benchmark| with sample ID |sample| from the OFG version 1 (single prompt).""" targets_dir = os.path.join(self._results_dir, benchmark, 'fixed_targets') + # TODO(donggeliu): Make this consistent with agent output. + if not os.path.exists(targets_dir): + return [] + targets = [] for name in sorted(FileSystem(targets_dir).listdir()): @@ -533,7 +547,11 @@ def _is_valid_benchmark_dir(self, cur_dir: str) -> bool: return True # Check sub-directories. - expected_dirs = ['raw_targets', 'status', 'fixed_targets'] + # TODO(donggeliu): Make this consistent with agent output. + # We used to expect 'fixed_targets' and 'raw_targets' here, but the agent + # workflow doesn't populate them. As a result, these directories don't get + # uploaded to GCS. + expected_dirs = ['status'] return all( FileSystem(os.path.join(self._results_dir, cur_dir, expected_dir)).isdir() @@ -545,6 +563,10 @@ def _get_generated_targets(self, benchmark: str) -> list[str]: prompt).""" targets = [] raw_targets_dir = os.path.join(self._results_dir, benchmark, 'raw_targets') + # TODO(donggeliu): Make this consistent with agent output. + if not os.path.exists(raw_targets_dir): + return [] + for filename in sorted(FileSystem(raw_targets_dir).listdir()): if os.path.splitext(filename)[1] in TARGET_EXTS: targets.append(os.path.join(raw_targets_dir, filename)) @@ -623,3 +645,53 @@ def _find_benchmark_signature(self, project: str, matched_prefix_signature = function_signature return matched_prefix_signature + + +def _parse_log_parts(log: str) -> list[LogPart]: + """Parse log into parts.""" + _CHAT_PROMPT_START_MARKER = re.compile(r'') + _CHAT_PROMPT_END_MARKER = re.compile(r'') + _CHAT_RESPONSE_START_MARKER = re.compile(r'') + _CHAT_RESPONSE_END_MARKER = re.compile(r'') + parts = [] + idx = 0 + next_marker = _CHAT_PROMPT_START_MARKER + + while idx < len(log): + match = next_marker.search(log, idx) + if not match: + parts.append(LogPart(content=log[idx:])) + break + + if match.start() > idx: + # Log content in between chat logs. + parts.append(LogPart(content=log[idx:match.start()])) + + # Read up to the start of the corresponding end marker. + end_idx = len(log) + + chat_prompt = False + chat_response = False + if next_marker == _CHAT_PROMPT_START_MARKER: + end = _CHAT_PROMPT_END_MARKER.search(log, match.end()) + chat_prompt = True + next_marker = _CHAT_RESPONSE_START_MARKER + else: + assert next_marker == _CHAT_RESPONSE_START_MARKER + end = _CHAT_RESPONSE_END_MARKER.search(log, match.end()) + chat_response = True + next_marker = _CHAT_PROMPT_START_MARKER + + if end: + end_idx = end.start() + # Skip past the end tag. + idx = end.end() + else: + # No corresponding end tag, just read till the end of the log. + end_idx = len(log) + idx = end_idx + + parts.append(LogPart(chat_prompt=chat_prompt, chat_response=chat_response, content=log[match.end():end_idx])) + + + return parts \ No newline at end of file diff --git a/report/templates/base.html b/report/templates/base.html index 185e69eeb4..c31f3cee22 100644 --- a/report/templates/base.html +++ b/report/templates/base.html @@ -53,6 +53,19 @@ tbody tr:nth-child(odd) { background-color: #f4f5ff; } + +.chat_prompt { + background-color: #fff7f2; + max-width: 50%; + overflow: scroll; +} + +.chat_response { + background-color: #fcfff2; + max-width: 50%; + overflow: scroll; + margin-left: auto; +} LLM: {{ model }} diff --git a/report/templates/sample.html b/report/templates/sample.html index 297fce10d9..e654cc2680 100644 --- a/report/templates/sample.html +++ b/report/templates/sample.html @@ -58,9 +58,11 @@

Code #{{ loop.index - 1}}

{% endfor %}

Logs

-
-{{ logs }}
+{% for part in logs %}
+
+{{ part.content }}
 
+{% endfor %}

Run logs