Skip to content

Commit

Permalink
html formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
oliverchang committed Oct 30, 2024
1 parent 01f2e1a commit 8c25286
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 8 deletions.
4 changes: 2 additions & 2 deletions agent/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def get_tool(self, tool_name: str) -> Optional[BaseTool]:

def chat_llm(self, cur_round: int, client: Any, prompt: Prompt) -> str:
"""Chat with LLM."""
logger.info('<CHAT PROMPT:ROUND %02d>%s</CHAT PROMPT:ROUND %2d>', cur_round,
prompt.get(), cur_round)
logger.info('<CHAT PROMPT:ROUND %02d>%s</CHAT PROMPT:ROUND %02d>',
cur_round, prompt.get(), cur_round)
response = self.llm.chat_llm(client=client, prompt=prompt)
logger.info('<CHAT RESPONSE:ROUND %02d>%s</CHAT RESPONSE:ROUND %02d>',
cur_round, response, cur_round)
Expand Down
80 changes: 76 additions & 4 deletions report/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ class Triage:
triager_prompt: str


@dataclasses.dataclass
class LogPart:
chat_prompt: bool = False
chat_response: bool = False
content: str = ''


class FileSystem:
"""
FileSystem provides a wrapper over standard library and GCS client and
Expand Down Expand Up @@ -267,6 +274,9 @@ def match_benchmark(self, benchmark_id: str, results: list[evaluator.Result],
def get_final_target_code(self, benchmark: str, sample: str) -> str:
"""Gets the targets of benchmark |benchmark| with sample ID |sample|."""
targets_dir = os.path.join(self._results_dir, benchmark, 'fixed_targets')
# TODO(donggeliu): Make this consistent with agent output.
if not os.path.exists(targets_dir):
return ''

for name in sorted(FileSystem(targets_dir).listdir()):
path = os.path.join(targets_dir, name)
Expand All @@ -277,14 +287,14 @@ def get_final_target_code(self, benchmark: str, sample: str) -> str:
return code
return ''

def get_logs(self, benchmark: str, sample: str) -> str:
def get_logs(self, benchmark: str, sample: str) -> list[LogPart]:
status_dir = os.path.join(self._results_dir, benchmark, 'status')
results_path = os.path.join(status_dir, sample, 'log.txt')
if not FileSystem(results_path).exists():
return ''
return []

with FileSystem(results_path).open() as f:
return f.read()
return _parse_log_parts(f.read())

def get_run_logs(self, benchmark: str, sample: str) -> str:
"""Returns the content of the last run log."""
Expand Down Expand Up @@ -353,6 +363,10 @@ def _get_targets(self, benchmark: str, sample: str) -> list[Target]:
"""Gets the targets of benchmark |benchmark| with sample ID |sample| from
the OFG version 1 (single prompt)."""
targets_dir = os.path.join(self._results_dir, benchmark, 'fixed_targets')
# TODO(donggeliu): Make this consistent with agent output.
if not os.path.exists(targets_dir):
return []

targets = []

for name in sorted(FileSystem(targets_dir).listdir()):
Expand Down Expand Up @@ -533,7 +547,11 @@ def _is_valid_benchmark_dir(self, cur_dir: str) -> bool:
return True

# Check sub-directories.
expected_dirs = ['raw_targets', 'status', 'fixed_targets']
# TODO(donggeliu): Make this consistent with agent output.
# We used to expect 'fixed_targets' and 'raw_targets' here, but the agent
# workflow doesn't populate them. As a result, these directories don't get
# uploaded to GCS.
expected_dirs = ['status']
return all(
FileSystem(os.path.join(self._results_dir, cur_dir,
expected_dir)).isdir()
Expand All @@ -545,6 +563,10 @@ def _get_generated_targets(self, benchmark: str) -> list[str]:
prompt)."""
targets = []
raw_targets_dir = os.path.join(self._results_dir, benchmark, 'raw_targets')
# TODO(donggeliu): Make this consistent with agent output.
if not os.path.exists(raw_targets_dir):
return []

for filename in sorted(FileSystem(raw_targets_dir).listdir()):
if os.path.splitext(filename)[1] in TARGET_EXTS:
targets.append(os.path.join(raw_targets_dir, filename))
Expand Down Expand Up @@ -623,3 +645,53 @@ def _find_benchmark_signature(self, project: str,
matched_prefix_signature = function_signature

return matched_prefix_signature


def _parse_log_parts(log: str) -> list[LogPart]:
"""Parse log into parts."""
_CHAT_PROMPT_START_MARKER = re.compile(r'<CHAT PROMPT:ROUND\s+\d+>')
_CHAT_PROMPT_END_MARKER = re.compile(r'</CHAT PROMPT:ROUND\s+\d+>')
_CHAT_RESPONSE_START_MARKER = re.compile(r'<CHAT RESPONSE:ROUND\s+\d+>')
_CHAT_RESPONSE_END_MARKER = re.compile(r'</CHAT RESPONSE:ROUND\s+\d+>')
parts = []
idx = 0
next_marker = _CHAT_PROMPT_START_MARKER

while idx < len(log):
match = next_marker.search(log, idx)
if not match:
parts.append(LogPart(content=log[idx:]))
break

if match.start() > idx:
# Log content in between chat logs.
parts.append(LogPart(content=log[idx:match.start()]))

# Read up to the start of the corresponding end marker.
end_idx = len(log)

chat_prompt = False
chat_response = False
if next_marker == _CHAT_PROMPT_START_MARKER:
end = _CHAT_PROMPT_END_MARKER.search(log, match.end())
chat_prompt = True
next_marker = _CHAT_RESPONSE_START_MARKER
else:
assert next_marker == _CHAT_RESPONSE_START_MARKER
end = _CHAT_RESPONSE_END_MARKER.search(log, match.end())
chat_response = True
next_marker = _CHAT_PROMPT_START_MARKER

if end:
end_idx = end.start()
# Skip past the end tag.
idx = end.end()
else:
# No corresponding end tag, just read till the end of the log.
end_idx = len(log)
idx = end_idx

parts.append(LogPart(chat_prompt=chat_prompt, chat_response=chat_response, content=log[match.end():end_idx]))


return parts
13 changes: 13 additions & 0 deletions report/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@
tbody tr:nth-child(odd) {
background-color: #f4f5ff;
}

.chat_prompt {
background-color: #fff7f2;
max-width: 50%;
overflow: scroll;
}

.chat_response {
background-color: #fcfff2;
max-width: 50%;
overflow: scroll;
margin-left: auto;
}
</style>
<body>
LLM: {{ model }}
Expand Down
6 changes: 4 additions & 2 deletions report/templates/sample.html
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ <h3>Code #{{ loop.index - 1}}</h3>
{% endfor %}

<h2>Logs</h2>
<pre>
{{ logs }}
{% for part in logs %}
<pre {% if part.chat_prompt %}class="chat_prompt"{% elif part.chat_response %}class="chat_response"{% endif %}>
{{ part.content }}
</pre>
{% endfor %}

<h2>Run logs</h2>
<pre>
Expand Down

0 comments on commit 8c25286

Please sign in to comment.