cleanup prompts (#4052)

sweepai · Jun 19, 2024 · 076aa9e · 076aa9e
1 parent 21e9c8b
commit 076aa9e
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 22 deletions.
diff --git a/sweepai/agents/modify.py b/sweepai/agents/modify.py
@@ -66,7 +66,6 @@ def modify(
 ) -> dict[str, dict[str, str]]:
     # join fcr in case of duplicates
     use_openai = True
-
     # handles renames in cloned_repo
     # TODO: handle deletions here - it can cause crashes
     for file_path, new_file_path in renames_dict.items():

diff --git a/sweepai/chat/api.py b/sweepai/chat/api.py
@@ -17,14 +17,13 @@
 
 from sweepai.agents.modify_utils import validate_and_parse_function_call
 from sweepai.agents.search_agent import extract_xml_tag
-from sweepai.chat.search_prompts import relevant_snippets_message, relevant_snippet_template, anthropic_system_message, function_response, anthropic_format_message, pr_format, relevant_snippets_message_for_pr, openai_format_message, openai_system_message, query_optimizer_system_prompt, query_optimizer_user_prompt
+from sweepai.chat.search_prompts import relevant_snippets_message, relevant_snippet_template, anthropic_system_message, function_response, pr_format, relevant_snippets_message_for_pr, openai_system_message, query_optimizer_system_prompt, query_optimizer_user_prompt
 from sweepai.config.client import SweepConfig
 from sweepai.config.server import CACHE_DIRECTORY, GITHUB_APP_ID, GITHUB_APP_PEM
 from sweepai.core.chat import ChatGPT, call_llm
 from sweepai.core.entities import FileChangeRequest, Message, Snippet
 from sweepai.core.pull_request_bot import get_pr_summary_for_chat
 from sweepai.core.review_utils import split_diff_into_patches
-from sweepai.core.viz_utils import save_messages_for_visualization
 from sweepai.dataclasses.code_suggestions import CodeSuggestion
 from sweepai.utils.convert_openai_anthropic import AnthropicFunctionCall
 from sweepai.utils.github_utils import ClonedRepo, CustomGithub, MockClonedRepo, clean_branch_name, commit_multi_file_changes, create_branch, get_github_client, get_installation_id
@@ -503,7 +502,7 @@ def chat_codebase_stream(
                 message.content += "\n\nPull requests:\n" + pulls_messages + f"\n\nBe sure to summarize the contents of the pull request during the analysis phase separately from other relevant files.\n\nRemember, the user's request was:\n\n<message>\n{message.content}\n</message>"
 
     if pr_snippets:
-        relevant_pr_snippets = []
+        relevant_pr_snippets: list[Snippet] = []
         other_relevant_snippets = []
         for snippet in snippets:
             if snippet.file_path in [pr_snippet.file_path for pr_snippet in pr_snippets]:
@@ -517,15 +516,15 @@ def chat_codebase_stream(
                 relevant_snippet_template.format(
                     i=i,
                     file_path=snippet.file_denotation,
-                    content=snippet.expand(EXPAND_SIZE).get_snippet(add_lines=False)
+                    content=snippet.expand(EXPAND_SIZE).get_snippet(add_lines=False, add_ellipsis=False)
                 )
                 for i, snippet in enumerate(relevant_pr_snippets)
             ]),
             joined_relevant_snippets="\n".join([
                 relevant_snippet_template.format(
                     i=i,
                     file_path=snippet.file_denotation,
-                    content=snippet.expand(EXPAND_SIZE).get_snippet(add_lines=False)
+                    content=snippet.expand(EXPAND_SIZE).get_snippet(add_lines=False, add_ellipsis=False)
                 )
                 for i, snippet in enumerate(other_relevant_snippets)
             ]),

diff --git a/sweepai/chat/search_prompts.py b/sweepai/chat/search_prompts.py
@@ -277,7 +277,11 @@ def area(self):
 - Focus on providing high-quality explanations. Start with a high-level overview.
 - Only show code as supplementary evidence or to enhance the explanations. When doing so, only show MINIMAL excerpts of code that address the user's question. Do NOT copy the whole file, but only the lines that are relevant to the user's question.
 - Use markdown for your responses, using headers where applicable to improve clarity and lists to enumerate examples.
-- Wherever possible, you should suggest code changes. To do so, you must add <code_change> blocks to the <user_response> block. First, indicate whether you want to modify an existing file or create a new fil, then write in the following format:
+- Wherever possible, you should suggest code changes. To do so, you must add <code_change> blocks to the <user_response> block following the format provided below.
+- Code changes must be atomic. Each code change must be in its own block, unless they are contiguous changes in the same file. 
+
+# <code_change> Format
+First, indicate whether you want to modify an existing file or create a new file, then write in the following format:
 
 <code_change>
 <file_path>

diff --git a/sweepai/handlers/on_comment.py b/sweepai/handlers/on_comment.py
@@ -95,7 +95,8 @@ def on_comment(
         issue_number_match = re.search(r"Fixes #(?P<issue_number>\d+).", pr_body or "")
         original_issue = None
         if issue_number_match or assignee:
-            issue_number = issue_number_match.group("issue_number")
+            if issue_number_match:
+                issue_number = issue_number_match.group("issue_number")
             if not assignee:
                 original_issue = repo.get_issue(int(issue_number))
                 author = original_issue.user.login

diff --git a/sweepai/handlers/on_failing_github_actions.py b/sweepai/handlers/on_failing_github_actions.py
@@ -196,29 +196,27 @@ def update_pr_status():
                 not failing_logs:
                 continue
             failed_runs = [run for run in suite_runs if run.conclusion == "failure"]
-            failed_gha_logs = get_failing_gha_logs(
-                failed_runs,
-                installation_id,
-            )
+            if not failing_logs:
+                failing_logs = get_failing_gha_logs(
+                    failed_runs,
+                    installation_id,
+                )
             if failing_logs:
-                # if circleci failed and is enabled, it has priority
-                failed_gha_logs = failing_logs + "\n" + failed_gha_logs
-            if failed_gha_logs:
                 # cleanup the gha logs
                 chat_gpt = ChatGPT()
                 chat_gpt.messages = [
                     Message(role="system", content=gha_context_cleanup_system_prompt)
                 ]
                 formatted_gha_context_prompt = gha_context_cleanup_user_prompt.format(
-                    github_actions_logs=failed_gha_logs
+                    github_actions_logs=failing_logs
                 )
                 # we can also gate github actions fixes here
-                failed_gha_logs = chat_gpt.chat_anthropic(
+                failing_logs = chat_gpt.chat_anthropic(
                     content=formatted_gha_context_prompt,
                     temperature=0.2,
                     use_openai=True,
                 )
-                failed_gha_logs = strip_triple_quotes(failed_gha_logs)
+                failing_logs = strip_triple_quotes(failing_logs)
                 # make edits to the PR
                 # TODO: look into rollbacks so we don't continue adding onto errors
                 cloned_repo = ClonedRepo( # reinitialize cloned_repo to avoid conflicts
@@ -228,19 +226,19 @@ def update_pr_status():
                     repo=repo,
                     branch=pull_request.head.ref,
                 )
-                failed_gha_logs, _ = get_error_locations_from_error_logs(failed_gha_logs, cloned_repo=cloned_repo)
+                failing_logs, _ = get_error_locations_from_error_logs(failing_logs, cloned_repo=cloned_repo)
                 diffs = get_branch_diff_text(repo=repo, branch=pull_request.head.ref, base_branch=pull_request.base.ref)
                 # problem_statement = f"{title}\n{internal_message_summary}\n{replies_text}"
                 all_information_prompt = GHA_PROMPT.format(
                     problem_statement=problem_statement,
-                    github_actions_logs=failed_gha_logs,
+                    github_actions_logs=failing_logs,
                     changes_made=diffs,
                 )
                 if gha_history:
                     previous_gha_logs = gha_history[-1]
                     all_information_prompt = GHA_PROMPT_WITH_HISTORY.format(
                         problem_statement=problem_statement,
-                        current_github_actions_logs=failed_gha_logs,
+                        current_github_actions_logs=failing_logs,
                         changes_made=diffs,
                         previous_github_actions_logs=previous_gha_logs,
                     )
@@ -309,7 +307,7 @@ def update_pr_status():
                     logger.info(f"Error in updating file{e}")
                     raise e
                 total_edit_attempts += 1
-                gha_history.append(failed_gha_logs)
+                gha_history.append(failing_logs)
                 if total_edit_attempts >= GHA_MAX_EDIT_ATTEMPTS:
                     logger.info(f"Tried to edit PR {GHA_MAX_EDIT_ATTEMPTS} times, giving up.")
                     break