use prompts.yaml instead of .py, add exclude folders

microsoft · Jun 6, 2024 · 4c49002 · 4c49002
1 parent d257106
commit 4c49002
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 171 deletions.
diff --git a/rdagent/app/CI/prompts.py b/rdagent/app/CI/prompts.py
diff --git a/rdagent/app/CI/prompts.yaml b/rdagent/app/CI/prompts.yaml
@@ -0,0 +1,117 @@
+generate_lint_command_template: |
+  Please generate a command to lint or format a {language} repository.
+  Here are some information about different linting tools ```{linting_tools}```
+linting_system_prompt_template: |
+  You are a software engineer. You can write code to a high standard and are adept at solving {language} linting problems.
+session_manual_template: |
+  There are some problems with the code you provided, please modify the code again according to the instruction and return the errors list you modified.
+  
+  Instruction:
+  {operation}
+  
+  Your response format should be like this:
+  
+  ```python
+  <modified code>
+  ```
+  
+  ```json
+  {{
+      "errors": ["<Line Number>:<Error Start Position> <Error Code>", ...]
+  }}
+  ```
+session_normal_template: |
+  Please modify this code snippet based on the lint info. Here is the code snippet:
+  ```Python
+  {code}
+  ```
+
+  -----Lint info-----
+  {lint_info}
+  -------------------
+
+  The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format:
+  -----Lint info format-----
+  <Line Number>:<Error Start Position> <Error Code> <Error Message>
+  <Error Position (maybe multiple lines)>
+  <Helpful Information (sometimes have)>
+  --------------------------
+  The error code is an abbreviation set by the checker for ease of describing the error. The error position includes the relevant code around the error, and the helpful information provides useful information or possible fix method.
+
+  Please simply reply the code after you fix all linting errors. You should be aware of the following:
+  1. The indentation of the code should be consistent with the original code.
+  2. You should just replace the code I provided you, which starts from line {start_line} to line {end_line}.
+  3. You'll need to add line numbers to the modified code which starts from {start_lineno}.
+  4. You don't need to add comments to explain your changes.
+  Please wrap your code with following format:
+
+  ```python
+  <your code..>
+  ```
+session_start_template: |
+  Please modify the Python code based on the lint info.
+  Due to the length of the code, I will first tell you the entire code, and then each time I ask a question, I will extract a portion of the code and tell you the error information contained in this code segment.
+  You need to fix the corresponding error in the code segment and return the code that can replace the corresponding code segment.
+
+  The Python code is from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("<white space>|<white space>"). The vertical bars are aligned.
+  Here is the complete code, please be prepared to fix it:
+  ```Python
+  {code}
+  ```
+suffix2language_template: |
+  Here are the files suffix in one code repo: {suffix}.
+  Please tell me the programming language used in this repo and which language has linting-tools.
+  Your response should follow this template:
+  {{
+      "languages": <languages list>,
+      "languages_with_linting_tools": <languages with lingting tools list>
+  }}
+user_get_files_contain_lint_commands_template: |
+  You get a file list of a repository. Some files may contain linting rules or linting commands defined by repo authors.
+  Here are the file list:
+  ```
+  {file_list}
+  ```
+  
+  Please find all files that may correspond to linting from it.
+  Please respond with the following JSON template:
+  {{
+      "files": </path/to/file>,
+  }}
+user_get_makefile_lint_commands_template: |
+  You get a Makefile which contains some linting rules. Here are its content:
+  ```
+  {file_text}
+  ```
+  Please find executable commands about linting from it.
+  Please respond with the following JSON template:
+  {{
+      "commands": ["python -m xxx --params"...],
+  }}
+user_template_for_code_snippet: |
+  Please modify the Python code based on the lint info.
+  -----Python Code-----
+  {code}
+  ---------------------
+
+  -----Lint info-----
+  {lint_info}
+  -------------------
+
+  The Python code is a snippet from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("<white space>|<white space>"). The vertical bars are aligned.
+
+  The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format:
+  -----Lint info format-----
+  <Line Number>:<Error Start Position> <Error Code> <Error Message>
+  <Error Context (multiple lines)>
+  <Helpful Information (last line)>
+  --------------------------
+  The error code is an abbreviation set by the checker for ease of describing the error. The error context includes the relevant code around the error, and the helpful information suggests possible fixes.
+
+  Please simply reply the code after you fix all linting errors.
+  The code you return does not require line numbers, and should just replace the code I provided you, and does not require comments.
+  Please wrap your code with following format:
+
+  ```python
+  <your code..>
+  ```
diff --git a/rdagent/app/CI/run.py b/rdagent/app/CI/run.py
@@ -21,6 +21,7 @@
     Feedback,
     Knowledge,
 )
+from rdagent.core.prompts import Prompts
 from rdagent.oai.llm_utils import APIBackend
 from rich import print
 from rich.panel import Panel
@@ -32,15 +33,8 @@
 from rich.text import Text
 from tree_sitter import Language, Node, Parser
 
-from .prompts import (
-    linting_system_prompt_template,
-    session_manual_template,
-    session_normal_template,
-    session_start_template,
-)
-
 py_parser = Parser(Language(tree_sitter_python.language()))
-
+CI_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
 @dataclass
 class CIError:
@@ -213,9 +207,12 @@ def __str__(self) -> str:
 
 
 class Repo(EvolvableSubjects):
-    def __init__(self, project_path: Path | str, **kwargs: Any) -> None:
+    def __init__(self, project_path: Path | str, excludes: list[Path] = [], **kwargs: Any) -> None:
         self.params = kwargs
         self.project_path = Path(project_path)
+
+        excludes = [self.project_path / path for path in excludes]
+
         git_ignored_output = subprocess.check_output(
             ["git", "status", "--ignored", "-s"],
             cwd=project_path,
@@ -228,11 +225,13 @@ def __init__(self, project_path: Path | str, **kwargs: Any) -> None:
             if line.startswith("!!")
         ]
 
+        excludes.extend(git_ignored_files)
+
         files = [
             file
             for file in self.project_path.glob("**/*")
             if file.is_file()
-            and not any(str(file).startswith(str(path)) for path in git_ignored_files)
+            and not any(str(file).startswith(str(path)) for path in excludes)
             and ".git/" not in str(file)
             and file.suffix == ".py"
         ]
@@ -428,7 +427,7 @@ class CodeFixGroup:
             responses: list[str]
 
         api = APIBackend()
-        system_prompt = linting_system_prompt_template.format(language="Python")
+        system_prompt = CI_prompts["linting_system_prompt_template"].format(language="Python")
 
         if len(evolving_trace) > 0:
             last_feedback: CIFeedback = evolving_trace[-1].feedback
@@ -470,7 +469,7 @@ class CodeFixGroup:
                         session = api.build_chat_session(session_system_prompt=system_prompt)
                         session_id = session.get_conversation_id()
                         session.build_chat_completion(
-                            session_start_template.format(code=file.get(add_line_number=True)),
+                            CI_prompts["session_start_template"].format(code=file.get(add_line_number=True)),
                         )
 
                         fix_groups[file_path].append(
@@ -493,7 +492,7 @@ class CodeFixGroup:
                         errors_str = "\n\n".join(str(e) for e in group_errors)
 
                         # ask LLM to repair current code snippet
-                        user_prompt = session_normal_template.format(
+                        user_prompt = CI_prompts["session_normal_template"].format(
                             code=code_snippet_with_lineno,
                             lint_info=errors_str,
                             start_line=start_line,
@@ -621,7 +620,7 @@ class CodeFixGroup:
                             break
 
                         fix_records[file_path].manual_instructions[operation].extend(group_errors)
-                        res = session.build_chat_completion(session_manual_template.format(operation=operation))
+                        res = session.build_chat_completion(CI_prompts["session_manual_template"].format(operation=operation))
                         code_fix_g.responses.append(res)
 
                 # apply changes
@@ -633,18 +632,17 @@ class CodeFixGroup:
 
 
 DIR = None
-while True:
-    DIR = Prompt.ask("Please input the project directory")
+while DIR is None or not DIR.exists():
+    DIR = Prompt.ask("Please input the [cyan]project directory[/cyan]")
     DIR = Path(DIR)
-    if DIR.exists():
-        break
-    else:
-        print("Invalid directory. Please try again.")
+
+excludes = Prompt.ask("Input the [dark_orange]excluded directories[/dark_orange] (relative to [cyan]project path[/cyan] and separated by whitespace)").split(" ")
+excludes = [Path(exclude.strip()) for exclude in excludes if exclude.strip() != ""]
 
 start_time = time.time()
 start_timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%m%d%H%M")
 
-repo = Repo(DIR)
+repo = Repo(DIR, excludes=excludes)
 evaluator = MultiEvaluator(MypyEvaluator(), RuffEvaluator())
 estr = CIEvoStr()
 rag = None  # RAG is not enable firstly.

diff --git a/rdagent/core/prompts.py b/rdagent/core/prompts.py
@@ -5,7 +5,7 @@
 from rdagent.core.utils import SingletonBaseClass
 
 
-class Prompts(Dict, SingletonBaseClass):
+class Prompts(Dict[str, str], SingletonBaseClass):
     def __init__(self, file_path: Path):
         prompt_yaml_dict = yaml.load(
             open(