From b6a1f900346a38885229a2c964b0c86e6c768c7b Mon Sep 17 00:00:00 2001 From: bowen xian Date: Fri, 24 May 2024 09:45:07 +0000 Subject: [PATCH] add CI fix tool to app --- Makefile | 4 +- constraints/3.10.txt | 2 +- constraints/3.11.txt | 2 +- constraints/3.8.txt | 2 +- constraints/3.9.txt | 2 +- pyproject.toml | 26 +-- rdagent/app/CI/prompts.py | 99 +++++++++ rdagent/app/CI/run.py | 422 ++++++++++++++++++++++++++++++++++++++ rdagent/core/conf.py | 45 ++++ rdagent/oai/llm_utils.py | 6 +- 10 files changed, 586 insertions(+), 24 deletions(-) create mode 100644 rdagent/app/CI/prompts.py create mode 100644 rdagent/app/CI/run.py diff --git a/Makefile b/Makefile index 8c2b4cf8..a0726a32 100644 --- a/Makefile +++ b/Makefile @@ -93,14 +93,14 @@ mypy: # Check lint with ruff. ruff: - $(PIPRUN) python -m ruff . --exclude FinCo,finco,rdagent/scripts,test/scripts,git_ignore_folder --ignore ANN101,ANN401,TCH003,D,ERA001,PLR0913,S101 --line-length 120 + $(PIPRUN) ruff check . --exclude FinCo,finco,rdagent/scripts,test/scripts,git_ignore_folder # Check lint with toml-sort. toml-sort: $(PIPRUN) toml-sort --check pyproject.toml # Check lint with all linters. -lint: black isort mypy ruff toml-sort +lint: mypy ruff toml-sort # Run pre-commit with autofix against all files. pre-commit: diff --git a/constraints/3.10.txt b/constraints/3.10.txt index e11a59f0..9441e17c 100644 --- a/constraints/3.10.txt +++ b/constraints/3.10.txt @@ -111,7 +111,7 @@ rfc3986==2.0.0 rich==13.7.0 ruamel.yaml==0.18.5 ruamel.yaml.clib==0.2.8 -ruff==0.1.9 +ruff==0.4.5 scipy==1.11.4 SecretStorage==3.3.3 semver==3.0.2 diff --git a/constraints/3.11.txt b/constraints/3.11.txt index f8da2921..d4914162 100644 --- a/constraints/3.11.txt +++ b/constraints/3.11.txt @@ -109,7 +109,7 @@ rfc3986==2.0.0 rich==13.7.0 ruamel.yaml==0.18.5 ruamel.yaml.clib==0.2.8 -ruff==0.1.9 +ruff==0.4.5 scipy==1.11.4 SecretStorage==3.3.3 semver==3.0.2 diff --git a/constraints/3.8.txt b/constraints/3.8.txt index bd6a80d1..f022bd1f 100644 --- a/constraints/3.8.txt +++ b/constraints/3.8.txt @@ -112,7 +112,7 @@ rfc3986==2.0.0 rich==13.7.0 ruamel.yaml==0.18.5 ruamel.yaml.clib==0.2.8 -ruff==0.1.9 +ruff==0.4.5 scipy==1.10.1 SecretStorage==3.3.3 semver==3.0.2 diff --git a/constraints/3.9.txt b/constraints/3.9.txt index e11a59f0..9441e17c 100644 --- a/constraints/3.9.txt +++ b/constraints/3.9.txt @@ -111,7 +111,7 @@ rfc3986==2.0.0 rich==13.7.0 ruamel.yaml==0.18.5 ruamel.yaml.clib==0.2.8 -ruff==0.1.9 +ruff==0.4.5 scipy==1.11.4 SecretStorage==3.3.3 semver==3.0.2 diff --git a/pyproject.toml b/pyproject.toml index 2122ffdb..c0b19a0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,32 +73,28 @@ log_format = "%(asctime)s %(levelname)s %(message)s" minversion = "6.0" [tool.ruff] -fix = true +line-length = 120 +src = ["src"] + +[tool.ruff.lint] ignore = [ # https://docs.astral.sh/ruff/rules/#pydocstyle-d - "D203", - "D204", - "D213", - "D215", - "D400", - "D404", - "D406", - "D407", - "D408", - "D409", - "D413", "ANN101", "ANN401", - "TCH003", "D", "ERA001", + "FIX", + "INP001", + "PGH", "PLR0913", "S101", + "T20", + "TCH003", + "TD", ] select = ["ALL"] -src = ["rdagent"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "docs/conf.py" = ["INP001"] "test/*" = ["S101"] diff --git a/rdagent/app/CI/prompts.py b/rdagent/app/CI/prompts.py new file mode 100644 index 00000000..baa3e4b2 --- /dev/null +++ b/rdagent/app/CI/prompts.py @@ -0,0 +1,99 @@ +linting_system_prompt_template = "You are a software engineer. You can write code to a high standard and are adept at solving {language} linting problems." + + +user_get_makefile_lint_commands_template = """You get a Makefile which contains some linting rules. Here are its content ```{file_text}``` +Please find executable commands about linting from it. + +Please response with following json template: +{{ + "commands": , +}} +""" + +user_get_files_contain_lint_commands_template = """You get a file list of a repository. Some file maybe contain linting rules or linting commands which defined by repo authors. Here are the file list ```{file_list}``` +Please find all files maybe correspond to linting from it. + +Please response with following json template: +{{ + "files": , +}} +""" + + +generate_lint_command_template = """Please generate a command to lint or format a {language} repository. Here are some information about different linting tools ```{linting_tools}``` +""" + + +suffix2language_template = """Here are the files suffix in one code repo: {suffix}. Please tell me the programming language used in this repo and which language has linting-tools. +Your response should follow this template: +{{ + "languages": , + "languages_with_linting_tools": +}} +""" + + +session_start_template = """Please modify the Python code based on the lint info. Due to the length of the code, I will first tell you the entire code, and then each time I ask a question, I will extract a portion of the code and tell you the error information contained in this code segment. You need to fix the corresponding error in the code segment and return the code that can replace the corresponding code segment. + +The Python code is from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("|"). The vertical bars are aligned. +Here is the complete code, please be prepared to fix it: +```Python +{code} +``` +""" + + +session_normal_template = """Please modify this code snippet based on the lint info. Here is the code snippet: +```Python +{code} +``` + +-----Lint info----- +{lint_info} +------------------- + +The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format: +-----Lint info format----- +: + + +-------------------------- +The error code is an abbreviation set by the checker for ease of describing the error. The error context includes the relevant code around the error, and the helpful information suggests possible fixes. + +Please simply reply the code after you fix all linting errors. +The code you return does not require line numbers, and should just replace the code I provided you, and does not require comments. +Please wrap your code with following format: + +```python + +``` +""" + + +user_template_for_code_snippet = """Please modify the Python code based on the lint info. +-----Python Code----- +{code} +--------------------- + +-----Lint info----- +{lint_info} +------------------- + +The Python code is a snippet from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("|"). The vertical bars are aligned. + +The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format: +-----Lint info format----- +: + + +-------------------------- +The error code is an abbreviation set by the checker for ease of describing the error. The error context includes the relevant code around the error, and the helpful information suggests possible fixes. + +Please simply reply the code after you fix all linting errors. +The code you return does not require line numbers, and should just replace the code I provided you, and does not require comments. +Please wrap your code with following format: + +```python + +``` +""" diff --git a/rdagent/app/CI/run.py b/rdagent/app/CI/run.py new file mode 100644 index 00000000..9124a8cc --- /dev/null +++ b/rdagent/app/CI/run.py @@ -0,0 +1,422 @@ +""" + +""" +import json +import subprocess +import time +import re +from dataclasses import dataclass +from difflib import ndiff, IS_LINE_JUNK +from collections import defaultdict +from pathlib import Path + +from rich import print +from rich.syntax import Syntax +from rich.prompt import Prompt +from rich.text import Text +from rich.rule import Rule +from rich.panel import Panel +from rich.table import Table + +from typing import Dict, List, Union, cast, Tuple +from .prompts import linting_system_prompt_template, session_start_template, session_normal_template, user_template_for_code_snippet + +from rdagent.core.evolving_framework import ( + Evaluator, + EvoAgent, + EvolvableSubjects, + EvolvingStrategy, + EvoStep, + Feedback, + Knowledge, +) +from rdagent.oai.llm_utils import APIBackend + + +@dataclass +class CIError: + raw_str: str + file_path: Union[Path, str] + line: int + column: int + code: str + msg: str + hint: str + + +@dataclass +class CIFeedback(Feedback): + errors: Dict[str, List[CIError]] + + +@dataclass +class FixRecord: + skipped_errors: List[CIError] + directly_fixed_errors: List[CIError] + manually_fixed_errors: List[CIError] + + +class CodeFile: + def __init__(self, path: Union[Path, str]): + self.path = Path(path) + self.load() + + + def load(self) -> None: + code = self.path.read_text(encoding="utf-8") + self.code_lines = code.split("\n") + + # add line number + self.lineno = len(self.code_lines) + self.lineno_width = len(str(self.lineno)) + self.code_lines_with_lineno = [] + for i, code_line in enumerate(self.code_lines): + self.code_lines_with_lineno.append(f"{i+1: >{self.lineno_width}} | {code_line}") + + + def get(self, start = 0, end = None, add_line_number: bool = False, return_list: bool = False) -> Union[List[str], str]: + start -= 1 + if start < 0: start = 0 + end = self.lineno if end is None else end-1 + + res = self.code_lines_with_lineno[start:end] if add_line_number else self.code_lines[start:end] + + return res if return_list else "\n".join(res) + + + def apply_changes(self, changes: List[Tuple[int, int, str]]) -> None: + offset = 0 + for start, end, code in changes: + start -= 1 + if start < 0: start = 0 + end -= 1 + + new_code = code.split("\n") + self.code_lines[start+offset:end+offset] = new_code + offset += len(new_code) - (end - start) + + self.path.write_text("\n".join(self.code_lines), encoding="utf-8") + self.load() + + + def __str__(self): + return f"{self.path}" + + +class Repo(EvolvableSubjects): + def __init__(self, project_path: Union[Path, str], **kwargs): + self.params = kwargs + self.project_path = Path(project_path) + git_ignored_output = subprocess.check_output( + "git status --ignored -s", + shell=True, + cwd=project_path, + stderr=subprocess.STDOUT, + ).decode("utf-8") + git_ignored_files = [ + (self.project_path / Path(line[3:])).resolve() + for line in git_ignored_output.split("\n") + if line.startswith("!!") + ] + + files = [ + file + for file in self.project_path.glob("**/*") + if file.is_file() + and not any(str(file).startswith(str(path)) for path in git_ignored_files) + and ".git/" not in str(file) + and file.suffix == ".py" + ] + self.files = {file: CodeFile(file) for file in files} + + self.fix_records: Dict[str, FixRecord] | None = None + + +@dataclass +class RuffRule: + """ + { + "name": "missing-trailing-comma", + "code": "COM812", + "linter": "flake8-commas", + "summary": "Trailing comma missing", + "message_formats": [ + "Trailing comma missing" + ], + "fix": "Fix is always available.", + "explanation": "## What it does\nChecks for the absence of trailing commas.\n\n## Why is this bad?\nThe presence of a trailing comma can reduce diff size when parameters or\nelements are added or removed from function calls, function definitions,\nliterals, etc.\n\n## Example\n```python\nfoo = {\n \"bar\": 1,\n \"baz\": 2\n}\n```\n\nUse instead:\n```python\nfoo = {\n \"bar\": 1,\n \"baz\": 2,\n}\n```\n", + "preview": false + } + """ + name: str + code: str + linter: str + summary: str + message_formats: List[str] + fix: str + explanation: str + preview: bool + + +class RuffEvaluator(Evaluator): + """The error message are generated by + `python -m ruff . --exclude FinCo,finco,fincov1 --ignore ANN101,TCH003,D,ERA001` + """ + + def __init__(self, command: str = None): + if command is None: + self.command = "ruff check . --no-fix --output-format full" + else: + self.command = command + + + def explain_rule(self, error_code: str) -> RuffRule: + explain_command = "ruff rule {error_code} --output-format json" + try: + out = subprocess.check_output( + explain_command.format(error_code=error_code), + shell=True, + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + out = e.output + + return json.loads(out.decode()) + + + def evaluate(self, evo: Repo, **kwargs) -> CIFeedback: + """Simply run ruff to get the feedbacks.""" + try: + out = subprocess.check_output( + self.command, + shell=True, + cwd=evo.project_path, + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + out = e.output + + """ruff output format: + src/finco/cli.py:9:5: ANN201 Missing return type annotation for public function `main` + | + 9 | def main(prompt=None): + | ^^^^ ANN201 + 10 | load_dotenv(verbose=True, override=True) + 11 | wm = WorkflowManager() + | + = help: Add return type annotation: `None` + """ + + # extract error info + pattern = r"(([^\n]*):(\d+):(\d+): (\w+) ([^\n]*)\n(.*?))\n\n" + matches = re.findall(pattern, out.decode(), re.DOTALL) + + errors = defaultdict(list) + for match in matches: + raw_str, file_path, line_number, column_number, error_code, error_message, error_hint = match + error = CIError(raw_str=raw_str, + file_path=file_path, + line=int(line_number), + column=int(column_number), + code=error_code, + msg=error_message, + hint=error_hint) + errors[file_path].append(error) + + return CIFeedback(errors=errors) + +class MypyEvaluator(Evaluator): + + def __init__(self, command: str = None): + if command is None: + self.command = "mypy . --explicit-package-bases" + else: + self.command = command + + def evaluate(self, evo: Repo, **kwargs) -> CIFeedback: + try: + out = subprocess.check_output( + self.command, + shell=True, + cwd=evo.project_path, + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + out = e.output + + return CIFeedback(cast(str, out).decode("utf-8")) + + +class CIEvoStr(EvolvingStrategy): + + def evolve( + self, + evo: Repo, + evolving_trace: List[EvoStep] = [], + knowledge_l: List[Knowledge] = [], + **kwargs, + ) -> Repo: + api = APIBackend() + system_prompt = linting_system_prompt_template.format(language='Python') + + if len(evolving_trace) > 0: + last_feedback: CIFeedback = evolving_trace[-1].feedback + fix_records: Dict[str, FixRecord] = defaultdict(lambda: FixRecord([], [], [])) + # iterate by file + for file_path, errors in last_feedback.errors.items(): + print(Rule(f"[cyan]Fixing {file_path}[/cyan]", style="bold cyan", align="left", characters=".")) + + file = evo.files[evo.project_path / Path(file_path)] + + # Group errors based on position + # TODO @bowen: Crossover between different groups after adding 3 lines of context + groups: List[List[CIError]] = [] + near_errors = [errors[0]] + for error in errors[1:]: + if error.line - near_errors[-1].line <= 6: + near_errors.append(error) + else: + groups.append(near_errors) + near_errors = [error] + groups.append(near_errors) + + changes = [] + + # generate changes + for group_id, group in enumerate(groups, start=1): + session = api.build_chat_session(session_system_prompt=system_prompt) + session.build_chat_completion(session_start_template.format(code=file.get(add_line_number=True))) + + print(f"[yellow]Fixing part {group_id}...[/yellow]\n") + + start_line = group[0].line - 3 + end_line = group[-1].line + 3 + 1 + code_snippet_with_lineno = file.get(start_line, end_line, add_line_number=True, return_list=False) + code_snippet_lines = file.get(start_line, end_line, add_line_number=False, return_list=True) + + # front_anchor_code = file.get(start_line-3, start_line, add_line_number=False, return_list=False) + # rear_anchor_code = file.get(end_line+1, end_line+3+1, add_line_number=False, return_list=False) + + errors_str = "\n".join([f"{error.raw_str}\n" for error in group]) + + print(Panel.fit(Syntax("\n".join([f"{error.line}: {error.msg}" for error in group]), lexer="python", background_color="default"), title=f"{len(group)} Errors")) + # print(f"[bold yellow]original code:[/bold yellow]\n\n{code_snippet_with_lineno}") + print(Panel.fit(Syntax(code_snippet_with_lineno, lexer="python", background_color="default"), title="Original Code")) + user_prompt = session_normal_template.format( + code=code_snippet_with_lineno, + lint_info=errors_str, + ) + res = session.build_chat_completion(user_prompt) + + manual_fix_flag = False + + while True: + new_code = re.search(r".*```[Pp]ython\n(.*)\n```.*", res, re.DOTALL).group(1) + + # print repair status (code diff) + diff = ndiff(code_snippet_lines, new_code.split("\n"), linejunk=IS_LINE_JUNK) + table = Table(show_header=False, box=None) + table.add_column() + for i in diff: + if i.startswith("+"): table.add_row(Text(i, style="green")) + elif i.startswith("-"): table.add_row(Text(i, style="red")) + elif i.startswith("?"): table.add_row(Text(i, style="yellow")) + else: table.add_row(Syntax(i, lexer="python", background_color="default")) + print(Panel.fit(table, title="Repair Status")) + + operation = input("Input your operation: ") + if operation == "s" or operation == "skip": + fix_records[file_path].skipped_errors.extend(group) + break + if operation == "a" or operation == "apply": + if manual_fix_flag: + fix_records[file_path].manually_fixed_errors.extend(group) + else: + fix_records[file_path].directly_fixed_errors.extend(group) + + changes.append((start_line, end_line, new_code)) + break + + manual_fix_flag = True + res = session.build_chat_completion(operation) + + # apply changes + file.apply_changes(changes) + + evo.fix_records = fix_records + + return evo + + +DIR = "/home/bowen/workspace/fincov2_test/" +PY = "/home/bowen/miniconda3/envs/cr/bin/python" + +start_time = time.time() + +evo = Repo(DIR, python_path=PY) + +eval = RuffEvaluator() +estr = CIEvoStr() +rag = None # RAG is not enable firstly. +ea = EvoAgent(estr, rag=rag) +ea.step_evolving(evo, eval) +while True: + print(Rule(f"Round {len(ea.evolving_trace)} repair", style="blue")) + evo: Repo = ea.step_evolving(evo, eval) + + fix_records = evo.fix_records + + # Count the number of skipped errors + skipped_errors_count = 0 + directly_fixed_errors_count = 0 + manually_fixed_errors_count = 0 + skipped_errors_code_count = defaultdict(int) + directly_fixed_errors_code_count = defaultdict(int) + manually_fixed_errors_code_count = defaultdict(int) + for record in fix_records.values(): + skipped_errors_count += len(record.skipped_errors) + directly_fixed_errors_count += len(record.directly_fixed_errors) + manually_fixed_errors_count += len(record.manually_fixed_errors) + for error in record.skipped_errors: + skipped_errors_code_count[error.code] += 1 + for error in record.directly_fixed_errors: + directly_fixed_errors_code_count[error.code] += 1 + for error in record.manually_fixed_errors: + manually_fixed_errors_code_count[error.code] += 1 + + skipped_errors_statistics = "" + directly_fixed_errors_statistics = "" + manually_fixed_errors_statistics = "" + for code, count in sorted(skipped_errors_code_count.items(), key=lambda x: x[1], reverse=True): + skipped_errors_statistics += f"{count: >5} {code: >10} {eval.explain_rule(code).summary}\n" + for code, count in sorted(directly_fixed_errors_code_count.items(), key=lambda x: x[1], reverse=True): + directly_fixed_errors_statistics += f"{count: >5} {code: >10} {eval.explain_rule(code).summary}\n" + for code, count in sorted(manually_fixed_errors_code_count.items(), key=lambda x: x[1], reverse=True): + manually_fixed_errors_statistics += f"{count: >5} {code: >10} {eval.explain_rule(code).summary}\n" + + # Create a table to display the counts and ratios + table = Table(title="Error Fix Statistics") + table.add_column("Type") + table.add_column("Statistics") + table.add_column("Count") + table.add_column("Ratio") + + total_errors_count = skipped_errors_count + directly_fixed_errors_count + manually_fixed_errors_count + table.add_row("Total Errors", "", str(total_errors_count), "") + table.add_row("Skipped Errors", skipped_errors_statistics, str(skipped_errors_count), f"{skipped_errors_count / total_errors_count:.2%}") + table.add_row("Directly Fixed Errors", directly_fixed_errors_statistics, str(directly_fixed_errors_count), f"{directly_fixed_errors_count / total_errors_count:.2%}") + table.add_row("Manually Fixed Errors", manually_fixed_errors_statistics, str(manually_fixed_errors_count), f"{manually_fixed_errors_count / total_errors_count:.2%}") + + print(table) + operation = Prompt.ask("Start next round? (y/n): ", choices=["y", "n"]) + if operation == "n": break + + +end_time = time.time() +execution_time = end_time - start_time +print(f"Execution time: {execution_time} seconds") + +""" Please commit it by hand... and then run the next round +git add -u +git commit --no-verify -v +""" diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index 577dd325..3d90c0fe 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -20,10 +20,12 @@ class FincoSettings(BaseSettings): use_chat_cache: bool = False dump_embedding_cache: bool = False use_embedding_cache: bool = False + workspace: str = "./finco_workspace" prompt_cache_path: str = os.getcwd() + "/prompt_cache.db" session_cache_folder_location: str = os.getcwd() + "/session_cache_folder/" max_past_message_include: int = 10 + use_vector_only: bool = False log_llm_chat_content: bool = True # Chat configs @@ -45,3 +47,46 @@ class FincoSettings(BaseSettings): embedding_azure_api_base: str = "" embedding_azure_api_version: str = "" embedding_model: str = "" + + # llama2 related config + use_llama2: bool = False + llama2_ckpt_dir: str = "Llama-2-7b-chat" + llama2_tokenizer_path: str = "Llama-2-7b-chat/tokenizer.model" + llams2_max_batch_size: int = 8 + + # finco v2 configs + azure_document_intelligence_key: str = "" + azure_document_intelligence_endpoint: str = "" + + # fincov2 llama2 endpoint + use_gcr_endpoint: bool = False + gcr_endpoint_type: str = "llama2_70b" # or "llama3_70b", "phi2", "phi3_4k", "phi3_128k" + + llama2_70b_endpoint: str = "" + llama2_70b_endpoint_key: str = "" + llama2_70b_endpoint_deployment: str = "" + + llama3_70b_endpoint: str = "" + llama3_70b_endpoint_key: str = "" + llama3_70b_endpoint_deployment: str = "" + + phi2_endpoint: str = "" + phi2_endpoint_key: str = "" + phi2_endpoint_deployment: str = "" + + phi3_4k_endpoint: str = "" + phi3_4k_endpoint_key: str = "" + phi3_4k_endpoint_deployment: str = "" + + phi3_128k_endpoint: str = "" + phi3_128k_endpoint_key: str = "" + phi3_128k_endpoint_deployment: str = "" + + gcr_endpoint_temperature: float = 0.7 + gcr_endpoint_top_p: float = 0.9 + gcr_endpoint_do_sample: bool = False + gcr_endpoint_max_token: int = 100 + + # factor extraction conf + max_input_duplicate_factor_group: int = 600 + max_output_duplicate_factor_group: int = 20 \ No newline at end of file diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 73eecf8f..6987d1ab 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -17,9 +17,9 @@ import tiktoken from scipy.spatial.distance import cosine -from core.conf import FincoSettings as Config -from core.log import FinCoLog, LogColors -from core.utils import SingletonBaseClass +from rdagent.core.conf import FincoSettings as Config +from rdagent.core.log import FinCoLog, LogColors +from rdagent.core.utils import SingletonBaseClass DEFAULT_QLIB_DOT_PATH = Path("./")