Unleash the power of AI to find and squash bugs before they reach your customers.
@@ -153,4 +153,4 @@ Need help or have questions? Reach out to us at support@cloudcode.ai.
Made with ❤️ by the Kaizen team
-
\ No newline at end of file
+
From 8ade2693659ca9f9be0152bb60bf7d4c274f4592 Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Thu, 22 Aug 2024 19:10:03 -0700
Subject: [PATCH 09/19] feat: added support for running single file tests
---
examples/unittest/main.py | 4 ++++
kaizen/actors/unit_test_runner.py | 5 ++++-
kaizen/generator/unit_test.py | 4 ++--
kaizen/llms/provider.py | 2 +-
4 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/examples/unittest/main.py b/examples/unittest/main.py
index 7e586356..23b14af6 100644
--- a/examples/unittest/main.py
+++ b/examples/unittest/main.py
@@ -17,8 +17,12 @@
)
print(result)
+# Run all tests
test_results = generator.run_tests()
+# Run a single test file:
+# test_results = generator.run_tests(file_path="test_create_folder.py")
+
for file_path, result in test_results.items():
print(f"Results for {file_path}:")
if "error" in result:
diff --git a/kaizen/actors/unit_test_runner.py b/kaizen/actors/unit_test_runner.py
index 5c0b50aa..3e6ace9d 100644
--- a/kaizen/actors/unit_test_runner.py
+++ b/kaizen/actors/unit_test_runner.py
@@ -70,7 +70,7 @@ def find_project_root(self, file_path):
self.logger.warning("Project root not found")
return None
- def discover_and_run_tests(self):
+ def discover_and_run_tests(self, test_file=None):
self.logger.info("Starting test discovery and execution")
results = {}
for root, dirs, files in os.walk(self.test_directory):
@@ -80,6 +80,9 @@ def discover_and_run_tests(self):
extension = file.split(".")[-1]
self.logger.debug(f"Found test file: {file_path}")
if extension in self.supported_extensions:
+ if file_path and file not in test_file:
+ self.logger.debug("Skipping file test")
+ continue
self.logger.info(f"Running tests for: {file_path}")
result = self.supported_extensions[extension](file_path)
results[str(file_path)] = result
diff --git a/kaizen/generator/unit_test.py b/kaizen/generator/unit_test.py
index 250de3d6..9e414d12 100644
--- a/kaizen/generator/unit_test.py
+++ b/kaizen/generator/unit_test.py
@@ -277,9 +277,9 @@ def generate_tests_with_feedback(self, test_code, feedback):
def _create_output_folder(self, folder_name):
os.makedirs(folder_name, exist_ok=True)
- def run_tests(self):
+ def run_tests(self, test_file=None):
runner = UnitTestRunner(self.output_folder)
- return runner.discover_and_run_tests()
+ return runner.discover_and_run_tests(test_file=test_file)
def format_test_scenarios(self, scenarios):
formatted_scenarios = ""
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index 4567d4ef..e10c8a0f 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -169,7 +169,7 @@ def raw_chat_completion(
model="default",
custom_model=None,
messages=None,
- n_choices=1
+ n_choices=1,
):
custom_model["n"] = n_choices
if not messages:
From bc9bef45e8850ae390b239a942c28715283b14a4 Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Thu, 22 Aug 2024 19:15:10 -0700
Subject: [PATCH 10/19] fix: kaizen suggested bug fix
---
kaizen/actors/unit_test_runner.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kaizen/actors/unit_test_runner.py b/kaizen/actors/unit_test_runner.py
index 3e6ace9d..7864baf8 100644
--- a/kaizen/actors/unit_test_runner.py
+++ b/kaizen/actors/unit_test_runner.py
@@ -80,7 +80,7 @@ def discover_and_run_tests(self, test_file=None):
extension = file.split(".")[-1]
self.logger.debug(f"Found test file: {file_path}")
if extension in self.supported_extensions:
- if file_path and file not in test_file:
+ if test_file and file not in test_file:
self.logger.debug("Skipping file test")
continue
self.logger.info(f"Running tests for: {file_path}")
From db3dab8ff7dcd213872648ad6f6f5907fed4576e Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Thu, 22 Aug 2024 22:20:37 -0400
Subject: [PATCH 11/19] Update kaizen/actors/unit_test_runner.py
Co-authored-by: kaizen-bot[bot] <150987473+kaizen-bot[bot]@users.noreply.github.com>
---
kaizen/actors/unit_test_runner.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kaizen/actors/unit_test_runner.py b/kaizen/actors/unit_test_runner.py
index 7864baf8..982fa493 100644
--- a/kaizen/actors/unit_test_runner.py
+++ b/kaizen/actors/unit_test_runner.py
@@ -71,6 +71,8 @@ def find_project_root(self, file_path):
return None
def discover_and_run_tests(self, test_file=None):
+ if test_file is None:
+ self.logger.warning('No test file specified. Running all tests.')
self.logger.info("Starting test discovery and execution")
results = {}
for root, dirs, files in os.walk(self.test_directory):
From 2a33d37d345a6a727104f997454bd46d8bb7e569 Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Thu, 22 Aug 2024 19:36:17 -0700
Subject: [PATCH 12/19] bugfix: code review issue when files are skipped
---
examples/code_review/main.py | 4 +++-
kaizen/reviewer/code_review.py | 41 ++++++++++++++++++----------------
pyproject.toml | 2 +-
3 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/examples/code_review/main.py b/examples/code_review/main.py
index 32794105..5c5e8903 100644
--- a/examples/code_review/main.py
+++ b/examples/code_review/main.py
@@ -33,7 +33,9 @@
)
topics = clean_keys(review_data.topics, "important")
-review_desc = create_pr_review_text(topics)
+review_desc = create_pr_review_text(
+ review_data.issues, code_quality=review_data.code_quality
+)
comments, topics = create_review_comments(topics)
print(f"Raw Topics: \n {json.dumps(topics, indent=2)}\n")
diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py
index 47dea566..176bbbd7 100644
--- a/kaizen/reviewer/code_review.py
+++ b/kaizen/reviewer/code_review.py
@@ -1,4 +1,4 @@
-from typing import Optional, List, Dict, Generator
+from typing import Optional, List, Dict, Generator, Tuple
from dataclasses import dataclass
import logging
from kaizen.helpers import parser
@@ -197,7 +197,7 @@ def _process_files(
pull_request_desc: str,
user: Optional[str],
reeval_response: bool,
- ) -> List[Dict]:
+ ) -> Tuple[List[Dict], Optional[float]]:
self.logger.debug("Processing based on files")
reviews = []
code_quality = None
@@ -208,13 +208,13 @@ def _process_files(
user,
reeval_response,
)
- for file_review, quality in file_chunks_generator:
- reviews.extend(file_review)
- if quality:
- if code_quality and code_quality > quality:
- code_quality = quality
- else:
- code_quality = quality
+ for result in file_chunks_generator:
+ if result: # Check if the result is not None
+ file_review, quality = result
+ reviews.extend(file_review)
+ if quality:
+ if code_quality is None or quality < code_quality:
+ code_quality = quality
return reviews, code_quality
def _process_files_generator(
@@ -224,7 +224,7 @@ def _process_files_generator(
pull_request_desc: str,
user: Optional[str],
reeval_response: bool,
- ) -> Generator[List[Dict], None, None]:
+ ) -> Generator[Optional[Tuple[List[Dict], Optional[float]]], None, None]:
combined_diff_data = ""
available_tokens = self.provider.available_tokens(FILE_CODE_REVIEW_PROMPT)
for file in pull_request_files:
@@ -253,13 +253,16 @@ def _process_files_generator(
)
combined_diff_data = f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details, self.ignore_deletions)}"
- yield self._process_file_chunk(
- combined_diff_data,
- pull_request_title,
- pull_request_desc,
- user,
- reeval_response,
- )
+ if combined_diff_data:
+ yield self._process_file_chunk(
+ combined_diff_data,
+ pull_request_title,
+ pull_request_desc,
+ user,
+ reeval_response,
+ )
+ else:
+ yield None # Yield None if there's no data to process
def _process_file_chunk(
self,
@@ -268,9 +271,9 @@ def _process_file_chunk(
pull_request_desc: str,
user: Optional[str],
reeval_response: bool,
- ) -> List[Dict]:
+ ) -> Optional[Tuple[List[Dict], Optional[float]]]:
if not diff_data:
- return []
+ return None
prompt = FILE_CODE_REVIEW_PROMPT.format(
FILE_PATCH=diff_data,
)
diff --git a/pyproject.toml b/pyproject.toml
index 40efde48..273795bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "kaizen-cloudcode"
-version = "0.4.4"
+version = "0.4.5"
description = "An intelligent coding companion that accelerates your development workflow by providing efficient assistance, enabling you to craft high-quality code more rapidly."
authors = ["Saurav Panda "]
license = "Apache2.0"
From 2ab2f486d3f708a19282afd061a172a29223f85c Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Thu, 22 Aug 2024 21:46:39 -0700
Subject: [PATCH 13/19] feat: remove temp file path
---
kaizen/actors/unit_test_runner.py | 2 +-
kaizen/generator/unit_test.py | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/kaizen/actors/unit_test_runner.py b/kaizen/actors/unit_test_runner.py
index 982fa493..0a73454c 100644
--- a/kaizen/actors/unit_test_runner.py
+++ b/kaizen/actors/unit_test_runner.py
@@ -72,7 +72,7 @@ def find_project_root(self, file_path):
def discover_and_run_tests(self, test_file=None):
if test_file is None:
- self.logger.warning('No test file specified. Running all tests.')
+ self.logger.warning("No test file specified. Running all tests.")
self.logger.info("Starting test discovery and execution")
results = {}
for root, dirs, files in os.walk(self.test_directory):
diff --git a/kaizen/generator/unit_test.py b/kaizen/generator/unit_test.py
index 9e414d12..df024a63 100644
--- a/kaizen/generator/unit_test.py
+++ b/kaizen/generator/unit_test.py
@@ -119,11 +119,13 @@ def generate_tests(
output_path: str = None,
verbose: bool = False,
enable_critique: bool = False,
+ temp_dir: str = "",
):
self.max_critique = max_critique
self.enable_critique = enable_critique
self.verbose = verbose if verbose else self.verbose
self.output_folder = output_path if output_path else self.output_folder
+ self.temp_dir = temp_dir
file_extension = file_path.split(".")[-1]
if file_extension not in self.SUPPORTED_LANGUAGES or file_extension == "pyc":
@@ -172,6 +174,7 @@ def _process_item(self, item, file_extension, file_path, folder_path):
item["full_path"] = file_path
test_code = self.generate_ai_tests(item, item["source"], file_extension)
+ test_code = test_code.replace(self.temp_dir, "")
self._write_test_file(test_file_path, test_code)
From 00baf6979f8d199ea0b0382bdec01004a7a5b3d4 Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Sat, 24 Aug 2024 01:59:35 -0700
Subject: [PATCH 14/19] feat: moved code from pg vector to qdrant
---
config.json | 5 +-
docker-compose-dev.yml | 24 +-
docker-compose.yml | 23 +-
examples/ragify_codebase/main.py | 18 +-
kaizen/llms/provider.py | 1 +
kaizen/retriever/custom_vector_store.py | 59 ----
kaizen/retriever/llama_index_retriever.py | 149 +++++----
kaizen/retriever/qdrant_vector_store.py | 26 ++
poetry.lock | 351 ++++++++++++++++------
pyproject.toml | 3 +-
10 files changed, 436 insertions(+), 223 deletions(-)
delete mode 100644 kaizen/retriever/custom_vector_store.py
create mode 100644 kaizen/retriever/qdrant_vector_store.py
diff --git a/config.json b/config.json
index e1e639da..8aefb0fe 100644
--- a/config.json
+++ b/config.json
@@ -7,12 +7,11 @@
{
"model_name": "embedding",
"litellm_params": {
- "model": "azure/text-embedding-small",
+ "model": "azure/text-embedding-3-small",
"input_cost_per_token": 0.000000015,
"output_cost_per_token": 0.0000006,
"api_key": "os.environ/AZURE_API_KEY",
- "api_base": "os.environ/AZURE_API_BASE",
- "base_model": "text-embedding-3-small"
+ "api_base": "os.environ/AZURE_API_BASE"
},
"model_info": {
"max_tokens": 8191,
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 014bd59a..1f2a5bbc 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -14,6 +14,11 @@ services:
- github_app_pem
networks:
- app-network
+ depends_on:
+ - redis
+ - postgres
+ - qdrant
+
redis:
image: "redis:alpine"
@@ -39,10 +44,27 @@ services:
networks:
- app-network
+ qdrant:
+ image: qdrant/qdrant:latest
+ ports:
+ - "6333:6333"
+ - "6334:6334"
+ volumes:
+ - qdrant_data:/qdrant/storage
+ environment:
+ - QDRANT__SERVICE__GRPC_PORT=6334
+ restart: always
+ networks:
+ - app-network
+
+volumes:
+ qdrant_data:
+ driver: local
+
secrets:
github_app_pem:
file: ./GITHUB_APP_NIGHTLY.pem
networks:
app-network:
- driver: bridge
\ No newline at end of file
+ driver: bridge
diff --git a/docker-compose.yml b/docker-compose.yml
index c9af20f7..4b249b8f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,7 +14,9 @@ services:
- github_app_pem
depends_on:
- redis
-
+ - postgres
+ - qdrant
+
postgres:
image: postgres:16-bullseye
env_file:
@@ -23,7 +25,7 @@ services:
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
ports:
- "5432:5432"
-
+
redis:
image: "redis:alpine"
environment:
@@ -31,8 +33,21 @@ services:
ports:
- "6379:6379"
+ qdrant:
+ image: qdrant/qdrant:latest
+ ports:
+ - "6333:6333"
+ - "6334:6334"
+ volumes:
+ - qdrant_data:/qdrant/storage
+ environment:
+ - QDRANT__SERVICE__GRPC_PORT=6334
+ restart: always
+
+volumes:
+ qdrant_data:
+ driver: local
secrets:
github_app_pem:
- file: ./GITHUB_APP_NIGHTLY.pem
-
+ file: ./GITHUB_APP_NIGHTLY.pem
\ No newline at end of file
diff --git a/examples/ragify_codebase/main.py b/examples/ragify_codebase/main.py
index a0d6057d..dcff7207 100644
--- a/examples/ragify_codebase/main.py
+++ b/examples/ragify_codebase/main.py
@@ -7,11 +7,21 @@
analyzer.setup_repository("./github_app/")
# Perform queries (you can do this as many times as you want without calling setup_repository again)
-results = analyzer.query("Find functions that handle authentication")
+results = analyzer.query("jwt token generation")
for result in results:
print(f"File: {result['file_path']}")
- print(f"Abstraction: {result['abstraction']}")
- print(f"result:\n{result}")
+ # print(f"Abstraction: {result['abstraction']}")
+ # print(f"result:\n{result}")
+ print(f"Relevance Score: {result['relevance_score']}")
+ print("---")
+
+print("....... \n\n")
+
+results = analyzer.query("How do you filter the results?")
+for result in results:
+ print(f"File: {result['file_path']}")
+ # print(f"Abstraction: {result['abstraction']}")
+ # print(f"result:\n{result}")
print(f"Relevance Score: {result['relevance_score']}")
print("---")
@@ -19,4 +29,4 @@
# analyzer.setup_repository("/path/to/your/repo")
# Then you can query again with the updated data
-results = analyzer.query("authentication")
+# results = analyzer.query("authentication")
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index e10c8a0f..4274d736 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -83,6 +83,7 @@ def _setup_provider(self) -> None:
"model_list": self.models,
"allowed_fails": 1,
"enable_pre_call_checks": True,
+ "routing_strategy": "simple-shuffle",
}
if self.config["language_model"].get("redis_enabled", False):
diff --git a/kaizen/retriever/custom_vector_store.py b/kaizen/retriever/custom_vector_store.py
deleted file mode 100644
index dfb2f061..00000000
--- a/kaizen/retriever/custom_vector_store.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from llama_index.vector_stores.postgres import PGVectorStore
-from typing import List
-import numpy as np
-from psycopg2.extras import Json
-
-
-class CustomPGVectorStore(PGVectorStore):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- # Store the table name in a new attribute
- self.table_name = kwargs.get("table_name", "embeddings")
-
- def custom_query(
- self, query_embedding: List[float], repo_id: int, similarity_top_k: int
- ) -> List[dict]:
- # Normalize the query embedding
- query_embedding_np = np.array(query_embedding)
- query_embedding_normalized = query_embedding_np / np.linalg.norm(
- query_embedding_np
- )
-
- # SQL query with repo_id filter and cosine similarity
- query = f"""
- SELECT
- e.node_id,
- e.text,
- e.metadata,
- 1 - (e.embedding <=> %s::vector) as similarity
- FROM
- {self.table_name} e
- JOIN
- function_abstractions fa ON e.node_id = fa.function_id::text
- JOIN
- files f ON fa.file_id = f.file_id
- WHERE
- f.repo_id = %s
- ORDER BY
- similarity DESC
- LIMIT
- %s
- """
-
- with self.get_client() as client:
- with client.cursor() as cur:
- cur.execute(
- query,
- (query_embedding_normalized.tolist(), repo_id, similarity_top_k),
- )
- results = cur.fetchall()
-
- return [
- {
- "id": row[0],
- "text": row[1],
- "metadata": row[2] if isinstance(row[2], dict) else Json(row[2]),
- "similarity": row[3],
- }
- for row in results
- ]
diff --git a/kaizen/retriever/llama_index_retriever.py b/kaizen/retriever/llama_index_retriever.py
index 46e020dd..bd345782 100644
--- a/kaizen/retriever/llama_index_retriever.py
+++ b/kaizen/retriever/llama_index_retriever.py
@@ -2,10 +2,11 @@
import logging
from llama_index.core import (
StorageContext,
- VectorStoreIndex,
)
+from uuid import uuid4
+
from llama_index.core.schema import TextNode
-from kaizen.retriever.custom_vector_store import CustomPGVectorStore
+from kaizen.retriever.qdrant_vector_store import QdrantVectorStore
from sqlalchemy import create_engine, text
from llama_index.llms.litellm import LiteLLM
import networkx as nx
@@ -16,7 +17,6 @@
from kaizen.retriever.code_chunker import chunk_code
import traceback
from llama_index.embeddings.litellm import LiteLLMEmbedding
-from llama_index.core import QueryBundle
# Set up logging
@@ -39,15 +39,7 @@ def __init__(self, repo_id=1):
)
self.repo_id = repo_id
self.graph = nx.DiGraph()
- self.vector_store = CustomPGVectorStore.from_params(
- database=os.environ["POSTGRES_DB"],
- host=os.environ["POSTGRES_HOST"],
- password=os.environ["POSTGRES_PASSWORD"],
- port=os.environ["POSTGRES_PORT"],
- user=os.environ["POSTGRES_USER"],
- table_name="embeddings",
- embed_dim=1536,
- )
+ self.vector_store = QdrantVectorStore("embeddings", vector_size=1536)
self.llm_provider = LLMProvider()
self.llm = LiteLLM(model_name="small", router=self.llm_provider.provider)
# embed_llm = LiteLLM(model_name="embedding", router=self.llm_provider.provider)
@@ -173,10 +165,11 @@ def store_abstraction_and_embedding(self, function_id: int, abstraction: str):
# Create a TextNode for the vector store
# Include repo_id in the metadata
- metadata = {"repo_id": self.repo_id}
+ metadata = {"repo_id": self.repo_id, "function_id": function_id}
+ node_id = str(uuid4())
node = TextNode(
text=abstraction,
- id_=str(function_id),
+ id_=node_id,
embedding=embedding,
metadata=metadata,
)
@@ -189,19 +182,61 @@ def store_abstraction_and_embedding(self, function_id: int, abstraction: str):
def generate_abstraction(
self, code_block: str, language: str, max_tokens: int = 300
) -> str:
- prompt = f"""Generate a concise yet comprehensive abstract description of the following {language} code block.
- Include information about:
- 1. The purpose or functionality of the code
- 2. Input parameters and return values (if applicable)
- 3. Any important algorithms or data structures used
- 4. Key dependencies or external libraries used
- 5. Any notable design patterns or architectural choices
- 6. Potential edge cases or error handling
-
- Code:
- ```{language}
- {code_block}
- ```
+ prompt = f"""Analyze the following {language} code block and generate a structured abstraction.
+Your response should be in YAML format and include the following sections:
+
+summary: A concise one-sentence summary of the function's primary purpose.
+
+functionality: |
+ A detailed explanation of what the function does, including its main steps and logic.
+ Use multiple lines if needed for clarity.
+
+inputs:
+ - name: The parameter name
+ type: The parameter type
+ description: A brief description of the parameter's purpose
+ default_value: The default value, if any (or null if not applicable)
+
+output:
+ type: The return type of the function
+ description: |
+ A description of what is returned and under what conditions.
+ Use multiple lines if needed.
+
+dependencies:
+ - name: Name of the external library or module
+ purpose: Brief explanation of its use in this function
+
+algorithms:
+ - name: Name of the algorithm or data structure
+ description: Brief explanation of its use and importance
+
+edge_cases:
+ - A list of potential edge cases or special conditions the function handles or should handle
+
+error_handling: |
+ A description of how errors are handled or propagated.
+ Include specific error types if applicable.
+
+usage_context: |
+ A brief explanation of how this function might be used by parent functions or in a larger system.
+ Include typical scenarios and any important considerations for its use.
+
+complexity:
+ time: Estimated time complexity (e.g., O(n))
+ space: Estimated space complexity (e.g., O(1))
+
+code_snippet: |
+ ```{language}
+ {code_block}
+ ```
+
+Provide your analysis in this clear, structured YAML format. If any section is not applicable, use an empty list [] or null value as appropriate. Ensure that multi-line descriptions are properly indented under their respective keys.
+
+Code to analyze:
+```{language}
+{code_block}
+```
"""
estimated_prompt_tokens = len(tokenizer.encode(prompt))
@@ -340,57 +375,47 @@ def store_function_relationships(self):
# logger.info(f"Query completed. Found {len(processed_results)} results.")
# return processed_results
- def query(self, query_text: str, num_results: int = 5) -> List[Dict[str, Any]]:
- logger.info(f"Performing query: '{query_text}' for repo_id: {self.repo_id}")
-
- index = VectorStoreIndex.from_vector_store(
- self.vector_store, embed_model=self.embed_model, llm=self.llm
- )
-
+ def query(
+ self, query_text: str, num_results: int = 5, repo_id=None
+ ) -> List[Dict[str, Any]]:
embedding, emb_usage = self.llm_provider.get_text_embedding(query_text)
embedding = embedding[0]["embedding"]
- # Create a filter to only search within the current repository
- # filter_dict = {"repo_id": self.repo_id}
+ results = self.vector_store.search(embedding, limit=num_results)
- query_bundle = QueryBundle(query_str=query_text, embedding=embedding)
- retriever = index.as_retriever(similarity_top_k=num_results)
-
- # Apply the filter during retrieval
- nodes = retriever.retrieve(query_bundle) # Add potential filtering
+ processed_results = []
+ for result in results:
+ processed_results.append(
+ {
+ "function_id": result.payload["function_id"],
+ "relevance_score": result.score,
+ }
+ )
- results = []
+ # Fetch additional data from the database
with self.engine.connect() as connection:
- for node in nodes:
- function_id = (
- node.node.id_
- ) # Assuming we stored function_id as the node id
+ for result in processed_results:
query = text(
"""
SELECT fa.function_name, fa.abstract_functionality, f.file_path, fa.function_signature
FROM function_abstractions fa
JOIN files f ON fa.file_id = f.file_id
WHERE fa.function_id = :function_id
- """
+ """
)
- result = connection.execute(
- query, {"function_id": function_id}
+ db_result = connection.execute(
+ query, {"function_id": result["function_id"]}
).fetchone()
- if result:
- results.append(
+ if db_result:
+ result.update(
{
- "function_name": result[0],
- "abstraction": result[1],
- "file_path": result[2],
- "function_signature": result[3],
- "relevance_score": (
- node.score if hasattr(node, "score") else 1.0
- ),
+ "function_name": db_result[0],
+ "abstraction": db_result[1],
+ "file_path": db_result[2],
+ "function_signature": db_result[3],
}
)
- sorted_results = sorted(
- results, key=lambda x: x["relevance_score"], reverse=True
+ return sorted(
+ processed_results, key=lambda x: x["relevance_score"], reverse=True
)
- logger.info(f"Query completed. Found {len(sorted_results)} results.")
- return sorted_results
diff --git a/kaizen/retriever/qdrant_vector_store.py b/kaizen/retriever/qdrant_vector_store.py
new file mode 100644
index 00000000..86a424d9
--- /dev/null
+++ b/kaizen/retriever/qdrant_vector_store.py
@@ -0,0 +1,26 @@
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams
+from qdrant_client.http.models import PointStruct
+
+
+class QdrantVectorStore:
+ def __init__(self, collection_name, vector_size):
+ self.client = QdrantClient("localhost", port=6333)
+ self.collection_name = collection_name
+ self.client.recreate_collection(
+ collection_name=self.collection_name,
+ vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
+ )
+
+ def add(self, nodes):
+ points = [
+ PointStruct(id=node.id_, vector=node.embedding, payload=node.metadata)
+ for node in nodes
+ ]
+ self.client.upsert(collection_name=self.collection_name, points=points)
+
+ def search(self, query_vector, limit=10):
+ results = self.client.search(
+ collection_name=self.collection_name, query_vector=query_vector, limit=limit
+ )
+ return results
diff --git a/poetry.lock b/poetry.lock
index 1b3c4d2b..9b2c9127 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -166,63 +166,6 @@ files = [
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
]
-[[package]]
-name = "asyncpg"
-version = "0.29.0"
-description = "An asyncio PostgreSQL driver"
-optional = false
-python-versions = ">=3.8.0"
-files = [
- {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"},
- {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"},
- {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"},
- {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"},
- {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"},
- {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"},
- {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"},
- {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"},
- {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"},
- {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"},
- {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"},
- {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"},
- {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"},
- {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"},
- {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"},
- {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"},
- {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"},
- {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"},
- {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"},
- {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"},
- {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"},
- {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"},
- {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"},
- {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"},
- {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"},
- {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"},
- {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"},
- {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"},
- {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"},
- {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"},
- {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"},
- {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"},
- {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"},
- {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"},
- {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"},
- {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"},
- {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"},
- {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"},
- {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"},
- {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"},
- {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"},
-]
-
-[package.dependencies]
-async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""}
-
-[package.extras]
-docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
-test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"]
-
[[package]]
name = "attrs"
version = "24.1.0"
@@ -960,6 +903,124 @@ files = [
docs = ["Sphinx", "furo"]
test = ["objgraph", "psutil"]
+[[package]]
+name = "grpcio"
+version = "1.66.0"
+description = "HTTP/2-based RPC framework"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "grpcio-1.66.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:ad7256f224437b2c29c2bef98ddd3130454c5b1ab1f0471fc11794cefd4dbd3d"},
+ {file = "grpcio-1.66.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:5f4b3357e59dfba9140a51597287297bc638710d6a163f99ee14efc19967a821"},
+ {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e8d20308eeae15b3e182f47876f05acbdec1eebd9473a9814a44e46ec4a84c04"},
+ {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1eb03524d0f55b965d6c86aa44e5db9e5eaa15f9ed3b164621e652e5b927f4b8"},
+ {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37514b68a42e9cf24536345d3cf9e580ffd29117c158b4eeea34625200256067"},
+ {file = "grpcio-1.66.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:516fdbc8e156db71a004bc431a6303bca24cfde186babe96dde7bd01e8f0cc70"},
+ {file = "grpcio-1.66.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d0439a970d65327de21c299ea0e0c2ad0987cdaf18ba5066621dea5f427f922b"},
+ {file = "grpcio-1.66.0-cp310-cp310-win32.whl", hash = "sha256:5f93fc84b72bbc7b84a42f3ca9dc055fa00d2303d9803be011ebf7a10a4eb833"},
+ {file = "grpcio-1.66.0-cp310-cp310-win_amd64.whl", hash = "sha256:8fc5c710ddd51b5a0dc36ef1b6663430aa620e0ce029b87b150dafd313b978c3"},
+ {file = "grpcio-1.66.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dd614370e939f9fceeeb2915111a0795271b4c11dfb5fc0f58449bee40c726a5"},
+ {file = "grpcio-1.66.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:245b08f9b3c645a6a623f3ed4fa43dcfcd6ad701eb9c32511c1bb7380e8c3d23"},
+ {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:aaf30c75cbaf30e561ca45f21eb1f729f0fab3f15c592c1074795ed43e3ff96f"},
+ {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49234580a073ce7ac490112f6c67c874cbcb27804c4525978cdb21ba7f3f193c"},
+ {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de9e20a0acb709dcfa15a622c91f584f12c9739a79c47999f73435d2b3cc8a3b"},
+ {file = "grpcio-1.66.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc008c6afa1e7c8df99bd9154abc4f0470d26b7730ca2521122e99e771baa8c7"},
+ {file = "grpcio-1.66.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:50cea8ce2552865b87e3dffbb85eb21e6b98d928621600c0feda2f02449cd837"},
+ {file = "grpcio-1.66.0-cp311-cp311-win32.whl", hash = "sha256:508411df1f2b7cfa05d4d7dbf3d576fe4f949cd61c03f3a6f0378c84e3d7b963"},
+ {file = "grpcio-1.66.0-cp311-cp311-win_amd64.whl", hash = "sha256:6d586a95c05c82a5354be48bb4537e1accaf2472d8eb7e9086d844cbff934482"},
+ {file = "grpcio-1.66.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:5ea27f4ce8c0daccfdd2c7961e6ba404b6599f47c948415c4cca5728739107a3"},
+ {file = "grpcio-1.66.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:296a45ea835e12a1cc35ab0c57e455346c272af7b0d178e29c67742167262b4c"},
+ {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:e36fa838ac1d6c87198ca149cbfcc92e1af06bb8c8cd852622f8e58f33ea3324"},
+ {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:684a4c07883cbd4ac864f0d08d927267404f5f0c76f31c85f9bbe05f2daae2f2"},
+ {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3084e590e857ba7585ae91078e4c9b6ef55aaf1dc343ce26400ba59a146eada"},
+ {file = "grpcio-1.66.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:526d4f6ca19f31b25606d5c470ecba55c0b22707b524e4de8987919e8920437d"},
+ {file = "grpcio-1.66.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:423ae18637cd99ddcf2e5a6851c61828c49e9b9d022d0442d979b4f230109787"},
+ {file = "grpcio-1.66.0-cp312-cp312-win32.whl", hash = "sha256:7bc9d823e05d63a87511fb456dcc48dc0fced86c282bf60229675e7ee7aac1a1"},
+ {file = "grpcio-1.66.0-cp312-cp312-win_amd64.whl", hash = "sha256:230cdd696751e7eb1395718cd308234749daa217bb8d128f00357dc4df102558"},
+ {file = "grpcio-1.66.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:0f3010bf46b2a01c9e40644cb9ed91b4b8435e5c500a275da5f9f62580e31e80"},
+ {file = "grpcio-1.66.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ba18cfdc09312eb2eea6fa0ce5d2eec3cf345ea78f6528b2eaed6432105e0bd0"},
+ {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:53d4c6706b49e358a2a33345dbe9b6b3bb047cecd7e8c07ba383bd09349bfef8"},
+ {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:643d8d9632a688ae69661e924b862e23c83a3575b24e52917ec5bcc59543d212"},
+ {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba60ae3b465b3e85080ae3bfbc36fd0305ae495ab16fcf8022fc7d7a23aac846"},
+ {file = "grpcio-1.66.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9d5251578767fe44602688c851c2373b5513048ac84c21a0fe946590a8e7933d"},
+ {file = "grpcio-1.66.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5e8140b39f10d7be2263afa2838112de29374c5c740eb0afd99146cb5bdbd990"},
+ {file = "grpcio-1.66.0-cp38-cp38-win32.whl", hash = "sha256:5b15ef1b296c4e78f15f64fc65bf8081f8774480ffcac45642f69d9d753d9c6b"},
+ {file = "grpcio-1.66.0-cp38-cp38-win_amd64.whl", hash = "sha256:c072f90a1f0409f827ae86266984cba65e89c5831a0726b9fc7f4b5fb940b853"},
+ {file = "grpcio-1.66.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:a639d3866bfb5a678b5c0b92cd7ab543033ed8988854290fd86145e71731fd4c"},
+ {file = "grpcio-1.66.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6ed35bf7da3fb3b1949e32bdf47a8b5ffe0aed11722d948933bd068531cd4682"},
+ {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:1c5466222470cb7fbc9cc898af1d48eefd297cb2e2f59af6d4a851c862fa90ac"},
+ {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:921b8f7f25d5300d7c6837a1e0639ef145fbdbfb728e0a5db2dbccc9fc0fd891"},
+ {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3f6feb0dc8456d025e566709f7dd02885add99bedaac50229013069242a1bfd"},
+ {file = "grpcio-1.66.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748452dbd5a047475d5413bdef08b0b9ceb2c0c0e249d4ee905a5fb82c6328dc"},
+ {file = "grpcio-1.66.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:832945e64176520520317b50d64ec7d79924429528d5747669b52d0bf2c7bd78"},
+ {file = "grpcio-1.66.0-cp39-cp39-win32.whl", hash = "sha256:8096a922eb91bc97c839f675c3efa1257c6ef181ae1b25d3fb97f2cae4c57c01"},
+ {file = "grpcio-1.66.0-cp39-cp39-win_amd64.whl", hash = "sha256:375b58892301a5fc6ca7d7ff689c9dc9d00895f5d560604ace9f4f0573013c63"},
+ {file = "grpcio-1.66.0.tar.gz", hash = "sha256:c1ea4c528e7db6660718e4165fd1b5ac24b79a70c870a7bc0b7bdb9babab7c1e"},
+]
+
+[package.extras]
+protobuf = ["grpcio-tools (>=1.66.0)"]
+
+[[package]]
+name = "grpcio-tools"
+version = "1.66.0"
+description = "Protobuf code generator for gRPC"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "grpcio_tools-1.66.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:e0841fe0aa865694468243b682792d6649a9eaaeec103984a74fcf4289851a83"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:30261ab79e460e93002117627ec42a960c0d3d6292e3fd44a43eae94aedbae9a"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:879a70a153f05d61fae8e7dd88ad67c63c1a30ee22c344509ec2b898f1e29250"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff704d5b2c66e15aee1f34c74d8a44f0b613e9205d69c22172ffa056f9791db4"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24773294210f554cdf282feaa3f95b79e22de56f78ec7a2e66c990266100480b"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2da55cab0569eb2bae8fc445cb9eaafad488918e4a443f831dbdd2ce60c47684"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72e86d15d5dab2f25385e40608f5dc6b512172c3b10d01952d3d25f2d0648b7c"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-win32.whl", hash = "sha256:923c60602e2025e1082cd3a1d7a5f74314f945ebb4763a939cc3f5a667d48d7f"},
+ {file = "grpcio_tools-1.66.0-cp310-cp310-win_amd64.whl", hash = "sha256:95edac51be6cd1391726024dea3a2a852c0a4c63e90de1ec52b5857d1ad5fef1"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:81123f93a4f93f8e2bd7ba4a106c1eb1529e0336368c3b93c077f7649b48d784"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:95e3d1506bb3c6574c9d359ac78eaaad18276a3aaa328852796ee10d28a10656"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:51cdcdf9dc9087bfc5d7aa03c4c76614350e0f7ef0689763f69938d1a7ebfac4"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ef97b6e945e77575d07dc2158773313aa1b36ddab41c59a1c51803b4620abd"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc188a5fbaf25e3a5f91f815d3928b1e40ba38f5a5f5b5e86f640c575f7db1c9"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fddc8f3216199f47f2370f8a22ecc10a4e0b5c434eeab0ec47a79fb292e5a6f8"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:87a654381cdc43a64f890e1f68ca14f09c5bcafe9fe2481f50029a220b748d15"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-win32.whl", hash = "sha256:ecb781e41b08b094742137f56740acebedc29a18480a37c16d5dfed2aef0597a"},
+ {file = "grpcio_tools-1.66.0-cp311-cp311-win_amd64.whl", hash = "sha256:cf5906367329121b90942de6a2f77b316090ce15980254c61ecd5043526dc03d"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:bcb7f09c1569c2e5f1600e5b1eb6a8321e789a3e1d2f9ec5c236c62d61d22879"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ca654c732029483a0355164f551b4531eae1d1f64e269d389d97d79a0b087966"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:b117868e2040489d8d542348a45cce6225fc87e1bc5e6092ad05bea343d4723d"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d72c6a8e1470832199764a4ac4aa999def0ccfb0fe0266c73aae003812acb957"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7055599f250713662022f5096956c220ff0f43a7ab500d080b0f343ba8d98e14"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4ecd2caa15c2070182e49aa1771cbf8e6181e5072833222401d965c6338a075c"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b7da029e5a1270a0342c01f897436ab690677502e12f18664b7387a5e6938134"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-win32.whl", hash = "sha256:bde2aca5fd16e5ab37cf83a8a7b805ccb7faceb804c562387852a3146bfd7eaf"},
+ {file = "grpcio_tools-1.66.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5507e1fee9caa19e2525d280016af8f4404affaad1a7c08beb7060797bd7972"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:01449e9b20347fc7661f79090a9c0317e6de2759748170ac04cc0a4db74a681f"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9c026adf37d1dacc3270c60ef479945c68756a251c362aef51c250e1f69f6a18"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2e31ac9a93feb5a4fbbb72de7a9a39709f28eea8183bab5e88f90a7facccf00b"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:63897f679ea55bc25accc825329b53acef2ad1266237d90be63c5aeaaa5bf175"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d38a0b97d16343b3389228edc58c9dfea69bd3833fe458681f9cf66d13bb2e0"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8e197458cc1747f56a5b6bddd635247f86d3eb2a8a191e3f43ce0e6f2bf374c5"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd70b60d6b62df3d232e6c4f6c061c6bb5e071af88fe6323487d0b3b97ac87d2"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-win32.whl", hash = "sha256:65dfc1019a6dc3343161360a9436ca34f4aa4ffc40f4cdcd98e1e887dbe87cf8"},
+ {file = "grpcio_tools-1.66.0-cp38-cp38-win_amd64.whl", hash = "sha256:2a76db15aea734e583158c7190615f9e82de19fbb1f8d15f7a34fa9e4c3938a5"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:95f1d076a310007fff710b4eea648a98ec75e0eb755b9df9af03b38a120ed8ac"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaf20f8141646b1db73f36711960d1bdf96435fbce670417e0754b15fbc52e76"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:d84db86038507c86bfa148c9b6dde5a17b8b2e529eecbf1ca427c367043a56e8"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca7080ac2aed6d303fab162c5945d920c0243a7a393df71c9f98882583dcda5"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af2f8f944e779cb8dd5b5e8a689514775c745068cd564df662e00cab45430d40"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e67a36da1ca3501933f26bd65589b7a5abdf5cfed79fd419054a0924f79fa760"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2e78e94d9db3d686bc76f0ecedf5634ca3fad2d94e50c564a7d87630326719e8"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-win32.whl", hash = "sha256:00aafd7714f2e2f618ec75b0f13df6a6f174f2bc50ad70c79443d8f5aa60df96"},
+ {file = "grpcio_tools-1.66.0-cp39-cp39-win_amd64.whl", hash = "sha256:a236df9ac2dd1f6009adc94bce1da10ac46dd87a04dea86bfbeadaa261c7adea"},
+ {file = "grpcio_tools-1.66.0.tar.gz", hash = "sha256:6e111f73f400d64b8dc32f5dab67c5e806c290eb2658fecdbfc44c2bb1020efc"},
+]
+
+[package.dependencies]
+grpcio = ">=1.66.0"
+protobuf = ">=5.26.1,<6.0dev"
+setuptools = "*"
+
[[package]]
name = "h11"
version = "0.14.0"
@@ -971,6 +1032,32 @@ files = [
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
+[[package]]
+name = "h2"
+version = "4.1.0"
+description = "HTTP/2 State-Machine based protocol implementation"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+ {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
+ {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
+]
+
+[package.dependencies]
+hpack = ">=4.0,<5"
+hyperframe = ">=6.0,<7"
+
+[[package]]
+name = "hpack"
+version = "4.0.0"
+description = "Pure-Python HPACK header compression"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+ {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
+ {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
+]
+
[[package]]
name = "httpcore"
version = "1.0.5"
@@ -1006,6 +1093,7 @@ files = [
[package.dependencies]
anyio = "*"
certifi = "*"
+h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
httpcore = "==1.*"
idna = "*"
sniffio = "*"
@@ -1050,6 +1138,17 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gr
torch = ["safetensors[torch]", "torch"]
typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
+[[package]]
+name = "hyperframe"
+version = "6.0.1"
+description = "HTTP/2 framing layer for Python"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+ {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
+ {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
+]
+
[[package]]
name = "identify"
version = "2.6.0"
@@ -1568,24 +1667,6 @@ files = [
llama-index-core = ">=0.10.7,<0.11.0"
llama-parse = ">=0.4.0"
-[[package]]
-name = "llama-index-vector-stores-postgres"
-version = "0.1.11"
-description = "llama-index vector_stores postgres integration"
-optional = false
-python-versions = "<4.0,>=3.8.1"
-files = [
- {file = "llama_index_vector_stores_postgres-0.1.11-py3-none-any.whl", hash = "sha256:a3856372579c541457dfde295858cb9d8719f588d559f55d57bc6174f93a8293"},
- {file = "llama_index_vector_stores_postgres-0.1.11.tar.gz", hash = "sha256:ce23ff9549c5269bdccba638875b921faaa4a581cefb753e99f8365c82487a0e"},
-]
-
-[package.dependencies]
-asyncpg = ">=0.29.0,<0.30.0"
-llama-index-core = ">=0.10.20,<0.11.0"
-pgvector = ">=0.2.4,<0.3.0"
-psycopg2-binary = ">=2.9.9,<3.0.0"
-sqlalchemy = {version = ">=1.4.49,<2.1", extras = ["asyncio"]}
-
[[package]]
name = "llama-parse"
version = "0.4.9"
@@ -2038,19 +2119,6 @@ files = [
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
]
-[[package]]
-name = "pgvector"
-version = "0.2.5"
-description = "pgvector support for Python"
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "pgvector-0.2.5-py2.py3-none-any.whl", hash = "sha256:5e5e93ec4d3c45ab1fa388729d56c602f6966296e19deee8878928c6d567e41b"},
-]
-
-[package.dependencies]
-numpy = "*"
-
[[package]]
name = "pillow"
version = "10.3.0"
@@ -2199,6 +2267,25 @@ files = [
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
+[[package]]
+name = "portalocker"
+version = "2.10.1"
+description = "Wraps the portalocker recipe for easy usage"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"},
+ {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+docs = ["sphinx (>=1.7.1)"]
+redis = ["redis"]
+tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"]
+
[[package]]
name = "pre-commit"
version = "3.8.0"
@@ -2217,6 +2304,26 @@ nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
virtualenv = ">=20.10.0"
+[[package]]
+name = "protobuf"
+version = "5.27.3"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "protobuf-5.27.3-cp310-abi3-win32.whl", hash = "sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b"},
+ {file = "protobuf-5.27.3-cp310-abi3-win_amd64.whl", hash = "sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7"},
+ {file = "protobuf-5.27.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f"},
+ {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce"},
+ {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25"},
+ {file = "protobuf-5.27.3-cp38-cp38-win32.whl", hash = "sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035"},
+ {file = "protobuf-5.27.3-cp38-cp38-win_amd64.whl", hash = "sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e"},
+ {file = "protobuf-5.27.3-cp39-cp39-win32.whl", hash = "sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf"},
+ {file = "protobuf-5.27.3-cp39-cp39-win_amd64.whl", hash = "sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1"},
+ {file = "protobuf-5.27.3-py3-none-any.whl", hash = "sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5"},
+ {file = "protobuf-5.27.3.tar.gz", hash = "sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c"},
+]
+
[[package]]
name = "psycopg2-binary"
version = "2.9.9"
@@ -2622,6 +2729,29 @@ files = [
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
]
+[[package]]
+name = "pywin32"
+version = "306"
+description = "Python for Window Extensions"
+optional = false
+python-versions = "*"
+files = [
+ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"},
+ {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"},
+ {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"},
+ {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"},
+ {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"},
+ {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"},
+ {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"},
+ {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"},
+ {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"},
+ {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"},
+ {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"},
+ {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"},
+ {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"},
+ {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
+]
+
[[package]]
name = "pyyaml"
version = "6.0.1"
@@ -2682,6 +2812,33 @@ files = [
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
]
+[[package]]
+name = "qdrant-client"
+version = "1.11.0"
+description = "Client library for the Qdrant vector search engine"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "qdrant_client-1.11.0-py3-none-any.whl", hash = "sha256:1f574ccebb91c0bc8a620c9a41a5a010084fbc4d8c6f1cd0ab7b2eeb97336fc0"},
+ {file = "qdrant_client-1.11.0.tar.gz", hash = "sha256:7c1d4d7a96cfd1ee0cde2a21c607e9df86bcca795ad8d1fd274d295ab64b8458"},
+]
+
+[package.dependencies]
+grpcio = ">=1.41.0"
+grpcio-tools = ">=1.41.0"
+httpx = {version = ">=0.20.0", extras = ["http2"]}
+numpy = [
+ {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""},
+ {version = ">=1.26", markers = "python_version >= \"3.12\""},
+]
+portalocker = ">=2.7.0,<3.0.0"
+pydantic = ">=1.10.8"
+urllib3 = ">=1.26.14,<3"
+
+[package.extras]
+fastembed = ["fastembed (==0.3.4)"]
+fastembed-gpu = ["fastembed-gpu (==0.3.4)"]
+
[[package]]
name = "redis"
version = "5.0.8"
@@ -2936,6 +3093,22 @@ files = [
{file = "rpds_py-0.19.1.tar.gz", hash = "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520"},
]
+[[package]]
+name = "setuptools"
+version = "73.0.1"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "setuptools-73.0.1-py3-none-any.whl", hash = "sha256:b208925fcb9f7af924ed2dc04708ea89791e24bde0d3020b27df0e116088b34e"},
+ {file = "setuptools-73.0.1.tar.gz", hash = "sha256:d59a3e788ab7e012ab2c4baed1b376da6366883ee20d7a5fc426816e3d7b1193"},
+]
+
+[package.extras]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+
[[package]]
name = "six"
version = "1.16.0"
@@ -3731,4 +3904,4 @@ typescript = ["tree-sitter-typescript"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9.0"
-content-hash = "19b210560f02e7e1b4210a4cca0e50df1f6669e3b9586e24ca20a39265170490"
+content-hash = "b8b1f2e9a8da22e59f874abab8847f1096da0c2782924889778f48722c7f4e44"
diff --git a/pyproject.toml b/pyproject.toml
index 273795bd..848c9d62 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ pip = "^24.0"
fuzzywuzzy = "^0.18.0"
llama-index-core = "0.10.65"
llama-index-readers-file = "^0.1.25"
-llama-index-vector-stores-postgres = "^0.1.11"
sqlalchemy = "^2.0.31"
esprima = "^4.0.1"
escodegen = "^1.0.11"
@@ -41,6 +40,8 @@ tree-sitter-rust = "^0.21.2"
llama-index-llms-litellm = "^0.1.4"
llama-index-embeddings-litellm = "^0.1.1"
pre-commit = "^3.8.0"
+qdrant-client = "^1.11.0"
+psycopg2-binary = "^2.9.9"
[tool.poetry.extras]
python = ["tree-sitter-python"]
From 8f3cd2005d04d51ab9eb61812f22e320657bd2b4 Mon Sep 17 00:00:00 2001
From: Saurav Panda
Date: Tue, 27 Aug 2024 02:34:00 -0700
Subject: [PATCH 15/19] feat: added experimental eval
---
.../no_eval/pr_335_comments.json | 17 -
.../no_eval/pr_335_review.md | 98 --
.../with_eval/pr_335_comments.json | 17 -
.../with_eval/pr_335_review.md | 114 ---
.../code_review/dataset/pr_222/issues.json | 152 +++
.../code_review/dataset/pr_232/issues.json | 107 ++
.../code_review/dataset/pr_252/issues.json | 92 ++
.../code_review/dataset/pr_335/issues.json | 92 ++
.../code_review/dataset/pr_400/issues.json | 52 +
.../code_review/dataset/pr_476/issues.json | 68 ++
.../code_review/dataset/pr_5/issues.json | 93 ++
.experiments/code_review/evaluate.py | 179 ++++
.../gpt-4o-mini/no_eval/pr_222/comments.json | 102 ++
.../gpt-4o-mini/no_eval/pr_222/issues.json | 342 +++++++
.../gpt-4o-mini/no_eval/pr_222/review.md | 402 ++++++++
.../gpt-4o-mini/no_eval/pr_335/comments.json | 17 +
.../gpt-4o-mini/no_eval/pr_335/issues.json | 77 ++
.../gpt-4o-mini/no_eval/pr_335/review.md | 139 +++
.../gpt-4o-mini/no_eval/pr_440/comments.json | 1 +
.../gpt-4o-mini/no_eval/pr_440/issues.json | 47 +
.../gpt-4o-mini/no_eval/pr_440/review.md | 100 ++
.../gpt-4o-mini/no_eval/pr_476/comments.json | 16 +
.../gpt-4o-mini/no_eval/pr_476/issues.json | 91 ++
.../gpt-4o-mini/no_eval/pr_476/review.md | 145 +++
.../gpt-4o-mini/no_eval/pr_5/comments.json | 47 +
.../gpt-4o-mini/no_eval/pr_5/issues.json | 107 ++
.../gpt-4o-mini/no_eval/pr_5/review.md | 155 +++
.../gpt-4o/no_eval/pr_222/comments.json | 132 +++
.../gpt-4o/no_eval/pr_222/issues.json | 387 +++++++
.../gpt-4o/no_eval/pr_222/review.md | 560 ++++++++++
.../gpt-4o/no_eval/pr_335/comments.json | 32 +
.../gpt-4o/no_eval/pr_335/issues.json | 92 ++
.../gpt-4o/no_eval/pr_335/review.md | 151 +++
.../gpt-4o/no_eval/pr_440/comments.json | 1 +
.../gpt-4o/no_eval/pr_440/issues.json | 47 +
.../gpt-4o/no_eval/pr_440/review.md | 92 ++
.../gpt-4o/no_eval/pr_476/comments.json | 31 +
.../gpt-4o/no_eval/pr_476/issues.json | 91 ++
.../gpt-4o/no_eval/pr_476/review.md | 144 +++
.../gpt-4o/no_eval/pr_5/comments.json | 47 +
.../gpt-4o/no_eval/pr_5/issues.json | 107 ++
.../code_review/gpt-4o/no_eval/pr_5/review.md | 182 ++++
.../haiku/no_eval/pr_222/comments.json | 72 ++
.../haiku/no_eval/pr_222/issues.json | 312 ++++++
.../haiku/no_eval/pr_222/review.md | 247 +++++
.../haiku/no_eval/pr_252/comments.json | 1 +
.../haiku/no_eval/pr_252/issues.json | 32 +
.../haiku/no_eval/pr_252/review.md | 70 ++
.../haiku/no_eval/pr_335/comments.json | 1 +
.../haiku/no_eval/pr_335/issues.json | 62 ++
.../haiku/no_eval/pr_335/review.md | 78 ++
.../haiku/no_eval/pr_400/comments.json | 16 +
.../haiku/no_eval/pr_400/issues.json | 406 ++++++++
.../haiku/no_eval/pr_400/review.md | 177 ++++
.../haiku/no_eval/pr_440/comments.json | 1 +
.../haiku/no_eval/pr_440/issues.json | 62 ++
.../haiku/no_eval/pr_440/review.md | 118 +++
.../haiku/no_eval/pr_476/comments.json | 16 +
.../haiku/no_eval/pr_476/issues.json | 106 ++
.../haiku/no_eval/pr_476/review.md | 103 ++
.../haiku/no_eval/pr_5/comments.json | 32 +
.../haiku/no_eval/pr_5/issues.json | 107 ++
.../code_review/haiku/no_eval/pr_5/review.md | 211 ++++
.../llama-405b/no_eval/pr_222/comments.json | 117 +++
.../llama-405b/no_eval/pr_222/issues.json | 282 ++++++
.../llama-405b/no_eval/pr_222/review.md | 195 ++++
.../llama-405b/no_eval/pr_232/comments.json | 1 +
.../llama-405b/no_eval/pr_232/issues.json | 137 +++
.../llama-405b/no_eval/pr_232/review.md | 100 ++
.../llama-405b/no_eval/pr_252/comments.json | 1 +
.../llama-405b/no_eval/pr_252/issues.json | 62 ++
.../llama-405b/no_eval/pr_252/review.md | 78 ++
.../llama-405b/no_eval/pr_335/comments.json | 1 +
.../llama-405b/no_eval/pr_335/issues.json | 47 +
.../llama-405b/no_eval/pr_335/review.md | 62 ++
.../llama-405b/no_eval/pr_400/comments.json | 16 +
.../llama-405b/no_eval/pr_400/issues.json | 316 ++++++
.../llama-405b/no_eval/pr_400/review.md | 197 ++++
.../llama-405b/no_eval/pr_440/comments.json | 1 +
.../llama-405b/no_eval/pr_440/issues.json | 47 +
.../llama-405b/no_eval/pr_440/review.md | 92 ++
.../llama-405b/no_eval/pr_476/comments.json | 16 +
.../llama-405b/no_eval/pr_476/issues.json | 61 ++
.../llama-405b/no_eval/pr_476/review.md | 115 +++
.../llama-405b/no_eval/pr_5/comments.json | 47 +
.../llama-405b/no_eval/pr_5/issues.json | 107 ++
.../llama-405b/no_eval/pr_5/review.md | 193 ++++
.experiments/code_review/main.py | 36 +-
.experiments/code_review/print_info.py | 40 +
.../sonnet-3.5/no_eval/pr_222/comments.json | 72 ++
.../sonnet-3.5/no_eval/pr_222/issues.json | 312 ++++++
.../sonnet-3.5/no_eval/pr_222/review.md | 481 +++++++++
.../sonnet-3.5/no_eval/pr_232/comments.json | 1 +
.../sonnet-3.5/no_eval/pr_232/issues.json | 227 +++++
.../sonnet-3.5/no_eval/pr_232/review.md | 253 +++++
.../sonnet-3.5/no_eval/pr_252/comments.json | 1 +
.../sonnet-3.5/no_eval/pr_252/issues.json | 62 ++
.../sonnet-3.5/no_eval/pr_252/review.md | 126 +++
.../sonnet-3.5/no_eval/pr_335/comments.json | 1 +
.../sonnet-3.5/no_eval/pr_335/issues.json | 62 ++
.../sonnet-3.5/no_eval/pr_335/review.md | 89 ++
.../sonnet-3.5/no_eval/pr_400/comments.json | 31 +
.../sonnet-3.5/no_eval/pr_400/issues.json | 406 ++++++++
.../sonnet-3.5/no_eval/pr_400/review.md | 435 ++++++++
.../sonnet-3.5/no_eval/pr_476/comments.json | 16 +
.../sonnet-3.5/no_eval/pr_476/issues.json | 106 ++
.../sonnet-3.5/no_eval/pr_476/review.md | 154 +++
.../sonnet-3.5/no_eval/pr_5/comments.json | 47 +
.../sonnet-3.5/no_eval/pr_5/issues.json | 107 ++
.../sonnet-3.5/no_eval/pr_5/review.md | 234 +++++
github_app/github_helper/pull_requests.py | 9 +-
kaizen/formatters/code_review_formatter.py | 23 +-
kaizen/llms/prompts/code_review_prompts.py | 45 +-
kaizen/llms/provider.py | 11 +-
kaizen/reviewer/code_review.py | 2 +-
poetry.lock | 323 +++++-
pyproject.toml | 3 +
test.txt | 956 ++++++++++++++++++
118 files changed, 13866 insertions(+), 291 deletions(-)
delete mode 100644 .experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json
delete mode 100644 .experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md
delete mode 100644 .experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json
delete mode 100644 .experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md
create mode 100644 .experiments/code_review/dataset/pr_222/issues.json
create mode 100644 .experiments/code_review/dataset/pr_232/issues.json
create mode 100644 .experiments/code_review/dataset/pr_252/issues.json
create mode 100644 .experiments/code_review/dataset/pr_335/issues.json
create mode 100644 .experiments/code_review/dataset/pr_400/issues.json
create mode 100644 .experiments/code_review/dataset/pr_476/issues.json
create mode 100644 .experiments/code_review/dataset/pr_5/issues.json
create mode 100644 .experiments/code_review/evaluate.py
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_222/comments.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_222/issues.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_222/review.md
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_335/comments.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_335/issues.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_335/review.md
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_440/comments.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_440/issues.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_440/review.md
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_476/comments.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_476/review.md
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_5/comments.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_5/issues.json
create mode 100644 .experiments/code_review/gpt-4o-mini/no_eval/pr_5/review.md
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_222/comments.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_222/issues.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_222/review.md
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_335/comments.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_335/issues.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_335/review.md
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_440/comments.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_440/issues.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_440/review.md
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_476/comments.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_476/issues.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_476/review.md
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_5/comments.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_5/issues.json
create mode 100644 .experiments/code_review/gpt-4o/no_eval/pr_5/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_222/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_222/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_222/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_252/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_252/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_252/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_335/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_335/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_335/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_400/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_400/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_400/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_440/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_440/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_440/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_476/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_476/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_476/review.md
create mode 100644 .experiments/code_review/haiku/no_eval/pr_5/comments.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_5/issues.json
create mode 100644 .experiments/code_review/haiku/no_eval/pr_5/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_222/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_222/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_222/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_232/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_232/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_232/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_252/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_252/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_252/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_335/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_335/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_335/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_400/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_400/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_400/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_440/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_440/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_440/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_476/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_476/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_476/review.md
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_5/comments.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_5/issues.json
create mode 100644 .experiments/code_review/llama-405b/no_eval/pr_5/review.md
create mode 100644 .experiments/code_review/print_info.py
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_222/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_222/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_222/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_232/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_232/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_232/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_252/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_252/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_252/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_335/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_335/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_335/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_400/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_400/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_400/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_476/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_476/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_476/review.md
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_5/comments.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_5/issues.json
create mode 100644 .experiments/code_review/sonnet-3.5/no_eval/pr_5/review.md
create mode 100644 test.txt
diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json
deleted file mode 100644
index 1f6094e7..00000000
--- a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json
+++ /dev/null
@@ -1,17 +0,0 @@
-[
- {
- "topic": "Error Handling",
- "comment": "Removed parameter 'reeval_response' without handling its previous functionality.",
- "confidence": "critical",
- "reason": "The removal of 'reeval_response' may lead to unexpected behavior if the function relies on it.",
- "solution": "Evaluate the necessity of the 'reeval_response' parameter and ensure that its removal does not affect the logic of the code.",
- "actual_code": "desc = self._process_full_diff(prompt, user, reeval_response)",
- "fixed_code": "desc = self._process_full_diff(prompt, user)",
- "file_name": "kaizen/generator/pr_description.py",
- "start_line": 54,
- "end_line": 54,
- "side": "LEFT",
- "sentiment": "negative",
- "severity_level": 8
- }
-]
\ No newline at end of file
diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md
deleted file mode 100644
index 4f72039b..00000000
--- a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md
+++ /dev/null
@@ -1,98 +0,0 @@
-# 🔍 Code Review Summary
-
-## 📊 Stats
-- Total Issues: 4
-- Critical: 1
-- Important: 2
-- Minor: 1
-- Files Affected: 1
-## 🏆 Code Quality
-[█████████████████░░░] 85% (Good)
-
-## 🚨 Critical Issues
-
-
-Error Handling (1 issues)
-
-### 1. Removed parameter 'reeval_response' without handling its previous functionality.
-📁 **File:** `kaizen/generator/pr_description.py:54`
-⚖️ **Severity:** 8/10
-🔍 **Description:** The removal of 'reeval_response' may lead to unexpected behavior if the function relies on it.
-💡 **Solution:** Evaluate the necessity of the 'reeval_response' parameter and ensure that its removal does not affect the logic of the code.
-
-**Current Code:**
-```python
-desc = self._process_full_diff(prompt, user, reeval_response)
-```
-
-**Suggested Code:**
-```python
-desc = self._process_full_diff(prompt, user)
-```
-
-
-
-## 🟠 Important Issues
-
-
-Imports (2 issues)
-
-### 1. Inconsistent naming of imported prompts.
-📁 **File:** `kaizen/generator/pr_description.py:8`
-⚖️ **Severity:** 5/10
-🔍 **Description:** The change from `code_review_prompts` to `pr_desc_prompts` may lead to confusion if not documented properly.
-💡 **Solution:** Ensure that the new prompt names are well-documented and consistent across the codebase.
-
-**Current Code:**
-```python
-from kaizen.llms.prompts.code_review_prompts import (
- PR_DESCRIPTION_PROMPT,
- MERGE_PR_DESCRIPTION_PROMPT,
- PR_FILE_DESCRIPTION_PROMPT,
- PR_DESC_EVALUATION_PROMPT,
- CODE_REVIEW_SYSTEM_PROMPT,
-)
-```
-
-**Suggested Code:**
-```python
-from kaizen.llms.prompts.pr_desc_prompts import (
- PR_DESCRIPTION_PROMPT,
- MERGE_PR_DESCRIPTION_PROMPT,
- PR_FILE_DESCRIPTION_PROMPT,
- PR_DESCRIPTION_SYSTEM_PROMPT,
-)
-```
-
-### 2. Inconsistent handling of response extraction.
-📁 **File:** `kaizen/generator/pr_description.py:110`
-⚖️ **Severity:** 7/10
-🔍 **Description:** The change from 'chat_completion_with_json' to 'chat_completion' may alter the expected response format.
-💡 **Solution:** Ensure that the new method returns the same structure as the previous one or update the handling logic accordingly.
-
-**Current Code:**
-```python
-resp, usage = self.provider.chat_completion_with_json(prompt, user=user)
-```
-
-**Suggested Code:**
-```python
-resp, usage = self.provider.chat_completion(prompt, user=user)
-```
-
-
-
----
-
-> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
-
-
-Useful Commands
-
-- **Feedback:** Reply with `!feedback [your message]`
-- **Ask PR:** Reply with `!ask-pr [your question]`
-- **Review:** Reply with `!review`
-- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
-- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
-- **Update Tests:** Reply with `!unittest` to create a PR with test changes
-
diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json
deleted file mode 100644
index 2eaf7ef9..00000000
--- a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json
+++ /dev/null
@@ -1,17 +0,0 @@
-[
- {
- "topic": "Functionality",
- "comment": "Changing the method from 'chat_completion_with_json' to 'chat_completion' may alter expected behavior.",
- "confidence": "critical",
- "reason": "If 'chat_completion_with_json' was designed to handle specific JSON formatting, switching to 'chat_completion' may lead to data handling issues.",
- "solution": "Review the implementation of 'chat_completion' to ensure it meets the requirements previously handled by 'chat_completion_with_json'.",
- "actual_code": "resp, usage = self.provider.chat_completion_with_json(prompt, user=user)",
- "fixed_code": "resp, usage = self.provider.chat_completion(prompt, user=user)",
- "file_name": "kaizen/generator/pr_description.py",
- "start_line": 83,
- "end_line": 83,
- "side": "LEFT",
- "sentiment": "negative",
- "severity_level": 8
- }
-]
\ No newline at end of file
diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md
deleted file mode 100644
index 8eefd8ac..00000000
--- a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md
+++ /dev/null
@@ -1,114 +0,0 @@
-# 🔍 Code Review Summary
-
-## 📊 Stats
-- Total Issues: 5
-- Critical: 1
-- Important: 3
-- Minor: 1
-- Files Affected: 1
-## 🏆 Code Quality
-[████████████████░░░░] 80% (Good)
-
-## 🚨 Critical Issues
-
-
-Functionality (1 issues)
-
-### 1. Changing the method from 'chat_completion_with_json' to 'chat_completion' may alter expected behavior.
-📁 **File:** `kaizen/generator/pr_description.py:83`
-⚖️ **Severity:** 8/10
-🔍 **Description:** If 'chat_completion_with_json' was designed to handle specific JSON formatting, switching to 'chat_completion' may lead to data handling issues.
-💡 **Solution:** Review the implementation of 'chat_completion' to ensure it meets the requirements previously handled by 'chat_completion_with_json'.
-
-**Current Code:**
-```python
-resp, usage = self.provider.chat_completion_with_json(prompt, user=user)
-```
-
-**Suggested Code:**
-```python
-resp, usage = self.provider.chat_completion(prompt, user=user)
-```
-
-
-
-## 🟠 Important Issues
-
-
-Imports (3 issues)
-
-### 1. Updated import statements may lead to confusion regarding the source of prompts.
-📁 **File:** `kaizen/generator/pr_description.py:8`
-⚖️ **Severity:** 5/10
-🔍 **Description:** Changing the import path for prompts can lead to issues if the new module does not contain the expected constants.
-💡 **Solution:** Ensure that the new import path is correct and that all necessary constants are defined in the new module.
-
-**Current Code:**
-```python
-from kaizen.llms.prompts.code_review_prompts import (
- PR_DESCRIPTION_PROMPT,
- MERGE_PR_DESCRIPTION_PROMPT,
- PR_FILE_DESCRIPTION_PROMPT,
- PR_DESC_EVALUATION_PROMPT,
- CODE_REVIEW_SYSTEM_PROMPT,
-)
-```
-
-**Suggested Code:**
-```python
-from kaizen.llms.prompts.pr_desc_prompts import (
- PR_DESCRIPTION_PROMPT,
- MERGE_PR_DESCRIPTION_PROMPT,
- PR_FILE_DESCRIPTION_PROMPT,
- PR_DESCRIPTION_SYSTEM_PROMPT,
-)
-```
-
-### 2. Raising a generic Exception can obscure the cause of errors.
-📁 **File:** `kaizen/generator/pr_description.py:51`
-⚖️ **Severity:** 7/10
-🔍 **Description:** Using a generic Exception does not provide specific information about the error, making debugging difficult.
-💡 **Solution:** Use a more specific exception type or create a custom exception class to provide better context.
-
-**Current Code:**
-```python
-raise Exception("Both diff_text and pull_request_files are empty!")
-```
-
-**Suggested Code:**
-```python
-raise ValueError("Both diff_text and pull_request_files are empty!")
-```
-
-### 3. Removing 'reeval_response' from multiple function signatures may lead to loss of intended functionality.
-📁 **File:** `kaizen/generator/pr_description.py:40`
-⚖️ **Severity:** 6/10
-🔍 **Description:** If 'reeval_response' was previously used to control logic, its removal could lead to unintended behavior.
-💡 **Solution:** Carefully assess the logic that relies on 'reeval_response' to determine if it should be retained.
-
-**Current Code:**
-```python
-def _process_full_diff(self, prompt: str, user: Optional[str], reeval_response: bool) -> str:
-```
-
-**Suggested Code:**
-```python
-def _process_full_diff(self, prompt: str, user: Optional[str]) -> str:
-```
-
-
-
----
-
-> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
-
-
-Useful Commands
-
-- **Feedback:** Reply with `!feedback [your message]`
-- **Ask PR:** Reply with `!ask-pr [your question]`
-- **Review:** Reply with `!review`
-- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
-- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
-- **Update Tests:** Reply with `!unittest` to create a PR with test changes
-
diff --git a/.experiments/code_review/dataset/pr_222/issues.json b/.experiments/code_review/dataset/pr_222/issues.json
new file mode 100644
index 00000000..c50260a4
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_222/issues.json
@@ -0,0 +1,152 @@
+[
+ {
+ "topic": "Security",
+ "comment": "Hardcoding API keys in `config.json` can lead to security vulnerabilities.",
+ "confidence": "critical",
+ "reason": "Exposing API keys in the codebase can lead to unauthorized access. This was highlighted by multiple models and is a critical security issue.",
+ "solution": "Use environment variables to store API keys instead of hardcoding them.",
+ "actual_code": "\"api_key\": \"os.environ/AZURE_API_KEY\"",
+ "fixed_code": "\"api_key\": \"${AZURE_API_KEY}\"",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "SQL Injection",
+ "comment": "Potential SQL injection vulnerability in the query construction.",
+ "confidence": "critical",
+ "reason": "Using string interpolation for SQL queries can lead to SQL injection attacks. This was identified by multiple models as a critical issue.",
+ "solution": "Use parameterized queries to avoid SQL injection vulnerabilities.",
+ "actual_code": "query = f\"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "fixed_code": "query = \"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n %s e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Lack of error handling in database operations.",
+ "confidence": "important",
+ "reason": "Multiple models identified the need for better error handling in database operations to prevent crashes and improve debugging.",
+ "solution": "Add try-except blocks to handle potential database errors.",
+ "actual_code": "",
+ "fixed_code": "try:\n with self.get_client() as client:\n with client.cursor() as cur:\n cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n results = cur.fetchall()\nexcept Exception as e:\n # Handle exception (e.g., log the error, re-raise, etc.)\n raise e",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 42,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The `chunk_code` function in `code_chunker.py` has nested functions and complex logic that can be refactored for better readability.",
+ "confidence": "important",
+ "reason": "Complex functions with nested logic can be hard to maintain and understand. This was noted by multiple models.",
+ "solution": "Refactor the `chunk_code` function to extract nested functions into separate helper functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/code_chunker.py",
+ "start_line": 7,
+ "end_line": 62,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Type Annotations",
+ "comment": "Missing or incomplete type annotations for method parameters and return types.",
+ "confidence": "important",
+ "reason": "Type annotations improve code readability and help with static analysis. This was mentioned by several models.",
+ "solution": "Add or improve type annotations to method parameters and return types.",
+ "actual_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+ "fixed_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Documentation",
+ "comment": "Lack of comments and docstrings in complex functions and methods.",
+ "confidence": "important",
+ "reason": "Several models noted the need for better documentation to improve code understanding and maintainability.",
+ "solution": "Add comments explaining complex logic and docstrings to functions and methods.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "Duplicate code found in test cases and database connection string creation.",
+ "confidence": "important",
+ "reason": "Code duplication was identified by multiple models as an issue that can lead to maintenance problems.",
+ "solution": "Refactor duplicate code into reusable functions or constants.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "tests/retriever/test_chunker.py",
+ "start_line": 98,
+ "end_line": 101,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Dependency Management",
+ "comment": "New dependencies added without justification.",
+ "confidence": "important",
+ "reason": "Adding dependencies increases the attack surface and maintenance burden. This was noted as an important consideration by multiple models.",
+ "solution": "Review and justify new dependencies, ensuring they are necessary and compatible.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 49,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Performance",
+ "comment": "Potential performance issues in database operations and code parsing.",
+ "confidence": "moderate",
+ "reason": "Several models identified areas where performance could be improved, particularly in database operations and file parsing.",
+ "solution": "Optimize database queries, consider batching operations, and review file parsing logic for potential improvements.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Improve error handling in the parse_file method and LanguageLoader class.",
+ "confidence": "important",
+ "reason": "Better error handling was suggested by multiple models to improve debugging and prevent unexpected behavior.",
+ "solution": "Implement more specific exception handling and provide detailed error messages.",
+ "actual_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())",
+ "fixed_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())\n raise",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 108,
+ "end_line": 110,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_232/issues.json b/.experiments/code_review/dataset/pr_232/issues.json
new file mode 100644
index 00000000..8ddf3398
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_232/issues.json
@@ -0,0 +1,107 @@
+[
+ {
+ "topic": "Unused Imports",
+ "comment": "There are several unused imports across multiple files that should be removed.",
+ "confidence": "important",
+ "reason": "Removing unused imports improves code cleanliness, readability, and potentially reduces bundle size. This issue was identified by both models across multiple files.",
+ "solution": "Remove all unused imports from the affected files.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "page.tsx, queryinput.tsx, apps/web/app/(dash)/home/page.tsx, apps/web/app/(dash)/home/queryinput.tsx, packages/ui/shadcn/combobox.tsx",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Type Annotations and Definitions",
+ "comment": "Some variables, functions, and components are missing proper type annotations or definitions.",
+ "confidence": "important",
+ "reason": "Proper type annotations improve code readability, maintainability, and help catch type-related errors at compile-time. This issue was noted by both models.",
+ "solution": "Add or improve type annotations for variables, functions, and components where they are missing or inadequate.",
+ "actual_code": "const ComboboxWithCreate = ({",
+ "fixed_code": "const ComboboxWithCreate: React.FC = ({",
+ "file_name": "queryinput.tsx, packages/ui/shadcn/combobox.tsx, apps/web/app/(dash)/(memories)/content.tsx",
+ "start_line": 32,
+ "end_line": 32,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Organization and Structure",
+ "comment": "Some files contain multiple unrelated components or have poor code organization.",
+ "confidence": "important",
+ "reason": "Proper code organization improves readability, maintainability, and reusability. This issue was identified by both models.",
+ "solution": "Separate unrelated components into their own files and improve overall code structure.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "page.tsx, apps/web/app/(dash)/menu.tsx",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Improve error handling in various parts of the code, particularly in the handleSubmit function.",
+ "confidence": "important",
+ "reason": "Proper error handling is crucial for preventing crashes and providing useful feedback. This issue was highlighted by both models.",
+ "solution": "Implement robust error handling, especially in critical functions like handleSubmit.",
+ "actual_code": "throw new Error(`Memory creation failed: ${cont.error}`);\nreturn cont;",
+ "fixed_code": "throw new Error(`Memory creation failed: ${cont.error}`);",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 230,
+ "end_line": 231,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "State Management",
+ "comment": "Consider improving state management to avoid prop drilling and improve component encapsulation.",
+ "confidence": "moderate",
+ "reason": "Better state management can improve code maintainability and reduce complexity. This was suggested by the Sonnet model.",
+ "solution": "Consider using React Context or a state management library for managing global state.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 163,
+ "end_line": 167,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Performance Optimization",
+ "comment": "Some computations, like filtering options, could be optimized to improve performance.",
+ "confidence": "moderate",
+ "reason": "Optimizing expensive computations can lead to better performance, especially for larger datasets.",
+ "solution": "Use memoization techniques like useMemo for expensive computations that don't need to be recalculated on every render.",
+ "actual_code": "const filteredOptions = options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t);",
+ "fixed_code": "const filteredOptions = useMemo(() => options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t),[options, selectedSpaces]);",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 55,
+ "end_line": 57,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Accessibility",
+ "comment": "Some UI elements lack proper accessibility attributes.",
+ "confidence": "moderate",
+ "reason": "Improving accessibility ensures the application is usable by all users, including those with disabilities.",
+ "solution": "Add appropriate aria-labels and other accessibility attributes to interactive elements.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 65,
+ "end_line": 72,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_252/issues.json b/.experiments/code_review/dataset/pr_252/issues.json
new file mode 100644
index 00000000..568ce90d
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_252/issues.json
@@ -0,0 +1,92 @@
+[
+ {
+ "topic": "Code Structure and Consistency",
+ "comment": "There are inconsistencies in code formatting and structure across different function calls.",
+ "confidence": "moderate",
+ "reason": "Consistent code structure and formatting improves readability and maintainability. This issue was noted by multiple models.",
+ "solution": "Standardize the formatting of function calls, particularly for `generate_twitter_post` and `generate_linkedin_post`. Consider using multi-line formatting for both for consistency.",
+ "actual_code": "twitter_post = work_summary_generator.generate_twitter_post(summary, user=\"oss_example\")\n\nlinkedin_post = work_summary_generator.generate_linkedin_post(\n summary, user=\"oss_example\"\n)",
+ "fixed_code": "twitter_post = work_summary_generator.generate_twitter_post(\n summary, user=\"oss_example\"\n)\n\nlinkedin_post = work_summary_generator.generate_linkedin_post(\n summary, user=\"oss_example\"\n)",
+ "file_name": "examples/work_summarizer/main.py",
+ "start_line": 59,
+ "end_line": 62,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Organization",
+ "comment": "The `WorkSummaryGenerator` class has multiple responsibilities and could be refactored for better organization.",
+ "confidence": "important",
+ "reason": "Separation of Concerns (SoC) principle improves code maintainability and readability.",
+ "solution": "Refactor the `WorkSummaryGenerator` class into separate classes or functions for each responsibility (e.g., summary generation, Twitter post generation, LinkedIn post generation).",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The `generate_twitter_post` and `generate_linkedin_post` methods lack error handling.",
+ "confidence": "important",
+ "reason": "Proper error handling improves code robustness and helps with debugging.",
+ "solution": "Add try-except blocks to handle and log any exceptions during the post generation process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py",
+ "start_line": 58,
+ "end_line": 74,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "There is code duplication in the `generate_twitter_post` and `generate_linkedin_post` methods, and a duplicated print statement for LinkedIn post.",
+ "confidence": "important",
+ "reason": "Code duplication violates the DRY principle and can lead to maintenance issues.",
+ "solution": "Extract common code from `generate_twitter_post` and `generate_linkedin_post` into a shared method. Remove the duplicated print statement for the LinkedIn post.",
+ "actual_code": "print(f\" LinkedIn Post: \\n{linkedin_post}\\n\")",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py, examples/work_summarizer/main.py",
+ "start_line": 58,
+ "end_line": 74,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Documentation",
+ "comment": "The `severity_level` field in the code review prompt lacks detailed explanation.",
+ "confidence": "moderate",
+ "reason": "Clear documentation helps users understand how to use features correctly.",
+ "solution": "Add a more detailed explanation of what each severity level represents in the code review prompt.",
+ "actual_code": "For \"severity_level\" score in range of 1 to 10, 1 being not severe and 10 being critical.",
+ "fixed_code": "For \"severity_level\" score in range of 1 to 10:\n1-3: Minor issues (style, small optimizations)\n4-6: Moderate issues (potential bugs, performance concerns)\n7-8: Major issues (definite bugs, security vulnerabilities)\n9-10: Critical issues (severe security risks, system-breaking bugs)",
+ "file_name": "kaizen/llms/prompts/code_review_prompts.py",
+ "start_line": 100,
+ "end_line": 100,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Prompt Formatting",
+ "comment": "The TWITTER_POST_PROMPT and LINKEDIN_POST_PROMPT could be improved for better readability.",
+ "confidence": "moderate",
+ "reason": "Well-formatted prompts are easier to read and maintain.",
+ "solution": "Break the prompts into multiple lines, use string formatting, and add comments to explain different sections.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/work_summary_prompts.py",
+ "start_line": 44,
+ "end_line": 65,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_335/issues.json b/.experiments/code_review/dataset/pr_335/issues.json
new file mode 100644
index 00000000..cb9843ab
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_335/issues.json
@@ -0,0 +1,92 @@
+[
+ {
+ "topic": "Import Changes",
+ "comment": "Import statements have been changed and some may be unused.",
+ "confidence": "important",
+ "reason": "Changing import paths can lead to runtime errors and unused imports clutter the code. This issue was identified by multiple models.",
+ "solution": "Verify that all new import paths are correct, remove any unused imports, and ensure consistency across the codebase.",
+ "actual_code": "from kaizen.llms.prompts.pr_desc_prompts import (",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Removal of Reevaluation Logic",
+ "comment": "The 'reeval_response' parameter and associated logic have been removed from multiple functions.",
+ "confidence": "critical",
+ "reason": "Removing this parameter and logic could significantly change the behavior of the PR description generation. This was noted as a critical issue by multiple models.",
+ "solution": "Carefully review the impact of removing the reevaluation logic. Ensure that the quality of PR descriptions is maintained without this feature. Consider adding unit tests to verify the new behavior.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 96,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "API Change",
+ "comment": "Changed from 'chat_completion_with_json' to 'chat_completion'",
+ "confidence": "important",
+ "reason": "This API change could affect the format of the response and how it's processed. Multiple models highlighted this as an important change.",
+ "solution": "Ensure that the new chat_completion method returns the expected format. Update any dependent code that might be affected by this change. Verify that the response parsing is adjusted accordingly.",
+ "actual_code": "resp, usage = self.provider.chat_completion(prompt, user=user)",
+ "fixed_code": "resp, usage = self.provider.chat_completion(prompt, user=user)\ndesc = parser.extract_code_from_markdown(resp)",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 79,
+ "end_line": 80,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Prompt Changes",
+ "comment": "Significant changes to PR description prompts and system prompts.",
+ "confidence": "important",
+ "reason": "The prompts have been restructured and moved to a new file. This could impact the quality and structure of generated PR descriptions.",
+ "solution": "Review the new prompt structure to ensure it meets all requirements. Test thoroughly to verify that the generated PR descriptions maintain or improve quality. Update any related documentation.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py",
+ "start_line": 1,
+ "end_line": 92,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 7
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Potential lack of error handling for exceptions in PR description generation.",
+ "confidence": "important",
+ "reason": "Proper error handling is crucial for preventing unexpected crashes and providing useful feedback.",
+ "solution": "Implement try-except blocks where appropriate to handle potential exceptions gracefully. Consider using more specific exception types.",
+ "actual_code": "raise Exception(\"Both diff_text and pull_request_files are empty!\")",
+ "fixed_code": "raise ValueError(\"Both diff_text and pull_request_files are empty!\")",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 51,
+ "end_line": 51,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Style and Documentation",
+ "comment": "Various minor issues with code style, variable naming, and documentation.",
+ "confidence": "moderate",
+ "reason": "Consistent code style and proper documentation improve readability and maintainability.",
+ "solution": "Review and update variable names to follow PEP 8 conventions. Add docstrings or comments explaining the purpose of new prompts and significant changes.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py, kaizen/generator/pr_description.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_400/issues.json b/.experiments/code_review/dataset/pr_400/issues.json
new file mode 100644
index 00000000..1bc6c452
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_400/issues.json
@@ -0,0 +1,52 @@
+[
+ {
+ "topic": "Test Coverage",
+ "comment": "Improved test coverage with more comprehensive test cases, including edge cases and error handling scenarios.",
+ "confidence": "important",
+ "reason": "Comprehensive test coverage is crucial for maintaining code quality, catching potential bugs, and ensuring the reliability of the code.",
+ "solution": "Continue to add test cases for edge cases and ensure all new functionality is covered by tests.",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py, .kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py, .kaizen/unit_test/kaizen/helpers/test_create_test_files.py, .kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "sentiment": "positive",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Structure and Organization",
+ "comment": "Improved code structure with better modularity, organization, and use of pytest fixtures.",
+ "confidence": "important",
+ "reason": "Good organization improves readability, maintainability, and allows for better separation of concerns.",
+ "solution": "Continue to monitor for potential further improvements in code structure and organization.",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_test_files.py, kaizen/generator/unit_test.py",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Insufficient error handling in some methods, particularly for file operations.",
+ "confidence": "important",
+ "reason": "Proper error handling ensures the program remains stable and provides useful error messages.",
+ "solution": "Implement try-except blocks to handle potential errors, especially in file operations and network requests.",
+ "file_name": "kaizen/generator/unit_test.py",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Logging Configuration",
+ "comment": "The logging configuration might override existing setups and is mixed with import statements.",
+ "confidence": "important",
+ "reason": "Inconsistent logging configuration can lead to loss of important log information and poor code organization.",
+ "solution": "Adjust the logging configuration to respect the LOGLEVEL environment variable and move it to a separate section after all imports.",
+ "file_name": "kaizen/llms/provider.py",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Configuration Update",
+ "comment": "Changes made to sensitive configuration file",
+ "confidence": "critical",
+ "reason": "Changes to config.json may affect system functionality and security",
+ "solution": "Review the changes in config.json carefully, especially the new 'base_model' fields, and ensure that the code using this configuration is updated accordingly.",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_476/issues.json b/.experiments/code_review/dataset/pr_476/issues.json
new file mode 100644
index 00000000..c426f116
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_476/issues.json
@@ -0,0 +1,68 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Broad exception handling with generic error message",
+ "confidence": "critical",
+ "reason": "Using a generic 'except Exception' block with a non-specific error message can mask important errors and make debugging difficult.",
+ "solution": "Catch specific exceptions where possible and provide more informative error messages. Consider using proper logging instead of print statements.",
+ "actual_code": "except Exception:\n print(\"Error\")",
+ "fixed_code": "except KeyError as e:\n logger.error(f\"Invalid confidence level: {e}\")\nexcept Exception as e:\n logger.error(f\"Unexpected error: {e}\")",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 140,
+ "end_line": 141,
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Code Efficiency",
+ "comment": "Inefficient sorting implementation",
+ "confidence": "important",
+ "reason": "The custom sorting logic in 'sort_files' function is unnecessarily complex and inefficient for large lists.",
+ "solution": "Use Python's built-in sorted() function with a key function for better performance and readability.",
+ "actual_code": "def sort_files(files):\n sorted_files = []\n for file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\n return sorted_files",
+ "fixed_code": "def sort_files(files):\n return sorted(files, key=lambda x: x[\"filename\"])",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 184,
+ "end_line": 194,
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Simplification",
+ "comment": "Overly verbose implementation of generate_tests function",
+ "confidence": "moderate",
+ "reason": "The current implementation of generate_tests function can be simplified using a list comprehension.",
+ "solution": "Use a list comprehension to create the list of filenames.",
+ "actual_code": "def generate_tests(pr_files):\n return [f[\"filename\"] for f in pr_files]",
+ "fixed_code": "def generate_tests(pr_files):\n return [f[\"filename\"] for f in pr_files]",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 199,
+ "end_line": 200,
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Logging and Debugging",
+ "comment": "Inconsistent use of print statements for debugging",
+ "confidence": "important",
+ "reason": "Using print statements for debugging can clutter the code and make it difficult to control log levels in different environments.",
+ "solution": "Replace print statements with proper logging calls using Python's logging module.",
+ "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)",
+ "fixed_code": "import logging\n\nlogger = logging.getLogger(__name__)\nlogger.debug(f\"diff: {diff_text}\")\nlogger.debug(f\"pr_files: {pr_files}\")",
+ "file_name": "examples/code_review/main.py",
+ "start_line": 21,
+ "end_line": 22,
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Configuration Changes",
+ "comment": "Changes made to sensitive configuration file",
+ "confidence": "critical",
+ "reason": "Changes to config.json may affect system functionality and security. The removal of 'enable_observability_logging' option needs to be properly documented.",
+ "solution": "Review all changes in config.json carefully. If removing features, provide a migration guide or deprecation notice for existing users.",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/dataset/pr_5/issues.json b/.experiments/code_review/dataset/pr_5/issues.json
new file mode 100644
index 00000000..693b3000
--- /dev/null
+++ b/.experiments/code_review/dataset/pr_5/issues.json
@@ -0,0 +1,93 @@
+[
+ {
+ "topic": "Unused Import",
+ "comment": "The 'random' module is imported but never used in the code.",
+ "confidence": "trivial",
+ "reason": "The 'random' module is imported but not utilized in the code.",
+ "solution": "Remove the unused import statement for 'random'.",
+ "actual_code": "import random # Unused import",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 8,
+ "end_line": 8,
+ "severity_level": 1
+ },
+ {
+ "topic": "API Call Error Handling",
+ "comment": "The API call to 'completion' lacks a retry mechanism.",
+ "confidence": "critical",
+ "reason": "API calls can fail due to network issues or server errors, and without a retry mechanism, the function may fail unexpectedly.",
+ "solution": "Implement a retry mechanism with exponential backoff for the API call.",
+ "actual_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)",
+ "fixed_code": "import time\n\nfor attempt in range(3):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n if attempt < 2:\n time.sleep(2 ** attempt)\n else:\n raise e",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "severity_level": 9
+ },
+ {
+ "topic": "Silent Failure in JSON Parsing",
+ "comment": "The exception handling for JSON decoding fails silently without logging.",
+ "confidence": "critical",
+ "reason": "Silent failures make it difficult to diagnose issues when they occur.",
+ "solution": "Add logging to capture the exception details.",
+ "actual_code": "except json.JSONDecodeError:\n result = {",
+ "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant: {e}\")\n result = {",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 84,
+ "severity_level": 8
+ },
+ {
+ "topic": "Inefficient Progress Printing",
+ "comment": "The progress printing method is inefficient.",
+ "confidence": "important",
+ "reason": "Printing progress in this manner can be slow and resource-intensive.",
+ "solution": "Use a more efficient method for printing progress, such as updating the progress less frequently or using a dedicated progress reporting library like tqdm.",
+ "actual_code": "print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "fixed_code": "if index % max(1, len(df) // 100) == 0: # Update every 1%\n print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "file_name": "main.py",
+ "start_line": 121,
+ "end_line": 122,
+ "severity_level": 5
+ },
+ {
+ "topic": "Redundant Code",
+ "comment": "The check for an empty DataFrame is redundant.",
+ "confidence": "moderate",
+ "reason": "The code already handles an empty DataFrame gracefully, so this check is unnecessary.",
+ "solution": "Remove the redundant check for an empty DataFrame.",
+ "actual_code": "if len(df) == 0:\n return",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 142,
+ "end_line": 143,
+ "severity_level": 3
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Potential division by zero when calculating total tokens.",
+ "confidence": "critical",
+ "reason": "If 'total_tokens' is zero, it will cause a division by zero error.",
+ "solution": "Add a check to ensure 'total_tokens' is not zero before performing the division.",
+ "actual_code": "print(f\"Total tokens used: {total_tokens:,}\")\nprint(f\" - Input tokens: {total_input_tokens:,}\")\nprint(f\" - Output tokens: {total_output_tokens:,}\")",
+ "fixed_code": "print(f\"Total tokens used: {total_tokens:,}\")\nif total_tokens > 0:\n print(f\" - Input tokens: {total_input_tokens:,} ({total_input_tokens/total_tokens:.2%})\")\n print(f\" - Output tokens: {total_output_tokens:,} ({total_output_tokens/total_tokens:.2%})\")\nelse:\n print(\" - No tokens used.\")",
+ "file_name": "main.py",
+ "start_line": 158,
+ "end_line": 163,
+ "severity_level": 7
+ },
+ {
+ "topic": "File Not Found Handling",
+ "comment": "No error handling for file not found.",
+ "confidence": "important",
+ "reason": "If the specified file does not exist, the program will crash.",
+ "solution": "Add error handling to check if the file exists before processing.",
+ "actual_code": "main(input_file)",
+ "fixed_code": "try:\n main(input_file)\nexcept FileNotFoundError:\n print(f\"Error: The file '{input_file}' does not exist. Please check the file path and try again.\")\nexcept Exception as e:\n print(f\"An error occurred: {e}\")",
+ "file_name": "main.py",
+ "start_line": 174,
+ "end_line": 175,
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/evaluate.py b/.experiments/code_review/evaluate.py
new file mode 100644
index 00000000..d36faecb
--- /dev/null
+++ b/.experiments/code_review/evaluate.py
@@ -0,0 +1,179 @@
+import json
+import os
+from fuzzywuzzy import fuzz
+
+
+def load_json(file_path):
+ with open(file_path, "r") as f:
+ return json.load(f)
+
+
+def fuzzy_match(str1, str2, threshold=50):
+ return fuzz.partial_ratio(str1.lower(), str2.lower()) >= threshold
+
+
+def compare_issues(ground_truth, model_issues):
+ matched = []
+ unmatched_ground_truth = []
+ unmatched_model = []
+
+ for gt_issue in ground_truth:
+ found_match = False
+ for model_issue in model_issues:
+ if (
+ fuzzy_match(gt_issue["topic"], model_issue["topic"])
+ and fuzzy_match(gt_issue["comment"], model_issue["comment"])
+ and gt_issue["file_name"] == model_issue["file_name"]
+ and abs(
+ int(gt_issue.get("start_line", 0))
+ - int(model_issue.get("start_line", -10))
+ )
+ <= 1
+ and abs(
+ int(gt_issue.get("end_line", 0))
+ - int(model_issue.get("end_line", -10))
+ )
+ <= 1
+ and abs(
+ int(gt_issue.get("severity_level", 0))
+ - int(model_issue.get("severity_level", -10))
+ )
+ <= 1
+ and gt_issue.get("sentiment", "bad")
+ == model_issue.get("sentiment", "hmm")
+ ):
+ matched.append((gt_issue, model_issue))
+ found_match = True
+ break
+
+ if not found_match:
+ unmatched_ground_truth.append(gt_issue)
+
+ for model_issue in model_issues:
+ if not any(model_issue in pair for pair in matched):
+ unmatched_model.append(model_issue)
+
+ return matched, unmatched_ground_truth, unmatched_model
+
+
+def evaluate_model(ground_truth, model_issues):
+ matched, unmatched_gt, unmatched_model = compare_issues(ground_truth, model_issues)
+
+ total_issues = len(ground_truth)
+ issues_found = len(model_issues)
+ correct_issues = len(matched)
+ false_positives = len(unmatched_model)
+ false_negatives = len(unmatched_gt)
+
+ precision = (
+ correct_issues / (correct_issues + false_positives)
+ if (correct_issues + false_positives) > 0
+ else 0
+ )
+ recall = correct_issues / total_issues
+ f1_score = (
+ 2 * (precision * recall) / (precision + recall)
+ if (precision + recall) > 0
+ else 0
+ )
+
+ return {
+ "total_issues": total_issues,
+ "issues_found": issues_found,
+ "correct_issues": correct_issues,
+ "false_positives": false_positives,
+ "false_negatives": false_negatives,
+ "precision": precision,
+ "recall": recall,
+ "f1_score": f1_score,
+ }
+
+
+def main(folder_name):
+ dataset_path = ".experiments/code_review/dataset"
+ model_base_path = os.path.join(
+ ".experiments", "code_review", folder_name, "no_eval"
+ )
+
+ overall_results = {
+ "total_issues": 0,
+ "correct_issues": 0,
+ "false_positives": 0,
+ "false_negatives": 0,
+ }
+
+ pr_count = 0
+
+ for pr_folder in os.listdir(dataset_path):
+ if pr_folder.startswith("pr_"):
+ pr_number = pr_folder.split("_")[1]
+ ground_truth_path = os.path.join(dataset_path, pr_folder, "issues.json")
+ model_path = os.path.join(model_base_path, f"pr_{pr_number}", "issues.json")
+
+ if not os.path.exists(ground_truth_path):
+ print(f"Ground truth file not found for PR {pr_number}")
+ continue
+ if not os.path.exists(model_path):
+ print(
+ f"Model output file not found for {folder_name} on PR {pr_number}"
+ )
+ continue
+
+ ground_truth = load_json(ground_truth_path)
+ model_issues = load_json(model_path)
+
+ results = evaluate_model(ground_truth, model_issues)
+
+ print(f"\nEvaluation Results for {folder_name} on PR {pr_number}:")
+ print(f" Issues Found: {results['issues_found']}")
+ print(
+ f" Correct issues: {results['correct_issues']}/{results['total_issues']}"
+ )
+ print(f" False positives: {results['false_positives']}")
+ print(f" False negatives: {results['false_negatives']}")
+ print(f" Precision: {results['precision']:.2f}")
+ print(f" Recall: {results['recall']:.2f}")
+ print(f" F1 Score: {results['f1_score']:.2f}")
+
+ for key in [
+ "total_issues",
+ "correct_issues",
+ "false_positives",
+ "false_negatives",
+ ]:
+ overall_results[key] += results[key]
+
+ pr_count += 1
+
+ if pr_count > 0:
+ overall_precision = overall_results["correct_issues"] / (
+ overall_results["correct_issues"] + overall_results["false_positives"]
+ )
+ overall_recall = (
+ overall_results["correct_issues"] / overall_results["total_issues"]
+ )
+ overall_f1 = (
+ 2
+ * (overall_precision * overall_recall)
+ / (overall_precision + overall_recall)
+ if (overall_precision + overall_recall) > 0
+ else 0
+ )
+
+ print(f"\nOverall Results for {folder_name}:")
+ print(f" Total PRs evaluated: {pr_count}")
+ print(
+ f" Correct issues: {overall_results['correct_issues']}/{overall_results['total_issues']}"
+ )
+ print(f" False positives: {overall_results['false_positives']}")
+ print(f" False negatives: {overall_results['false_negatives']}")
+ print(f" Precision: {overall_precision:.2f}")
+ print(f" Recall: {overall_recall:.2f}")
+ print(f" F1 Score: {overall_f1:.2f}")
+ else:
+ print(f"No valid PRs found for evaluation of {folder_name}")
+
+
+if __name__ == "__main__":
+ folder_name = input("Enter the model name (e.g., gpt-4o): ")
+ main(folder_name)
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/comments.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/comments.json
new file mode 100644
index 00000000..e09425aa
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/comments.json
@@ -0,0 +1,102 @@
+[
+ {
+ "topic": "Environment Variables",
+ "comment": "Using 'os.environ' directly in JSON is not valid.",
+ "confidence": "critical",
+ "reason": "Environment variables should be accessed in the code, not hardcoded in configuration files.",
+ "solution": "Use a placeholder or variable in the code to fetch environment variables.",
+ "actual_code": " \"api_key\": \"os.environ/AZURE_API_KEY\",",
+ "fixed_code": " \"api_key\": \"AZURE_API_KEY\", // Access this in the code",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Lack of error handling in database operations.",
+ "confidence": "critical",
+ "reason": "Database operations can fail due to various reasons (e.g., connection issues, SQL errors). Without error handling, the application may crash or behave unexpectedly.",
+ "solution": "Wrap database operations in try-except blocks to handle potential exceptions gracefully.",
+ "actual_code": "41 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))",
+ "fixed_code": "41 +1:[+] try:\n41.1 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n41.2 +1:[+] except Exception as e:\n41.3 +1:[+] # Handle the exception (e.g., log it, raise a custom error, etc.)\n41.4 +1:[+] raise RuntimeError('Database query failed') from e",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 41,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/issues.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/issues.json
new file mode 100644
index 00000000..061bf872
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/issues.json
@@ -0,0 +1,342 @@
+[
+ {
+ "topic": "Environment Variables",
+ "comment": "Using 'os.environ' directly in JSON is not valid.",
+ "confidence": "critical",
+ "reason": "Environment variables should be accessed in the code, not hardcoded in configuration files.",
+ "solution": "Use a placeholder or variable in the code to fetch environment variables.",
+ "actual_code": " \"api_key\": \"os.environ/AZURE_API_KEY\",",
+ "fixed_code": " \"api_key\": \"AZURE_API_KEY\", // Access this in the code",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Dockerfile",
+ "comment": "Consider using multi-stage builds to reduce image size.",
+ "confidence": "important",
+ "reason": "Multi-stage builds can help keep the final image smaller by excluding build dependencies.",
+ "solution": "Use a multi-stage build pattern to install dependencies and copy only necessary files.",
+ "actual_code": "RUN apt-get update && apt-get install -y \\",
+ "fixed_code": "FROM python:3.9 AS builder\nRUN apt-get update && apt-get install -y build-essential git\n\nFROM python:3.9\nCOPY --from=builder /app /app",
+ "file_name": "Dockerfile",
+ "start_line": 8,
+ "end_line": 12,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Docker Compose",
+ "comment": "Ensure proper indentation for YAML files.",
+ "confidence": "important",
+ "reason": "Improper indentation can lead to YAML parsing errors.",
+ "solution": "Review the indentation levels for all entries in the YAML file.",
+ "actual_code": " networks:\n - app-network",
+ "fixed_code": " networks:\n - app-network",
+ "file_name": "docker-compose-dev.yml",
+ "start_line": 15,
+ "end_line": 16,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "SQL Initialization",
+ "comment": "Add comments to SQL statements for clarity.",
+ "confidence": "moderate",
+ "reason": "Comments can help future developers understand the purpose of each SQL command.",
+ "solution": "Add comments above each CREATE TABLE statement.",
+ "actual_code": "CREATE TABLE repositories (",
+ "fixed_code": "-- Table to store repository information\nCREATE TABLE repositories (",
+ "file_name": "db_setup/init.sql",
+ "start_line": 4,
+ "end_line": 4,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Shell Script",
+ "comment": "Add error handling to the shell script.",
+ "confidence": "important",
+ "reason": "Error handling can prevent the script from failing silently.",
+ "solution": "Use 'set -e' at the beginning of the script to exit on errors.",
+ "actual_code": "#!/bin/bash",
+ "fixed_code": "#!/bin/bash\nset -e",
+ "file_name": "install_tree_sitter_languages.sh",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Lack of error handling in database operations.",
+ "confidence": "critical",
+ "reason": "Database operations can fail due to various reasons (e.g., connection issues, SQL errors). Without error handling, the application may crash or behave unexpectedly.",
+ "solution": "Wrap database operations in try-except blocks to handle potential exceptions gracefully.",
+ "actual_code": "41 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))",
+ "fixed_code": "41 +1:[+] try:\n41.1 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n41.2 +1:[+] except Exception as e:\n41.3 +1:[+] # Handle the exception (e.g., log it, raise a custom error, etc.)\n41.4 +1:[+] raise RuntimeError('Database query failed') from e",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 41,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "Consider adding docstrings to methods for better understanding.",
+ "confidence": "important",
+ "reason": "Docstrings help other developers understand the purpose and usage of methods, especially in larger codebases.",
+ "solution": "Add docstrings to the `__init__` and `custom_query` methods to describe their parameters and return values.",
+ "actual_code": "8 +1:[+] def __init__(self, *args, **kwargs):",
+ "fixed_code": "8 +1:[+] def __init__(self, *args, **kwargs):\n8.1 +1:[+] \"\"\"Initialize the CustomPGVectorStore with table name and other parameters.\"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "Consider adding type hints for method parameters and return types.",
+ "confidence": "important",
+ "reason": "Type hints improve code clarity and help with static type checking, making it easier for developers to understand expected types.",
+ "solution": "Add type hints to the `custom_query` method parameters and return type.",
+ "actual_code": "13 +1:[+] def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+ "fixed_code": "13 +1:[+] def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "Consider using f-strings for consistency in SQL query construction.",
+ "confidence": "moderate",
+ "reason": "Using f-strings consistently improves readability and reduces the risk of SQL injection if not handled properly.",
+ "solution": "Use f-strings for constructing the SQL query instead of concatenation.",
+ "actual_code": "19 +1:[+] query = f\"\"\"\n26 +1:[+]{self.table_name}e\n32 +1:[+] f.repo_id = %s\n36 +1:[+] %s\n\"\"\"",
+ "fixed_code": "19 +1:[+] query = f\"\"\"\n19.1 +1:[+] SELECT \n19.2 +1:[+] e.node_id,\n19.3 +1:[+] e.text,\n19.4 +1:[+] e.metadata,\n19.5 +1:[+] 1 - (e.embedding <=> %s::vector) as similarity\n19.6 +1:[+] FROM \n19.7 +1:[+]{self.table_name}e\n19.8 +1:[+] JOIN \n19.9 +1:[+] function_abstractions fa ON e.node_id = fa.function_id::text\n19.10 +1:[+] JOIN \n19.11 +1:[+] files f ON fa.file_id = f.file_id\n19.12 +1:[+] WHERE \n19.13 +1:[+] f.repo_id = %s\n19.14 +1:[+] ORDER BY \n19.15 +1:[+] similarity DESC\n19.16 +1:[+] LIMIT \n19.17 +1:[+] %s\n\"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Potential unhandled exceptions in `generate_abstraction` method.",
+ "confidence": "important",
+ "reason": "Raising the exception without handling it can lead to application crashes.",
+ "solution": "Wrap the call to `self.llm_provider.chat_completion` in a try-except block to handle specific exceptions gracefully.",
+ "actual_code": " raise e",
+ "fixed_code": " logger.error(f'Error in generating abstraction:{str(e)}')\n return None, None",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 218,
+ "end_line": 219,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Logging",
+ "comment": "Inconsistent logging levels for errors and debugging.",
+ "confidence": "important",
+ "reason": "Using `logger.debug` for important errors can lead to missed critical information in production logs.",
+ "solution": "Use `logger.error` for logging errors and `logger.debug` for detailed debugging information.",
+ "actual_code": " logger.debug(f\"Successfully parsed file:{file_path}\")",
+ "fixed_code": " logger.info(f\"Successfully parsed file:{file_path}\")",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 107,
+ "end_line": 107,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "Lack of comments explaining complex logic in methods.",
+ "confidence": "moderate",
+ "reason": "While the code is mostly clear, some complex sections could benefit from additional comments for future maintainability.",
+ "solution": "Add comments to explain the purpose and logic of complex code blocks, especially in `store_code_in_db` and `query` methods.",
+ "actual_code": "",
+ "fixed_code": " # This method stores the code and its abstraction in the database.\n # Ensure to handle potential conflicts and return the function ID.",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 246,
+ "end_line": 246,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Environment Variables",
+ "comment": "Direct use of environment variables without validation.",
+ "confidence": "important",
+ "reason": "Using environment variables directly can lead to runtime errors if they are not set or misspelled.",
+ "solution": "Implement checks to ensure that required environment variables are set before using them.",
+ "actual_code": " self.engine = create_engine(\n f\"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}\",",
+ "fixed_code": " required_env_vars =['POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_HOST', 'POSTGRES_PORT', 'POSTGRES_DB']\n for var in required_env_vars:\n if var not in os.environ:\n raise EnvironmentError(f'Missing required environment variable:{var}')",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 35,
+ "end_line": 39,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Broad exception handling in load_language and get_parser methods.",
+ "confidence": "important",
+ "reason": "Using a generic Exception can obscure the root cause of issues and make debugging difficult.",
+ "solution": "Catch specific exceptions where possible, and log the relevant error messages.",
+ "actual_code": "except Exception as e:",
+ "fixed_code": "except (ImportError, ValueError) as e:",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 28,
+ "end_line": 28,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Logging",
+ "comment": "Consider adding more context to log messages.",
+ "confidence": "moderate",
+ "reason": "Current log messages may not provide enough context for troubleshooting.",
+ "solution": "Include the function name or additional context in the log messages.",
+ "actual_code": "logger.error(f\"Failed to load language{language}:{str(e)}\")",
+ "fixed_code": "logger.error(f\"{__name__}.load_language failed for{language}:{str(e)}\")",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 29,
+ "end_line": 29,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "Duplicated code in traverse_tree for handling different node types.",
+ "confidence": "important",
+ "reason": "Code duplication can lead to maintenance challenges and potential inconsistencies.",
+ "solution": "Consider refactoring to a helper function that handles common logic.",
+ "actual_code": "return{\"type\": \"function\", \"name\": (node.child_by_field_name(\"name\").text.decode(\"utf8\") if node.child_by_field_name(\"name\") else \"anonymous\"), \"code\": code_bytes[node.start_byte : node.end_byte].decode(\"utf8\")}",
+ "fixed_code": "def extract_node_info(node, code_bytes): ...",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 55,
+ "end_line": 68,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Function Naming",
+ "comment": "Function names could be more descriptive.",
+ "confidence": "low",
+ "reason": "Descriptive names improve code readability and maintainability.",
+ "solution": "Consider renaming functions to reflect their purpose more clearly.",
+ "actual_code": "def parse_code(code: str, language: str) -> Dict[str, Any]:",
+ "fixed_code": "def parse_source_code(source_code: str, language: str) -> Dict[str, Any]:",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 91,
+ "end_line": 91,
+ "side": "LEFT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Dependency Management",
+ "comment": "Ensure that dependencies in pyproject.toml are up-to-date.",
+ "confidence": "important",
+ "reason": "Using outdated dependencies can lead to security vulnerabilities and compatibility issues.",
+ "solution": "Regularly review and update dependencies to the latest stable versions.",
+ "actual_code": "python = \"^3.8.1\"",
+ "fixed_code": "python = \"^3.9.0\"",
+ "file_name": "pyproject.toml",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/review.md b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/review.md
new file mode 100644
index 00000000..a5f856a0
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_222/review.md
@@ -0,0 +1,402 @@
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 23
+- Critical: 7
+- Important: 11
+- Minor: 4
+- Files Affected: 11
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Environment Variables (7 issues)
+
+### 1. Using 'os.environ' directly in JSON is not valid.
+📁 **File:** `config.json:13`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Environment variables should be accessed in the code, not hardcoded in configuration files.
+💡 **Solution:** Use a placeholder or variable in the code to fetch environment variables.
+
+**Current Code:**
+```python
+ "api_key": "os.environ/AZURE_API_KEY",
+```
+
+**Suggested Code:**
+```python
+ "api_key": "AZURE_API_KEY", // Access this in the code
+```
+
+### 2. Lack of error handling in database operations.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:39`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Database operations can fail due to various reasons (e.g., connection issues, SQL errors). Without error handling, the application may crash or behave unexpectedly.
+💡 **Solution:** Wrap database operations in try-except blocks to handle potential exceptions gracefully.
+
+**Current Code:**
+```python
+41 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))
+```
+
+**Suggested Code:**
+```python
+41 +1:[+] try:
+41.1 +1:[+] cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))
+41.2 +1:[+] except Exception as e:
+41.3 +1:[+] # Handle the exception (e.g., log it, raise a custom error, etc.)
+41.4 +1:[+] raise RuntimeError('Database query failed') from e
+```
+
+### 3. Changes made to sensitive file
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+### 4. Changes made to sensitive file
+📁 **File:** `Dockerfile:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to Dockerfile, which needs review
+💡 **Solution:** NA
+
+### 5. Changes made to sensitive file
+📁 **File:** `docker-compose.yml:15`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to docker-compose.yml, which needs review
+💡 **Solution:** NA
+
+### 6. Changes made to sensitive file
+📁 **File:** `.gitignore:164`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to .gitignore, which needs review
+💡 **Solution:** NA
+
+### 7. Changes made to sensitive file
+📁 **File:** `db_setup/init.sql:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to db_setup/init.sql, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Dockerfile (11 issues)
+
+### 1. Consider using multi-stage builds to reduce image size.
+📁 **File:** `Dockerfile:8`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Multi-stage builds can help keep the final image smaller by excluding build dependencies.
+💡 **Solution:** Use a multi-stage build pattern to install dependencies and copy only necessary files.
+
+**Current Code:**
+```python
+RUN apt-get update && apt-get install -y \
+```
+
+**Suggested Code:**
+```python
+FROM python:3.9 AS builder
+RUN apt-get update && apt-get install -y build-essential git
+
+FROM python:3.9
+COPY --from=builder /app /app
+```
+
+### 2. Ensure proper indentation for YAML files.
+📁 **File:** `docker-compose-dev.yml:15`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Improper indentation can lead to YAML parsing errors.
+💡 **Solution:** Review the indentation levels for all entries in the YAML file.
+
+**Current Code:**
+```python
+ networks:
+ - app-network
+```
+
+**Suggested Code:**
+```python
+ networks:
+ - app-network
+```
+
+### 3. Add error handling to the shell script.
+📁 **File:** `install_tree_sitter_languages.sh:1`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Error handling can prevent the script from failing silently.
+💡 **Solution:** Use 'set -e' at the beginning of the script to exit on errors.
+
+**Current Code:**
+```python
+#!/bin/bash
+```
+
+**Suggested Code:**
+```python
+#!/bin/bash
+set -e
+```
+
+### 4. Consider adding docstrings to methods for better understanding.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:8`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Docstrings help other developers understand the purpose and usage of methods, especially in larger codebases.
+💡 **Solution:** Add docstrings to the `__init__` and `custom_query` methods to describe their parameters and return values.
+
+**Current Code:**
+```python
+8 +1:[+] def __init__(self, *args, **kwargs):
+```
+
+**Suggested Code:**
+```python
+8 +1:[+] def __init__(self, *args, **kwargs):
+8.1 +1:[+] """Initialize the CustomPGVectorStore with table name and other parameters."""
+```
+
+### 5. Consider adding type hints for method parameters and return types.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Type hints improve code clarity and help with static type checking, making it easier for developers to understand expected types.
+💡 **Solution:** Add type hints to the `custom_query` method parameters and return type.
+
+**Current Code:**
+```python
+13 +1:[+] def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:
+```
+
+**Suggested Code:**
+```python
+13 +1:[+] def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:
+```
+
+### 6. Potential unhandled exceptions in `generate_abstraction` method.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:218`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Raising the exception without handling it can lead to application crashes.
+💡 **Solution:** Wrap the call to `self.llm_provider.chat_completion` in a try-except block to handle specific exceptions gracefully.
+
+**Current Code:**
+```python
+ raise e
+```
+
+**Suggested Code:**
+```python
+ logger.error(f'Error in generating abstraction:{str(e)}')
+ return None, None
+```
+
+### 7. Inconsistent logging levels for errors and debugging.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:107`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Using `logger.debug` for important errors can lead to missed critical information in production logs.
+💡 **Solution:** Use `logger.error` for logging errors and `logger.debug` for detailed debugging information.
+
+**Current Code:**
+```python
+ logger.debug(f"Successfully parsed file:{file_path}")
+```
+
+**Suggested Code:**
+```python
+ logger.info(f"Successfully parsed file:{file_path}")
+```
+
+### 8. Direct use of environment variables without validation.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:35`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Using environment variables directly can lead to runtime errors if they are not set or misspelled.
+💡 **Solution:** Implement checks to ensure that required environment variables are set before using them.
+
+**Current Code:**
+```python
+ self.engine = create_engine(
+ f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}",
+```
+
+**Suggested Code:**
+```python
+ required_env_vars =['POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_HOST', 'POSTGRES_PORT', 'POSTGRES_DB']
+ for var in required_env_vars:
+ if var not in os.environ:
+ raise EnvironmentError(f'Missing required environment variable:{var}')
+```
+
+### 9. Broad exception handling in load_language and get_parser methods.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:28`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Using a generic Exception can obscure the root cause of issues and make debugging difficult.
+💡 **Solution:** Catch specific exceptions where possible, and log the relevant error messages.
+
+**Current Code:**
+```python
+except Exception as e:
+```
+
+**Suggested Code:**
+```python
+except (ImportError, ValueError) as e:
+```
+
+### 10. Duplicated code in traverse_tree for handling different node types.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:55`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Code duplication can lead to maintenance challenges and potential inconsistencies.
+💡 **Solution:** Consider refactoring to a helper function that handles common logic.
+
+**Current Code:**
+```python
+return{"type": "function", "name": (node.child_by_field_name("name").text.decode("utf8") if node.child_by_field_name("name") else "anonymous"), "code": code_bytes[node.start_byte : node.end_byte].decode("utf8")}
+```
+
+**Suggested Code:**
+```python
+def extract_node_info(node, code_bytes): ...
+```
+
+### 11. Ensure that dependencies in pyproject.toml are up-to-date.
+📁 **File:** `pyproject.toml:13`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Using outdated dependencies can lead to security vulnerabilities and compatibility issues.
+💡 **Solution:** Regularly review and update dependencies to the latest stable versions.
+
+**Current Code:**
+```python
+python = "^3.8.1"
+```
+
+**Suggested Code:**
+```python
+python = "^3.9.0"
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (5 issues)
+
+
+SQL Initialization (4 issues)
+
+### 1. Add comments to SQL statements for clarity.
+📁 **File:** `db_setup/init.sql:4`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Comments can help future developers understand the purpose of each SQL command.
+💡 **Solution:** Add comments above each CREATE TABLE statement.
+
+**Current Code:**
+```python
+CREATE TABLE repositories (
+```
+
+**Suggested Code:**
+```python
+-- Table to store repository information
+CREATE TABLE repositories (
+```
+
+### 2. Consider using f-strings for consistency in SQL query construction.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:19`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Using f-strings consistently improves readability and reduces the risk of SQL injection if not handled properly.
+💡 **Solution:** Use f-strings for constructing the SQL query instead of concatenation.
+
+**Current Code:**
+```python
+19 +1:[+] query = f"""
+26 +1:[+]{self.table_name}e
+32 +1:[+] f.repo_id = %s
+36 +1:[+] %s
+"""
+```
+
+**Suggested Code:**
+```python
+19 +1:[+] query = f"""
+19.1 +1:[+] SELECT
+19.2 +1:[+] e.node_id,
+19.3 +1:[+] e.text,
+19.4 +1:[+] e.metadata,
+19.5 +1:[+] 1 - (e.embedding <=> %s::vector) as similarity
+19.6 +1:[+] FROM
+19.7 +1:[+]{self.table_name}e
+19.8 +1:[+] JOIN
+19.9 +1:[+] function_abstractions fa ON e.node_id = fa.function_id::text
+19.10 +1:[+] JOIN
+19.11 +1:[+] files f ON fa.file_id = f.file_id
+19.12 +1:[+] WHERE
+19.13 +1:[+] f.repo_id = %s
+19.14 +1:[+] ORDER BY
+19.15 +1:[+] similarity DESC
+19.16 +1:[+] LIMIT
+19.17 +1:[+] %s
+"""
+```
+
+### 3. Lack of comments explaining complex logic in methods.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:246`
+⚖️ **Severity:** 4/10
+🔍 **Description:** While the code is mostly clear, some complex sections could benefit from additional comments for future maintainability.
+💡 **Solution:** Add comments to explain the purpose and logic of complex code blocks, especially in `store_code_in_db` and `query` methods.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+ # This method stores the code and its abstraction in the database.
+ # Ensure to handle potential conflicts and return the function ID.
+```
+
+### 4. Consider adding more context to log messages.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:29`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Current log messages may not provide enough context for troubleshooting.
+💡 **Solution:** Include the function name or additional context in the log messages.
+
+**Current Code:**
+```python
+logger.error(f"Failed to load language{language}:{str(e)}")
+```
+
+**Suggested Code:**
+```python
+logger.error(f"{__name__}.load_language failed for{language}:{str(e)}")
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage
+{"prompt_tokens": 21545, "completion_tokens": 3763, "total_tokens": 25308}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/comments.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/comments.json
new file mode 100644
index 00000000..dfddeae0
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/comments.json
@@ -0,0 +1,17 @@
+[
+ {
+ "topic": "Parameter Removal",
+ "comment": "Removal of 'reeval_response' parameter may affect functionality.",
+ "confidence": "critical",
+ "reason": "This parameter is used in multiple methods, and its removal could lead to unexpected behavior.",
+ "solution": "Evaluate the necessity of this parameter and ensure that its removal does not break existing functionality.",
+ "actual_code": "reeval_response: bool = False,",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 43,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/issues.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/issues.json
new file mode 100644
index 00000000..b93239cf
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/issues.json
@@ -0,0 +1,77 @@
+[
+ {
+ "topic": "Imports",
+ "comment": "Inconsistent import statements.",
+ "confidence": "important",
+ "reason": "Changing import paths can lead to confusion and potential import errors.",
+ "solution": "Ensure that all import paths are updated consistently throughout the codebase.",
+ "actual_code": "from kaizen.llms.prompts.code_review_prompts import (",
+ "fixed_code": "from kaizen.llms.prompts.pr_desc_prompts import (",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Parameter Removal",
+ "comment": "Removal of 'reeval_response' parameter may affect functionality.",
+ "confidence": "critical",
+ "reason": "This parameter is used in multiple methods, and its removal could lead to unexpected behavior.",
+ "solution": "Evaluate the necessity of this parameter and ensure that its removal does not break existing functionality.",
+ "actual_code": "reeval_response: bool = False,",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 43,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Lack of error handling for potential exceptions.",
+ "confidence": "important",
+ "reason": "The absence of error handling can cause the application to crash unexpectedly.",
+ "solution": "Implement try-except blocks where appropriate to handle potential exceptions gracefully.",
+ "actual_code": "raise Exception(\"Both diff_text and pull_request_files are empty!\")",
+ "fixed_code": "raise ValueError(\"Both diff_text and pull_request_files are empty!\")",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 51,
+ "end_line": 51,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Functionality Changes",
+ "comment": "Changes to function calls may alter expected behavior.",
+ "confidence": "important",
+ "reason": "Switching from 'chat_completion_with_json' to 'chat_completion' may change the output format.",
+ "solution": "Review the expected output of the new function and ensure compatibility with existing code.",
+ "actual_code": "resp, usage = self.provider.chat_completion_with_json(prompt, user=user)",
+ "fixed_code": "resp, usage = self.provider.chat_completion(prompt, user=user)",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 83,
+ "end_line": 83,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 8
+ },
+ {
+ "topic": "Documentation",
+ "comment": "Missing documentation for new prompts.",
+ "confidence": "moderate",
+ "reason": "Lack of documentation can make it harder for other developers to understand the purpose of new prompts.",
+ "solution": "Add docstrings or comments explaining the purpose of each new prompt.",
+ "actual_code": "",
+ "fixed_code": "# This prompt is used to generate a PR description based on the provided details.",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/review.md b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/review.md
new file mode 100644
index 00000000..7fdf7ed4
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_335/review.md
@@ -0,0 +1,139 @@
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 5
+- Critical: 1
+- Important: 3
+- Minor: 1
+- Files Affected: 2
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Parameter Removal (1 issues)
+
+### 1. Removal of 'reeval_response' parameter may affect functionality.
+📁 **File:** `kaizen/generator/pr_description.py:43`
+⚖️ **Severity:** 9/10
+🔍 **Description:** This parameter is used in multiple methods, and its removal could lead to unexpected behavior.
+💡 **Solution:** Evaluate the necessity of this parameter and ensure that its removal does not break existing functionality.
+
+**Current Code:**
+```python
+reeval_response: bool = False,
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Imports (3 issues)
+
+### 1. Inconsistent import statements.
+📁 **File:** `kaizen/generator/pr_description.py:8`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Changing import paths can lead to confusion and potential import errors.
+💡 **Solution:** Ensure that all import paths are updated consistently throughout the codebase.
+
+**Current Code:**
+```python
+from kaizen.llms.prompts.code_review_prompts import (
+```
+
+**Suggested Code:**
+```python
+from kaizen.llms.prompts.pr_desc_prompts import (
+```
+
+### 2. Lack of error handling for potential exceptions.
+📁 **File:** `kaizen/generator/pr_description.py:51`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The absence of error handling can cause the application to crash unexpectedly.
+💡 **Solution:** Implement try-except blocks where appropriate to handle potential exceptions gracefully.
+
+**Current Code:**
+```python
+raise Exception("Both diff_text and pull_request_files are empty!")
+```
+
+**Suggested Code:**
+```python
+raise ValueError("Both diff_text and pull_request_files are empty!")
+```
+
+### 3. Changes to function calls may alter expected behavior.
+📁 **File:** `kaizen/generator/pr_description.py:83`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Switching from 'chat_completion_with_json' to 'chat_completion' may change the output format.
+💡 **Solution:** Review the expected output of the new function and ensure compatibility with existing code.
+
+**Current Code:**
+```python
+resp, usage = self.provider.chat_completion_with_json(prompt, user=user)
+```
+
+**Suggested Code:**
+```python
+resp, usage = self.provider.chat_completion(prompt, user=user)
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Documentation (1 issues)
+
+### 1. Missing documentation for new prompts.
+📁 **File:** `kaizen/llms/prompts/pr_desc_prompts.py:1`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Lack of documentation can make it harder for other developers to understand the purpose of new prompts.
+💡 **Solution:** Add docstrings or comments explaining the purpose of each new prompt.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+# This prompt is used to generate a PR description based on the provided details.
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage
+{"prompt_tokens": 6751, "completion_tokens": 826, "total_tokens": 7577}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/comments.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/issues.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/issues.json
new file mode 100644
index 00000000..7e262d38
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/issues.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "Repository Setup",
+ "comment": "The setup_repository method is commented out in the previous version, which may lead to confusion about its necessity.",
+ "confidence": "important",
+ "reason": "Commenting out essential setup code can lead to runtime errors if the repository is not properly initialized.",
+ "solution": "Ensure that the setup_repository method is called appropriately to avoid potential issues.",
+ "actual_code": "analyzer.setup_repository(\"./github_app/\")",
+ "fixed_code": "analyzer.setup_repository(\"./github_app/\")",
+ "file_name": "examples/ragify_codebase/main.py",
+ "start_line": 7,
+ "end_line": 7,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Comment",
+ "comment": "The TODO comment lacks specificity regarding how to handle duplicates.",
+ "confidence": "moderate",
+ "reason": "Vague comments can lead to misunderstandings and may not provide enough guidance for future developers.",
+ "solution": "Specify the conditions under which duplicates should be checked and how to handle them.",
+ "actual_code": "# TODO: DONT PUSH DUPLICATE",
+ "fixed_code": "# TODO: Implement a check to prevent pushing duplicate embeddings based on a unique identifier.",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 157,
+ "end_line": 157,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Dependency Update",
+ "comment": "The dependency version for llama-index-core has been updated without a clear reason.",
+ "confidence": "important",
+ "reason": "Updating dependencies can introduce breaking changes; it's important to ensure compatibility.",
+ "solution": "Review the changelog for llama-index-core to confirm that the new version does not introduce breaking changes.",
+ "actual_code": "llama-index-core = \"^0.10.47\"",
+ "fixed_code": "llama-index-core = \"0.10.65\"",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 27,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/review.md b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/review.md
new file mode 100644
index 00000000..08c9e2bc
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_440/review.md
@@ -0,0 +1,100 @@
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 3
+- Critical: 0
+- Important: 2
+- Minor: 1
+- Files Affected: 3
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Repository Setup (2 issues)
+
+### 1. The setup_repository method is commented out in the previous version, which may lead to confusion about its necessity.
+📁 **File:** `examples/ragify_codebase/main.py:7`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Commenting out essential setup code can lead to runtime errors if the repository is not properly initialized.
+💡 **Solution:** Ensure that the setup_repository method is called appropriately to avoid potential issues.
+
+**Current Code:**
+```python
+analyzer.setup_repository("./github_app/")
+```
+
+**Suggested Code:**
+```python
+analyzer.setup_repository("./github_app/")
+```
+
+### 2. The dependency version for llama-index-core has been updated without a clear reason.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Updating dependencies can introduce breaking changes; it's important to ensure compatibility.
+💡 **Solution:** Review the changelog for llama-index-core to confirm that the new version does not introduce breaking changes.
+
+**Current Code:**
+```python
+llama-index-core = "^0.10.47"
+```
+
+**Suggested Code:**
+```python
+llama-index-core = "0.10.65"
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Code Comment (1 issues)
+
+### 1. The TODO comment lacks specificity regarding how to handle duplicates.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:157`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Vague comments can lead to misunderstandings and may not provide enough guidance for future developers.
+💡 **Solution:** Specify the conditions under which duplicates should be checked and how to handle them.
+
+**Current Code:**
+```python
+# TODO: DONT PUSH DUPLICATE
+```
+
+**Suggested Code:**
+```python
+# TODO: Implement a check to prevent pushing duplicate embeddings based on a unique identifier.
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage
+{"prompt_tokens": 1364, "completion_tokens": 544, "total_tokens": 1908}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/comments.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/comments.json
new file mode 100644
index 00000000..a9d40eac
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/comments.json
@@ -0,0 +1,16 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json
new file mode 100644
index 00000000..498888fb
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json
@@ -0,0 +1,91 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Broad exception handling can obscure specific errors.",
+ "confidence": "important",
+ "reason": "Using a generic Exception can make debugging difficult and hide underlying issues.",
+ "solution": "Catch specific exceptions where possible.",
+ "actual_code": "except Exception:",
+ "fixed_code": "except KeyError:",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 140,
+ "end_line": 141,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Function Signature",
+ "comment": "The function 'post_pull_request' has an additional parameter that should be documented.",
+ "confidence": "important",
+ "reason": "New parameters should be documented to ensure clarity for future maintainers.",
+ "solution": "Update the function docstring to include the 'tests' parameter.",
+ "actual_code": "def post_pull_request(url, data, installation_id, tests=None):",
+ "fixed_code": "def post_pull_request(url, data, installation_id, tests=None): # tests: List of test files",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 106,
+ "end_line": 106,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The new function 'sort_files' lacks a docstring.",
+ "confidence": "important",
+ "reason": "Docstrings are essential for understanding the purpose and usage of functions.",
+ "solution": "Add a docstring to describe the function's purpose and parameters.",
+ "actual_code": "def sort_files(files):",
+ "fixed_code": "def sort_files(files): # Sorts a list of file dictionaries by filename.",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 184,
+ "end_line": 184,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Variable Naming",
+ "comment": "The variable 'tests' could be more descriptive.",
+ "confidence": "moderate",
+ "reason": "Descriptive variable names improve code readability and maintainability.",
+ "solution": "Consider renaming 'tests' to 'generated_tests' for clarity.",
+ "actual_code": "tests = generate_tests(pr_files)",
+ "fixed_code": "generated_tests = generate_tests(pr_files)",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 58,
+ "end_line": 58,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Logging",
+ "comment": "Consider using logging instead of print statements for error reporting.",
+ "confidence": "important",
+ "reason": "Using logging allows for better control over the output and can be configured for different environments.",
+ "solution": "Replace print statements with appropriate logging calls.",
+ "actual_code": "print(\"Error\")",
+ "fixed_code": "logger.error(\"Error occurred\")",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 141,
+ "end_line": 141,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/review.md b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/review.md
new file mode 100644
index 00000000..5d27ff4f
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_476/review.md
@@ -0,0 +1,145 @@
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 6
+- Critical: 1
+- Important: 4
+- Minor: 1
+- Files Affected: 2
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Configuration (1 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Error Handling (4 issues)
+
+### 1. Broad exception handling can obscure specific errors.
+📁 **File:** `github_app/github_helper/pull_requests.py:140`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Using a generic Exception can make debugging difficult and hide underlying issues.
+💡 **Solution:** Catch specific exceptions where possible.
+
+**Current Code:**
+```python
+except Exception:
+```
+
+**Suggested Code:**
+```python
+except KeyError:
+```
+
+### 2. The function 'post_pull_request' has an additional parameter that should be documented.
+📁 **File:** `github_app/github_helper/pull_requests.py:106`
+⚖️ **Severity:** 5/10
+🔍 **Description:** New parameters should be documented to ensure clarity for future maintainers.
+💡 **Solution:** Update the function docstring to include the 'tests' parameter.
+
+**Current Code:**
+```python
+def post_pull_request(url, data, installation_id, tests=None):
+```
+
+**Suggested Code:**
+```python
+def post_pull_request(url, data, installation_id, tests=None): # tests: List of test files
+```
+
+### 3. The new function 'sort_files' lacks a docstring.
+📁 **File:** `github_app/github_helper/pull_requests.py:184`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Docstrings are essential for understanding the purpose and usage of functions.
+💡 **Solution:** Add a docstring to describe the function's purpose and parameters.
+
+**Current Code:**
+```python
+def sort_files(files):
+```
+
+**Suggested Code:**
+```python
+def sort_files(files): # Sorts a list of file dictionaries by filename.
+```
+
+### 4. Consider using logging instead of print statements for error reporting.
+📁 **File:** `github_app/github_helper/pull_requests.py:141`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Using logging allows for better control over the output and can be configured for different environments.
+💡 **Solution:** Replace print statements with appropriate logging calls.
+
+**Current Code:**
+```python
+print("Error")
+```
+
+**Suggested Code:**
+```python
+logger.error("Error occurred")
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Variable Naming (1 issues)
+
+### 1. The variable 'tests' could be more descriptive.
+📁 **File:** `github_app/github_helper/pull_requests.py:58`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Descriptive variable names improve code readability and maintainability.
+💡 **Solution:** Consider renaming 'tests' to 'generated_tests' for clarity.
+
+**Current Code:**
+```python
+tests = generate_tests(pr_files)
+```
+
+**Suggested Code:**
+```python
+generated_tests = generate_tests(pr_files)
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage
+{"prompt_tokens": 4007, "completion_tokens": 796, "total_tokens": 4803}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/comments.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/comments.json
new file mode 100644
index 00000000..c38f9fea
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/comments.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "API call may fail without a retry mechanism.",
+ "confidence": "critical",
+ "reason": "The completion function call does not handle potential API failures.",
+ "solution": "Implement a retry mechanism or error handling for API calls.",
+ "actual_code": "response = completion(",
+ "fixed_code": "# Implement retry logic here",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "Silent failure without logging in case of JSON parsing error.",
+ "confidence": "critical",
+ "reason": "The code does not log the error, making debugging difficult.",
+ "solution": "Log the error message to provide feedback in case of failure.",
+ "actual_code": "print(f\"Failed to parse content for applicant\")",
+ "fixed_code": "print(f\"Failed to parse content for applicant:{e}\")",
+ "file_name": "main.py",
+ "start_line": 86,
+ "end_line": 86,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Potential division by zero error.",
+ "confidence": "critical",
+ "reason": "If total_tokens is zero, this will raise an error.",
+ "solution": "Add a check to prevent division by zero.",
+ "actual_code": "print(f\"Total tokens used:{total_tokens:,}\")",
+ "fixed_code": "if total_tokens > 0: print(f\"Total tokens used:{total_tokens:,}\")",
+ "file_name": "main.py",
+ "start_line": 159,
+ "end_line": 159,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/issues.json b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/issues.json
new file mode 100644
index 00000000..5dd7c900
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/issues.json
@@ -0,0 +1,107 @@
+[
+ {
+ "topic": "Imports",
+ "comment": "Unused import detected.",
+ "confidence": "trivial",
+ "reason": "The import of 'random' is not utilized in the code.",
+ "solution": "Remove the unused import to clean up the code.",
+ "actual_code": "import random # Unused import",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 2
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "API call may fail without a retry mechanism.",
+ "confidence": "critical",
+ "reason": "The completion function call does not handle potential API failures.",
+ "solution": "Implement a retry mechanism or error handling for API calls.",
+ "actual_code": "response = completion(",
+ "fixed_code": "# Implement retry logic here",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "Silent failure without logging in case of JSON parsing error.",
+ "confidence": "critical",
+ "reason": "The code does not log the error, making debugging difficult.",
+ "solution": "Log the error message to provide feedback in case of failure.",
+ "actual_code": "print(f\"Failed to parse content for applicant\")",
+ "fixed_code": "print(f\"Failed to parse content for applicant:{e}\")",
+ "file_name": "main.py",
+ "start_line": 86,
+ "end_line": 86,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Inefficient Progress Reporting",
+ "comment": "Inefficient way to print progress.",
+ "confidence": "important",
+ "reason": "The current method of printing progress can be improved for better performance.",
+ "solution": "Consider using a logging library or a more efficient progress reporting method.",
+ "actual_code": "print(f\"\\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "fixed_code": "# Use logging or a more efficient progress reporting",
+ "file_name": "main.py",
+ "start_line": 121,
+ "end_line": 121,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Redundant Code",
+ "comment": "Redundant check for empty DataFrame.",
+ "confidence": "moderate",
+ "reason": "The check for an empty DataFrame is unnecessary as the process will handle it.",
+ "solution": "Remove the redundant check to simplify the code.",
+ "actual_code": "if len(df) == 0:",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 142,
+ "end_line": 143,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Potential division by zero error.",
+ "confidence": "critical",
+ "reason": "If total_tokens is zero, this will raise an error.",
+ "solution": "Add a check to prevent division by zero.",
+ "actual_code": "print(f\"Total tokens used:{total_tokens:,}\")",
+ "fixed_code": "if total_tokens > 0: print(f\"Total tokens used:{total_tokens:,}\")",
+ "file_name": "main.py",
+ "start_line": 159,
+ "end_line": 159,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "File Handling",
+ "comment": "No error handling for file not found.",
+ "confidence": "important",
+ "reason": "If the file does not exist, it will raise an unhandled exception.",
+ "solution": "Add error handling to manage file not found scenarios.",
+ "actual_code": "main(input_file)",
+ "fixed_code": "# Add error handling for file not found",
+ "file_name": "main.py",
+ "start_line": 175,
+ "end_line": 175,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/review.md b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/review.md
new file mode 100644
index 00000000..fcc46911
--- /dev/null
+++ b/.experiments/code_review/gpt-4o-mini/no_eval/pr_5/review.md
@@ -0,0 +1,155 @@
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 7
+- Critical: 3
+- Important: 2
+- Minor: 1
+- Files Affected: 1
+## 🏆 Code Quality
+[███████████████░░░░░] 75% (Fair)
+
+## 🚨 Critical Issues
+
+
+Error Handling (3 issues)
+
+### 1. API call may fail without a retry mechanism.
+📁 **File:** `main.py:66`
+⚖️ **Severity:** 8/10
+🔍 **Description:** The completion function call does not handle potential API failures.
+💡 **Solution:** Implement a retry mechanism or error handling for API calls.
+
+**Current Code:**
+```python
+response = completion(
+```
+
+**Suggested Code:**
+```python
+# Implement retry logic here
+```
+
+### 2. Silent failure without logging in case of JSON parsing error.
+📁 **File:** `main.py:86`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The code does not log the error, making debugging difficult.
+💡 **Solution:** Log the error message to provide feedback in case of failure.
+
+**Current Code:**
+```python
+print(f"Failed to parse content for applicant")
+```
+
+**Suggested Code:**
+```python
+print(f"Failed to parse content for applicant:{e}")
+```
+
+### 3. Potential division by zero error.
+📁 **File:** `main.py:159`
+⚖️ **Severity:** 9/10
+🔍 **Description:** If total_tokens is zero, this will raise an error.
+💡 **Solution:** Add a check to prevent division by zero.
+
+**Current Code:**
+```python
+print(f"Total tokens used:{total_tokens:,}")
+```
+
+**Suggested Code:**
+```python
+if total_tokens > 0: print(f"Total tokens used:{total_tokens:,}")
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Inefficient Progress Reporting (2 issues)
+
+### 1. Inefficient way to print progress.
+📁 **File:** `main.py:121`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The current method of printing progress can be improved for better performance.
+💡 **Solution:** Consider using a logging library or a more efficient progress reporting method.
+
+**Current Code:**
+```python
+print(f"\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}", end="", flush=True)
+```
+
+**Suggested Code:**
+```python
+# Use logging or a more efficient progress reporting
+```
+
+### 2. No error handling for file not found.
+📁 **File:** `main.py:175`
+⚖️ **Severity:** 6/10
+🔍 **Description:** If the file does not exist, it will raise an unhandled exception.
+💡 **Solution:** Add error handling to manage file not found scenarios.
+
+**Current Code:**
+```python
+main(input_file)
+```
+
+**Suggested Code:**
+```python
+# Add error handling for file not found
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Redundant Code (1 issues)
+
+### 1. Redundant check for empty DataFrame.
+📁 **File:** `main.py:142`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The check for an empty DataFrame is unnecessary as the process will handle it.
+💡 **Solution:** Remove the redundant check to simplify the code.
+
+**Current Code:**
+```python
+if len(df) == 0:
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage
+{"prompt_tokens": 6154, "completion_tokens": 1046, "total_tokens": 7200}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_222/comments.json b/.experiments/code_review/gpt-4o/no_eval/pr_222/comments.json
new file mode 100644
index 00000000..5ebff860
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_222/comments.json
@@ -0,0 +1,132 @@
+[
+ {
+ "topic": "Security",
+ "comment": "Hardcoding API keys in `config.json` can lead to security vulnerabilities.",
+ "confidence": "critical",
+ "reason": "Exposing API keys in the codebase can lead to unauthorized access.",
+ "solution": "Use environment variables to store API keys instead of hardcoding them.",
+ "actual_code": "\"api_key\": \"os.environ/AZURE_API_KEY\"",
+ "fixed_code": "\"api_key\": \"${AZURE_API_KEY}\"",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Security",
+ "comment": "Database connection strings should not be constructed using string interpolation.",
+ "confidence": "critical",
+ "reason": "Using string interpolation for connection strings can expose the application to SQL injection attacks.",
+ "solution": "Use parameterized queries or a configuration management tool to handle sensitive information.",
+ "actual_code": "self.engine = create_engine(\n f\"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}\",\n pool_size=10,\n max_overflow=20,\n)",
+ "fixed_code": "self.engine = create_engine(\n 'postgresql://{user}:{password}@{host}:{port}/{db}'.format(\n user=os.environ['POSTGRES_USER'],\n password=os.environ['POSTGRES_PASSWORD'],\n host=os.environ['POSTGRES_HOST'],\n port=os.environ['POSTGRES_PORT'],\n db=os.environ['POSTGRES_DB']\n ),\n pool_size=10,\n max_overflow=20,\n)",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 35,
+ "end_line": 39,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "SQL Injection",
+ "comment": "Potential SQL injection vulnerability in the query construction.",
+ "confidence": "critical",
+ "reason": "Using f-strings to construct SQL queries can lead to SQL injection attacks if user input is not properly sanitized.",
+ "solution": "Use parameterized queries to avoid SQL injection vulnerabilities.",
+ "actual_code": "query = f\"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "fixed_code": "query = \"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n %s e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Exception Handling",
+ "comment": "Broad exception handling should be avoided.",
+ "confidence": "critical",
+ "reason": "Catching all exceptions can hide bugs and make debugging difficult.",
+ "solution": "Catch specific exceptions instead of using a broad except clause.",
+ "actual_code": "except Exception as e:",
+ "fixed_code": "except ImportError as e:\n logger.error(f\"Failed to import module:{str(e)}\")\n raise\nexcept ValueError as e:\n logger.error(f\"Invalid value:{str(e)}\")\n raise",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 28,
+ "end_line": 30,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_222/issues.json b/.experiments/code_review/gpt-4o/no_eval/pr_222/issues.json
new file mode 100644
index 00000000..a46082c3
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_222/issues.json
@@ -0,0 +1,387 @@
+[
+ {
+ "topic": "Documentation",
+ "comment": "The `install_tree_sitter_languages.sh` script lacks comments explaining the purpose of each step.",
+ "confidence": "moderate",
+ "reason": "Comments improve readability and maintainability, especially for complex scripts.",
+ "solution": "Add comments explaining the purpose of each step in the script.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "install_tree_sitter_languages.sh",
+ "start_line": 1,
+ "end_line": 47,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The `install_tree_sitter_languages.sh` script does not handle errors during the installation process.",
+ "confidence": "important",
+ "reason": "Error handling ensures that the script fails gracefully and provides useful error messages.",
+ "solution": "Add error handling for each critical step in the script.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "install_tree_sitter_languages.sh",
+ "start_line": 1,
+ "end_line": 47,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Security",
+ "comment": "Hardcoding API keys in `config.json` can lead to security vulnerabilities.",
+ "confidence": "critical",
+ "reason": "Exposing API keys in the codebase can lead to unauthorized access.",
+ "solution": "Use environment variables to store API keys instead of hardcoding them.",
+ "actual_code": "\"api_key\": \"os.environ/AZURE_API_KEY\"",
+ "fixed_code": "\"api_key\": \"${AZURE_API_KEY}\"",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Performance",
+ "comment": "The Dockerfile installs build dependencies and then removes them, which is good practice but can be optimized.",
+ "confidence": "moderate",
+ "reason": "Optimizing Dockerfile layers can reduce image size and build time.",
+ "solution": "Combine RUN commands to reduce the number of layers.",
+ "actual_code": "RUN apt-get update && apt-get install -y \\\n build-essential \\\n git \\\n postgresql-server-dev-16\nRUN apt-get remove -y build-essential git postgresql-server-dev-16 \\\n && apt-get autoremove -y \\\n && rm -rf /var/lib/apt/lists/* /pgvector",
+ "fixed_code": "RUN apt-get update && apt-get install -y \\\n build-essential \\\n git \\\n postgresql-server-dev-16 \\\n && git clone https://github.com/pgvector/pgvector.git \\\n && cd pgvector \\\n && make \\\n && make install \\\n && apt-get remove -y build-essential git postgresql-server-dev-16 \\\n && apt-get autoremove -y \\\n && rm -rf /var/lib/apt/lists/* /pgvector",
+ "file_name": "Dockerfile-postgres",
+ "start_line": 4,
+ "end_line": 18,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The `chunk_code` function in `code_chunker.py` has nested functions and complex logic that can be refactored for better readability.",
+ "confidence": "important",
+ "reason": "Refactoring complex functions into smaller, well-named functions improves readability and maintainability.",
+ "solution": "Refactor the `chunk_code` function to extract nested functions into separate helper functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/code_chunker.py",
+ "start_line": 7,
+ "end_line": 62,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "SQL Injection",
+ "comment": "Potential SQL injection vulnerability in the query construction.",
+ "confidence": "critical",
+ "reason": "Using f-strings to construct SQL queries can lead to SQL injection attacks if user input is not properly sanitized.",
+ "solution": "Use parameterized queries to avoid SQL injection vulnerabilities.",
+ "actual_code": "query = f\"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "fixed_code": "query = \"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n %s e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The normalization of the query embedding can be simplified for better readability.",
+ "confidence": "moderate",
+ "reason": "Simplifying code improves readability and maintainability.",
+ "solution": "Combine the normalization steps into a single line.",
+ "actual_code": "query_embedding_np = np.array(query_embedding)\nquery_embedding_normalized = query_embedding_np / np.linalg.norm(query_embedding_np)",
+ "fixed_code": "query_embedding_normalized = np.array(query_embedding) / np.linalg.norm(query_embedding)",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 15,
+ "end_line": 16,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Lack of error handling in database operations.",
+ "confidence": "important",
+ "reason": "Database operations can fail; it's important to handle exceptions to avoid crashes.",
+ "solution": "Add try-except blocks to handle potential database errors.",
+ "actual_code": "",
+ "fixed_code": "try:\n with self.get_client() as client:\n with client.cursor() as cur:\n cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n results = cur.fetchall()\nexcept Exception as e:\n # Handle exception (e.g., log the error, re-raise, etc.)\n raise e",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 42,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Type Annotations",
+ "comment": "Missing type annotations for method parameters and return types.",
+ "confidence": "moderate",
+ "reason": "Type annotations improve code readability and help with static analysis.",
+ "solution": "Add type annotations to the method parameters and return types.",
+ "actual_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+ "fixed_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Dictionary Default Values",
+ "comment": "Using `None` as a default return value for `get_feedback` method.",
+ "confidence": "low",
+ "reason": "Returning `None` can lead to potential `NoneType` errors if not handled properly.",
+ "solution": "Return an empty dictionary instead of `None`.",
+ "actual_code": "return self.feedback_store.get(code_id, None)",
+ "fixed_code": "return self.feedback_store.get(code_id,{})",
+ "file_name": "kaizen/retriever/feedback_system.py",
+ "start_line": 18,
+ "end_line": 18,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 2
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Exception handling in `generate_abstraction` method is too generic.",
+ "confidence": "important",
+ "reason": "Catching all exceptions without specific handling can obscure the root cause of errors and make debugging difficult.",
+ "solution": "Catch specific exceptions and handle them appropriately.",
+ "actual_code": "except Exception as e:\n raise e",
+ "fixed_code": "except SomeSpecificException as e:\n logger.error(f\"Specific error occurred:{str(e)}\")\n raise e",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 218,
+ "end_line": 219,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Security",
+ "comment": "Database connection strings should not be constructed using string interpolation.",
+ "confidence": "critical",
+ "reason": "Using string interpolation for connection strings can expose the application to SQL injection attacks.",
+ "solution": "Use parameterized queries or a configuration management tool to handle sensitive information.",
+ "actual_code": "self.engine = create_engine(\n f\"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}\",\n pool_size=10,\n max_overflow=20,\n)",
+ "fixed_code": "self.engine = create_engine(\n 'postgresql://{user}:{password}@{host}:{port}/{db}'.format(\n user=os.environ['POSTGRES_USER'],\n password=os.environ['POSTGRES_PASSWORD'],\n host=os.environ['POSTGRES_HOST'],\n port=os.environ['POSTGRES_PORT'],\n db=os.environ['POSTGRES_DB']\n ),\n pool_size=10,\n max_overflow=20,\n)",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 35,
+ "end_line": 39,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Performance",
+ "comment": "Using `os.walk` and `ThreadPoolExecutor` for file parsing can be optimized.",
+ "confidence": "important",
+ "reason": "The current implementation may not efficiently utilize available CPU cores and can be improved for better performance.",
+ "solution": "Consider using asynchronous I/O operations or more efficient file traversal methods.",
+ "actual_code": "with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:\n futures =[]\n for root, _, files in os.walk(repo_path):\n for file in files:\n if file.endswith((\".py\", \".js\", \".ts\", \".rs\")):\n file_path = os.path.join(root, file)\n futures.append(executor.submit(self.parse_file, file_path))",
+ "fixed_code": "import asyncio\nfrom aiofiles import open as aio_open\n\nasync def parse_repository_async(self, repo_path: str):\n self.total_usage = self.llm_provider.DEFAULT_USAGE\n logger.info(f\"Starting repository setup for:{repo_path}\")\n await self.parse_repository(repo_path)\n self.store_function_relationships()\n logger.info(\"Repository setup completed successfully\")\n\nasync def parse_file_async(self, file_path: str):\n logger.debug(f\"Parsing file:{file_path}\")\n try:\n async with aio_open(file_path, \"r\", encoding=\"utf-8\") as file:\n content = await file.read()\n language = self.get_language_from_extension(file_path)\n chunked_code = chunk_code(content, language)\n for section, items in chunked_code.items():\n if isinstance(items, dict):\n for name, code_info in items.items():\n await self.process_code_block_async(code_info, file_path, section, name)\n elif isinstance(items, list):\n for i, code_info in enumerate(items):\n await self.process_code_block_async(code_info, file_path, section, f\"{section}_{i}\")\n logger.debug(f\"Successfully parsed file:{file_path}\")\n except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 71,
+ "end_line": 79,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Logging",
+ "comment": "Logging level for parsing files should be more granular.",
+ "confidence": "moderate",
+ "reason": "Using `logger.debug` for file parsing can help in better debugging without cluttering the log files.",
+ "solution": "Change logging level to `debug` for detailed logs during file parsing.",
+ "actual_code": "logger.info(f\"Parsing repository:{repo_path}\")",
+ "fixed_code": "logger.debug(f\"Parsing repository:{repo_path}\")",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 70,
+ "end_line": 70,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The `generate_abstraction` method is too long and complex.",
+ "confidence": "important",
+ "reason": "Long methods can be difficult to read and maintain. They should be broken down into smaller, more manageable functions.",
+ "solution": "Refactor the `generate_abstraction` method into smaller helper methods.",
+ "actual_code": "def generate_abstraction(\n self, code_block: str, language: str, max_tokens: int = 300\n) -> str:\n prompt = f\"\"\"Generate a concise yet comprehensive abstract description of the following{language}code block. \n Include information about:\n 1. The purpose or functionality of the code\n 2. Input parameters and return values (if applicable)\n 3. Any important algorithms or data structures used\n 4. Key dependencies or external libraries used\n 5. Any notable design patterns or architectural choices\n 6. Potential edge cases or error handling\n\n Code:\n ```{language}\n{code_block}\n ```\n \"\"\"\n\n estimated_prompt_tokens = len(tokenizer.encode(prompt))\n adjusted_max_tokens = min(max(150, estimated_prompt_tokens), 1000)\n\n try:\n abstraction, usage = self.llm_provider.chat_completion(\n prompt=\"\",\n messages=[\n{\n \"role\": \"system\",\n \"content\": \"You are an expert programmer tasked with generating comprehensive and accurate abstractions of code snippets.\",\n},\n{\"role\": \"user\", \"content\": prompt},\n ],\n custom_model={\"max_tokens\": adjusted_max_tokens, \"model\": \"small\"},\n )\n return abstraction, usage\n\n except Exception as e:\n raise e",
+ "fixed_code": "def generate_abstraction(self, code_block: str, language: str, max_tokens: int = 300) -> str:\n prompt = self._create_prompt(code_block, language)\n estimated_prompt_tokens = len(tokenizer.encode(prompt))\n adjusted_max_tokens = min(max(150, estimated_prompt_tokens), 1000)\n\n try:\n abstraction, usage = self._get_abstraction_from_llm(prompt, adjusted_max_tokens)\n return abstraction, usage\n except Exception as e:\n raise e\n\n def _create_prompt(self, code_block: str, language: str) -> str:\n return f\"\"\"Generate a concise yet comprehensive abstract description of the following{language}code block. \n Include information about:\n 1. The purpose or functionality of the code\n 2. Input parameters and return values (if applicable)\n 3. Any important algorithms or data structures used\n 4. Key dependencies or external libraries used\n 5. Any notable design patterns or architectural choices\n 6. Potential edge cases or error handling\n\n Code:\n ```{language}\n{code_block}\n ```\n \"\"\"\n\n def _get_abstraction_from_llm(self, prompt: str, max_tokens: int) -> str:\n return self.llm_provider.chat_completion(\n prompt=\"\",\n messages=[\n{\n \"role\": \"system\",\n \"content\": \"You are an expert programmer tasked with generating comprehensive and accurate abstractions of code snippets.\",\n},\n{\"role\": \"user\", \"content\": prompt},\n ],\n custom_model={\"max_tokens\": max_tokens, \"model\": \"small\"},\n )",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 184,
+ "end_line": 219,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Logging Configuration",
+ "comment": "Logging configuration should be done in the main entry point of the application, not in the module.",
+ "confidence": "important",
+ "reason": "Configuring logging in a module can lead to unexpected behavior if the module is imported multiple times.",
+ "solution": "Move the logging configuration to the main entry point of the application.",
+ "actual_code": "logging.basicConfig(level=logging.INFO)",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Exception Handling",
+ "comment": "Broad exception handling should be avoided.",
+ "confidence": "critical",
+ "reason": "Catching all exceptions can hide bugs and make debugging difficult.",
+ "solution": "Catch specific exceptions instead of using a broad except clause.",
+ "actual_code": "except Exception as e:",
+ "fixed_code": "except ImportError as e:\n logger.error(f\"Failed to import module:{str(e)}\")\n raise\nexcept ValueError as e:\n logger.error(f\"Invalid value:{str(e)}\")\n raise",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 28,
+ "end_line": 30,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Function Documentation",
+ "comment": "Public functions should have docstrings.",
+ "confidence": "moderate",
+ "reason": "Docstrings provide a convenient way of associating documentation with functions.",
+ "solution": "Add docstrings to public functions.",
+ "actual_code": "",
+ "fixed_code": "def load_language(language: str) -> Language:\n \"\"\"\n Load the specified language.\n :param language: The name of the language to load.\n :return: The loaded Language object.\n \"\"\"",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 15,
+ "end_line": 15,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "Duplicate code found in test cases.",
+ "confidence": "important",
+ "reason": "Duplicate code can lead to maintenance issues and bugs.",
+ "solution": "Refactor the duplicate code into a helper function.",
+ "actual_code": "print_chunks(\"JavaScript\", chunk_code(javascript_code, \"javascript\"))",
+ "fixed_code": "def test_chunk(language, code):\n print_chunks(language, chunk_code(code, language))\n\ntest_chunk(\"Python\", python_code)\ntest_chunk(\"JavaScript\", javascript_code)\ntest_chunk(\"React\", react_nextjs_code)",
+ "file_name": "tests/retriever/test_chunker.py",
+ "start_line": 98,
+ "end_line": 101,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Versioning",
+ "comment": "Version bump should be accompanied by a changelog update.",
+ "confidence": "moderate",
+ "reason": "A changelog helps track changes and improvements in the project.",
+ "solution": "Update the changelog to reflect the changes made in this version.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Dependency Management",
+ "comment": "New dependencies added without justification.",
+ "confidence": "important",
+ "reason": "Adding dependencies increases the attack surface and maintenance burden.",
+ "solution": "Provide justification for new dependencies in the pull request description.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 49,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_222/review.md b/.experiments/code_review/gpt-4o/no_eval/pr_222/review.md
new file mode 100644
index 00000000..5a796ee4
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_222/review.md
@@ -0,0 +1,560 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/222
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 26
+- Critical: 9
+- Important: 9
+- Minor: 7
+- Files Affected: 14
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Security (9 issues)
+
+### 1. Hardcoding API keys in `config.json` can lead to security vulnerabilities.
+📁 **File:** `config.json:13`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Exposing API keys in the codebase can lead to unauthorized access.
+💡 **Solution:** Use environment variables to store API keys instead of hardcoding them.
+
+**Current Code:**
+```python
+"api_key": "os.environ/AZURE_API_KEY"
+```
+
+**Suggested Code:**
+```python
+"api_key": "${AZURE_API_KEY}"
+```
+
+### 2. Potential SQL injection vulnerability in the query construction.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:19`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Using f-strings to construct SQL queries can lead to SQL injection attacks if user input is not properly sanitized.
+💡 **Solution:** Use parameterized queries to avoid SQL injection vulnerabilities.
+
+**Current Code:**
+```python
+query = f"""
+SELECT
+ e.node_id,
+ e.text,
+ e.metadata,
+ 1 - (e.embedding <=> %s::vector) as similarity
+FROM
+{self.table_name}e
+JOIN
+ function_abstractions fa ON e.node_id = fa.function_id::text
+JOIN
+ files f ON fa.file_id = f.file_id
+WHERE
+ f.repo_id = %s
+ORDER BY
+ similarity DESC
+LIMIT
+ %s
+"""
+```
+
+**Suggested Code:**
+```python
+query = """
+SELECT
+ e.node_id,
+ e.text,
+ e.metadata,
+ 1 - (e.embedding <=> %s::vector) as similarity
+FROM
+ %s e
+JOIN
+ function_abstractions fa ON e.node_id = fa.function_id::text
+JOIN
+ files f ON fa.file_id = f.file_id
+WHERE
+ f.repo_id = %s
+ORDER BY
+ similarity DESC
+LIMIT
+ %s
+"""
+```
+
+### 3. Database connection strings should not be constructed using string interpolation.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:35`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Using string interpolation for connection strings can expose the application to SQL injection attacks.
+💡 **Solution:** Use parameterized queries or a configuration management tool to handle sensitive information.
+
+**Current Code:**
+```python
+self.engine = create_engine(
+ f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}",
+ pool_size=10,
+ max_overflow=20,
+)
+```
+
+**Suggested Code:**
+```python
+self.engine = create_engine(
+ 'postgresql://{user}:{password}@{host}:{port}/{db}'.format(
+ user=os.environ['POSTGRES_USER'],
+ password=os.environ['POSTGRES_PASSWORD'],
+ host=os.environ['POSTGRES_HOST'],
+ port=os.environ['POSTGRES_PORT'],
+ db=os.environ['POSTGRES_DB']
+ ),
+ pool_size=10,
+ max_overflow=20,
+)
+```
+
+### 4. Broad exception handling should be avoided.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:28`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Catching all exceptions can hide bugs and make debugging difficult.
+💡 **Solution:** Catch specific exceptions instead of using a broad except clause.
+
+**Current Code:**
+```python
+except Exception as e:
+```
+
+**Suggested Code:**
+```python
+except ImportError as e:
+ logger.error(f"Failed to import module:{str(e)}")
+ raise
+except ValueError as e:
+ logger.error(f"Invalid value:{str(e)}")
+ raise
+```
+
+### 5. Changes made to sensitive file
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+### 6. Changes made to sensitive file
+📁 **File:** `Dockerfile:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to Dockerfile, which needs review
+💡 **Solution:** NA
+
+### 7. Changes made to sensitive file
+📁 **File:** `docker-compose.yml:15`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to docker-compose.yml, which needs review
+💡 **Solution:** NA
+
+### 8. Changes made to sensitive file
+📁 **File:** `.gitignore:164`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to .gitignore, which needs review
+💡 **Solution:** NA
+
+### 9. Changes made to sensitive file
+📁 **File:** `db_setup/init.sql:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to db_setup/init.sql, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Error Handling (9 issues)
+
+### 1. The `install_tree_sitter_languages.sh` script does not handle errors during the installation process.
+📁 **File:** `install_tree_sitter_languages.sh:1`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Error handling ensures that the script fails gracefully and provides useful error messages.
+💡 **Solution:** Add error handling for each critical step in the script.
+
+### 2. The `chunk_code` function in `code_chunker.py` has nested functions and complex logic that can be refactored for better readability.
+📁 **File:** `kaizen/retriever/code_chunker.py:7`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Refactoring complex functions into smaller, well-named functions improves readability and maintainability.
+💡 **Solution:** Refactor the `chunk_code` function to extract nested functions into separate helper functions.
+
+### 3. Lack of error handling in database operations.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:39`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Database operations can fail; it's important to handle exceptions to avoid crashes.
+💡 **Solution:** Add try-except blocks to handle potential database errors.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+try:
+ with self.get_client() as client:
+ with client.cursor() as cur:
+ cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))
+ results = cur.fetchall()
+except Exception as e:
+ # Handle exception (e.g., log the error, re-raise, etc.)
+ raise e
+```
+
+### 4. Exception handling in `generate_abstraction` method is too generic.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:218`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Catching all exceptions without specific handling can obscure the root cause of errors and make debugging difficult.
+💡 **Solution:** Catch specific exceptions and handle them appropriately.
+
+**Current Code:**
+```python
+except Exception as e:
+ raise e
+```
+
+**Suggested Code:**
+```python
+except SomeSpecificException as e:
+ logger.error(f"Specific error occurred:{str(e)}")
+ raise e
+```
+
+### 5. Using `os.walk` and `ThreadPoolExecutor` for file parsing can be optimized.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:71`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The current implementation may not efficiently utilize available CPU cores and can be improved for better performance.
+💡 **Solution:** Consider using asynchronous I/O operations or more efficient file traversal methods.
+
+**Current Code:**
+```python
+with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
+ futures =[]
+ for root, _, files in os.walk(repo_path):
+ for file in files:
+ if file.endswith((".py", ".js", ".ts", ".rs")):
+ file_path = os.path.join(root, file)
+ futures.append(executor.submit(self.parse_file, file_path))
+```
+
+**Suggested Code:**
+```python
+import asyncio
+from aiofiles import open as aio_open
+
+async def parse_repository_async(self, repo_path: str):
+ self.total_usage = self.llm_provider.DEFAULT_USAGE
+ logger.info(f"Starting repository setup for:{repo_path}")
+ await self.parse_repository(repo_path)
+ self.store_function_relationships()
+ logger.info("Repository setup completed successfully")
+
+async def parse_file_async(self, file_path: str):
+ logger.debug(f"Parsing file:{file_path}")
+ try:
+ async with aio_open(file_path, "r", encoding="utf-8") as file:
+ content = await file.read()
+ language = self.get_language_from_extension(file_path)
+ chunked_code = chunk_code(content, language)
+ for section, items in chunked_code.items():
+ if isinstance(items, dict):
+ for name, code_info in items.items():
+ await self.process_code_block_async(code_info, file_path, section, name)
+ elif isinstance(items, list):
+ for i, code_info in enumerate(items):
+ await self.process_code_block_async(code_info, file_path, section, f"{section}_{i}")
+ logger.debug(f"Successfully parsed file:{file_path}")
+ except Exception as e:
+ logger.error(f"Error processing file{file_path}:{str(e)}")
+ logger.error(traceback.format_exc())
+```
+
+### 6. The `generate_abstraction` method is too long and complex.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:184`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Long methods can be difficult to read and maintain. They should be broken down into smaller, more manageable functions.
+💡 **Solution:** Refactor the `generate_abstraction` method into smaller helper methods.
+
+**Current Code:**
+```python
+def generate_abstraction(
+ self, code_block: str, language: str, max_tokens: int = 300
+) -> str:
+ prompt = f"""Generate a concise yet comprehensive abstract description of the following{language}code block.
+ Include information about:
+ 1. The purpose or functionality of the code
+ 2. Input parameters and return values (if applicable)
+ 3. Any important algorithms or data structures used
+ 4. Key dependencies or external libraries used
+ 5. Any notable design patterns or architectural choices
+ 6. Potential edge cases or error handling
+
+ Code:
+ ```{language}
+{code_block}
+ ```
+ """
+
+ estimated_prompt_tokens = len(tokenizer.encode(prompt))
+ adjusted_max_tokens = min(max(150, estimated_prompt_tokens), 1000)
+
+ try:
+ abstraction, usage = self.llm_provider.chat_completion(
+ prompt="",
+ messages=[
+{
+ "role": "system",
+ "content": "You are an expert programmer tasked with generating comprehensive and accurate abstractions of code snippets.",
+},
+{"role": "user", "content": prompt},
+ ],
+ custom_model={"max_tokens": adjusted_max_tokens, "model": "small"},
+ )
+ return abstraction, usage
+
+ except Exception as e:
+ raise e
+```
+
+**Suggested Code:**
+```python
+def generate_abstraction(self, code_block: str, language: str, max_tokens: int = 300) -> str:
+ prompt = self._create_prompt(code_block, language)
+ estimated_prompt_tokens = len(tokenizer.encode(prompt))
+ adjusted_max_tokens = min(max(150, estimated_prompt_tokens), 1000)
+
+ try:
+ abstraction, usage = self._get_abstraction_from_llm(prompt, adjusted_max_tokens)
+ return abstraction, usage
+ except Exception as e:
+ raise e
+
+ def _create_prompt(self, code_block: str, language: str) -> str:
+ return f"""Generate a concise yet comprehensive abstract description of the following{language}code block.
+ Include information about:
+ 1. The purpose or functionality of the code
+ 2. Input parameters and return values (if applicable)
+ 3. Any important algorithms or data structures used
+ 4. Key dependencies or external libraries used
+ 5. Any notable design patterns or architectural choices
+ 6. Potential edge cases or error handling
+
+ Code:
+ ```{language}
+{code_block}
+ ```
+ """
+
+ def _get_abstraction_from_llm(self, prompt: str, max_tokens: int) -> str:
+ return self.llm_provider.chat_completion(
+ prompt="",
+ messages=[
+{
+ "role": "system",
+ "content": "You are an expert programmer tasked with generating comprehensive and accurate abstractions of code snippets.",
+},
+{"role": "user", "content": prompt},
+ ],
+ custom_model={"max_tokens": max_tokens, "model": "small"},
+ )
+```
+
+### 7. Logging configuration should be done in the main entry point of the application, not in the module.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:8`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Configuring logging in a module can lead to unexpected behavior if the module is imported multiple times.
+💡 **Solution:** Move the logging configuration to the main entry point of the application.
+
+**Current Code:**
+```python
+logging.basicConfig(level=logging.INFO)
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 8. Duplicate code found in test cases.
+📁 **File:** `tests/retriever/test_chunker.py:98`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Duplicate code can lead to maintenance issues and bugs.
+💡 **Solution:** Refactor the duplicate code into a helper function.
+
+**Current Code:**
+```python
+print_chunks("JavaScript", chunk_code(javascript_code, "javascript"))
+```
+
+**Suggested Code:**
+```python
+def test_chunk(language, code):
+ print_chunks(language, chunk_code(code, language))
+
+test_chunk("Python", python_code)
+test_chunk("JavaScript", javascript_code)
+test_chunk("React", react_nextjs_code)
+```
+
+### 9. New dependencies added without justification.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Adding dependencies increases the attack surface and maintenance burden.
+💡 **Solution:** Provide justification for new dependencies in the pull request description.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (8 issues)
+
+
+Documentation (7 issues)
+
+### 1. The `install_tree_sitter_languages.sh` script lacks comments explaining the purpose of each step.
+📁 **File:** `install_tree_sitter_languages.sh:1`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Comments improve readability and maintainability, especially for complex scripts.
+💡 **Solution:** Add comments explaining the purpose of each step in the script.
+
+### 2. The Dockerfile installs build dependencies and then removes them, which is good practice but can be optimized.
+📁 **File:** `Dockerfile-postgres:4`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Optimizing Dockerfile layers can reduce image size and build time.
+💡 **Solution:** Combine RUN commands to reduce the number of layers.
+
+**Current Code:**
+```python
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ git \
+ postgresql-server-dev-16
+RUN apt-get remove -y build-essential git postgresql-server-dev-16 \
+ && apt-get autoremove -y \
+ && rm -rf /var/lib/apt/lists/* /pgvector
+```
+
+**Suggested Code:**
+```python
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ git \
+ postgresql-server-dev-16 \
+ && git clone https://github.com/pgvector/pgvector.git \
+ && cd pgvector \
+ && make \
+ && make install \
+ && apt-get remove -y build-essential git postgresql-server-dev-16 \
+ && apt-get autoremove -y \
+ && rm -rf /var/lib/apt/lists/* /pgvector
+```
+
+### 3. The normalization of the query embedding can be simplified for better readability.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:15`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Simplifying code improves readability and maintainability.
+💡 **Solution:** Combine the normalization steps into a single line.
+
+**Current Code:**
+```python
+query_embedding_np = np.array(query_embedding)
+query_embedding_normalized = query_embedding_np / np.linalg.norm(query_embedding_np)
+```
+
+**Suggested Code:**
+```python
+query_embedding_normalized = np.array(query_embedding) / np.linalg.norm(query_embedding)
+```
+
+### 4. Missing type annotations for method parameters and return types.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Type annotations improve code readability and help with static analysis.
+💡 **Solution:** Add type annotations to the method parameters and return types.
+
+**Current Code:**
+```python
+def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:
+```
+
+**Suggested Code:**
+```python
+def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:
+```
+
+### 5. Logging level for parsing files should be more granular.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:70`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Using `logger.debug` for file parsing can help in better debugging without cluttering the log files.
+💡 **Solution:** Change logging level to `debug` for detailed logs during file parsing.
+
+**Current Code:**
+```python
+logger.info(f"Parsing repository:{repo_path}")
+```
+
+**Suggested Code:**
+```python
+logger.debug(f"Parsing repository:{repo_path}")
+```
+
+### 6. Public functions should have docstrings.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:15`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Docstrings provide a convenient way of associating documentation with functions.
+💡 **Solution:** Add docstrings to public functions.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+def load_language(language: str) -> Language:
+ """
+ Load the specified language.
+ :param language: The name of the language to load.
+ :return: The loaded Language object.
+ """
+```
+
+### 7. Version bump should be accompanied by a changelog update.
+📁 **File:** `pyproject.toml:3`
+⚖️ **Severity:** 3/10
+🔍 **Description:** A changelog helps track changes and improvements in the project.
+💡 **Solution:** Update the changelog to reflect the changes made in this version.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (gpt-4o-2024-05-13)
+{"prompt_tokens": 21545, "completion_tokens": 5255, "total_tokens": 26800}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_335/comments.json b/.experiments/code_review/gpt-4o/no_eval/pr_335/comments.json
new file mode 100644
index 00000000..7ab211a4
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_335/comments.json
@@ -0,0 +1,32 @@
+[
+ {
+ "topic": "Function Parameters",
+ "comment": "Removed `reeval_response` parameter from multiple functions. Ensure that this parameter is no longer needed and does not affect the functionality.",
+ "confidence": "critical",
+ "reason": "Removing a parameter can lead to missing functionality if the parameter was being used elsewhere in the code.",
+ "solution": "Verify that `reeval_response` is not required for the functions to operate correctly.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 43,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Function Implementation",
+ "comment": "Changed the implementation of `_process_full_diff` to remove `reeval_response` logic. Ensure that the re-evaluation logic is no longer needed.",
+ "confidence": "critical",
+ "reason": "Removing logic can lead to missing functionality if the logic was essential.",
+ "solution": "Verify that the re-evaluation logic is not required for the function to operate correctly.",
+ "actual_code": "resp, usage = self.provider.chat_completion(prompt, user=user)\ndesc = parser.extract_code_from_markdown(resp)\nreturn desc",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 79,
+ "end_line": 83,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_335/issues.json b/.experiments/code_review/gpt-4o/no_eval/pr_335/issues.json
new file mode 100644
index 00000000..49da45e6
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_335/issues.json
@@ -0,0 +1,92 @@
+[
+ {
+ "topic": "Imports",
+ "comment": "The import statement was changed to import from a different module. Ensure that the new module contains the required constants.",
+ "confidence": "important",
+ "reason": "Changing import paths can lead to runtime errors if the new module does not contain the expected constants.",
+ "solution": "Verify that `PR_DESCRIPTION_SYSTEM_PROMPT` exists in `pr_desc_prompts` and is correctly defined.",
+ "actual_code": "from kaizen.llms.prompts.pr_desc_prompts import (",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Initialization",
+ "comment": "The system prompt was changed from `CODE_REVIEW_SYSTEM_PROMPT` to `PR_DESCRIPTION_SYSTEM_PROMPT`. Ensure this change aligns with the intended functionality.",
+ "confidence": "important",
+ "reason": "Changing the system prompt can alter the behavior of the LLMProvider, potentially affecting the output.",
+ "solution": "Confirm that `PR_DESCRIPTION_SYSTEM_PROMPT` is the correct prompt for the intended functionality.",
+ "actual_code": "self.provider.system_prompt = PR_DESCRIPTION_SYSTEM_PROMPT",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 28,
+ "end_line": 28,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Function Parameters",
+ "comment": "Removed `reeval_response` parameter from multiple functions. Ensure that this parameter is no longer needed and does not affect the functionality.",
+ "confidence": "critical",
+ "reason": "Removing a parameter can lead to missing functionality if the parameter was being used elsewhere in the code.",
+ "solution": "Verify that `reeval_response` is not required for the functions to operate correctly.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 43,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Function Calls",
+ "comment": "Changed function calls to remove `reeval_response` parameter. Ensure that the new function signatures match the updated calls.",
+ "confidence": "important",
+ "reason": "Mismatch in function signatures can lead to runtime errors.",
+ "solution": "Ensure that `_process_full_diff` and `_process_files` functions do not require `reeval_response` parameter.",
+ "actual_code": "desc = self._process_full_diff(prompt, user)",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 52,
+ "end_line": 52,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Function Implementation",
+ "comment": "Changed the implementation of `_process_full_diff` to remove `reeval_response` logic. Ensure that the re-evaluation logic is no longer needed.",
+ "confidence": "critical",
+ "reason": "Removing logic can lead to missing functionality if the logic was essential.",
+ "solution": "Verify that the re-evaluation logic is not required for the function to operate correctly.",
+ "actual_code": "resp, usage = self.provider.chat_completion(prompt, user=user)\ndesc = parser.extract_code_from_markdown(resp)\nreturn desc",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 79,
+ "end_line": 83,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Prompt Definitions",
+ "comment": "Added new prompt definitions in `pr_desc_prompts.py`. Ensure that these new prompts are correctly formatted and used in the code.",
+ "confidence": "moderate",
+ "reason": "New prompt definitions need to be correctly formatted and integrated into the existing codebase.",
+ "solution": "Review the new prompt definitions for correctness and ensure they are used appropriately.",
+ "actual_code": "PR_DESCRIPTION_SYSTEM_PROMPT = \"\"\"\nAs a senior software developer reviewing code submissions, provide thorough, constructive feedback and suggestions for improvements. Consider best practices, error handling, performance, readability, and maintainability. Offer objective and respectful reviews that help developers enhance their skills and code quality. Use your expertise to provide comprehensive feedback without asking clarifying questions.\n\"\"\"",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py",
+ "start_line": 1,
+ "end_line": 3,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_335/review.md b/.experiments/code_review/gpt-4o/no_eval/pr_335/review.md
new file mode 100644
index 00000000..e13a98d6
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_335/review.md
@@ -0,0 +1,151 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/335
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 6
+- Critical: 2
+- Important: 3
+- Minor: 1
+- Files Affected: 2
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Function Parameters (2 issues)
+
+### 1. Removed `reeval_response` parameter from multiple functions. Ensure that this parameter is no longer needed and does not affect the functionality.
+📁 **File:** `kaizen/generator/pr_description.py:43`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Removing a parameter can lead to missing functionality if the parameter was being used elsewhere in the code.
+💡 **Solution:** Verify that `reeval_response` is not required for the functions to operate correctly.
+
+### 2. Changed the implementation of `_process_full_diff` to remove `reeval_response` logic. Ensure that the re-evaluation logic is no longer needed.
+📁 **File:** `kaizen/generator/pr_description.py:79`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Removing logic can lead to missing functionality if the logic was essential.
+💡 **Solution:** Verify that the re-evaluation logic is not required for the function to operate correctly.
+
+**Current Code:**
+```python
+resp, usage = self.provider.chat_completion(prompt, user=user)
+desc = parser.extract_code_from_markdown(resp)
+return desc
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Imports (3 issues)
+
+### 1. The import statement was changed to import from a different module. Ensure that the new module contains the required constants.
+📁 **File:** `kaizen/generator/pr_description.py:8`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Changing import paths can lead to runtime errors if the new module does not contain the expected constants.
+💡 **Solution:** Verify that `PR_DESCRIPTION_SYSTEM_PROMPT` exists in `pr_desc_prompts` and is correctly defined.
+
+**Current Code:**
+```python
+from kaizen.llms.prompts.pr_desc_prompts import (
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 2. The system prompt was changed from `CODE_REVIEW_SYSTEM_PROMPT` to `PR_DESCRIPTION_SYSTEM_PROMPT`. Ensure this change aligns with the intended functionality.
+📁 **File:** `kaizen/generator/pr_description.py:28`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Changing the system prompt can alter the behavior of the LLMProvider, potentially affecting the output.
+💡 **Solution:** Confirm that `PR_DESCRIPTION_SYSTEM_PROMPT` is the correct prompt for the intended functionality.
+
+**Current Code:**
+```python
+self.provider.system_prompt = PR_DESCRIPTION_SYSTEM_PROMPT
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 3. Changed function calls to remove `reeval_response` parameter. Ensure that the new function signatures match the updated calls.
+📁 **File:** `kaizen/generator/pr_description.py:52`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Mismatch in function signatures can lead to runtime errors.
+💡 **Solution:** Ensure that `_process_full_diff` and `_process_files` functions do not require `reeval_response` parameter.
+
+**Current Code:**
+```python
+desc = self._process_full_diff(prompt, user)
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Prompt Definitions (1 issues)
+
+### 1. Added new prompt definitions in `pr_desc_prompts.py`. Ensure that these new prompts are correctly formatted and used in the code.
+📁 **File:** `kaizen/llms/prompts/pr_desc_prompts.py:1`
+⚖️ **Severity:** 4/10
+🔍 **Description:** New prompt definitions need to be correctly formatted and integrated into the existing codebase.
+💡 **Solution:** Review the new prompt definitions for correctness and ensure they are used appropriately.
+
+**Current Code:**
+```python
+PR_DESCRIPTION_SYSTEM_PROMPT = """
+As a senior software developer reviewing code submissions, provide thorough, constructive feedback and suggestions for improvements. Consider best practices, error handling, performance, readability, and maintainability. Offer objective and respectful reviews that help developers enhance their skills and code quality. Use your expertise to provide comprehensive feedback without asking clarifying questions.
+"""
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (gpt-4o-2024-05-13)
+{"prompt_tokens": 6751, "completion_tokens": 1126, "total_tokens": 7877}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_440/comments.json b/.experiments/code_review/gpt-4o/no_eval/pr_440/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_440/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_440/issues.json b/.experiments/code_review/gpt-4o/no_eval/pr_440/issues.json
new file mode 100644
index 00000000..ccaaf465
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_440/issues.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "Comment Clarity",
+ "comment": "The comment 'TODO: DONT PUSH DUPLICATE' is ambiguous and lacks context.",
+ "confidence": "moderate",
+ "reason": "Comments should provide clear guidance or context for future developers.",
+ "solution": "Provide a more descriptive comment explaining what needs to be done to avoid pushing duplicates.",
+ "actual_code": "# TODO: DONT PUSH DUPLICATE",
+ "fixed_code": "# TODO: Ensure that duplicate embeddings are not pushed to the database.",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 157,
+ "end_line": 157,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Code Removal Impact",
+ "comment": "The removal of the llama-index-llms-openai dependency might cause issues if any part of the codebase relies on it.",
+ "confidence": "important",
+ "reason": "Removing dependencies without ensuring they are not used elsewhere can lead to runtime errors.",
+ "solution": "Verify that no part of the codebase uses llama-index-llms-openai before removing it.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 28,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Hardcoded Paths",
+ "comment": "The path './github_app/' is hardcoded, which can cause issues in different environments.",
+ "confidence": "important",
+ "reason": "Hardcoded paths can lead to errors when the code is run in different environments or directories.",
+ "solution": "Use a configuration file or environment variables to manage paths.",
+ "actual_code": "analyzer.setup_repository(\"./github_app/\")",
+ "fixed_code": "analyzer.setup_repository(os.getenv('GITHUB_APP_PATH', './github_app/'))",
+ "file_name": "examples/ragify_codebase/main.py",
+ "start_line": 7,
+ "end_line": 7,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_440/review.md b/.experiments/code_review/gpt-4o/no_eval/pr_440/review.md
new file mode 100644
index 00000000..ab3069eb
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_440/review.md
@@ -0,0 +1,92 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/440
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 3
+- Critical: 0
+- Important: 2
+- Minor: 1
+- Files Affected: 3
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Code Removal Impact (2 issues)
+
+### 1. The removal of the llama-index-llms-openai dependency might cause issues if any part of the codebase relies on it.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Removing dependencies without ensuring they are not used elsewhere can lead to runtime errors.
+💡 **Solution:** Verify that no part of the codebase uses llama-index-llms-openai before removing it.
+
+### 2. The path './github_app/' is hardcoded, which can cause issues in different environments.
+📁 **File:** `examples/ragify_codebase/main.py:7`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Hardcoded paths can lead to errors when the code is run in different environments or directories.
+💡 **Solution:** Use a configuration file or environment variables to manage paths.
+
+**Current Code:**
+```python
+analyzer.setup_repository("./github_app/")
+```
+
+**Suggested Code:**
+```python
+analyzer.setup_repository(os.getenv('GITHUB_APP_PATH', './github_app/'))
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Comment Clarity (1 issues)
+
+### 1. The comment 'TODO: DONT PUSH DUPLICATE' is ambiguous and lacks context.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:157`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Comments should provide clear guidance or context for future developers.
+💡 **Solution:** Provide a more descriptive comment explaining what needs to be done to avoid pushing duplicates.
+
+**Current Code:**
+```python
+# TODO: DONT PUSH DUPLICATE
+```
+
+**Suggested Code:**
+```python
+# TODO: Ensure that duplicate embeddings are not pushed to the database.
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (gpt-4o-2024-05-13)
+{"prompt_tokens": 1364, "completion_tokens": 531, "total_tokens": 1895}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_476/comments.json b/.experiments/code_review/gpt-4o/no_eval/pr_476/comments.json
new file mode 100644
index 00000000..f9fab36c
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_476/comments.json
@@ -0,0 +1,31 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Generic exception handling without logging specific error details.",
+ "confidence": "critical",
+ "reason": "Using a generic `except Exception` block without logging the specific error details can make debugging difficult.",
+ "solution": "Log the specific error message in the exception block.",
+ "actual_code": "except Exception:\n print(\"Error\")",
+ "fixed_code": "except Exception as e:\n print(f\"Error:{e}\")",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 140,
+ "end_line": 141,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_476/issues.json b/.experiments/code_review/gpt-4o/no_eval/pr_476/issues.json
new file mode 100644
index 00000000..e5c492a5
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_476/issues.json
@@ -0,0 +1,91 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Generic exception handling without logging specific error details.",
+ "confidence": "critical",
+ "reason": "Using a generic `except Exception` block without logging the specific error details can make debugging difficult.",
+ "solution": "Log the specific error message in the exception block.",
+ "actual_code": "except Exception:\n print(\"Error\")",
+ "fixed_code": "except Exception as e:\n print(f\"Error:{e}\")",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 140,
+ "end_line": 141,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "Unnecessary print statements left in the code.",
+ "confidence": "important",
+ "reason": "Leaving print statements in production code can clutter the output and is generally not recommended.",
+ "solution": "Remove or replace print statements with proper logging.",
+ "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)",
+ "fixed_code": "",
+ "file_name": "examples/code_review/main.py",
+ "start_line": 21,
+ "end_line": 22,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Function Signature",
+ "comment": "Modified function signature without updating all references.",
+ "confidence": "important",
+ "reason": "Changing a function signature without updating all references can lead to runtime errors.",
+ "solution": "Ensure all references to `post_pull_request` are updated to include the new `tests` parameter.",
+ "actual_code": "def post_pull_request(url, data, installation_id, tests=None):",
+ "fixed_code": "",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 107,
+ "end_line": 107,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Maintainability",
+ "comment": "Redundant code for sorting files.",
+ "confidence": "moderate",
+ "reason": "The custom sorting logic can be replaced with Python's built-in sorting functions for better readability and maintainability.",
+ "solution": "Use Python's `sorted` function with a key parameter.",
+ "actual_code": "sorted_files =[]\nfor file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\nreturn sorted_files",
+ "fixed_code": "sorted_files = sorted(files, key=lambda x: x[\"filename\"])\nreturn sorted_files",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 185,
+ "end_line": 194,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Quality",
+ "comment": "Unnecessary variable assignment.",
+ "confidence": "low",
+ "reason": "Assigning `issues` in the loop is unnecessary and can be removed.",
+ "solution": "Remove the assignment of `issues` within the loop.",
+ "actual_code": "issues = review",
+ "fixed_code": "",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 153,
+ "end_line": 153,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_476/review.md b/.experiments/code_review/gpt-4o/no_eval/pr_476/review.md
new file mode 100644
index 00000000..06252bed
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_476/review.md
@@ -0,0 +1,144 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/476
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 6
+- Critical: 2
+- Important: 2
+- Minor: 1
+- Files Affected: 3
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Error Handling (2 issues)
+
+### 1. Generic exception handling without logging specific error details.
+📁 **File:** `github_app/github_helper/pull_requests.py:140`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Using a generic `except Exception` block without logging the specific error details can make debugging difficult.
+💡 **Solution:** Log the specific error message in the exception block.
+
+**Current Code:**
+```python
+except Exception:
+ print("Error")
+```
+
+**Suggested Code:**
+```python
+except Exception as e:
+ print(f"Error:{e}")
+```
+
+### 2. Changes made to sensitive file
+📁 **File:** `config.json:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Code Readability (2 issues)
+
+### 1. Unnecessary print statements left in the code.
+📁 **File:** `examples/code_review/main.py:21`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Leaving print statements in production code can clutter the output and is generally not recommended.
+💡 **Solution:** Remove or replace print statements with proper logging.
+
+**Current Code:**
+```python
+print("diff: ", diff_text)
+print("pr_files", pr_files)
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 2. Modified function signature without updating all references.
+📁 **File:** `github_app/github_helper/pull_requests.py:107`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Changing a function signature without updating all references can lead to runtime errors.
+💡 **Solution:** Ensure all references to `post_pull_request` are updated to include the new `tests` parameter.
+
+**Current Code:**
+```python
+def post_pull_request(url, data, installation_id, tests=None):
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Code Maintainability (1 issues)
+
+### 1. Redundant code for sorting files.
+📁 **File:** `github_app/github_helper/pull_requests.py:185`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The custom sorting logic can be replaced with Python's built-in sorting functions for better readability and maintainability.
+💡 **Solution:** Use Python's `sorted` function with a key parameter.
+
+**Current Code:**
+```python
+sorted_files =[]
+for file in files:
+ min_index = len(sorted_files)
+ file_name = file["filename"]
+ for i, sorted_file in enumerate(sorted_files):
+ if file_name < sorted_file["filename"]:
+ min_index = i
+ break
+ sorted_files.insert(min_index, file)
+return sorted_files
+```
+
+**Suggested Code:**
+```python
+sorted_files = sorted(files, key=lambda x: x["filename"])
+return sorted_files
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (gpt-4o-2024-05-13)
+{"prompt_tokens": 4007, "completion_tokens": 873, "total_tokens": 4880}
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_5/comments.json b/.experiments/code_review/gpt-4o/no_eval/pr_5/comments.json
new file mode 100644
index 00000000..b67c7510
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_5/comments.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "API Call Error Handling",
+ "comment": "The API call to 'completion' lacks a retry mechanism.",
+ "confidence": "critical",
+ "reason": "API calls can fail due to network issues or server errors, and without a retry mechanism, the function may fail unexpectedly.",
+ "solution": "Implement a retry mechanism with exponential backoff for the API call.",
+ "actual_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)",
+ "fixed_code": "import time\n\nfor attempt in range(3):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n if attempt < 2:\n time.sleep(2 ** attempt)\n else:\n raise e",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "The exception handling for JSON decoding fails silently without logging.",
+ "confidence": "critical",
+ "reason": "Silent failures make it difficult to diagnose issues when they occur.",
+ "solution": "Add logging to capture the exception details.",
+ "actual_code": "except json.JSONDecodeError:\n result ={",
+ "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant:{e}\")\n result ={",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 84,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Potential division by zero when calculating total tokens.",
+ "confidence": "critical",
+ "reason": "If 'total_tokens' is zero, it will cause a division by zero error.",
+ "solution": "Add a check to ensure 'total_tokens' is not zero before performing the division.",
+ "actual_code": "total_tokens = total_input_tokens + total_output_tokens",
+ "fixed_code": "total_tokens = total_input_tokens + total_output_tokens\nif total_tokens == 0:\n print(\"No tokens were used.\")\n return",
+ "file_name": "main.py",
+ "start_line": 156,
+ "end_line": 158,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_5/issues.json b/.experiments/code_review/gpt-4o/no_eval/pr_5/issues.json
new file mode 100644
index 00000000..0847dab5
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_5/issues.json
@@ -0,0 +1,107 @@
+[
+ {
+ "topic": "Unused Import",
+ "comment": "The import statement for the 'random' module is unnecessary.",
+ "confidence": "trivial",
+ "reason": "The 'random' module is imported but never used in the code.",
+ "solution": "Remove the import statement for 'random'.",
+ "actual_code": "import random # Unused import",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 1
+ },
+ {
+ "topic": "API Call Error Handling",
+ "comment": "The API call to 'completion' lacks a retry mechanism.",
+ "confidence": "critical",
+ "reason": "API calls can fail due to network issues or server errors, and without a retry mechanism, the function may fail unexpectedly.",
+ "solution": "Implement a retry mechanism with exponential backoff for the API call.",
+ "actual_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)",
+ "fixed_code": "import time\n\nfor attempt in range(3):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n if attempt < 2:\n time.sleep(2 ** attempt)\n else:\n raise e",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "The exception handling for JSON decoding fails silently without logging.",
+ "confidence": "critical",
+ "reason": "Silent failures make it difficult to diagnose issues when they occur.",
+ "solution": "Add logging to capture the exception details.",
+ "actual_code": "except json.JSONDecodeError:\n result ={",
+ "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant:{e}\")\n result ={",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 84,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Inefficient Progress Printing",
+ "comment": "The progress printing method is inefficient.",
+ "confidence": "important",
+ "reason": "Printing progress in this manner can be slow and resource-intensive.",
+ "solution": "Use a more efficient method for printing progress, such as updating the progress less frequently.",
+ "actual_code": "print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "fixed_code": "if index % 10 == 0 or index == total - 1:\n print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "file_name": "main.py",
+ "start_line": 121,
+ "end_line": 122,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Redundant Code",
+ "comment": "The check for an empty DataFrame is redundant.",
+ "confidence": "moderate",
+ "reason": "The code already handles an empty DataFrame gracefully, so this check is unnecessary.",
+ "solution": "Remove the redundant check for an empty DataFrame.",
+ "actual_code": "if len(df) == 0:\n return",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 142,
+ "end_line": 143,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Potential division by zero when calculating total tokens.",
+ "confidence": "critical",
+ "reason": "If 'total_tokens' is zero, it will cause a division by zero error.",
+ "solution": "Add a check to ensure 'total_tokens' is not zero before performing the division.",
+ "actual_code": "total_tokens = total_input_tokens + total_output_tokens",
+ "fixed_code": "total_tokens = total_input_tokens + total_output_tokens\nif total_tokens == 0:\n print(\"No tokens were used.\")\n return",
+ "file_name": "main.py",
+ "start_line": 156,
+ "end_line": 158,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "File Not Found Handling",
+ "comment": "No error handling for file not found.",
+ "confidence": "important",
+ "reason": "If the specified file does not exist, the program will crash.",
+ "solution": "Add error handling to check if the file exists before processing.",
+ "actual_code": "main(input_file)",
+ "fixed_code": "if not os.path.isfile(input_file):\n print(f\"File not found:{input_file}\")\n return\nmain(input_file)",
+ "file_name": "main.py",
+ "start_line": 174,
+ "end_line": 175,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/gpt-4o/no_eval/pr_5/review.md b/.experiments/code_review/gpt-4o/no_eval/pr_5/review.md
new file mode 100644
index 00000000..f36368e6
--- /dev/null
+++ b/.experiments/code_review/gpt-4o/no_eval/pr_5/review.md
@@ -0,0 +1,182 @@
+PR URL: https://github.com/sauravpanda/applicant-screening/pull/5
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 7
+- Critical: 3
+- Important: 2
+- Minor: 1
+- Files Affected: 1
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+API Call Error Handling (3 issues)
+
+### 1. The API call to 'completion' lacks a retry mechanism.
+📁 **File:** `main.py:66`
+⚖️ **Severity:** 9/10
+🔍 **Description:** API calls can fail due to network issues or server errors, and without a retry mechanism, the function may fail unexpectedly.
+💡 **Solution:** Implement a retry mechanism with exponential backoff for the API call.
+
+**Current Code:**
+```python
+response = completion(
+ model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages
+)
+```
+
+**Suggested Code:**
+```python
+import time
+
+for attempt in range(3):
+ try:
+ response = completion(
+ model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages
+ )
+ break
+ except Exception as e:
+ if attempt < 2:
+ time.sleep(2 ** attempt)
+ else:
+ raise e
+```
+
+### 2. The exception handling for JSON decoding fails silently without logging.
+📁 **File:** `main.py:82`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Silent failures make it difficult to diagnose issues when they occur.
+💡 **Solution:** Add logging to capture the exception details.
+
+**Current Code:**
+```python
+except json.JSONDecodeError:
+ result ={
+```
+
+**Suggested Code:**
+```python
+except json.JSONDecodeError as e:
+ print(f"Failed to parse content for applicant:{e}")
+ result ={
+```
+
+### 3. Potential division by zero when calculating total tokens.
+📁 **File:** `main.py:156`
+⚖️ **Severity:** 7/10
+🔍 **Description:** If 'total_tokens' is zero, it will cause a division by zero error.
+💡 **Solution:** Add a check to ensure 'total_tokens' is not zero before performing the division.
+
+**Current Code:**
+```python
+total_tokens = total_input_tokens + total_output_tokens
+```
+
+**Suggested Code:**
+```python
+total_tokens = total_input_tokens + total_output_tokens
+if total_tokens == 0:
+ print("No tokens were used.")
+ return
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Inefficient Progress Printing (2 issues)
+
+### 1. The progress printing method is inefficient.
+📁 **File:** `main.py:121`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Printing progress in this manner can be slow and resource-intensive.
+💡 **Solution:** Use a more efficient method for printing progress, such as updating the progress less frequently.
+
+**Current Code:**
+```python
+print(f"\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}", end="", flush=True)
+```
+
+**Suggested Code:**
+```python
+if index % 10 == 0 or index == total - 1:
+ print(f"\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}", end="", flush=True)
+```
+
+### 2. No error handling for file not found.
+📁 **File:** `main.py:174`
+⚖️ **Severity:** 6/10
+🔍 **Description:** If the specified file does not exist, the program will crash.
+💡 **Solution:** Add error handling to check if the file exists before processing.
+
+**Current Code:**
+```python
+main(input_file)
+```
+
+**Suggested Code:**
+```python
+if not os.path.isfile(input_file):
+ print(f"File not found:{input_file}")
+ return
+main(input_file)
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Redundant Code (1 issues)
+
+### 1. The check for an empty DataFrame is redundant.
+📁 **File:** `main.py:142`
+⚖️ **Severity:** 3/10
+🔍 **Description:** The code already handles an empty DataFrame gracefully, so this check is unnecessary.
+💡 **Solution:** Remove the redundant check for an empty DataFrame.
+
+**Current Code:**
+```python
+if len(df) == 0:
+ return
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (gpt-4o-2024-05-13)
+{"prompt_tokens": 6154, "completion_tokens": 1315, "total_tokens": 7469}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_222/comments.json b/.experiments/code_review/haiku/no_eval/pr_222/comments.json
new file mode 100644
index 00000000..7edfffbe
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_222/comments.json
@@ -0,0 +1,72 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_222/issues.json b/.experiments/code_review/haiku/no_eval/pr_222/issues.json
new file mode 100644
index 00000000..b3bb8f78
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_222/issues.json
@@ -0,0 +1,312 @@
+[
+ {
+ "topic": "Dockerfile",
+ "comment": "The Dockerfile should install system dependencies before installing Poetry.",
+ "confidence": "important",
+ "reason": "Installing system dependencies before Poetry ensures that the necessary build tools are available for the Poetry installation process.",
+ "solution": "Move the system dependency installation block before the Poetry installation step.",
+ "actual_code": "",
+ "fixed_code": "# Install system dependencies\nRUN apt-get update && apt-get install -y \\\n git \\\n build-essential \\\n && rm -rf /var/lib/apt/lists/*\n\n# Install Poetry\nRUN pip install --no-cache-dir poetry",
+ "file_name": "Dockerfile",
+ "start_line": 7,
+ "end_line": 11,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Dockerfile",
+ "comment": "The Dockerfile should make the Tree-sitter language installation script executable before running it.",
+ "confidence": "important",
+ "reason": "Making the script executable ensures that it can be properly executed during the build process.",
+ "solution": "Add a step to make the script executable before running it.",
+ "actual_code": "",
+ "fixed_code": "# Make the installation script executable\nRUN chmod +x install_tree_sitter_languages.sh\n\n# Run the Tree-sitter language installation script\nRUN ./install_tree_sitter_languages.sh",
+ "file_name": "Dockerfile",
+ "start_line": 25,
+ "end_line": 29,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "config.json",
+ "comment": "The config.json file should use environment variables for sensitive information like API keys.",
+ "confidence": "important",
+ "reason": "Using environment variables instead of hardcoding sensitive information in the config file improves security and makes the configuration more flexible.",
+ "solution": "Replace the API key and API base values with environment variable references, e.g., `os.environ['AZURE_API_KEY']`.",
+ "actual_code": "\"api_key\": \"azure/text-embedding-small\",\n\"api_base\": \"azure/gpt-4o-mini\"",
+ "fixed_code": "\"api_key\": \"os.environ['AZURE_API_KEY']\",\n\"api_base\": \"os.environ['AZURE_API_BASE']\"",
+ "file_name": "config.json",
+ "start_line": 13,
+ "end_line": 14,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "docker-compose.yml",
+ "comment": "The docker-compose.yml file should use the same Postgres image as the Dockerfile-postgres file.",
+ "confidence": "important",
+ "reason": "Using the same Postgres image ensures consistency and reduces potential issues with different versions or configurations.",
+ "solution": "Replace the Postgres image in the docker-compose.yml file with the one used in the Dockerfile-postgres file.",
+ "actual_code": "image: postgres:16-bullseye",
+ "fixed_code": "build:\n context: .\n dockerfile: Dockerfile-postgres",
+ "file_name": "docker-compose.yml",
+ "start_line": 18,
+ "end_line": 26,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "examples/ragify_codebase/main.py",
+ "comment": "The main.py file should provide more context and examples for using the RepositoryAnalyzer.",
+ "confidence": "moderate",
+ "reason": "Adding more context and examples will help users understand how to effectively use the RepositoryAnalyzer in their own projects.",
+ "solution": "Expand the example code to include more comments and explanations, such as how to set up the repository, how to perform different types of queries, and how to handle the results.",
+ "actual_code": "",
+ "fixed_code": "# Initialize the analyzer\nanalyzer = RepositoryAnalyzer()\n\n# Set up the repository (do this when you first analyze a repo or when you want to update it)\nanalyzer.setup_repository(\"./github_app/\")\n\n# Perform queries (you can do this as many times as you want without calling setup_repository again)\nresults = analyzer.query(\"Find functions that handle authentication\")\nfor result in results:\n print(f\"File:{result['file_path']}\")\n print(f\"Abstraction:{result['abstraction']}\")\n print(f\"result:\\n{result}\")\n print(f\"Relevance Score:{result['relevance_score']}\")\n print(\"---\")\n\n# If you make changes to the repository and want to update the analysis:\nanalyzer.setup_repository(\"/path/to/your/repo\")\n\n# Then you can query again with the updated data\nresults = analyzer.query(\"authentication\")",
+ "file_name": "examples/ragify_codebase/main.py",
+ "start_line": 1,
+ "end_line": 22,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Normalization of query embedding",
+ "comment": "The query embedding is normalized correctly by dividing it by its L2 norm. This ensures that the query embedding has a unit length, which is important for cosine similarity calculations.",
+ "confidence": "positive",
+ "reason": "Normalizing the query embedding is a common best practice in vector similarity search to ensure that the magnitude of the vector does not affect the similarity calculation.",
+ "solution": "The current implementation of normalizing the query embedding is appropriate and does not require any changes.",
+ "actual_code": "query_embedding_normalized = query_embedding_np / np.linalg.norm(query_embedding_np)",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 15,
+ "end_line": 16,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "SQL query construction",
+ "comment": "The SQL query is well-constructed and includes the necessary filters and ordering to retrieve the top-k most similar results based on the cosine similarity.",
+ "confidence": "positive",
+ "reason": "The query includes a join between the `embeddings` table and the `function_abstractions` and `files` tables to filter the results by the repository ID. The `ORDER BY` and `LIMIT` clauses ensure that only the top-k most similar results are returned.",
+ "solution": "The current implementation of the SQL query is appropriate and does not require any changes.",
+ "actual_code": "```sql\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n```",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 36,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Metadata handling",
+ "comment": "The code correctly handles the metadata column, converting it to a dictionary if it is not already in that format.",
+ "confidence": "positive",
+ "reason": "The `row[2]` value is checked to see if it is already a dictionary, and if not, it is converted to a dictionary using the `Json` class from `psycopg2.extras`.",
+ "solution": "The current implementation of metadata handling is appropriate and does not require any changes.",
+ "actual_code": "\"metadata\": row[2] if isinstance(row[2], dict) else Json(row[2]),",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 48,
+ "end_line": 48,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Feedback system implementation",
+ "comment": "The `AbstractionFeedback` class provides a simple and effective way to store and retrieve feedback for code abstractions.",
+ "confidence": "positive",
+ "reason": "The class uses a dictionary to store the feedback, with the code ID as the key and the feedback details as the value. The `add_feedback` and `get_feedback` methods provide a clear interface for managing the feedback.",
+ "solution": "The current implementation of the `AbstractionFeedback` class is appropriate and does not require any changes.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/feedback_system.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Unused Imports",
+ "comment": "The following imports are not used in the code and can be removed: `from llama_index.core.schema import TextNode`, `from concurrent.futures import ThreadPoolExecutor, as_completed`, `from llama_index.embeddings.litellm import LiteLLMEmbedding`, `from llama_index.core import QueryBundle`.",
+ "confidence": "moderate",
+ "reason": "Unused imports can clutter the codebase and make it harder to maintain.",
+ "solution": "Remove the unused imports to improve code readability and maintainability.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 7,
+ "end_line": 19,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Logging Configuration",
+ "comment": "The logging configuration is set up in the global scope, which can lead to issues if the module is imported in multiple places. Consider moving the logging setup to a function or class initialization to ensure it's only configured once.",
+ "confidence": "moderate",
+ "reason": "Global logging configuration can cause conflicts if the module is used in multiple places.",
+ "solution": "Move the logging setup to a function or class initialization to ensure it's only configured once.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 22,
+ "end_line": 26,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Tokenizer Initialization",
+ "comment": "The tokenizer is initialized in the global scope, which can lead to issues if the module is imported in multiple places. Consider moving the tokenizer initialization to a function or class initialization to ensure it's only initialized once.",
+ "confidence": "moderate",
+ "reason": "Global tokenizer initialization can cause conflicts if the module is used in multiple places.",
+ "solution": "Move the tokenizer initialization to a function or class initialization to ensure it's only initialized once.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 28,
+ "end_line": 29,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Unused Code",
+ "comment": "The commented-out code block starting from line 315 appears to be an unused query method. Consider removing this code if it's no longer needed.",
+ "confidence": "important",
+ "reason": "Unused code can make the codebase harder to maintain and understand.",
+ "solution": "Remove the commented-out code block if it's no longer needed.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 315,
+ "end_line": 335,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Potential Performance Optimization",
+ "comment": "The `query` method retrieves the function details from the database for each result node. Consider optimizing this by fetching all the required information in a single database query.",
+ "confidence": "important",
+ "reason": "Fetching data from the database for each result node can be inefficient, especially for larger result sets.",
+ "solution": "Modify the `query` method to fetch all the required information in a single database query to improve performance.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 337,
+ "end_line": 390,
+ "side": "LEFT",
+ "sentiment": "positive",
+ "severity_level": 6
+ },
+ {
+ "topic": "Dependencies",
+ "comment": "The project dependencies have been updated to use newer versions of some libraries, such as Python 3.9 and various tree-sitter language parsers.",
+ "confidence": "important",
+ "reason": "Keeping dependencies up-to-date is important for security, performance, and access to new features.",
+ "solution": "The changes look good and should help improve the project's overall maintainability.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 3,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Chunking",
+ "comment": "The new `chunk_code` function in `test_chunker.py` looks like a useful utility for testing the code chunking functionality.",
+ "confidence": "moderate",
+ "reason": "The function provides a clear way to test the code chunking behavior for different programming languages.",
+ "solution": "Consider adding more test cases to cover edge cases and ensure the chunking works as expected for a variety of code samples.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "tests/retriever/test_chunker.py",
+ "start_line": 1,
+ "end_line": 101,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 6
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_222/review.md b/.experiments/code_review/haiku/no_eval/pr_222/review.md
new file mode 100644
index 00000000..de7eb626
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_222/review.md
@@ -0,0 +1,247 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/222
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 21
+- Critical: 5
+- Important: 7
+- Minor: 5
+- Files Affected: 11
+## 🏆 Code Quality
+[██████████████████░░] 90% (Excellent)
+
+## 🚨 Critical Issues
+
+
+Configuration (5 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+### 2. Changes made to sensitive file
+📁 **File:** `Dockerfile:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to Dockerfile, which needs review
+💡 **Solution:** NA
+
+### 3. Changes made to sensitive file
+📁 **File:** `docker-compose.yml:15`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to docker-compose.yml, which needs review
+💡 **Solution:** NA
+
+### 4. Changes made to sensitive file
+📁 **File:** `.gitignore:164`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to .gitignore, which needs review
+💡 **Solution:** NA
+
+### 5. Changes made to sensitive file
+📁 **File:** `db_setup/init.sql:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to db_setup/init.sql, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Dockerfile (7 issues)
+
+### 1. The Dockerfile should install system dependencies before installing Poetry.
+📁 **File:** `Dockerfile:7`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Installing system dependencies before Poetry ensures that the necessary build tools are available for the Poetry installation process.
+💡 **Solution:** Move the system dependency installation block before the Poetry installation step.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+ git \
+ build-essential \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Poetry
+RUN pip install --no-cache-dir poetry
+```
+
+### 2. The Dockerfile should make the Tree-sitter language installation script executable before running it.
+📁 **File:** `Dockerfile:25`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Making the script executable ensures that it can be properly executed during the build process.
+💡 **Solution:** Add a step to make the script executable before running it.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+# Make the installation script executable
+RUN chmod +x install_tree_sitter_languages.sh
+
+# Run the Tree-sitter language installation script
+RUN ./install_tree_sitter_languages.sh
+```
+
+### 3. The config.json file should use environment variables for sensitive information like API keys.
+📁 **File:** `config.json:13`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Using environment variables instead of hardcoding sensitive information in the config file improves security and makes the configuration more flexible.
+💡 **Solution:** Replace the API key and API base values with environment variable references, e.g., `os.environ['AZURE_API_KEY']`.
+
+**Current Code:**
+```python
+"api_key": "azure/text-embedding-small",
+"api_base": "azure/gpt-4o-mini"
+```
+
+**Suggested Code:**
+```python
+"api_key": "os.environ['AZURE_API_KEY']",
+"api_base": "os.environ['AZURE_API_BASE']"
+```
+
+### 4. The docker-compose.yml file should use the same Postgres image as the Dockerfile-postgres file.
+📁 **File:** `docker-compose.yml:18`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Using the same Postgres image ensures consistency and reduces potential issues with different versions or configurations.
+💡 **Solution:** Replace the Postgres image in the docker-compose.yml file with the one used in the Dockerfile-postgres file.
+
+**Current Code:**
+```python
+image: postgres:16-bullseye
+```
+
+**Suggested Code:**
+```python
+build:
+ context: .
+ dockerfile: Dockerfile-postgres
+```
+
+### 5. The commented-out code block starting from line 315 appears to be an unused query method. Consider removing this code if it's no longer needed.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:315`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Unused code can make the codebase harder to maintain and understand.
+💡 **Solution:** Remove the commented-out code block if it's no longer needed.
+
+### 6. The `query` method retrieves the function details from the database for each result node. Consider optimizing this by fetching all the required information in a single database query.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:337`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Fetching data from the database for each result node can be inefficient, especially for larger result sets.
+💡 **Solution:** Modify the `query` method to fetch all the required information in a single database query to improve performance.
+
+### 7. The project dependencies have been updated to use newer versions of some libraries, such as Python 3.9 and various tree-sitter language parsers.
+📁 **File:** `pyproject.toml:3`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Keeping dependencies up-to-date is important for security, performance, and access to new features.
+💡 **Solution:** The changes look good and should help improve the project's overall maintainability.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (5 issues)
+
+
+examples/ragify_codebase/main.py (5 issues)
+
+### 1. The main.py file should provide more context and examples for using the RepositoryAnalyzer.
+📁 **File:** `examples/ragify_codebase/main.py:1`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Adding more context and examples will help users understand how to effectively use the RepositoryAnalyzer in their own projects.
+💡 **Solution:** Expand the example code to include more comments and explanations, such as how to set up the repository, how to perform different types of queries, and how to handle the results.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+# Initialize the analyzer
+analyzer = RepositoryAnalyzer()
+
+# Set up the repository (do this when you first analyze a repo or when you want to update it)
+analyzer.setup_repository("./github_app/")
+
+# Perform queries (you can do this as many times as you want without calling setup_repository again)
+results = analyzer.query("Find functions that handle authentication")
+for result in results:
+ print(f"File:{result['file_path']}")
+ print(f"Abstraction:{result['abstraction']}")
+ print(f"result:\n{result}")
+ print(f"Relevance Score:{result['relevance_score']}")
+ print("---")
+
+# If you make changes to the repository and want to update the analysis:
+analyzer.setup_repository("/path/to/your/repo")
+
+# Then you can query again with the updated data
+results = analyzer.query("authentication")
+```
+
+### 2. The following imports are not used in the code and can be removed: `from llama_index.core.schema import TextNode`, `from concurrent.futures import ThreadPoolExecutor, as_completed`, `from llama_index.embeddings.litellm import LiteLLMEmbedding`, `from llama_index.core import QueryBundle`.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:7`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Unused imports can clutter the codebase and make it harder to maintain.
+💡 **Solution:** Remove the unused imports to improve code readability and maintainability.
+
+### 3. The logging configuration is set up in the global scope, which can lead to issues if the module is imported in multiple places. Consider moving the logging setup to a function or class initialization to ensure it's only configured once.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:22`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Global logging configuration can cause conflicts if the module is used in multiple places.
+💡 **Solution:** Move the logging setup to a function or class initialization to ensure it's only configured once.
+
+### 4. The tokenizer is initialized in the global scope, which can lead to issues if the module is imported in multiple places. Consider moving the tokenizer initialization to a function or class initialization to ensure it's only initialized once.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:28`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Global tokenizer initialization can cause conflicts if the module is used in multiple places.
+💡 **Solution:** Move the tokenizer initialization to a function or class initialization to ensure it's only initialized once.
+
+### 5. The new `chunk_code` function in `test_chunker.py` looks like a useful utility for testing the code chunking functionality.
+📁 **File:** `tests/retriever/test_chunker.py:1`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The function provides a clear way to test the code chunking behavior for different programming languages.
+💡 **Solution:** Consider adding more test cases to cover edge cases and ensure the chunking works as expected for a variety of code samples.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 24844, "completion_tokens": 4069, "total_tokens": 28913}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_252/comments.json b/.experiments/code_review/haiku/no_eval/pr_252/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_252/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_252/issues.json b/.experiments/code_review/haiku/no_eval/pr_252/issues.json
new file mode 100644
index 00000000..38344bd0
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_252/issues.json
@@ -0,0 +1,32 @@
+[
+ {
+ "topic": "Linkedin Post Generation",
+ "comment": "The LinkedIn post generation code is not formatted correctly.",
+ "confidence": "moderate",
+ "reason": "The LinkedIn post generation code is spread across multiple lines, making it less readable and maintainable.",
+ "solution": "Condense the LinkedIn post generation code into a single line, similar to the Twitter post generation.",
+ "actual_code": "linkedin_post = work_summary_generator.generate_linkedin_post(\n summary, user=\"oss_example\"\n)",
+ "fixed_code": "linkedin_post = work_summary_generator.generate_linkedin_post(summary, user=\"oss_example\")",
+ "file_name": "examples/work_summarizer/main.py",
+ "start_line": 60,
+ "end_line": 62,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Prompt Formatting",
+ "comment": "The TWITTER_POST_PROMPT and LINKEDIN_POST_PROMPT could be improved for better readability.",
+ "confidence": "moderate",
+ "reason": "The prompts are currently formatted as a single long string, making them difficult to read and maintain.",
+ "solution": "Consider breaking the prompts into multiple lines, using string formatting, and adding comments to explain the different sections.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/work_summary_prompts.py",
+ "start_line": 44,
+ "end_line": 65,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_252/review.md b/.experiments/code_review/haiku/no_eval/pr_252/review.md
new file mode 100644
index 00000000..e28ec0f5
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_252/review.md
@@ -0,0 +1,70 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/252
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 2
+- Critical: 0
+- Important: 0
+- Minor: 2
+- Files Affected: 2
+## 🏆 Code Quality
+[██████████████████░░] 90% (Excellent)
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Linkedin Post Generation (2 issues)
+
+### 1. The LinkedIn post generation code is not formatted correctly.
+📁 **File:** `examples/work_summarizer/main.py:60`
+⚖️ **Severity:** 3/10
+🔍 **Description:** The LinkedIn post generation code is spread across multiple lines, making it less readable and maintainable.
+💡 **Solution:** Condense the LinkedIn post generation code into a single line, similar to the Twitter post generation.
+
+**Current Code:**
+```python
+linkedin_post = work_summary_generator.generate_linkedin_post(
+ summary, user="oss_example"
+)
+```
+
+**Suggested Code:**
+```python
+linkedin_post = work_summary_generator.generate_linkedin_post(summary, user="oss_example")
+```
+
+### 2. The TWITTER_POST_PROMPT and LINKEDIN_POST_PROMPT could be improved for better readability.
+📁 **File:** `kaizen/llms/prompts/work_summary_prompts.py:44`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The prompts are currently formatted as a single long string, making them difficult to read and maintain.
+💡 **Solution:** Consider breaking the prompts into multiple lines, using string formatting, and adding comments to explain the different sections.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 4436, "completion_tokens": 465, "total_tokens": 4901}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_335/comments.json b/.experiments/code_review/haiku/no_eval/pr_335/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_335/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_335/issues.json b/.experiments/code_review/haiku/no_eval/pr_335/issues.json
new file mode 100644
index 00000000..d941a81f
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_335/issues.json
@@ -0,0 +1,62 @@
+[
+ {
+ "topic": "Prompt Changes",
+ "comment": "The `PR_DESCRIPTION_PROMPT` and `PR_FILE_DESCRIPTION_PROMPT` have been updated to use a more concise and structured format for the output.",
+ "confidence": "important",
+ "reason": "The new prompts provide a clearer and more organized structure for the generated pull request description, making it easier to read and understand.",
+ "solution": "The changes look good and should improve the overall quality and readability of the generated pull request descriptions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py",
+ "start_line": 1,
+ "end_line": 92,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 7
+ },
+ {
+ "topic": "Removal of `PR_DESC_EVALUATION_PROMPT`",
+ "comment": "The `PR_DESC_EVALUATION_PROMPT` has been removed from the `code_review_prompts.py` file.",
+ "confidence": "moderate",
+ "reason": "The `PR_DESC_EVALUATION_PROMPT` was previously used for re-evaluating the generated pull request description, but it has been removed in the current changes.",
+ "solution": "Ensure that the functionality for re-evaluating the pull request description is still maintained, either through a different mechanism or by incorporating the evaluation logic directly into the main description generation process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/code_review_prompts.py",
+ "start_line": 190,
+ "end_line": 201,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Removal of `reeval_response` parameter",
+ "comment": "The `reeval_response` parameter has been removed from several method calls in the `pr_description.py` file.",
+ "confidence": "moderate",
+ "reason": "The `reeval_response` parameter was previously used for re-evaluating the generated pull request description, but it has been removed in the current changes.",
+ "solution": "Ensure that the functionality for re-evaluating the pull request description is still maintained, either through a different mechanism or by incorporating the evaluation logic directly into the main description generation process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 43,
+ "end_line": 96,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Removal of `CODE_REVIEW_SYSTEM_PROMPT`",
+ "comment": "The `CODE_REVIEW_SYSTEM_PROMPT` has been removed and replaced with `PR_DESCRIPTION_SYSTEM_PROMPT` in the `pr_description.py` file.",
+ "confidence": "important",
+ "reason": "The `CODE_REVIEW_SYSTEM_PROMPT` was previously used as the system prompt for the code review process, but it has been replaced with a new prompt specifically for generating pull request descriptions.",
+ "solution": "The changes look good and should help align the system prompt with the updated pull request description generation process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 29,
+ "end_line": 29,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 7
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_335/review.md b/.experiments/code_review/haiku/no_eval/pr_335/review.md
new file mode 100644
index 00000000..c4f147c7
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_335/review.md
@@ -0,0 +1,78 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/335
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 4
+- Critical: 0
+- Important: 2
+- Minor: 2
+- Files Affected: 3
+## 🏆 Code Quality
+[██████████████████░░] 90% (Excellent)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Prompt Changes (2 issues)
+
+### 1. The `PR_DESCRIPTION_PROMPT` and `PR_FILE_DESCRIPTION_PROMPT` have been updated to use a more concise and structured format for the output.
+📁 **File:** `kaizen/llms/prompts/pr_desc_prompts.py:1`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The new prompts provide a clearer and more organized structure for the generated pull request description, making it easier to read and understand.
+💡 **Solution:** The changes look good and should improve the overall quality and readability of the generated pull request descriptions.
+
+### 2. The `CODE_REVIEW_SYSTEM_PROMPT` has been removed and replaced with `PR_DESCRIPTION_SYSTEM_PROMPT` in the `pr_description.py` file.
+📁 **File:** `kaizen/generator/pr_description.py:29`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The `CODE_REVIEW_SYSTEM_PROMPT` was previously used as the system prompt for the code review process, but it has been replaced with a new prompt specifically for generating pull request descriptions.
+💡 **Solution:** The changes look good and should help align the system prompt with the updated pull request description generation process.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Removal of `PR_DESC_EVALUATION_PROMPT` (2 issues)
+
+### 1. The `PR_DESC_EVALUATION_PROMPT` has been removed from the `code_review_prompts.py` file.
+📁 **File:** `kaizen/llms/prompts/code_review_prompts.py:190`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The `PR_DESC_EVALUATION_PROMPT` was previously used for re-evaluating the generated pull request description, but it has been removed in the current changes.
+💡 **Solution:** Ensure that the functionality for re-evaluating the pull request description is still maintained, either through a different mechanism or by incorporating the evaluation logic directly into the main description generation process.
+
+### 2. The `reeval_response` parameter has been removed from several method calls in the `pr_description.py` file.
+📁 **File:** `kaizen/generator/pr_description.py:43`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The `reeval_response` parameter was previously used for re-evaluating the generated pull request description, but it has been removed in the current changes.
+💡 **Solution:** Ensure that the functionality for re-evaluating the pull request description is still maintained, either through a different mechanism or by incorporating the evaluation logic directly into the main description generation process.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 7614, "completion_tokens": 945, "total_tokens": 8559}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_400/comments.json b/.experiments/code_review/haiku/no_eval/pr_400/comments.json
new file mode 100644
index 00000000..33197e40
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_400/comments.json
@@ -0,0 +1,16 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "11",
+ "end_line": "11",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_400/issues.json b/.experiments/code_review/haiku/no_eval/pr_400/issues.json
new file mode 100644
index 00000000..03fa0f89
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_400/issues.json
@@ -0,0 +1,406 @@
+[
+ {
+ "topic": "Boundary Conditions",
+ "comment": "The test cases for boundary conditions (very long descriptions) look good. The execution time is also printed, which is a nice addition.",
+ "confidence": "important",
+ "reason": "Handling large inputs is an important aspect of the function's robustness.",
+ "solution": "No changes needed.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 45,
+ "end_line": 61,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 8
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The error handling tests cover various invalid input scenarios, which is good.",
+ "confidence": "important",
+ "reason": "Proper error handling is crucial for the function's reliability.",
+ "solution": "No changes needed.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 31,
+ "end_line": 43,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 8
+ },
+ {
+ "topic": "Collapsible Template",
+ "comment": "The collapsible template for the original description has been improved to include newlines for better readability.",
+ "confidence": "moderate",
+ "reason": "The previous template did not have proper newline formatting.",
+ "solution": "No changes needed.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 5,
+ "end_line": 6,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Readability",
+ "comment": "The updated PR_COLLAPSIBLE_TEMPLATE is more readable and follows better formatting.",
+ "confidence": "high",
+ "reason": "The previous template used a mix of string concatenation and formatting, which made it less readable and maintainable. The new template uses a multi-line string with proper indentation and formatting.",
+ "solution": "Keep the updated PR_COLLAPSIBLE_TEMPLATE, as it improves the overall readability of the code.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 4,
+ "end_line": 16,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Test Coverage",
+ "comment": "The new test cases cover a wider range of scenarios, including empty topics, single topic with single review, and multiple topics with multiple reviews.",
+ "confidence": "high",
+ "reason": "The additional test cases ensure the `create_pr_review_text` function handles different input scenarios correctly.",
+ "solution": "Keep the new test cases, as they improve the overall test coverage and ensure the function's robustness.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 74,
+ "end_line": 270,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The new test cases cover scenarios where the review data is missing required fields, such as 'solution', 'reason', 'confidence', and 'severity_level'.",
+ "confidence": "high",
+ "reason": "Handling missing fields is important to ensure the function can gracefully handle incomplete review data.",
+ "solution": "Keep the new test cases, as they ensure the function can handle missing fields in the review data without crashing or producing unexpected output.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 142,
+ "end_line": 234,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Maintainability",
+ "comment": "The new test case for 'reviews_with_missing_comment' ensures the function can handle missing 'comment' field in the review data.",
+ "confidence": "high",
+ "reason": "Handling missing fields, such as 'comment', is important for the function's robustness and maintainability.",
+ "solution": "Keep the new test case, as it ensures the function can handle missing 'comment' field without crashing or producing unexpected output.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 238,
+ "end_line": 269,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Efficiency",
+ "comment": "The new test case for 'empty_list_in_topics' ensures the function can handle an empty list of reviews for a given topic.",
+ "confidence": "high",
+ "reason": "Handling empty lists of reviews is important for the function's efficiency and edge case handling.",
+ "solution": "Keep the new test case, as it ensures the function can handle empty lists of reviews without producing unexpected output.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 271,
+ "end_line": 276,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Unused imports",
+ "comment": "The imports `mock` and `pytest` are not used in the updated tests.",
+ "confidence": "moderate",
+ "reason": "Unused imports can make the code harder to read and maintain.",
+ "solution": "Remove the unused imports.",
+ "actual_code": "import os\nimport pytest\nfrom unittest import mock\nfrom kaizen.helpers.output import get_parent_folder",
+ "fixed_code": "import os\nfrom kaizen.helpers.output import get_parent_folder",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 1,
+ "end_line": 6,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Unnecessary mocking",
+ "comment": "The `test_get_parent_folder_normal()` function does not need to mock `os.getcwd()` as the actual implementation can be used.",
+ "confidence": "moderate",
+ "reason": "Mocking should be used only when necessary, as it can make the tests more complex and harder to maintain.",
+ "solution": "Remove the mocking in `test_get_parent_folder_normal()` and use the actual implementation of `get_parent_folder()`.",
+ "actual_code": " with mock.patch('os.getcwd', return_value='/home/user/project'):\n expected = '/home/user/project'\n result = get_parent_folder()\n assert result == expected, f\"Expected{expected}, but got{result}\"",
+ "fixed_code": " expected = os.path.dirname(os.getcwd())\n result = get_parent_folder()\n assert result == expected, f\"Expected{expected}, but got{result}\"",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 13,
+ "end_line": 16,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Comprehensive error handling",
+ "comment": "The updated `test_get_parent_folder_error_handling()` function covers more error scenarios, including a generic `Exception` case.",
+ "confidence": "positive",
+ "reason": "Thorough error handling is important to ensure the function behaves correctly in various exceptional situations.",
+ "solution": "No changes needed, the updated error handling tests are comprehensive.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 20,
+ "end_line": 26,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Nested directory structure test",
+ "comment": "The updated `test_get_parent_folder_nested()` function tests the behavior of `get_parent_folder()` in a nested directory structure.",
+ "confidence": "positive",
+ "reason": "Testing the function in a nested directory structure is important to ensure it works correctly in different scenarios.",
+ "solution": "No changes needed, the nested directory structure test is a good addition.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 28,
+ "end_line": 33,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Boundary condition test",
+ "comment": "The previous `test_get_parent_folder_boundary_condition_long_path()` function testing a long path has been removed, which is a reasonable decision as the current implementation of `get_parent_folder()` should handle long paths without issues.",
+ "confidence": "positive",
+ "reason": "The previous test case was not necessary as the current implementation should handle long paths correctly.",
+ "solution": "No changes needed, the removal of the unnecessary boundary condition test is appropriate.",
+ "actual_code": "def test_get_parent_folder_boundary_condition_long_path():\n long_path = \"/\" + \"a\" * 255\n with mock.patch(\"os.getcwd\", return_value=long_path):\n assert get_parent_folder() == long_path",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 29,
+ "end_line": 32,
+ "side": "LEFT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Unused imports",
+ "comment": "The imports `asyncio` and `nest_asyncio` are not used in the original test cases. Consider removing them if they are not required.",
+ "confidence": "moderate",
+ "reason": "Unused imports can clutter the code and make it less readable.",
+ "solution": "Remove the unused imports `asyncio` and `nest_asyncio` from the test file.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 4,
+ "end_line": 5,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Parameterized test",
+ "comment": "The new test `test_get_web_html_normal_cases` uses a parameterized approach, which is a good practice. It covers multiple test cases in a concise manner.",
+ "confidence": "positive",
+ "reason": "Parameterized tests improve code readability and maintainability by reducing duplication.",
+ "solution": "Keep the parameterized test approach.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 20,
+ "end_line": 75,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Handling empty HTML content",
+ "comment": "The new test `test_get_web_html_empty_html` ensures that the function handles empty HTML content correctly.",
+ "confidence": "positive",
+ "reason": "Handling edge cases like empty input is important for robust error handling.",
+ "solution": "Keep the test case for handling empty HTML content.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 85,
+ "end_line": 92,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Handling network errors",
+ "comment": "The new test `test_get_web_html_invalid_url` ensures that the function handles network errors correctly.",
+ "confidence": "positive",
+ "reason": "Handling network errors is important for a robust web scraping implementation.",
+ "solution": "Keep the test case for handling network errors.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 85,
+ "end_line": 91,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Handling large HTML content",
+ "comment": "The new test `test_get_web_html_large_content` ensures that the function can handle large HTML content without performance issues.",
+ "confidence": "positive",
+ "reason": "Testing the function's ability to handle large inputs is important for ensuring its scalability.",
+ "solution": "Keep the test case for handling large HTML content.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 93,
+ "end_line": 97,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Handling invalid HTML",
+ "comment": "The previous test `test_get_web_html_invalid_html` has been removed. Consider adding a new test case to ensure the function can handle invalid HTML content gracefully.",
+ "confidence": "moderate",
+ "reason": "Handling invalid HTML is important for a robust web scraping implementation.",
+ "solution": "Add a new test case to ensure the function can handle invalid HTML content without raising unexpected exceptions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 98,
+ "end_line": 107,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Enabling critique mode",
+ "comment": "The `generate_tests` function call in the `examples/unittest/main.py` file has been updated to enable the critique mode and verbose output.",
+ "confidence": "positive",
+ "reason": "Enabling the critique mode and verbose output can provide more detailed feedback and insights during the testing process.",
+ "solution": "Keep the changes to enable critique mode and verbose output in the `generate_tests` function call.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "examples/unittest/main.py",
+ "start_line": 35,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Refactoring",
+ "comment": "The `generate_tests` method has become quite long and complex. Consider breaking it down into smaller, more focused methods to improve readability and maintainability.",
+ "confidence": "important",
+ "reason": "Large methods can be difficult to understand and maintain, especially as the codebase grows.",
+ "solution": "Refactor the `generate_tests` method by extracting smaller, more focused methods for specific tasks, such as preparing the test file path, generating the AI tests, and writing the test file.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Logging",
+ "comment": "The logging implementation could be improved by using a more structured approach, such as using the built-in `logging` module with appropriate log levels.",
+ "confidence": "moderate",
+ "reason": "The current logging implementation uses print statements, which can be less flexible and harder to manage than a structured logging approach.",
+ "solution": "Refactor the logging implementation to use the `logging` module, with appropriate log levels (e.g., DEBUG, INFO, WARNING, ERROR) and log messages that provide more context and details.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The `generate_tests_from_dir` method could benefit from more robust error handling, such as catching and handling specific exceptions.",
+ "confidence": "moderate",
+ "reason": "Catching and handling specific exceptions can help provide more informative error messages and improve the overall robustness of the application.",
+ "solution": "Modify the `generate_tests_from_dir` method to catch and handle specific exceptions, such as `FileNotFoundError` or `ValueError`, and provide more detailed error messages to help with debugging and troubleshooting.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Separation of Concerns",
+ "comment": "The `UnitTestGenerator` class is responsible for both generating and running the tests. Consider separating these concerns into two different classes or modules.",
+ "confidence": "important",
+ "reason": "Separating the concerns of test generation and test execution can improve the overall design and maintainability of the codebase.",
+ "solution": "Create a separate `UnitTestRunner` class or module that is responsible for discovering and running the generated tests, while the `UnitTestGenerator` class focuses solely on generating the tests.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Dependency Injection",
+ "comment": "The `UnitTestGenerator` class has several dependencies, such as the `LLMProvider` and the various prompt templates. Consider using dependency injection to improve the testability and flexibility of the class.",
+ "confidence": "moderate",
+ "reason": "Dependency injection can make the code more modular and easier to test, as it allows for easier substitution of dependencies.",
+ "solution": "Refactor the `UnitTestGenerator` class to accept its dependencies (e.g., `LLMProvider`, prompt templates) as constructor arguments, rather than creating them internally. This will improve the testability and flexibility of the class.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Logging Configuration",
+ "comment": "The code sets all loggers to the ERROR level, which may be too restrictive. Consider providing more granular control over log levels.",
+ "confidence": "moderate",
+ "reason": "Setting all loggers to ERROR level may result in losing valuable information during development and debugging. It's generally better to have more fine-grained control over log levels for different components.",
+ "solution": "Instead of setting all loggers to ERROR, consider the following:\n1. Set a default log level (e.g., INFO) for all loggers using `logging.basicConfig()`.\n2. Selectively set the log level for specific loggers (e.g., 'LiteLLM', 'LiteLLM Router', 'LiteLLM Proxy') to a more appropriate level (e.g., DEBUG, INFO, or WARNING) based on the importance and verbosity of each component.\n3. Provide a way for users to easily adjust the log level, such as through an environment variable or a configuration file.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/provider.py",
+ "start_line": 13,
+ "end_line": 28,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "11",
+ "end_line": "11",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_400/review.md b/.experiments/code_review/haiku/no_eval/pr_400/review.md
new file mode 100644
index 00000000..a785248a
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_400/review.md
@@ -0,0 +1,177 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/400
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 27
+- Critical: 1
+- Important: 4
+- Minor: 9
+- Files Affected: 8
+## 🏆 Code Quality
+[██████████████████░░] 90% (Excellent)
+
+## 🚨 Critical Issues
+
+
+Configuration (1 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:11`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Boundary Conditions (4 issues)
+
+### 1. The test cases for boundary conditions (very long descriptions) look good. The execution time is also printed, which is a nice addition.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:45`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Handling large inputs is an important aspect of the function's robustness.
+💡 **Solution:** No changes needed.
+
+### 2. The error handling tests cover various invalid input scenarios, which is good.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:31`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Proper error handling is crucial for the function's reliability.
+💡 **Solution:** No changes needed.
+
+### 3. The `generate_tests` method has become quite long and complex. Consider breaking it down into smaller, more focused methods to improve readability and maintainability.
+📁 **File:** `kaizen/generator/unit_test.py:0`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Large methods can be difficult to understand and maintain, especially as the codebase grows.
+💡 **Solution:** Refactor the `generate_tests` method by extracting smaller, more focused methods for specific tasks, such as preparing the test file path, generating the AI tests, and writing the test file.
+
+### 4. The `UnitTestGenerator` class is responsible for both generating and running the tests. Consider separating these concerns into two different classes or modules.
+📁 **File:** `kaizen/generator/unit_test.py:0`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Separating the concerns of test generation and test execution can improve the overall design and maintainability of the codebase.
+💡 **Solution:** Create a separate `UnitTestRunner` class or module that is responsible for discovering and running the generated tests, while the `UnitTestGenerator` class focuses solely on generating the tests.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (9 issues)
+
+
+Collapsible Template (9 issues)
+
+### 1. The collapsible template for the original description has been improved to include newlines for better readability.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:5`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The previous template did not have proper newline formatting.
+💡 **Solution:** No changes needed.
+
+### 2. The imports `mock` and `pytest` are not used in the updated tests.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py:1`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Unused imports can make the code harder to read and maintain.
+💡 **Solution:** Remove the unused imports.
+
+**Current Code:**
+```python
+import os
+import pytest
+from unittest import mock
+from kaizen.helpers.output import get_parent_folder
+```
+
+**Suggested Code:**
+```python
+import os
+from kaizen.helpers.output import get_parent_folder
+```
+
+### 3. The `test_get_parent_folder_normal()` function does not need to mock `os.getcwd()` as the actual implementation can be used.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Mocking should be used only when necessary, as it can make the tests more complex and harder to maintain.
+💡 **Solution:** Remove the mocking in `test_get_parent_folder_normal()` and use the actual implementation of `get_parent_folder()`.
+
+**Current Code:**
+```python
+ with mock.patch('os.getcwd', return_value='/home/user/project'):
+ expected = '/home/user/project'
+ result = get_parent_folder()
+ assert result == expected, f"Expected{expected}, but got{result}"
+```
+
+**Suggested Code:**
+```python
+ expected = os.path.dirname(os.getcwd())
+ result = get_parent_folder()
+ assert result == expected, f"Expected{expected}, but got{result}"
+```
+
+### 4. The imports `asyncio` and `nest_asyncio` are not used in the original test cases. Consider removing them if they are not required.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_web_html.py:4`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Unused imports can clutter the code and make it less readable.
+💡 **Solution:** Remove the unused imports `asyncio` and `nest_asyncio` from the test file.
+
+### 5. The previous test `test_get_web_html_invalid_html` has been removed. Consider adding a new test case to ensure the function can handle invalid HTML content gracefully.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_web_html.py:98`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Handling invalid HTML is important for a robust web scraping implementation.
+💡 **Solution:** Add a new test case to ensure the function can handle invalid HTML content without raising unexpected exceptions.
+
+### 6. The logging implementation could be improved by using a more structured approach, such as using the built-in `logging` module with appropriate log levels.
+📁 **File:** `kaizen/generator/unit_test.py:0`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The current logging implementation uses print statements, which can be less flexible and harder to manage than a structured logging approach.
+💡 **Solution:** Refactor the logging implementation to use the `logging` module, with appropriate log levels (e.g., DEBUG, INFO, WARNING, ERROR) and log messages that provide more context and details.
+
+### 7. The `generate_tests_from_dir` method could benefit from more robust error handling, such as catching and handling specific exceptions.
+📁 **File:** `kaizen/generator/unit_test.py:0`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Catching and handling specific exceptions can help provide more informative error messages and improve the overall robustness of the application.
+💡 **Solution:** Modify the `generate_tests_from_dir` method to catch and handle specific exceptions, such as `FileNotFoundError` or `ValueError`, and provide more detailed error messages to help with debugging and troubleshooting.
+
+### 8. The `UnitTestGenerator` class has several dependencies, such as the `LLMProvider` and the various prompt templates. Consider using dependency injection to improve the testability and flexibility of the class.
+📁 **File:** `kaizen/generator/unit_test.py:0`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Dependency injection can make the code more modular and easier to test, as it allows for easier substitution of dependencies.
+💡 **Solution:** Refactor the `UnitTestGenerator` class to accept its dependencies (e.g., `LLMProvider`, prompt templates) as constructor arguments, rather than creating them internally. This will improve the testability and flexibility of the class.
+
+### 9. The code sets all loggers to the ERROR level, which may be too restrictive. Consider providing more granular control over log levels.
+📁 **File:** `kaizen/llms/provider.py:13`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Setting all loggers to ERROR level may result in losing valuable information during development and debugging. It's generally better to have more fine-grained control over log levels for different components.
+💡 **Solution:** Instead of setting all loggers to ERROR, consider the following:
+1. Set a default log level (e.g., INFO) for all loggers using `logging.basicConfig()`.
+2. Selectively set the log level for specific loggers (e.g., 'LiteLLM', 'LiteLLM Router', 'LiteLLM Proxy') to a more appropriate level (e.g., DEBUG, INFO, or WARNING) based on the importance and verbosity of each component.
+3. Provide a way for users to easily adjust the log level, such as through an environment variable or a configuration file.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 42996, "completion_tokens": 5792, "total_tokens": 48788}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_440/comments.json b/.experiments/code_review/haiku/no_eval/pr_440/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_440/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_440/issues.json b/.experiments/code_review/haiku/no_eval/pr_440/issues.json
new file mode 100644
index 00000000..8e24d702
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_440/issues.json
@@ -0,0 +1,62 @@
+[
+ {
+ "topic": "Unnecessary comment removal",
+ "comment": "The commented-out line `# analyzer.setup_repository(\"./github_app/\")` should be removed, as the line below it already sets up the repository.",
+ "confidence": "moderate",
+ "reason": "Removing unnecessary comments improves code readability and maintainability.",
+ "solution": "Remove the commented-out line `# analyzer.setup_repository(\"./github_app/\")` in `examples/ragify_codebase/main.py`.",
+ "actual_code": "# analyzer.setup_repository(\"./github_app/\")",
+ "fixed_code": "",
+ "file_name": "examples/ragify_codebase/main.py",
+ "start_line": 7,
+ "end_line": 7,
+ "side": "LEFT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Duplicate embedding storage",
+ "comment": "The TODO comment `# TODO: DONT PUSH DUPLICATE` suggests that the code is storing duplicate embeddings, which could lead to performance and storage issues.",
+ "confidence": "important",
+ "reason": "Storing duplicate embeddings can waste storage space and slow down the retrieval process.",
+ "solution": "Implement a mechanism to check for and avoid storing duplicate embeddings in the database.",
+ "actual_code": "# TODO: DONT PUSH DUPLICATE",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 156,
+ "end_line": 156,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Dependency version update",
+ "comment": "The `llama-index-core` dependency version has been updated from `0.10.47` to `0.10.65`. This update should be reviewed to ensure compatibility with the rest of the codebase.",
+ "confidence": "moderate",
+ "reason": "Dependency version updates can introduce breaking changes, so it's important to review the changes and ensure they don't introduce any issues.",
+ "solution": "Review the changelog and release notes for the `llama-index-core` version update to understand the changes and ensure they don't introduce any issues in the codebase.",
+ "actual_code": "llama-index-core = \"0.10.65\"",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 27,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Dependency removal",
+ "comment": "The `llama-index-llms-openai` dependency has been removed. This change should be reviewed to ensure that it doesn't impact the functionality of the codebase.",
+ "confidence": "moderate",
+ "reason": "Removing dependencies can have unintended consequences, so it's important to review the impact of the change.",
+ "solution": "Review the codebase to ensure that the removal of the `llama-index-llms-openai` dependency doesn't break any functionality.",
+ "actual_code": "llama-index-llms-openai = \"^0.1.22\"",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 28,
+ "end_line": 28,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_440/review.md b/.experiments/code_review/haiku/no_eval/pr_440/review.md
new file mode 100644
index 00000000..62d5b74d
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_440/review.md
@@ -0,0 +1,118 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/440
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 4
+- Critical: 0
+- Important: 1
+- Minor: 3
+- Files Affected: 3
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Duplicate embedding storage (1 issues)
+
+### 1. The TODO comment `# TODO: DONT PUSH DUPLICATE` suggests that the code is storing duplicate embeddings, which could lead to performance and storage issues.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:156`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Storing duplicate embeddings can waste storage space and slow down the retrieval process.
+💡 **Solution:** Implement a mechanism to check for and avoid storing duplicate embeddings in the database.
+
+**Current Code:**
+```python
+# TODO: DONT PUSH DUPLICATE
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (3 issues)
+
+
+Unnecessary comment removal (3 issues)
+
+### 1. The commented-out line `# analyzer.setup_repository("./github_app/")` should be removed, as the line below it already sets up the repository.
+📁 **File:** `examples/ragify_codebase/main.py:7`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Removing unnecessary comments improves code readability and maintainability.
+💡 **Solution:** Remove the commented-out line `# analyzer.setup_repository("./github_app/")` in `examples/ragify_codebase/main.py`.
+
+**Current Code:**
+```python
+# analyzer.setup_repository("./github_app/")
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 2. The `llama-index-core` dependency version has been updated from `0.10.47` to `0.10.65`. This update should be reviewed to ensure compatibility with the rest of the codebase.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Dependency version updates can introduce breaking changes, so it's important to review the changes and ensure they don't introduce any issues.
+💡 **Solution:** Review the changelog and release notes for the `llama-index-core` version update to understand the changes and ensure they don't introduce any issues in the codebase.
+
+**Current Code:**
+```python
+llama-index-core = "0.10.65"
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 3. The `llama-index-llms-openai` dependency has been removed. This change should be reviewed to ensure that it doesn't impact the functionality of the codebase.
+📁 **File:** `pyproject.toml:28`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Removing dependencies can have unintended consequences, so it's important to review the impact of the change.
+💡 **Solution:** Review the codebase to ensure that the removal of the `llama-index-llms-openai` dependency doesn't break any functionality.
+
+**Current Code:**
+```python
+llama-index-llms-openai = "^0.1.22"
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 1579, "completion_tokens": 944, "total_tokens": 2523}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_476/comments.json b/.experiments/code_review/haiku/no_eval/pr_476/comments.json
new file mode 100644
index 00000000..a9d40eac
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_476/comments.json
@@ -0,0 +1,16 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_476/issues.json b/.experiments/code_review/haiku/no_eval/pr_476/issues.json
new file mode 100644
index 00000000..15365ce7
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_476/issues.json
@@ -0,0 +1,106 @@
+[
+ {
+ "topic": "Sorting PR Files",
+ "comment": "The PR files are now being sorted before passing them to the description generator. This is a good improvement for maintaining consistent file order in the review.",
+ "confidence": "important",
+ "reason": "Sorting the files ensures a consistent order in the review, making it easier for the reviewer to understand the changes.",
+ "solution": "The `sort_files` function looks good and should effectively sort the files in alphabetical order.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 184,
+ "end_line": 194,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Generating Tests",
+ "comment": "The new `generate_tests` function is a good addition, as it provides a way to generate test cases based on the changed files in the PR.",
+ "confidence": "important",
+ "reason": "Generating tests based on the PR files can help ensure the changes don't break existing functionality.",
+ "solution": "The current implementation of `generate_tests` is simple and effective. It returns a list of file names, which can be used to create test cases.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 199,
+ "end_line": 200,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Printing Diff and PR Files",
+ "comment": "The changes in the `main.py` file to print the `diff_text` and `pr_files` are useful for debugging and understanding the input data.",
+ "confidence": "moderate",
+ "reason": "Printing the diff and PR files can help developers better understand the changes being reviewed.",
+ "solution": "The changes look good and should provide helpful information during the review process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "examples/code_review/main.py",
+ "start_line": 21,
+ "end_line": 22,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Passing Code Quality to Review Description",
+ "comment": "The change to pass the `code_quality` parameter to the `create_pr_review_text` function is a good improvement, as it allows the review description to include information about the overall code quality.",
+ "confidence": "important",
+ "reason": "Providing information about the code quality in the review description can give the developer a better understanding of the overall state of the codebase.",
+ "solution": "The change looks good and should provide valuable information in the review description.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "examples/code_review/main.py",
+ "start_line": 36,
+ "end_line": 36,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Printing Raw Issues",
+ "comment": "The change to print the `review_data.issues` instead of the `topics` variable is an improvement, as it provides more detailed information about the identified issues.",
+ "confidence": "moderate",
+ "reason": "Printing the raw issues can give the developer a better understanding of the specific problems found during the review.",
+ "solution": "The change looks good and should provide more useful information in the output.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "examples/code_review/main.py",
+ "start_line": 39,
+ "end_line": 39,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Removing Unused Configuration",
+ "comment": "The removal of the `enable_observability_logging` configuration option is a good cleanup, as it removes an unused feature from the configuration file.",
+ "confidence": "moderate",
+ "reason": "Removing unused configuration options helps keep the codebase clean and maintainable.",
+ "solution": "The change looks good and should help simplify the configuration file.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "config.json",
+ "start_line": 4,
+ "end_line": 4,
+ "side": "LEFT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_476/review.md b/.experiments/code_review/haiku/no_eval/pr_476/review.md
new file mode 100644
index 00000000..86156ba1
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_476/review.md
@@ -0,0 +1,103 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/476
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 7
+- Critical: 1
+- Important: 3
+- Minor: 3
+- Files Affected: 3
+## 🏆 Code Quality
+[██████████████████░░] 90% (Excellent)
+
+## 🚨 Critical Issues
+
+
+Configuration (1 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Sorting PR Files (3 issues)
+
+### 1. The PR files are now being sorted before passing them to the description generator. This is a good improvement for maintaining consistent file order in the review.
+📁 **File:** `github_app/github_helper/pull_requests.py:184`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Sorting the files ensures a consistent order in the review, making it easier for the reviewer to understand the changes.
+💡 **Solution:** The `sort_files` function looks good and should effectively sort the files in alphabetical order.
+
+### 2. The new `generate_tests` function is a good addition, as it provides a way to generate test cases based on the changed files in the PR.
+📁 **File:** `github_app/github_helper/pull_requests.py:199`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Generating tests based on the PR files can help ensure the changes don't break existing functionality.
+💡 **Solution:** The current implementation of `generate_tests` is simple and effective. It returns a list of file names, which can be used to create test cases.
+
+### 3. The change to pass the `code_quality` parameter to the `create_pr_review_text` function is a good improvement, as it allows the review description to include information about the overall code quality.
+📁 **File:** `examples/code_review/main.py:36`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Providing information about the code quality in the review description can give the developer a better understanding of the overall state of the codebase.
+💡 **Solution:** The change looks good and should provide valuable information in the review description.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (3 issues)
+
+
+Printing Diff and PR Files (3 issues)
+
+### 1. The changes in the `main.py` file to print the `diff_text` and `pr_files` are useful for debugging and understanding the input data.
+📁 **File:** `examples/code_review/main.py:21`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Printing the diff and PR files can help developers better understand the changes being reviewed.
+💡 **Solution:** The changes look good and should provide helpful information during the review process.
+
+### 2. The change to print the `review_data.issues` instead of the `topics` variable is an improvement, as it provides more detailed information about the identified issues.
+📁 **File:** `examples/code_review/main.py:39`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Printing the raw issues can give the developer a better understanding of the specific problems found during the review.
+💡 **Solution:** The change looks good and should provide more useful information in the output.
+
+### 3. The removal of the `enable_observability_logging` configuration option is a good cleanup, as it removes an unused feature from the configuration file.
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Removing unused configuration options helps keep the codebase clean and maintainable.
+💡 **Solution:** The change looks good and should help simplify the configuration file.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 4511, "completion_tokens": 1305, "total_tokens": 5816}
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_5/comments.json b/.experiments/code_review/haiku/no_eval/pr_5/comments.json
new file mode 100644
index 00000000..1130b98e
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_5/comments.json
@@ -0,0 +1,32 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Potential for API call to fail without retry mechanism",
+ "confidence": "critical",
+ "reason": "The `process_applicant` function makes a call to the `completion` function, which could fail for various reasons (e.g., network issues, API downtime). Without a retry mechanism, the function will silently fail, leading to incomplete or inaccurate results.",
+ "solution": "Implement a retry mechanism with exponential backoff to handle transient failures in the `completion` function call. This will improve the reliability and robustness of the application.",
+ "actual_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)",
+ "fixed_code": "num_retries = 3\nretry_delay = 1\nfor _ in range(num_retries):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n print(f\"Error calling completion function:{e}. Retrying in{retry_delay}seconds...\")\n time.sleep(retry_delay)\n retry_delay *= 2\nelse:\n print(\"Failed to call completion function after multiple retries. Skipping this applicant.\")\n return{key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Silent failure without logging",
+ "confidence": "critical",
+ "reason": "In the `process_applicant` function, when a `json.JSONDecodeError` is raised, the function silently returns a default result without any logging or error reporting. This makes it difficult to diagnose and troubleshoot issues that may occur during the processing of applicants.",
+ "solution": "Add proper error logging to the `except` block to capture the error and provide more visibility into the failure. This will help with debugging and monitoring the application's performance.",
+ "actual_code": "except json.JSONDecodeError:\n result ={\n key: \"\" for key in[\n \"feedback\",\n \"review\",\n \"should_interview\",\n \"rating\",\n \"input_tokens\",\n \"output_tokens\",\n ]\n}",
+ "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant:{e}\")\n result ={\n key: \"\" for key in[\n \"feedback\",\n \"review\",\n \"should_interview\",\n \"rating\",\n \"input_tokens\",\n \"output_tokens\",\n ]\n}",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 94,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_5/issues.json b/.experiments/code_review/haiku/no_eval/pr_5/issues.json
new file mode 100644
index 00000000..5482c08e
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_5/issues.json
@@ -0,0 +1,107 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Potential for API call to fail without retry mechanism",
+ "confidence": "critical",
+ "reason": "The `process_applicant` function makes a call to the `completion` function, which could fail for various reasons (e.g., network issues, API downtime). Without a retry mechanism, the function will silently fail, leading to incomplete or inaccurate results.",
+ "solution": "Implement a retry mechanism with exponential backoff to handle transient failures in the `completion` function call. This will improve the reliability and robustness of the application.",
+ "actual_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)",
+ "fixed_code": "num_retries = 3\nretry_delay = 1\nfor _ in range(num_retries):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n print(f\"Error calling completion function:{e}. Retrying in{retry_delay}seconds...\")\n time.sleep(retry_delay)\n retry_delay *= 2\nelse:\n print(\"Failed to call completion function after multiple retries. Skipping this applicant.\")\n return{key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "file_name": "main.py",
+ "start_line": 66,
+ "end_line": 68,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Silent failure without logging",
+ "confidence": "critical",
+ "reason": "In the `process_applicant` function, when a `json.JSONDecodeError` is raised, the function silently returns a default result without any logging or error reporting. This makes it difficult to diagnose and troubleshoot issues that may occur during the processing of applicants.",
+ "solution": "Add proper error logging to the `except` block to capture the error and provide more visibility into the failure. This will help with debugging and monitoring the application's performance.",
+ "actual_code": "except json.JSONDecodeError:\n result ={\n key: \"\" for key in[\n \"feedback\",\n \"review\",\n \"should_interview\",\n \"rating\",\n \"input_tokens\",\n \"output_tokens\",\n ]\n}",
+ "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant:{e}\")\n result ={\n key: \"\" for key in[\n \"feedback\",\n \"review\",\n \"should_interview\",\n \"rating\",\n \"input_tokens\",\n \"output_tokens\",\n ]\n}",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 94,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Performance",
+ "comment": "Inefficient way to print progress",
+ "confidence": "important",
+ "reason": "The `process_applicants` function uses a print statement with carriage return (`\r`) to update the progress bar. This approach can be inefficient, especially for large datasets, as it requires continuously overwriting the same line of output.",
+ "solution": "Use a dedicated progress reporting library, such as `tqdm`, which provides a more efficient and visually appealing progress bar. This will improve the overall performance and user experience of the application.",
+ "actual_code": "progress = (index + 1) / total\nprint(f\"\\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "fixed_code": "if use_tqdm:\n progress_bar = tqdm(total=total, desc=\"Processing applicants\")\n progress_bar.update(1)\nelse:\n progress = (index + 1) / total\n print(f\"\\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "file_name": "main.py",
+ "start_line": 120,
+ "end_line": 122,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Redundant Code",
+ "comment": "Redundant code: The following line is unnecessary",
+ "confidence": "moderate",
+ "reason": "The `if len(df) == 0` check in the `main` function is redundant, as the `process_applicants` function already handles the case where the DataFrame is empty.",
+ "solution": "Remove the unnecessary `if` statement, as it does not provide any additional value to the code.",
+ "actual_code": "if len(df) == 0:\n return",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 142,
+ "end_line": 143,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Division by zero potential if total_tokens is zero",
+ "confidence": "important",
+ "reason": "In the `main` function, the code calculates the total tokens used and prints a summary. However, if the total tokens is zero, the division operation will result in a division by zero error, which can cause the application to crash.",
+ "solution": "Add a check to ensure that the total tokens is not zero before performing the division operation. If the total tokens is zero, handle the case gracefully by printing a message or skipping the division step.",
+ "actual_code": "print(f\"Total tokens used:{total_tokens:,}\")\nprint(f\" - Input tokens:{total_input_tokens:,}\")\nprint(f\" - Output tokens:{total_output_tokens:,}\")",
+ "fixed_code": "if total_tokens > 0:\n print(f\"Total tokens used:{total_tokens:,}\")\n print(f\" - Input tokens:{total_input_tokens:,}\")\n print(f\" - Output tokens:{total_output_tokens:,}\")\nelse:\n print(\"Total tokens used: 0\")",
+ "file_name": "main.py",
+ "start_line": 158,
+ "end_line": 163,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "No error handling for file not found",
+ "confidence": "important",
+ "reason": "The `main` function does not handle the case where the input file specified by the user does not exist. This can lead to a `FileNotFoundError` being raised, which will cause the application to crash without any meaningful error message.",
+ "solution": "Add a try-except block to handle the `FileNotFoundError` and provide a user-friendly error message when the input file is not found.",
+ "actual_code": "main(input_file)",
+ "fixed_code": "try:\n main(input_file)\nexcept FileNotFoundError:\n print(f\"Error: The file '{input_file}' does not exist. Please check the file path and try again.\")",
+ "file_name": "main.py",
+ "start_line": 174,
+ "end_line": 175,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Unused Import",
+ "comment": "Unused import",
+ "confidence": "low",
+ "reason": "The `random` module is imported but not used in the code.",
+ "solution": "Remove the unused import statement.",
+ "actual_code": "import random # Unused import",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 1
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/haiku/no_eval/pr_5/review.md b/.experiments/code_review/haiku/no_eval/pr_5/review.md
new file mode 100644
index 00000000..5e96731d
--- /dev/null
+++ b/.experiments/code_review/haiku/no_eval/pr_5/review.md
@@ -0,0 +1,211 @@
+PR URL: https://github.com/sauravpanda/applicant-screening/pull/5
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 7
+- Critical: 2
+- Important: 3
+- Minor: 1
+- Files Affected: 1
+## 🏆 Code Quality
+[███████████████░░░░░] 75% (Fair)
+
+## 🚨 Critical Issues
+
+
+Error Handling (2 issues)
+
+### 1. Potential for API call to fail without retry mechanism
+📁 **File:** `main.py:66`
+⚖️ **Severity:** 9/10
+🔍 **Description:** The `process_applicant` function makes a call to the `completion` function, which could fail for various reasons (e.g., network issues, API downtime). Without a retry mechanism, the function will silently fail, leading to incomplete or inaccurate results.
+💡 **Solution:** Implement a retry mechanism with exponential backoff to handle transient failures in the `completion` function call. This will improve the reliability and robustness of the application.
+
+**Current Code:**
+```python
+response = completion(
+ model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages
+)
+```
+
+**Suggested Code:**
+```python
+num_retries = 3
+retry_delay = 1
+for _ in range(num_retries):
+ try:
+ response = completion(
+ model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages
+ )
+ break
+ except Exception as e:
+ print(f"Error calling completion function:{e}. Retrying in{retry_delay}seconds...")
+ time.sleep(retry_delay)
+ retry_delay *= 2
+else:
+ print("Failed to call completion function after multiple retries. Skipping this applicant.")
+ return{key: "" for key in["feedback", "review", "should_interview", "rating", "input_tokens", "output_tokens"]}
+```
+
+### 2. Silent failure without logging
+📁 **File:** `main.py:82`
+⚖️ **Severity:** 8/10
+🔍 **Description:** In the `process_applicant` function, when a `json.JSONDecodeError` is raised, the function silently returns a default result without any logging or error reporting. This makes it difficult to diagnose and troubleshoot issues that may occur during the processing of applicants.
+💡 **Solution:** Add proper error logging to the `except` block to capture the error and provide more visibility into the failure. This will help with debugging and monitoring the application's performance.
+
+**Current Code:**
+```python
+except json.JSONDecodeError:
+ result ={
+ key: "" for key in[
+ "feedback",
+ "review",
+ "should_interview",
+ "rating",
+ "input_tokens",
+ "output_tokens",
+ ]
+}
+```
+
+**Suggested Code:**
+```python
+except json.JSONDecodeError as e:
+ print(f"Failed to parse content for applicant:{e}")
+ result ={
+ key: "" for key in[
+ "feedback",
+ "review",
+ "should_interview",
+ "rating",
+ "input_tokens",
+ "output_tokens",
+ ]
+}
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Performance (3 issues)
+
+### 1. Inefficient way to print progress
+📁 **File:** `main.py:120`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The `process_applicants` function uses a print statement with carriage return (`
`) to update the progress bar. This approach can be inefficient, especially for large datasets, as it requires continuously overwriting the same line of output.
+💡 **Solution:** Use a dedicated progress reporting library, such as `tqdm`, which provides a more efficient and visually appealing progress bar. This will improve the overall performance and user experience of the application.
+
+**Current Code:**
+```python
+progress = (index + 1) / total
+print(f"\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}", end="", flush=True)
+```
+
+**Suggested Code:**
+```python
+if use_tqdm:
+ progress_bar = tqdm(total=total, desc="Processing applicants")
+ progress_bar.update(1)
+else:
+ progress = (index + 1) / total
+ print(f"\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}", end="", flush=True)
+```
+
+### 2. Division by zero potential if total_tokens is zero
+📁 **File:** `main.py:158`
+⚖️ **Severity:** 7/10
+🔍 **Description:** In the `main` function, the code calculates the total tokens used and prints a summary. However, if the total tokens is zero, the division operation will result in a division by zero error, which can cause the application to crash.
+💡 **Solution:** Add a check to ensure that the total tokens is not zero before performing the division operation. If the total tokens is zero, handle the case gracefully by printing a message or skipping the division step.
+
+**Current Code:**
+```python
+print(f"Total tokens used:{total_tokens:,}")
+print(f" - Input tokens:{total_input_tokens:,}")
+print(f" - Output tokens:{total_output_tokens:,}")
+```
+
+**Suggested Code:**
+```python
+if total_tokens > 0:
+ print(f"Total tokens used:{total_tokens:,}")
+ print(f" - Input tokens:{total_input_tokens:,}")
+ print(f" - Output tokens:{total_output_tokens:,}")
+else:
+ print("Total tokens used: 0")
+```
+
+### 3. No error handling for file not found
+📁 **File:** `main.py:174`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The `main` function does not handle the case where the input file specified by the user does not exist. This can lead to a `FileNotFoundError` being raised, which will cause the application to crash without any meaningful error message.
+💡 **Solution:** Add a try-except block to handle the `FileNotFoundError` and provide a user-friendly error message when the input file is not found.
+
+**Current Code:**
+```python
+main(input_file)
+```
+
+**Suggested Code:**
+```python
+try:
+ main(input_file)
+except FileNotFoundError:
+ print(f"Error: The file '{input_file}' does not exist. Please check the file path and try again.")
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Redundant Code (1 issues)
+
+### 1. Redundant code: The following line is unnecessary
+📁 **File:** `main.py:142`
+⚖️ **Severity:** 3/10
+🔍 **Description:** The `if len(df) == 0` check in the `main` function is redundant, as the `process_applicants` function already handles the case where the DataFrame is empty.
+💡 **Solution:** Remove the unnecessary `if` statement, as it does not provide any additional value to the code.
+
+**Current Code:**
+```python
+if len(df) == 0:
+ return
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-haiku-20240307-v1:0)
+{"prompt_tokens": 7009, "completion_tokens": 2247, "total_tokens": 9256}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_222/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_222/comments.json
new file mode 100644
index 00000000..f054c504
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_222/comments.json
@@ -0,0 +1,117 @@
+[
+ {
+ "topic": "Error handling",
+ "comment": "Error handling is missing in some critical sections of the code.",
+ "confidence": "critical",
+ "reason": "Error handling is crucial for preventing crashes and providing useful error messages.",
+ "solution": "Add try-except blocks to handle potential errors.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "SQL Injection",
+ "comment": "Potential SQL injection vulnerability",
+ "confidence": "critical",
+ "reason": "Using string formatting to construct SQL queries can lead to SQL injection attacks",
+ "solution": "Use parameterized queries or an ORM to prevent SQL injection",
+ "actual_code": "query = f\"\"\"SELECT ... FROM{self.table_name}e\"\"\"",
+ "fixed_code": "query = \"\"\"SELECT ... FROM %s e\"\"\" % self.table_name",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Version bump",
+ "comment": "Verify that the version bump is intentional and follows the project's versioning scheme",
+ "confidence": "critical",
+ "reason": "Inconsistent versioning can cause confusion and break dependencies",
+ "solution": "Verify the version bump and update the project's versioning scheme if necessary",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 9
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_222/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_222/issues.json
new file mode 100644
index 00000000..599c0667
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_222/issues.json
@@ -0,0 +1,282 @@
+[
+ {
+ "topic": "Code organization",
+ "comment": "The code is well-organized, but some files have too many responsibilities.",
+ "confidence": "important",
+ "reason": "Separation of concerns is crucial for maintainability.",
+ "solution": "Consider breaking down large files into smaller ones.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Type hints",
+ "comment": "Type hints are missing in some function definitions.",
+ "confidence": "moderate",
+ "reason": "Type hints improve code readability and help catch type-related errors.",
+ "solution": "Add type hints for function parameters and return types.",
+ "actual_code": "def chunk_code(code: str, language: str) -> ParsedBody:",
+ "fixed_code": "def chunk_code(code: str, language: str) -> Dict[str, Dict[str, Any]]:",
+ "file_name": "kaizen/retriever/code_chunker.py",
+ "start_line": 7,
+ "end_line": 7,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Error handling",
+ "comment": "Error handling is missing in some critical sections of the code.",
+ "confidence": "critical",
+ "reason": "Error handling is crucial for preventing crashes and providing useful error messages.",
+ "solution": "Add try-except blocks to handle potential errors.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Code duplication",
+ "comment": "Some code is duplicated across multiple files.",
+ "confidence": "moderate",
+ "reason": "Code duplication makes maintenance harder and increases the chance of bugs.",
+ "solution": "Extract duplicated code into reusable functions or classes.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Type Hints",
+ "comment": "Missing type hints for function return types",
+ "confidence": "moderate",
+ "reason": "Type hints improve code readability and help catch type-related errors",
+ "solution": "Add type hints for function return types",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 52,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "SQL Injection",
+ "comment": "Potential SQL injection vulnerability",
+ "confidence": "critical",
+ "reason": "Using string formatting to construct SQL queries can lead to SQL injection attacks",
+ "solution": "Use parameterized queries or an ORM to prevent SQL injection",
+ "actual_code": "query = f\"\"\"SELECT ... FROM{self.table_name}e\"\"\"",
+ "fixed_code": "query = \"\"\"SELECT ... FROM %s e\"\"\" % self.table_name",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 19,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Missing error handling for database operations",
+ "confidence": "important",
+ "reason": "Database operations can fail due to various reasons, and error handling is necessary to prevent crashes",
+ "solution": "Add try-except blocks to handle database operation errors",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 52,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Organization",
+ "comment": "AbstractionFeedback class has a single responsibility, but its methods are not well-organized",
+ "confidence": "moderate",
+ "reason": "Well-organized code is easier to read and maintain",
+ "solution": "Consider reorganizing the methods into separate classes or modules",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/feedback_system.py",
+ "start_line": 4,
+ "end_line": 18,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The code does not handle potential errors that may occur when connecting to the database or executing queries.",
+ "confidence": "important",
+ "reason": "Error handling is crucial to prevent the program from crashing and to provide meaningful error messages instead.",
+ "solution": "Add try-except blocks to handle potential errors when connecting to the database or executing queries.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 250,
+ "end_line": 270,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "The code has duplicated logic for storing code in the database and storing function relationships.",
+ "confidence": "moderate",
+ "reason": "Code duplication makes the code harder to maintain and modify.",
+ "solution": "Extract the duplicated logic into a separate function.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 295,
+ "end_line": 313,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The code has long and complex functions that are hard to read and understand.",
+ "confidence": "moderate",
+ "reason": "Long and complex functions make the code harder to maintain and modify.",
+ "solution": "Break down the long and complex functions into smaller and simpler functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 100,
+ "end_line": 200,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Unused imports",
+ "comment": "Remove unused imports in tree_sitter_utils.py and test_chunker.py",
+ "confidence": "moderate",
+ "reason": "Unused imports can clutter the codebase and make it harder to maintain",
+ "solution": "Remove the unnecessary imports",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Error handling",
+ "comment": "Catch specific exceptions in LanguageLoader.load_language",
+ "confidence": "important",
+ "reason": "Broad exception catching can mask bugs and make debugging harder",
+ "solution": "Catch specific exceptions, such as ImportError or ModuleNotFoundError",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 15,
+ "end_line": 15,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Version bump",
+ "comment": "Verify that the version bump is intentional and follows the project's versioning scheme",
+ "confidence": "critical",
+ "reason": "Inconsistent versioning can cause confusion and break dependencies",
+ "solution": "Verify the version bump and update the project's versioning scheme if necessary",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 9
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_222/review.md b/.experiments/code_review/llama-405b/no_eval/pr_222/review.md
new file mode 100644
index 00000000..6fd1770a
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_222/review.md
@@ -0,0 +1,195 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/222
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 19
+- Critical: 8
+- Important: 4
+- Minor: 7
+- Files Affected: 12
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🚨 Critical Issues
+
+
+Error handling (8 issues)
+
+### 1. Error handling is missing in some critical sections of the code.
+📁 **File:** `:0`
+⚖️ **Severity:** 8/10
+🔍 **Description:** Error handling is crucial for preventing crashes and providing useful error messages.
+💡 **Solution:** Add try-except blocks to handle potential errors.
+
+### 2. Potential SQL injection vulnerability
+📁 **File:** `kaizen/retriever/custom_vector_store.py:19`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Using string formatting to construct SQL queries can lead to SQL injection attacks
+💡 **Solution:** Use parameterized queries or an ORM to prevent SQL injection
+
+**Current Code:**
+```python
+query = f"""SELECT ... FROM{self.table_name}e"""
+```
+
+**Suggested Code:**
+```python
+query = """SELECT ... FROM %s e""" % self.table_name
+```
+
+### 3. Verify that the version bump is intentional and follows the project's versioning scheme
+📁 **File:** `pyproject.toml:3`
+⚖️ **Severity:** 9/10
+🔍 **Description:** Inconsistent versioning can cause confusion and break dependencies
+💡 **Solution:** Verify the version bump and update the project's versioning scheme if necessary
+
+### 4. Changes made to sensitive file
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+### 5. Changes made to sensitive file
+📁 **File:** `Dockerfile:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to Dockerfile, which needs review
+💡 **Solution:** NA
+
+### 6. Changes made to sensitive file
+📁 **File:** `docker-compose.yml:15`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to docker-compose.yml, which needs review
+💡 **Solution:** NA
+
+### 7. Changes made to sensitive file
+📁 **File:** `.gitignore:164`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to .gitignore, which needs review
+💡 **Solution:** NA
+
+### 8. Changes made to sensitive file
+📁 **File:** `db_setup/init.sql:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to db_setup/init.sql, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Code organization (4 issues)
+
+### 1. The code is well-organized, but some files have too many responsibilities.
+📁 **File:** `:0`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Separation of concerns is crucial for maintainability.
+💡 **Solution:** Consider breaking down large files into smaller ones.
+
+### 2. Missing error handling for database operations
+📁 **File:** `kaizen/retriever/custom_vector_store.py:39`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Database operations can fail due to various reasons, and error handling is necessary to prevent crashes
+💡 **Solution:** Add try-except blocks to handle database operation errors
+
+### 3. The code does not handle potential errors that may occur when connecting to the database or executing queries.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:250`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Error handling is crucial to prevent the program from crashing and to provide meaningful error messages instead.
+💡 **Solution:** Add try-except blocks to handle potential errors when connecting to the database or executing queries.
+
+### 4. Catch specific exceptions in LanguageLoader.load_language
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:15`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Broad exception catching can mask bugs and make debugging harder
+💡 **Solution:** Catch specific exceptions, such as ImportError or ModuleNotFoundError
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (7 issues)
+
+
+Type hints (7 issues)
+
+### 1. Type hints are missing in some function definitions.
+📁 **File:** `kaizen/retriever/code_chunker.py:7`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Type hints improve code readability and help catch type-related errors.
+💡 **Solution:** Add type hints for function parameters and return types.
+
+**Current Code:**
+```python
+def chunk_code(code: str, language: str) -> ParsedBody:
+```
+
+**Suggested Code:**
+```python
+def chunk_code(code: str, language: str) -> Dict[str, Dict[str, Any]]:
+```
+
+### 2. Some code is duplicated across multiple files.
+📁 **File:** `:0`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Code duplication makes maintenance harder and increases the chance of bugs.
+💡 **Solution:** Extract duplicated code into reusable functions or classes.
+
+### 3. Missing type hints for function return types
+📁 **File:** `kaizen/retriever/custom_vector_store.py:13`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Type hints improve code readability and help catch type-related errors
+💡 **Solution:** Add type hints for function return types
+
+### 4. AbstractionFeedback class has a single responsibility, but its methods are not well-organized
+📁 **File:** `kaizen/retriever/feedback_system.py:4`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Well-organized code is easier to read and maintain
+💡 **Solution:** Consider reorganizing the methods into separate classes or modules
+
+### 5. The code has duplicated logic for storing code in the database and storing function relationships.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:295`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Code duplication makes the code harder to maintain and modify.
+💡 **Solution:** Extract the duplicated logic into a separate function.
+
+### 6. The code has long and complex functions that are hard to read and understand.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:100`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Long and complex functions make the code harder to maintain and modify.
+💡 **Solution:** Break down the long and complex functions into smaller and simpler functions.
+
+### 7. Remove unused imports in tree_sitter_utils.py and test_chunker.py
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:1`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Unused imports can clutter the codebase and make it harder to maintain
+💡 **Solution:** Remove the unnecessary imports
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 21487, "completion_tokens": 2711, "total_tokens": 24198}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_232/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_232/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_232/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_232/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_232/issues.json
new file mode 100644
index 00000000..1e332158
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_232/issues.json
@@ -0,0 +1,137 @@
+[
+ {
+ "topic": "Unused imports",
+ "comment": "There are several unused imports in the code, such as `redirect` in `page.tsx` and `Switch` and `Label` in `queryinput.tsx`. These imports should be removed to declutter the code.",
+ "confidence": "moderate",
+ "reason": "Unused imports can make the code harder to read and maintain.",
+ "solution": "Remove unused imports.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "page.tsx, queryinput.tsx",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Type annotations",
+ "comment": "Some variables and functions are missing type annotations, such as the `initialSpaces` prop in `queryinput.tsx`. Adding type annotations can improve code readability and prevent type-related errors.",
+ "confidence": "moderate",
+ "reason": "Type annotations can help catch type-related errors at compile-time.",
+ "solution": "Add type annotations for variables and functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "queryinput.tsx",
+ "start_line": 10,
+ "end_line": 10,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code organization",
+ "comment": "Some files, such as `page.tsx`, contain multiple unrelated components. It would be better to separate these components into their own files to improve code organization and reusability.",
+ "confidence": "low",
+ "reason": "Separating components into their own files can improve code organization and reusability.",
+ "solution": "Separate unrelated components into their own files.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "page.tsx",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Functionality",
+ "comment": "The code seems to be implementing a memory creation feature with a dialog box. However, there are some potential issues with the functionality.",
+ "confidence": "important",
+ "reason": "The `handleSubmit` function is not properly handling errors. It should be improved to handle errors in a more robust way.",
+ "solution": "Add try-catch blocks to handle errors in the `handleSubmit` function.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "menu.tsx",
+ "start_line": 213,
+ "end_line": 233,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code organization",
+ "comment": "The code is not well-organized. There are many functions and variables defined inside the `DialogContentContainer` component.",
+ "confidence": "moderate",
+ "reason": "It would be better to separate the concerns of the component into smaller functions or utilities.",
+ "solution": "Extract some of the functions and variables into separate files or utilities.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "menu.tsx",
+ "start_line": 163,
+ "end_line": 346,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Type annotations",
+ "comment": "Some of the variables and functions are missing type annotations.",
+ "confidence": "low",
+ "reason": "Adding type annotations would improve the code quality and make it easier to understand.",
+ "solution": "Add type annotations for the variables and functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "menu.tsx",
+ "start_line": 163,
+ "end_line": 346,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 2
+ },
+ {
+ "topic": "Unused imports",
+ "comment": "The import of `useEffect` from 'react' is not used in the code.",
+ "confidence": "moderate",
+ "reason": "The import is not used anywhere in the code.",
+ "solution": "Remove the unused import.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Type annotations",
+ "comment": "The type annotation for the `handleInputChange` function is missing.",
+ "confidence": "moderate",
+ "reason": "The function is not annotated with a type.",
+ "solution": "Add the type annotation for the function.",
+ "actual_code": "const handleInputChange = (e: React.ChangeEvent) =>{",
+ "fixed_code": "const handleInputChange: React.ChangeEventHandler = (e) =>{",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 41,
+ "end_line": 41,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code organization",
+ "comment": "The code is not organized in a logical manner.",
+ "confidence": "low",
+ "reason": "The code is not separated into clear sections or functions.",
+ "solution": "Organize the code into clear sections or functions.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 1,
+ "end_line": 107,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_232/review.md b/.experiments/code_review/llama-405b/no_eval/pr_232/review.md
new file mode 100644
index 00000000..ef2f635f
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_232/review.md
@@ -0,0 +1,100 @@
+PR URL: https://github.com/supermemoryai/supermemory/pull/232
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 9
+- Critical: 0
+- Important: 1
+- Minor: 5
+- Files Affected: 5
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Functionality (1 issues)
+
+### 1. The code seems to be implementing a memory creation feature with a dialog box. However, there are some potential issues with the functionality.
+📁 **File:** `menu.tsx:213`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The `handleSubmit` function is not properly handling errors. It should be improved to handle errors in a more robust way.
+💡 **Solution:** Add try-catch blocks to handle errors in the `handleSubmit` function.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (8 issues)
+
+
+Unused imports (5 issues)
+
+### 1. There are several unused imports in the code, such as `redirect` in `page.tsx` and `Switch` and `Label` in `queryinput.tsx`. These imports should be removed to declutter the code.
+📁 **File:** `page.tsx, queryinput.tsx:0`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Unused imports can make the code harder to read and maintain.
+💡 **Solution:** Remove unused imports.
+
+### 2. Some variables and functions are missing type annotations, such as the `initialSpaces` prop in `queryinput.tsx`. Adding type annotations can improve code readability and prevent type-related errors.
+📁 **File:** `queryinput.tsx:10`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Type annotations can help catch type-related errors at compile-time.
+💡 **Solution:** Add type annotations for variables and functions.
+
+### 3. The code is not well-organized. There are many functions and variables defined inside the `DialogContentContainer` component.
+📁 **File:** `menu.tsx:163`
+⚖️ **Severity:** 4/10
+🔍 **Description:** It would be better to separate the concerns of the component into smaller functions or utilities.
+💡 **Solution:** Extract some of the functions and variables into separate files or utilities.
+
+### 4. The import of `useEffect` from 'react' is not used in the code.
+📁 **File:** `packages/ui/shadcn/combobox.tsx:3`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The import is not used anywhere in the code.
+💡 **Solution:** Remove the unused import.
+
+### 5. The type annotation for the `handleInputChange` function is missing.
+📁 **File:** `packages/ui/shadcn/combobox.tsx:41`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The function is not annotated with a type.
+💡 **Solution:** Add the type annotation for the function.
+
+**Current Code:**
+```python
+const handleInputChange = (e: React.ChangeEvent) =>{
+```
+
+**Suggested Code:**
+```python
+const handleInputChange: React.ChangeEventHandler = (e) =>{
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 15756, "completion_tokens": 1539, "total_tokens": 17295}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_252/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_252/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_252/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_252/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_252/issues.json
new file mode 100644
index 00000000..4a42628f
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_252/issues.json
@@ -0,0 +1,62 @@
+[
+ {
+ "topic": "Code Organization",
+ "comment": "The `WorkSummaryGenerator` class has multiple responsibilities, including generating work summaries, Twitter posts, and LinkedIn posts. Consider breaking this down into separate classes or functions for better organization and maintainability.",
+ "confidence": "important",
+ "reason": "Separation of Concerns (SoC) principle",
+ "solution": "Refactor the `WorkSummaryGenerator` class into separate classes or functions for each responsibility.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The `generate_twitter_post` and `generate_linkedin_post` methods do not handle potential errors that may occur during the generation process. Consider adding try-except blocks to handle and log any exceptions.",
+ "confidence": "important",
+ "reason": "Error handling and logging",
+ "solution": "Add try-except blocks to handle and log any exceptions during the generation process.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py",
+ "start_line": 58,
+ "end_line": 74,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Style",
+ "comment": "The `kaizen/llms/prompts/code_review_prompts.py` file has inconsistent indentation. Consider using a consistent number of spaces for indentation throughout the file.",
+ "confidence": "moderate",
+ "reason": "Code style and readability",
+ "solution": "Use a consistent number of spaces for indentation throughout the file.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/llms/prompts/code_review_prompts.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "The `generate_twitter_post` and `generate_linkedin_post` methods have similar code structures. Consider extracting a common method to avoid code duplication.",
+ "confidence": "moderate",
+ "reason": "Don't Repeat Yourself (DRY) principle",
+ "solution": "Extract a common method to avoid code duplication.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/reviewer/work_summarizer.py",
+ "start_line": 58,
+ "end_line": 74,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 5
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_252/review.md b/.experiments/code_review/llama-405b/no_eval/pr_252/review.md
new file mode 100644
index 00000000..732aa03c
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_252/review.md
@@ -0,0 +1,78 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/252
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 4
+- Critical: 0
+- Important: 2
+- Minor: 2
+- Files Affected: 2
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Code Organization (2 issues)
+
+### 1. The `WorkSummaryGenerator` class has multiple responsibilities, including generating work summaries, Twitter posts, and LinkedIn posts. Consider breaking this down into separate classes or functions for better organization and maintainability.
+📁 **File:** `kaizen/reviewer/work_summarizer.py:0`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Separation of Concerns (SoC) principle
+💡 **Solution:** Refactor the `WorkSummaryGenerator` class into separate classes or functions for each responsibility.
+
+### 2. The `generate_twitter_post` and `generate_linkedin_post` methods do not handle potential errors that may occur during the generation process. Consider adding try-except blocks to handle and log any exceptions.
+📁 **File:** `kaizen/reviewer/work_summarizer.py:58`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Error handling and logging
+💡 **Solution:** Add try-except blocks to handle and log any exceptions during the generation process.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Code Style (2 issues)
+
+### 1. The `kaizen/llms/prompts/code_review_prompts.py` file has inconsistent indentation. Consider using a consistent number of spaces for indentation throughout the file.
+📁 **File:** `kaizen/llms/prompts/code_review_prompts.py:0`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Code style and readability
+💡 **Solution:** Use a consistent number of spaces for indentation throughout the file.
+
+### 2. The `generate_twitter_post` and `generate_linkedin_post` methods have similar code structures. Consider extracting a common method to avoid code duplication.
+📁 **File:** `kaizen/reviewer/work_summarizer.py:58`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Don't Repeat Yourself (DRY) principle
+💡 **Solution:** Extract a common method to avoid code duplication.
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 3881, "completion_tokens": 757, "total_tokens": 4638}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_335/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_335/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_335/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_335/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_335/issues.json
new file mode 100644
index 00000000..5b84d52d
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_335/issues.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "Import Statements",
+ "comment": "Unused import statements are present in the code.",
+ "confidence": "moderate",
+ "reason": "The import statements for 'output' and 'parser' are not used anywhere in the code.",
+ "solution": "Remove unused import statements to declutter the code.",
+ "actual_code": "from kaizen.helpers import output, parser",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 6,
+ "end_line": 6,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Function Definition",
+ "comment": "The function '__init__' has an unused parameter 'llm_provider'.",
+ "confidence": "high",
+ "reason": "The parameter 'llm_provider' is not used anywhere in the function.",
+ "solution": "Remove unused function parameters to simplify the code.",
+ "actual_code": "def __init__(self, llm_provider: LLMProvider):",
+ "fixed_code": "def __init__(self):",
+ "file_name": "kaizen/generator/pr_description.py",
+ "start_line": 26,
+ "end_line": 26,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Variable Naming",
+ "comment": "Variable names are not following PEP 8 conventions.",
+ "confidence": "low",
+ "reason": "Variable names should be in lowercase with words separated by underscores.",
+ "solution": "Rename variables to follow PEP 8 conventions.",
+ "actual_code": "PULL_REQUEST_TITLE ={PULL_REQUEST_TITLE}",
+ "fixed_code": "pull_request_title ={pull_request_title}",
+ "file_name": "kaizen/llms/prompts/pr_desc_prompts.py",
+ "start_line": 27,
+ "end_line": 27,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 3
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_335/review.md b/.experiments/code_review/llama-405b/no_eval/pr_335/review.md
new file mode 100644
index 00000000..2a15ce48
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_335/review.md
@@ -0,0 +1,62 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/335
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 3
+- Critical: 0
+- Important: 0
+- Minor: 1
+- Files Affected: 2
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Import Statements (1 issues)
+
+### 1. Unused import statements are present in the code.
+📁 **File:** `kaizen/generator/pr_description.py:6`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The import statements for 'output' and 'parser' are not used anywhere in the code.
+💡 **Solution:** Remove unused import statements to declutter the code.
+
+**Current Code:**
+```python
+from kaizen.helpers import output, parser
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 6756, "completion_tokens": 533, "total_tokens": 7289}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_400/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_400/comments.json
new file mode 100644
index 00000000..33197e40
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_400/comments.json
@@ -0,0 +1,16 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "11",
+ "end_line": "11",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_400/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_400/issues.json
new file mode 100644
index 00000000..318f90b8
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_400/issues.json
@@ -0,0 +1,316 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "The `create_pr_description` function does not handle cases where `desc` or `original_desc` are not strings.",
+ "confidence": "important",
+ "reason": "The function should check the type of the input parameters and raise a meaningful error if they are not strings.",
+ "solution": "Add type checking for `desc` and `original_desc` and raise a `TypeError` if they are not strings.",
+ "actual_code": "",
+ "fixed_code": "if not isinstance(desc, str) or not isinstance(original_desc, str):\n raise TypeError('desc and original_desc must be strings')",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 31,
+ "end_line": 31,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Performance",
+ "comment": "The `create_pr_description` function may have performance issues for large input strings.",
+ "confidence": "moderate",
+ "reason": "The function uses string concatenation, which can be inefficient for large strings.",
+ "solution": "Consider using a more efficient string concatenation method, such as using a list and joining the strings at the end.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 51,
+ "end_line": 51,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Organization",
+ "comment": "The test file has a mix of test cases and helper functions.",
+ "confidence": "moderate",
+ "reason": "It's better to separate test cases and helper functions into different files or modules.",
+ "solution": "Consider moving the helper functions to a separate file or module.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_description.py",
+ "start_line": 1,
+ "end_line": 1,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "The `PR_COLLAPSIBLE_TEMPLATE` is duplicated in multiple tests.",
+ "confidence": "high",
+ "reason": "Code duplication can make maintenance harder.",
+ "solution": "Extract the template into a separate constant or function.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Test Coverage",
+ "comment": "There are no tests for the `create_pr_review_text` function with an empty input.",
+ "confidence": "medium",
+ "reason": "Empty input can cause unexpected behavior.",
+ "solution": "Add a test for the `create_pr_review_text` function with an empty input.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 0,
+ "end_line": 0,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Readability",
+ "comment": "The `test_create_pr_review_text_mixed_reviews` test has a long and complex expected output.",
+ "confidence": "low",
+ "reason": "Long and complex expected outputs can make tests harder to understand.",
+ "solution": "Consider breaking the expected output into smaller, more manageable parts.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py",
+ "start_line": 96,
+ "end_line": 172,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Functionality",
+ "comment": "The code seems to be implementing the required functionality correctly.",
+ "confidence": "moderate",
+ "reason": "The code is using the correct libraries and functions to achieve the desired outcome.",
+ "solution": "",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_test_files.py",
+ "start_line": 1,
+ "end_line": 247,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The code is missing error handling for potential exceptions that may occur during file operations.",
+ "confidence": "high",
+ "reason": "The code is not handling potential exceptions that may occur during file operations, which can lead to unexpected behavior.",
+ "solution": "Add try-except blocks to handle potential exceptions during file operations.",
+ "actual_code": "",
+ "fixed_code": "try:\n with open(file_path, 'r') as f:\n content = f.read()\nexcept FileNotFoundError:\n print(f'File{file_path}not found.')",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_create_test_files.py",
+ "start_line": 40,
+ "end_line": 45,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Code Quality",
+ "comment": "The code has some redundant comments and docstrings that can be removed.",
+ "confidence": "low",
+ "reason": "The code has some redundant comments and docstrings that are not providing any additional information.",
+ "solution": "Remove redundant comments and docstrings.",
+ "actual_code": "# Correct implementation of get_parent_folder()",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 2
+ },
+ {
+ "topic": "Import Statements",
+ "comment": "Unused import statement",
+ "confidence": "moderate",
+ "reason": "The import statement 'from kaizen.helpers.output import get_web_html' is not used in the code.",
+ "solution": "Remove the unused import statement.",
+ "actual_code": "from kaizen.helpers.output import get_web_html",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 4,
+ "end_line": 4,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Function Definition",
+ "comment": "Function 'test_get_web_html_normal_cases' is too long and complex.",
+ "confidence": "high",
+ "reason": "The function has too many lines of code and is difficult to understand.",
+ "solution": "Break down the function into smaller, more manageable functions.",
+ "actual_code": "async def test_get_web_html_normal_cases(mock_get_html, mock_nest_asyncio, html_content, expected_output):",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 76,
+ "end_line": 103,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Error handling is missing in function 'test_get_web_html_invalid_url'.",
+ "confidence": "high",
+ "reason": "The function does not handle potential errors that may occur.",
+ "solution": "Add try-except blocks to handle potential errors.",
+ "actual_code": "async def test_get_web_html_invalid_url(mock_get_html, mock_nest_asyncio):",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 85,
+ "end_line": 91,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 9
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "Code duplication in functions 'test_get_web_html_normal_cases' and 'test_get_web_html_invalid_url'.",
+ "confidence": "moderate",
+ "reason": "The functions have similar code that can be extracted into a separate function.",
+ "solution": "Extract the common code into a separate function.",
+ "actual_code": "mock_get_html.return_value = html_content",
+ "fixed_code": "",
+ "file_name": ".kaizen/unit_test/kaizen/helpers/test_get_web_html.py",
+ "start_line": 77,
+ "end_line": 77,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Import organization",
+ "comment": "Imports are not organized alphabetically.",
+ "confidence": "moderate",
+ "reason": "Following PEP 8 guidelines for import organization improves readability.",
+ "solution": "Organize imports alphabetically.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 1,
+ "end_line": 10,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Docstrings",
+ "comment": "Missing docstrings for classes and methods.",
+ "confidence": "important",
+ "reason": "Docstrings provide essential documentation for users and maintainers.",
+ "solution": "Add docstrings to classes and methods.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 11,
+ "end_line": 275,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Type hints",
+ "comment": "Missing type hints for method parameters and return types.",
+ "confidence": "important",
+ "reason": "Type hints improve code readability and enable static type checking.",
+ "solution": "Add type hints for method parameters and return types.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 11,
+ "end_line": 275,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error handling",
+ "comment": "Insufficient error handling in methods.",
+ "confidence": "important",
+ "reason": "Proper error handling ensures the program remains stable and provides useful error messages.",
+ "solution": "Implement try-except blocks to handle potential errors.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 11,
+ "end_line": 275,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code organization",
+ "comment": "Some methods are too long and complex.",
+ "confidence": "moderate",
+ "reason": "Breaking down long methods into smaller ones improves readability and maintainability.",
+ "solution": "Refactor long methods into smaller, more focused ones.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "kaizen/generator/unit_test.py",
+ "start_line": 11,
+ "end_line": 275,
+ "side": "LEFT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Logging Configuration",
+ "comment": "The logging configuration is not properly set up.",
+ "confidence": "important",
+ "reason": "The logging level is set to ERROR for all loggers, but the LOGLEVEL environment variable is set to INFO.",
+ "solution": "Set the logging level consistently throughout the application.",
+ "actual_code": "set_all_loggers_to_ERROR()",
+ "fixed_code": "set_all_loggers_to_INFO()",
+ "file_name": "kaizen/llms/provider.py",
+ "start_line": 13,
+ "end_line": 28,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Organization",
+ "comment": "The code is not properly organized.",
+ "confidence": "moderate",
+ "reason": "The set_all_loggers_to_ERROR function is defined in the middle of the file.",
+ "solution": "Move the function definition to the top of the file.",
+ "actual_code": "def set_all_loggers_to_ERROR():",
+ "fixed_code": "def set_all_loggers_to_ERROR():\n # ...",
+ "file_name": "kaizen/llms/provider.py",
+ "start_line": 13,
+ "end_line": 28,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "11",
+ "end_line": "11",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_400/review.md b/.experiments/code_review/llama-405b/no_eval/pr_400/review.md
new file mode 100644
index 00000000..c168419b
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_400/review.md
@@ -0,0 +1,197 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/400
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 21
+- Critical: 1
+- Important: 5
+- Minor: 8
+- Files Affected: 8
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🚨 Critical Issues
+
+
+Configuration (1 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:11`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Error Handling (5 issues)
+
+### 1. The `create_pr_description` function does not handle cases where `desc` or `original_desc` are not strings.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:31`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The function should check the type of the input parameters and raise a meaningful error if they are not strings.
+💡 **Solution:** Add type checking for `desc` and `original_desc` and raise a `TypeError` if they are not strings.
+
+**Current Code:**
+```python
+
+```
+
+**Suggested Code:**
+```python
+if not isinstance(desc, str) or not isinstance(original_desc, str):
+ raise TypeError('desc and original_desc must be strings')
+```
+
+### 2. Missing docstrings for classes and methods.
+📁 **File:** `kaizen/generator/unit_test.py:11`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Docstrings provide essential documentation for users and maintainers.
+💡 **Solution:** Add docstrings to classes and methods.
+
+### 3. Missing type hints for method parameters and return types.
+📁 **File:** `kaizen/generator/unit_test.py:11`
+⚖️ **Severity:** 6/10
+🔍 **Description:** Type hints improve code readability and enable static type checking.
+💡 **Solution:** Add type hints for method parameters and return types.
+
+### 4. Insufficient error handling in methods.
+📁 **File:** `kaizen/generator/unit_test.py:11`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Proper error handling ensures the program remains stable and provides useful error messages.
+💡 **Solution:** Implement try-except blocks to handle potential errors.
+
+### 5. The logging configuration is not properly set up.
+📁 **File:** `kaizen/llms/provider.py:13`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The logging level is set to ERROR for all loggers, but the LOGLEVEL environment variable is set to INFO.
+💡 **Solution:** Set the logging level consistently throughout the application.
+
+**Current Code:**
+```python
+set_all_loggers_to_ERROR()
+```
+
+**Suggested Code:**
+```python
+set_all_loggers_to_INFO()
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (10 issues)
+
+
+Performance (8 issues)
+
+### 1. The `create_pr_description` function may have performance issues for large input strings.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:51`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The function uses string concatenation, which can be inefficient for large strings.
+💡 **Solution:** Consider using a more efficient string concatenation method, such as using a list and joining the strings at the end.
+
+### 2. The test file has a mix of test cases and helper functions.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py:1`
+⚖️ **Severity:** 3/10
+🔍 **Description:** It's better to separate test cases and helper functions into different files or modules.
+💡 **Solution:** Consider moving the helper functions to a separate file or module.
+
+### 3. The code seems to be implementing the required functionality correctly.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_create_test_files.py:1`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The code is using the correct libraries and functions to achieve the desired outcome.
+💡 **Solution:**
+
+### 4. Unused import statement
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_web_html.py:4`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The import statement 'from kaizen.helpers.output import get_web_html' is not used in the code.
+💡 **Solution:** Remove the unused import statement.
+
+**Current Code:**
+```python
+from kaizen.helpers.output import get_web_html
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 5. Code duplication in functions 'test_get_web_html_normal_cases' and 'test_get_web_html_invalid_url'.
+📁 **File:** `.kaizen/unit_test/kaizen/helpers/test_get_web_html.py:77`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The functions have similar code that can be extracted into a separate function.
+💡 **Solution:** Extract the common code into a separate function.
+
+**Current Code:**
+```python
+mock_get_html.return_value = html_content
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 6. Imports are not organized alphabetically.
+📁 **File:** `kaizen/generator/unit_test.py:1`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Following PEP 8 guidelines for import organization improves readability.
+💡 **Solution:** Organize imports alphabetically.
+
+### 7. Some methods are too long and complex.
+📁 **File:** `kaizen/generator/unit_test.py:11`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Breaking down long methods into smaller ones improves readability and maintainability.
+💡 **Solution:** Refactor long methods into smaller, more focused ones.
+
+### 8. The code is not properly organized.
+📁 **File:** `kaizen/llms/provider.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The set_all_loggers_to_ERROR function is defined in the middle of the file.
+💡 **Solution:** Move the function definition to the top of the file.
+
+**Current Code:**
+```python
+def set_all_loggers_to_ERROR():
+```
+
+**Suggested Code:**
+```python
+def set_all_loggers_to_ERROR():
+ # ...
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 36841, "completion_tokens": 3640, "total_tokens": 40481}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_440/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_440/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_440/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_440/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_440/issues.json
new file mode 100644
index 00000000..172e9fbf
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_440/issues.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "Unnecessary Comment",
+ "comment": "The comment 'TODO: DONT PUSH DUPLICATE' is unnecessary and should be removed.",
+ "confidence": "moderate",
+ "reason": "The comment does not provide any useful information and is not relevant to the code.",
+ "solution": "Remove the comment.",
+ "actual_code": "# TODO: DONT PUSH DUPLICATE",
+ "fixed_code": "",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 157,
+ "end_line": 157,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 2
+ },
+ {
+ "topic": "Dependency Update",
+ "comment": "The dependency 'llama-index-core' has been updated to version '0.10.65'.",
+ "confidence": "important",
+ "reason": "The updated version may include security patches or bug fixes.",
+ "solution": "Review the changelog for the updated version to ensure compatibility with the current codebase.",
+ "actual_code": "llama-index-core = \"0.10.65\"",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 27,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Removed Dependencies",
+ "comment": "The dependencies 'llama-index-llms-openai' and 'llama-index-core' (version '^0.10.47') have been removed.",
+ "confidence": "important",
+ "reason": "The removed dependencies may be required by other parts of the codebase.",
+ "solution": "Review the codebase to ensure that the removed dependencies are not required.",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 28,
+ "side": "LEFT",
+ "sentiment": "negative",
+ "severity_level": 6
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_440/review.md b/.experiments/code_review/llama-405b/no_eval/pr_440/review.md
new file mode 100644
index 00000000..f509ad53
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_440/review.md
@@ -0,0 +1,92 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/440
+
+# 🔍 Code Review Summary
+
+✅ **All Clear:** This commit looks good! 👍
+
+## 📊 Stats
+- Total Issues: 3
+- Critical: 0
+- Important: 2
+- Minor: 1
+- Files Affected: 2
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Dependency Update (2 issues)
+
+### 1. The dependency 'llama-index-core' has been updated to version '0.10.65'.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The updated version may include security patches or bug fixes.
+💡 **Solution:** Review the changelog for the updated version to ensure compatibility with the current codebase.
+
+**Current Code:**
+```python
+llama-index-core = "0.10.65"
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 2. The dependencies 'llama-index-llms-openai' and 'llama-index-core' (version '^0.10.47') have been removed.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The removed dependencies may be required by other parts of the codebase.
+💡 **Solution:** Review the codebase to ensure that the removed dependencies are not required.
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (1 issues)
+
+
+Unnecessary Comment (1 issues)
+
+### 1. The comment 'TODO: DONT PUSH DUPLICATE' is unnecessary and should be removed.
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:157`
+⚖️ **Severity:** 2/10
+🔍 **Description:** The comment does not provide any useful information and is not relevant to the code.
+💡 **Solution:** Remove the comment.
+
+**Current Code:**
+```python
+# TODO: DONT PUSH DUPLICATE
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 1362, "completion_tokens": 562, "total_tokens": 1924}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_476/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_476/comments.json
new file mode 100644
index 00000000..a9d40eac
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_476/comments.json
@@ -0,0 +1,16 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_476/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_476/issues.json
new file mode 100644
index 00000000..a394f97b
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_476/issues.json
@@ -0,0 +1,61 @@
+[
+ {
+ "topic": "Error Handling",
+ "comment": "Broad exception handling can mask bugs and make debugging difficult.",
+ "confidence": "important",
+ "reason": "The `except Exception` block in `github_app/github_helper/pull_requests.py` (line 140) catches all exceptions, which can make it challenging to identify and fix specific issues.",
+ "solution": "Catch specific exceptions that can occur during the execution of the code, and provide meaningful error messages to aid in debugging.",
+ "actual_code": "except Exception:",
+ "fixed_code": "except requests.exceptions.RequestException as e:",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 140,
+ "end_line": 141,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Organization",
+ "comment": "The `sort_files` function is not necessary and can be replaced with a built-in sorting function.",
+ "confidence": "moderate",
+ "reason": "The `sort_files` function in `github_app/github_helper/pull_requests.py` (line 184) is not necessary and can be replaced with the built-in `sorted` function.",
+ "solution": "Use the built-in `sorted` function to sort the files, which is more efficient and Pythonic.",
+ "actual_code": "def sort_files(files):",
+ "fixed_code": "sorted_files = sorted(files, key=lambda x: x['filename'])",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 184,
+ "end_line": 194,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Code Quality",
+ "comment": "The `generate_tests` function is not necessary and can be replaced with a list comprehension.",
+ "confidence": "moderate",
+ "reason": "The `generate_tests` function in `github_app/github_helper/pull_requests.py` (line 199) is not necessary and can be replaced with a list comprehension.",
+ "solution": "Use a list comprehension to generate the tests, which is more efficient and Pythonic.",
+ "actual_code": "def generate_tests(pr_files):",
+ "fixed_code": "tests =[f['filename'] for f in pr_files]",
+ "file_name": "github_app/github_helper/pull_requests.py",
+ "start_line": 199,
+ "end_line": 200,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_476/review.md b/.experiments/code_review/llama-405b/no_eval/pr_476/review.md
new file mode 100644
index 00000000..8f093042
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_476/review.md
@@ -0,0 +1,115 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/476
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 4
+- Critical: 1
+- Important: 1
+- Minor: 2
+- Files Affected: 2
+## 🏆 Code Quality
+[████████████████░░░░] 80% (Good)
+
+## 🚨 Critical Issues
+
+
+Configuration (1 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Error Handling (1 issues)
+
+### 1. Broad exception handling can mask bugs and make debugging difficult.
+📁 **File:** `github_app/github_helper/pull_requests.py:140`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The `except Exception` block in `github_app/github_helper/pull_requests.py` (line 140) catches all exceptions, which can make it challenging to identify and fix specific issues.
+💡 **Solution:** Catch specific exceptions that can occur during the execution of the code, and provide meaningful error messages to aid in debugging.
+
+**Current Code:**
+```python
+except Exception:
+```
+
+**Suggested Code:**
+```python
+except requests.exceptions.RequestException as e:
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Code Organization (2 issues)
+
+### 1. The `sort_files` function is not necessary and can be replaced with a built-in sorting function.
+📁 **File:** `github_app/github_helper/pull_requests.py:184`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The `sort_files` function in `github_app/github_helper/pull_requests.py` (line 184) is not necessary and can be replaced with the built-in `sorted` function.
+💡 **Solution:** Use the built-in `sorted` function to sort the files, which is more efficient and Pythonic.
+
+**Current Code:**
+```python
+def sort_files(files):
+```
+
+**Suggested Code:**
+```python
+sorted_files = sorted(files, key=lambda x: x['filename'])
+```
+
+### 2. The `generate_tests` function is not necessary and can be replaced with a list comprehension.
+📁 **File:** `github_app/github_helper/pull_requests.py:199`
+⚖️ **Severity:** 4/10
+🔍 **Description:** The `generate_tests` function in `github_app/github_helper/pull_requests.py` (line 199) is not necessary and can be replaced with a list comprehension.
+💡 **Solution:** Use a list comprehension to generate the tests, which is more efficient and Pythonic.
+
+**Current Code:**
+```python
+def generate_tests(pr_files):
+```
+
+**Suggested Code:**
+```python
+tests =[f['filename'] for f in pr_files]
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 4006, "completion_tokens": 655, "total_tokens": 4661}
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_5/comments.json b/.experiments/code_review/llama-405b/no_eval/pr_5/comments.json
new file mode 100644
index 00000000..ce61c328
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_5/comments.json
@@ -0,0 +1,47 @@
+[
+ {
+ "topic": "API Call Failure",
+ "comment": "API call failure without retry mechanism.",
+ "confidence": "critical",
+ "reason": "The API call may fail without a retry mechanism, causing the program to crash.",
+ "solution": "Implement a retry mechanism for the API call.",
+ "actual_code": "response = completion(model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages)",
+ "fixed_code": "import time\ntry:\n response = completion(model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages)\nexcept Exception as e:\n print(f\"API call failed:{e}\")\n time.sleep(1)\n # retry the API call",
+ "file_name": "main.py",
+ "start_line": 65,
+ "end_line": 65,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "Silent failure without logging.",
+ "confidence": "critical",
+ "reason": "The program will fail silently without logging any errors.",
+ "solution": "Implement logging for errors.",
+ "actual_code": "except json.JSONDecodeError:\n result ={key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "fixed_code": "import logging\nexcept json.JSONDecodeError as e:\n logging.error(f\"JSON decode error:{e}\")\n result ={key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 84,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Division by zero potential.",
+ "confidence": "critical",
+ "reason": "The code may divide by zero, causing a runtime error.",
+ "solution": "Add a check to prevent division by zero.",
+ "actual_code": "total_tokens = total_input_tokens + total_output_tokens",
+ "fixed_code": "if total_tokens == 0:\n total_tokens = 1\n print(\"Warning: total tokens is zero\")",
+ "file_name": "main.py",
+ "start_line": 156,
+ "end_line": 156,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_5/issues.json b/.experiments/code_review/llama-405b/no_eval/pr_5/issues.json
new file mode 100644
index 00000000..a33e8f2e
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_5/issues.json
@@ -0,0 +1,107 @@
+[
+ {
+ "topic": "Unused Import",
+ "comment": "Unused import 'random' should be removed.",
+ "confidence": "moderate",
+ "reason": "The import 'random' is not used anywhere in the code.",
+ "solution": "Remove the line 'import random'.",
+ "actual_code": "import random",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 8,
+ "end_line": 8,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "API Call Failure",
+ "comment": "API call failure without retry mechanism.",
+ "confidence": "critical",
+ "reason": "The API call may fail without a retry mechanism, causing the program to crash.",
+ "solution": "Implement a retry mechanism for the API call.",
+ "actual_code": "response = completion(model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages)",
+ "fixed_code": "import time\ntry:\n response = completion(model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages)\nexcept Exception as e:\n print(f\"API call failed:{e}\")\n time.sleep(1)\n # retry the API call",
+ "file_name": "main.py",
+ "start_line": 65,
+ "end_line": 65,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Silent Failure",
+ "comment": "Silent failure without logging.",
+ "confidence": "critical",
+ "reason": "The program will fail silently without logging any errors.",
+ "solution": "Implement logging for errors.",
+ "actual_code": "except json.JSONDecodeError:\n result ={key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "fixed_code": "import logging\nexcept json.JSONDecodeError as e:\n logging.error(f\"JSON decode error:{e}\")\n result ={key: \"\" for key in[\"feedback\", \"review\", \"should_interview\", \"rating\", \"input_tokens\", \"output_tokens\"]}",
+ "file_name": "main.py",
+ "start_line": 82,
+ "end_line": 84,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "Inefficient Progress Printing",
+ "comment": "Inefficient way to print progress.",
+ "confidence": "important",
+ "reason": "The progress printing is inefficient and may cause performance issues.",
+ "solution": "Use a more efficient way to print progress, such as using a progress bar library.",
+ "actual_code": "print(f\"\\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}\", end=\"\", flush=True)",
+ "fixed_code": "from tqdm import tqdm\nwith tqdm(total=total, desc=\"Processing applicants\") as pbar:\n for index, row in df.iterrows():\n # process applicant\n pbar.update(1)",
+ "file_name": "main.py",
+ "start_line": 121,
+ "end_line": 121,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Redundant Code",
+ "comment": "Redundant code.",
+ "confidence": "moderate",
+ "reason": "The code is redundant and can be removed.",
+ "solution": "Remove the redundant code.",
+ "actual_code": "if len(df) == 0:\n return",
+ "fixed_code": "",
+ "file_name": "main.py",
+ "start_line": 141,
+ "end_line": 142,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Division by Zero",
+ "comment": "Division by zero potential.",
+ "confidence": "critical",
+ "reason": "The code may divide by zero, causing a runtime error.",
+ "solution": "Add a check to prevent division by zero.",
+ "actual_code": "total_tokens = total_input_tokens + total_output_tokens",
+ "fixed_code": "if total_tokens == 0:\n total_tokens = 1\n print(\"Warning: total tokens is zero\")",
+ "file_name": "main.py",
+ "start_line": 156,
+ "end_line": 156,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 8
+ },
+ {
+ "topic": "File Not Found",
+ "comment": "No error handling for file not found.",
+ "confidence": "important",
+ "reason": "The code does not handle the case where the file is not found.",
+ "solution": "Add error handling for file not found.",
+ "actual_code": "main(input_file)",
+ "fixed_code": "try:\n main(input_file)\nexcept FileNotFoundError:\n print(f\"Error: file '{input_file}' not found\")",
+ "file_name": "main.py",
+ "start_line": 174,
+ "end_line": 174,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/llama-405b/no_eval/pr_5/review.md b/.experiments/code_review/llama-405b/no_eval/pr_5/review.md
new file mode 100644
index 00000000..adfbba95
--- /dev/null
+++ b/.experiments/code_review/llama-405b/no_eval/pr_5/review.md
@@ -0,0 +1,193 @@
+PR URL: https://github.com/sauravpanda/applicant-screening/pull/5
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 7
+- Critical: 3
+- Important: 2
+- Minor: 2
+- Files Affected: 1
+## 🏆 Code Quality
+[██████████████░░░░░░] 70% (Fair)
+
+## 🚨 Critical Issues
+
+
+API Call Failure (3 issues)
+
+### 1. API call failure without retry mechanism.
+📁 **File:** `main.py:65`
+⚖️ **Severity:** 8/10
+🔍 **Description:** The API call may fail without a retry mechanism, causing the program to crash.
+💡 **Solution:** Implement a retry mechanism for the API call.
+
+**Current Code:**
+```python
+response = completion(model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages)
+```
+
+**Suggested Code:**
+```python
+import time
+try:
+ response = completion(model=os.environ.get("model", "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1"), messages=messages)
+except Exception as e:
+ print(f"API call failed:{e}")
+ time.sleep(1)
+ # retry the API call
+```
+
+### 2. Silent failure without logging.
+📁 **File:** `main.py:82`
+⚖️ **Severity:** 8/10
+🔍 **Description:** The program will fail silently without logging any errors.
+💡 **Solution:** Implement logging for errors.
+
+**Current Code:**
+```python
+except json.JSONDecodeError:
+ result ={key: "" for key in["feedback", "review", "should_interview", "rating", "input_tokens", "output_tokens"]}
+```
+
+**Suggested Code:**
+```python
+import logging
+except json.JSONDecodeError as e:
+ logging.error(f"JSON decode error:{e}")
+ result ={key: "" for key in["feedback", "review", "should_interview", "rating", "input_tokens", "output_tokens"]}
+```
+
+### 3. Division by zero potential.
+📁 **File:** `main.py:156`
+⚖️ **Severity:** 8/10
+🔍 **Description:** The code may divide by zero, causing a runtime error.
+💡 **Solution:** Add a check to prevent division by zero.
+
+**Current Code:**
+```python
+total_tokens = total_input_tokens + total_output_tokens
+```
+
+**Suggested Code:**
+```python
+if total_tokens == 0:
+ total_tokens = 1
+ print("Warning: total tokens is zero")
+```
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Inefficient Progress Printing (2 issues)
+
+### 1. Inefficient way to print progress.
+📁 **File:** `main.py:121`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The progress printing is inefficient and may cause performance issues.
+💡 **Solution:** Use a more efficient way to print progress, such as using a progress bar library.
+
+**Current Code:**
+```python
+print(f"\rProgress:[{('=' * int(50 * progress)):<50}]{progress:.0%}", end="", flush=True)
+```
+
+**Suggested Code:**
+```python
+from tqdm import tqdm
+with tqdm(total=total, desc="Processing applicants") as pbar:
+ for index, row in df.iterrows():
+ # process applicant
+ pbar.update(1)
+```
+
+### 2. No error handling for file not found.
+📁 **File:** `main.py:174`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The code does not handle the case where the file is not found.
+💡 **Solution:** Add error handling for file not found.
+
+**Current Code:**
+```python
+main(input_file)
+```
+
+**Suggested Code:**
+```python
+try:
+ main(input_file)
+except FileNotFoundError:
+ print(f"Error: file '{input_file}' not found")
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (2 issues)
+
+
+Unused Import (2 issues)
+
+### 1. Unused import 'random' should be removed.
+📁 **File:** `main.py:8`
+⚖️ **Severity:** 3/10
+🔍 **Description:** The import 'random' is not used anywhere in the code.
+💡 **Solution:** Remove the line 'import random'.
+
+**Current Code:**
+```python
+import random
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 2. Redundant code.
+📁 **File:** `main.py:141`
+⚖️ **Severity:** 3/10
+🔍 **Description:** The code is redundant and can be removed.
+💡 **Solution:** Remove the redundant code.
+
+**Current Code:**
+```python
+if len(df) == 0:
+ return
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (azure_ai/Meta-Llama-3-405B-Instruct)
+{"prompt_tokens": 6128, "completion_tokens": 1277, "total_tokens": 7405}
\ No newline at end of file
diff --git a/.experiments/code_review/main.py b/.experiments/code_review/main.py
index 28375d34..c04c609f 100644
--- a/.experiments/code_review/main.py
+++ b/.experiments/code_review/main.py
@@ -35,7 +35,7 @@ def process_pr(pr_url, reeval_response=False):
diff_text = get_diff_text(pr_diff, "")
pr_files = get_pr_files(pr_files, "")
- reviewer = CodeReviewer(llm_provider=LLMProvider())
+ reviewer = CodeReviewer(llm_provider=LLMProvider(), default_model="best")
review_data = reviewer.review_pull_request(
diff_text=diff_text,
pull_request_title=pr_title,
@@ -51,16 +51,23 @@ def process_pr(pr_url, reeval_response=False):
review_desc = create_pr_review_text(
review_data.issues, code_quality=review_data.code_quality
)
+ review_desc = f"PR URL: {pr_url}\n\n" + review_desc
+ review_desc += f"\n\n----- Cost Usage ({review_data.model_name})\n" + json.dumps(
+ review_data.usage
+ )
comments, topics = create_review_comments(review_data.topics)
logger.info(f"Model: {review_data.model_name}\nUsage: {review_data.usage}")
logger.info(f"Completed processing PR: {pr_url}")
- return review_desc, comments, topics
+ return review_desc, comments, review_data.issues
-def save_review(pr_number, review_desc, comments, topics, folder):
+def save_review(pr_number, review_desc, comments, issues, folder):
+ folder = os.path.join(folder, f"pr_{pr_number}")
logger.info(f"Saving review for PR {pr_number} in {folder}")
- review_file = os.path.join(folder, f"pr_{pr_number}_review.md")
- comments_file = os.path.join(folder, f"pr_{pr_number}_comments.json")
+ os.makedirs(folder, exist_ok=True)
+ review_file = os.path.join(folder, "review.md")
+ comments_file = os.path.join(folder, "comments.json")
+ issues_file = os.path.join(folder, "issues.json")
with open(review_file, "w") as f:
f.write(review_desc)
@@ -68,6 +75,9 @@ def save_review(pr_number, review_desc, comments, topics, folder):
with open(comments_file, "w") as f:
json.dump(comments, f, indent=2)
+ with open(issues_file, "w") as f:
+ json.dump(issues, f, indent=2)
+
logger.info(f"Saved review files for PR {pr_number}")
@@ -88,12 +98,12 @@ def main(pr_urls):
logger.info(f"Starting to process PR {pr_number}")
# Without re-evaluation
- review_desc, comments, topics = process_pr(pr_url, reeval_response=False)
- save_review(pr_number, review_desc, comments, topics, no_eval_folder)
+ review_desc, comments, issues = process_pr(pr_url, reeval_response=False)
+ save_review(pr_number, review_desc, comments, issues, no_eval_folder)
- # With re-evaluation
- review_desc, comments, topics = process_pr(pr_url, reeval_response=True)
- save_review(pr_number, review_desc, comments, topics, with_eval_folder)
+ # # With re-evaluation
+ # review_desc, comments, topics = process_pr(pr_url, reeval_response=True)
+ # save_review(pr_number, review_desc, comments, topics, with_eval_folder)
logger.info(f"Completed processing PR {pr_number}")
@@ -102,9 +112,15 @@ def main(pr_urls):
if __name__ == "__main__":
pr_urls = [
+ "https://github.com/sauravpanda/applicant-screening/pull/5",
"https://github.com/Cloud-Code-AI/kaizen/pull/335",
"https://github.com/Cloud-Code-AI/kaizen/pull/440",
"https://github.com/Cloud-Code-AI/kaizen/pull/222",
+ "https://github.com/Cloud-Code-AI/kaizen/pull/476",
+ "https://github.com/Cloud-Code-AI/kaizen/pull/252",
+ "https://github.com/Cloud-Code-AI/kaizen/pull/400",
+ # "https://github.com/supermemoryai/supermemory/pull/164",
+ "https://github.com/supermemoryai/supermemory/pull/232",
# Add more PR URLs here
]
main(pr_urls)
diff --git a/.experiments/code_review/print_info.py b/.experiments/code_review/print_info.py
new file mode 100644
index 00000000..1e693c37
--- /dev/null
+++ b/.experiments/code_review/print_info.py
@@ -0,0 +1,40 @@
+import json
+from pathlib import Path
+
+
+def print_issues_for_pr(pr_number):
+ base_path = Path(".experiments/code_review")
+ models = [
+ "gpt-4o",
+ "gpt-4o-mini",
+ "gpt-4o-try2",
+ "haiku",
+ "llama-405b",
+ "sonnet-3.5",
+ ]
+
+ for model in models:
+ file_path = base_path / model / "no_eval" / f"pr_{pr_number}" / "issues.json"
+
+ if file_path.exists():
+ print(f"\nModel: {model}")
+ print(f"File: {file_path}")
+
+ try:
+ with open(file_path, "r") as file:
+ data = json.load(file)
+ formatted_json = json.dumps(data, indent=2)
+ print("Content:")
+ print(formatted_json)
+ except json.JSONDecodeError:
+ print("Error: Invalid JSON file")
+ except Exception as e:
+ print(f"Error reading file: {str(e)}")
+ else:
+ print(f"\nModel: {model}")
+ print(f"File not found: {file_path}")
+
+
+# Example usage
+pr_number = 476
+print_issues_for_pr(pr_number)
diff --git a/.experiments/code_review/sonnet-3.5/no_eval/pr_222/comments.json b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/comments.json
new file mode 100644
index 00000000..7edfffbe
--- /dev/null
+++ b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/comments.json
@@ -0,0 +1,72 @@
+[
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/sonnet-3.5/no_eval/pr_222/issues.json b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/issues.json
new file mode 100644
index 00000000..13cebd2d
--- /dev/null
+++ b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/issues.json
@@ -0,0 +1,312 @@
+[
+ {
+ "topic": "Docker Configuration",
+ "comment": "Consider using multi-stage builds to reduce the final image size",
+ "confidence": "moderate",
+ "reason": "Multi-stage builds can significantly reduce the size of the final Docker image by excluding build dependencies",
+ "solution": "Implement a multi-stage build in the Dockerfile",
+ "actual_code": "RUN apt-get update && apt-get install -y \\\n git \\\n build-essential \\\n && rm -rf /var/lib/apt/lists/*",
+ "fixed_code": "FROM python:3.9 AS builder\n\nRUN apt-get update && apt-get install -y \\\n git \\\n build-essential\n\n# ... (build steps)\n\nFROM python:3.9-slim\n\nCOPY --from=builder /app /app\n\n# ... (runtime configuration)",
+ "file_name": "Dockerfile",
+ "start_line": 7,
+ "end_line": 11,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Environment Variables",
+ "comment": "Consider using a more secure method for storing sensitive information",
+ "confidence": "important",
+ "reason": "Storing sensitive information like API keys directly in environment variables can be a security risk",
+ "solution": "Use a secret management system or encrypt sensitive values",
+ "actual_code": "OPENAI_API_KEY=\nOPENAI_ORGANIZATION=",
+ "fixed_code": "# Use a secret management system to securely store and retrieve API keys\n# OPENAI_API_KEY=\n# OPENAI_ORGANIZATION=",
+ "file_name": ".env.example",
+ "start_line": 10,
+ "end_line": 11,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Database Configuration",
+ "comment": "Ensure proper indexing for performance optimization",
+ "confidence": "important",
+ "reason": "Proper indexing is crucial for database performance, especially for frequently queried columns",
+ "solution": "Review and optimize index creation based on query patterns",
+ "actual_code": "CREATE INDEX idx_file_path ON files(file_path);\n\nCREATE INDEX idx_function_name ON function_abstractions(function_name);\n\nCREATE INDEX idx_node_type ON syntax_nodes(node_type);",
+ "fixed_code": "CREATE INDEX idx_file_path ON files(file_path);\nCREATE INDEX idx_function_name ON function_abstractions(function_name);\nCREATE INDEX idx_node_type ON syntax_nodes(node_type);\n-- Consider adding composite indexes based on common query patterns\n-- CREATE INDEX idx_file_repo ON files(repo_id, file_path);\n-- CREATE INDEX idx_function_file ON function_abstractions(file_id, function_name);",
+ "file_name": "db_setup/init.sql",
+ "start_line": 72,
+ "end_line": 78,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 5
+ },
+ {
+ "topic": "Code Embedding",
+ "comment": "Hardcoded embedding dimensions may limit flexibility",
+ "confidence": "moderate",
+ "reason": "Using a fixed embedding size of 1536 may not be suitable for all models or future changes",
+ "solution": "Consider making the embedding dimensions configurable",
+ "actual_code": "response = self.provider.embedding(\n model=\"embedding\", input=[text], dimensions=1536, encoding_format=\"float\"\n)",
+ "fixed_code": "embedding_dim = self.config.get('embedding_dimensions', 1536)\nresponse = self.provider.embedding(\n model=\"embedding\", input=[text], dimensions=embedding_dim, encoding_format=\"float\"\n)",
+ "file_name": "kaizen/llms/provider.py",
+ "start_line": 242,
+ "end_line": 244,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Type Hinting",
+ "comment": "Consider using more specific type hints for better code clarity and maintainability.",
+ "confidence": "important",
+ "reason": "Using more specific type hints can improve code readability and catch potential type-related errors early.",
+ "solution": "Replace 'List[float]' with 'np.ndarray' for the query_embedding parameter, and use 'List[Dict[str, Any]]' for the return type.",
+ "actual_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+ "fixed_code": "def custom_query(self, query_embedding: np.ndarray, repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Add error handling for database operations to improve robustness.",
+ "confidence": "important",
+ "reason": "Database operations can fail due to various reasons, and proper error handling can prevent unexpected crashes and improve debugging.",
+ "solution": "Wrap the database operations in a try-except block and handle potential exceptions.",
+ "actual_code": "with self.get_client() as client:\n with client.cursor() as cur:\n cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n results = cur.fetchall()",
+ "fixed_code": "try:\n with self.get_client() as client:\n with client.cursor() as cur:\n cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n results = cur.fetchall()\nexcept Exception as e:\n # Log the error and handle it appropriately\n print(f\"Database error:{e}\")\n results =[]",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 39,
+ "end_line": 42,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Optimization",
+ "comment": "Consider using a list comprehension for creating the result list to improve performance and readability.",
+ "confidence": "moderate",
+ "reason": "List comprehensions are generally more efficient and concise than traditional for loops for creating lists.",
+ "solution": "Replace the for loop with a list comprehension.",
+ "actual_code": "return[\n{\n \"id\": row[0],\n \"text\": row[1],\n \"metadata\": row[2] if isinstance(row[2], dict) else Json(row[2]),\n \"similarity\": row[3]\n}\n for row in results\n ]",
+ "fixed_code": "return[{\n \"id\": row[0],\n \"text\": row[1],\n \"metadata\": row[2] if isinstance(row[2], dict) else Json(row[2]),\n \"similarity\": row[3]\n}for row in results]",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 44,
+ "end_line": 52,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Code Structure",
+ "comment": "Consider adding docstrings to methods for better documentation.",
+ "confidence": "moderate",
+ "reason": "Docstrings improve code readability and help other developers understand the purpose and usage of methods.",
+ "solution": "Add descriptive docstrings to the custom_query method and the AbstractionFeedback class methods.",
+ "actual_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+ "fixed_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:\n \"\"\"Perform a custom query on the vector store.\n\n Args:\n query_embedding (List[float]): The query embedding vector.\n repo_id (int): The repository ID to filter results.\n similarity_top_k (int): The number of top similar results to return.\n\n Returns:\n List[dict]: A list of dictionaries containing the query results.\n \"\"\"",
+ "file_name": "kaizen/retriever/custom_vector_store.py",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Improve error handling in the parse_file method",
+ "confidence": "important",
+ "reason": "The current implementation catches all exceptions and logs them, but continues execution. This might lead to incomplete or inconsistent data.",
+ "solution": "Consider rethrowing specific exceptions or implementing a more granular error handling strategy.",
+ "actual_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())",
+ "fixed_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())\n raise",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 108,
+ "end_line": 110,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Code Duplication",
+ "comment": "Repeated code for database connection string",
+ "confidence": "important",
+ "reason": "The database connection string is defined in multiple places, which violates the DRY principle and makes maintenance harder.",
+ "solution": "Extract the database connection string creation into a separate method or constant.",
+ "actual_code": "f\"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}\"",
+ "fixed_code": "self.db_connection_string = f\"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}\"\n self.engine = create_engine(\n self.db_connection_string,\n pool_size=10,\n max_overflow=20,\n )",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 36,
+ "end_line": 37,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Code Optimization",
+ "comment": "Potential performance issue in store_function_relationships method",
+ "confidence": "moderate",
+ "reason": "The method executes a database query for each edge in the graph, which could be inefficient for large graphs.",
+ "solution": "Consider batching the inserts or using a more efficient bulk insert method if supported by the database.",
+ "actual_code": "for caller, callee in self.graph.edges():\n query = text(\n \"\"\"\n INSERT INTO node_relationships (parent_node_id, child_node_id, relationship_type)\n VALUES (\n (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :caller),\n (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :callee),\n 'calls'\n )\n ON CONFLICT DO NOTHING\n \"\"\")\n connection.execute(\n query,{\"caller\": f\"%{caller}%\", \"callee\": f\"%{callee}%\"}\n )",
+ "fixed_code": "relationships =[(caller, callee) for caller, callee in self.graph.edges()]\n query = text(\"\"\"\n INSERT INTO node_relationships (parent_node_id, child_node_id, relationship_type)\n VALUES (\n (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :caller),\n (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :callee),\n 'calls'\n )\n ON CONFLICT DO NOTHING\n \"\"\")\n connection.execute(query,[{\"caller\": f\"%{caller}%\", \"callee\": f\"%{callee}%\"}for caller, callee in relationships])",
+ "file_name": "kaizen/retriever/llama_index_retriever.py",
+ "start_line": 298,
+ "end_line": 312,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Python Version Upgrade",
+ "comment": "The minimum Python version has been increased from 3.8.1 to 3.9.0.",
+ "confidence": "important",
+ "reason": "This change may break compatibility with environments using Python 3.8.x.",
+ "solution": "Ensure all development and production environments are updated to Python 3.9.0 or higher. Update CI/CD pipelines and deployment scripts accordingly.",
+ "actual_code": "python = \"^3.9.0\"",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 13,
+ "end_line": 13,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 7
+ },
+ {
+ "topic": "New Dependencies",
+ "comment": "Several new dependencies have been added, including llama-index and tree-sitter related packages.",
+ "confidence": "important",
+ "reason": "New dependencies may introduce compatibility issues or increase the project's complexity.",
+ "solution": "Review each new dependency for necessity and potential impact on the project. Ensure they are compatible with existing dependencies and the project's requirements.",
+ "actual_code": "llama-index-core = \"^0.10.47\"\nllama-index-llms-openai = \"^0.1.22\"\nllama-index-readers-file = \"^0.1.25\"\nllama-index-vector-stores-postgres = \"^0.1.11\"\nsqlalchemy = \"^2.0.31\"\nesprima = \"^4.0.1\"\nescodegen = \"^1.0.11\"\ntree-sitter = \"^0.22.3\"\nllama-index = \"^0.10.65\"\ntree-sitter-python = \"^0.21.0\"\ntree-sitter-javascript = \"^0.21.4\"\ntree-sitter-typescript = \"^0.21.2\"\ntree-sitter-rust = \"^0.21.2\"\nllama-index-llms-litellm = \"^0.1.4\"\nllama-index-embeddings-litellm = \"^0.1.1\"",
+ "fixed_code": "",
+ "file_name": "pyproject.toml",
+ "start_line": 27,
+ "end_line": 43,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error Handling in LanguageLoader",
+ "comment": "The error handling in the LanguageLoader class could be improved for better debugging.",
+ "confidence": "moderate",
+ "reason": "The current error handling catches all exceptions and logs them, which might hide specific issues.",
+ "solution": "Consider catching specific exceptions (e.g., ImportError) separately and provide more detailed error messages.",
+ "actual_code": "except Exception as e:\n logger.error(f\"Failed to load language{language}:{str(e)}\")\n raise",
+ "fixed_code": "except ImportError as e:\n logger.error(f\"Failed to import language module for{language}:{str(e)}\")\n raise\nexcept Exception as e:\n logger.error(f\"Unexpected error loading language{language}:{str(e)}\")\n raise",
+ "file_name": "kaizen/retriever/tree_sitter_utils.py",
+ "start_line": 28,
+ "end_line": 30,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Unused Import in Test File",
+ "comment": "The 'json' module is imported but not used in the test file.",
+ "confidence": "moderate",
+ "reason": "Unused imports can clutter the code and potentially confuse other developers.",
+ "solution": "Remove the unused import to improve code cleanliness.",
+ "actual_code": "import json",
+ "fixed_code": "",
+ "file_name": "tests/retriever/test_chunker.py",
+ "start_line": 2,
+ "end_line": 2,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 3
+ },
+ {
+ "topic": "Commented Out Code in Test File",
+ "comment": "There are several blocks of commented-out code in the test file.",
+ "confidence": "moderate",
+ "reason": "Commented-out code can make the file harder to read and maintain.",
+ "solution": "Remove commented-out code if it's no longer needed, or add a clear comment explaining why it's kept if it might be useful in the future.",
+ "actual_code": "# print(\"\\nFunctions:\")\n# for name, func in chunks[\"functions\"].items():\n# print(f\"\\n{name}:\\n{func}\")\n\n# print(\"\\nClasses:\")\n# for name, class_info in chunks[\"classes\"].items():\n# print(f\"\\n{name}:\")\n# print(f\"Definition:\\n{class_info['definition']}\")\n# print(\"Methods:\")\n# for method_name, method in class_info[\"methods\"].items():\n# print(f\"\\n{method_name}:\\n{method}\")\n\n# print(\"\\nOther Blocks:\")\n# for i, block in enumerate(chunks[\"other_blocks\"], 1):\n# print(f\"\\nBlock{i}:\\n{block}\")",
+ "fixed_code": "",
+ "file_name": "tests/retriever/test_chunker.py",
+ "start_line": 81,
+ "end_line": 95,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 4
+ },
+ {
+ "topic": "Configuration",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to config.json, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "config.json",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to Dockerfile, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "4",
+ "end_line": "4",
+ "side": "RIGHT",
+ "file_name": "Dockerfile",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Docker",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to docker-compose.yml, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "15",
+ "end_line": "15",
+ "side": "RIGHT",
+ "file_name": "docker-compose.yml",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Version Control",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to .gitignore, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "164",
+ "end_line": "164",
+ "side": "RIGHT",
+ "file_name": ".gitignore",
+ "sentiment": "negative",
+ "severity_level": 10
+ },
+ {
+ "topic": "Database",
+ "comment": "Changes made to sensitive file",
+ "confidence": "critical",
+ "reason": "Changes were made to db_setup/init.sql, which needs review",
+ "solution": "NA",
+ "fixed_code": "",
+ "start_line": "1",
+ "end_line": "1",
+ "side": "RIGHT",
+ "file_name": "db_setup/init.sql",
+ "sentiment": "negative",
+ "severity_level": 10
+ }
+]
\ No newline at end of file
diff --git a/.experiments/code_review/sonnet-3.5/no_eval/pr_222/review.md b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/review.md
new file mode 100644
index 00000000..abca4169
--- /dev/null
+++ b/.experiments/code_review/sonnet-3.5/no_eval/pr_222/review.md
@@ -0,0 +1,481 @@
+PR URL: https://github.com/Cloud-Code-AI/kaizen/pull/222
+
+# 🔍 Code Review Summary
+
+❗ **Attention Required:** This push has potential issues. 🚨
+
+## 📊 Stats
+- Total Issues: 21
+- Critical: 5
+- Important: 8
+- Minor: 8
+- Files Affected: 12
+## 🏆 Code Quality
+[█████████████████░░░] 85% (Good)
+
+## 🚨 Critical Issues
+
+
+Configuration (5 issues)
+
+### 1. Changes made to sensitive file
+📁 **File:** `config.json:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to config.json, which needs review
+💡 **Solution:** NA
+
+### 2. Changes made to sensitive file
+📁 **File:** `Dockerfile:4`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to Dockerfile, which needs review
+💡 **Solution:** NA
+
+### 3. Changes made to sensitive file
+📁 **File:** `docker-compose.yml:15`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to docker-compose.yml, which needs review
+💡 **Solution:** NA
+
+### 4. Changes made to sensitive file
+📁 **File:** `.gitignore:164`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to .gitignore, which needs review
+💡 **Solution:** NA
+
+### 5. Changes made to sensitive file
+📁 **File:** `db_setup/init.sql:1`
+⚖️ **Severity:** 10/10
+🔍 **Description:** Changes were made to db_setup/init.sql, which needs review
+💡 **Solution:** NA
+
+
+
+## 🟠 Refinement Suggestions:
+These are not critical issues, but addressing them could further improve the code:
+
+
+Environment Variables (8 issues)
+
+### 1. Consider using a more secure method for storing sensitive information
+📁 **File:** `.env.example:10`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Storing sensitive information like API keys directly in environment variables can be a security risk
+💡 **Solution:** Use a secret management system or encrypt sensitive values
+
+**Current Code:**
+```python
+OPENAI_API_KEY=
+OPENAI_ORGANIZATION=
+```
+
+**Suggested Code:**
+```python
+# Use a secret management system to securely store and retrieve API keys
+# OPENAI_API_KEY=
+# OPENAI_ORGANIZATION=
+```
+
+### 2. Ensure proper indexing for performance optimization
+📁 **File:** `db_setup/init.sql:72`
+⚖️ **Severity:** 5/10
+🔍 **Description:** Proper indexing is crucial for database performance, especially for frequently queried columns
+💡 **Solution:** Review and optimize index creation based on query patterns
+
+**Current Code:**
+```python
+CREATE INDEX idx_file_path ON files(file_path);
+
+CREATE INDEX idx_function_name ON function_abstractions(function_name);
+
+CREATE INDEX idx_node_type ON syntax_nodes(node_type);
+```
+
+**Suggested Code:**
+```python
+CREATE INDEX idx_file_path ON files(file_path);
+CREATE INDEX idx_function_name ON function_abstractions(function_name);
+CREATE INDEX idx_node_type ON syntax_nodes(node_type);
+-- Consider adding composite indexes based on common query patterns
+-- CREATE INDEX idx_file_repo ON files(repo_id, file_path);
+-- CREATE INDEX idx_function_file ON function_abstractions(file_id, function_name);
+```
+
+### 3. Consider using more specific type hints for better code clarity and maintainability.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Using more specific type hints can improve code readability and catch potential type-related errors early.
+💡 **Solution:** Replace 'List[float]' with 'np.ndarray' for the query_embedding parameter, and use 'List[Dict[str, Any]]' for the return type.
+
+**Current Code:**
+```python
+def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:
+```
+
+**Suggested Code:**
+```python
+def custom_query(self, query_embedding: np.ndarray, repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:
+```
+
+### 4. Add error handling for database operations to improve robustness.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:39`
+⚖️ **Severity:** 7/10
+🔍 **Description:** Database operations can fail due to various reasons, and proper error handling can prevent unexpected crashes and improve debugging.
+💡 **Solution:** Wrap the database operations in a try-except block and handle potential exceptions.
+
+**Current Code:**
+```python
+with self.get_client() as client:
+ with client.cursor() as cur:
+ cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))
+ results = cur.fetchall()
+```
+
+**Suggested Code:**
+```python
+try:
+ with self.get_client() as client:
+ with client.cursor() as cur:
+ cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))
+ results = cur.fetchall()
+except Exception as e:
+ # Log the error and handle it appropriately
+ print(f"Database error:{e}")
+ results =[]
+```
+
+### 5. Improve error handling in the parse_file method
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:108`
+⚖️ **Severity:** 7/10
+🔍 **Description:** The current implementation catches all exceptions and logs them, but continues execution. This might lead to incomplete or inconsistent data.
+💡 **Solution:** Consider rethrowing specific exceptions or implementing a more granular error handling strategy.
+
+**Current Code:**
+```python
+except Exception as e:
+ logger.error(f"Error processing file{file_path}:{str(e)}")
+ logger.error(traceback.format_exc())
+```
+
+**Suggested Code:**
+```python
+except Exception as e:
+ logger.error(f"Error processing file{file_path}:{str(e)}")
+ logger.error(traceback.format_exc())
+ raise
+```
+
+### 6. Repeated code for database connection string
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:36`
+⚖️ **Severity:** 6/10
+🔍 **Description:** The database connection string is defined in multiple places, which violates the DRY principle and makes maintenance harder.
+💡 **Solution:** Extract the database connection string creation into a separate method or constant.
+
+**Current Code:**
+```python
+f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}"
+```
+
+**Suggested Code:**
+```python
+self.db_connection_string = f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}"
+ self.engine = create_engine(
+ self.db_connection_string,
+ pool_size=10,
+ max_overflow=20,
+ )
+```
+
+### 7. The minimum Python version has been increased from 3.8.1 to 3.9.0.
+📁 **File:** `pyproject.toml:13`
+⚖️ **Severity:** 7/10
+🔍 **Description:** This change may break compatibility with environments using Python 3.8.x.
+💡 **Solution:** Ensure all development and production environments are updated to Python 3.9.0 or higher. Update CI/CD pipelines and deployment scripts accordingly.
+
+**Current Code:**
+```python
+python = "^3.9.0"
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 8. Several new dependencies have been added, including llama-index and tree-sitter related packages.
+📁 **File:** `pyproject.toml:27`
+⚖️ **Severity:** 6/10
+🔍 **Description:** New dependencies may introduce compatibility issues or increase the project's complexity.
+💡 **Solution:** Review each new dependency for necessity and potential impact on the project. Ensure they are compatible with existing dependencies and the project's requirements.
+
+**Current Code:**
+```python
+llama-index-core = "^0.10.47"
+llama-index-llms-openai = "^0.1.22"
+llama-index-readers-file = "^0.1.25"
+llama-index-vector-stores-postgres = "^0.1.11"
+sqlalchemy = "^2.0.31"
+esprima = "^4.0.1"
+escodegen = "^1.0.11"
+tree-sitter = "^0.22.3"
+llama-index = "^0.10.65"
+tree-sitter-python = "^0.21.0"
+tree-sitter-javascript = "^0.21.4"
+tree-sitter-typescript = "^0.21.2"
+tree-sitter-rust = "^0.21.2"
+llama-index-llms-litellm = "^0.1.4"
+llama-index-embeddings-litellm = "^0.1.1"
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+## 📝 Minor Notes
+Additional small points that you might want to consider:
+
+
+Click to expand (8 issues)
+
+
+Docker Configuration (8 issues)
+
+### 1. Consider using multi-stage builds to reduce the final image size
+📁 **File:** `Dockerfile:7`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Multi-stage builds can significantly reduce the size of the final Docker image by excluding build dependencies
+💡 **Solution:** Implement a multi-stage build in the Dockerfile
+
+**Current Code:**
+```python
+RUN apt-get update && apt-get install -y \
+ git \
+ build-essential \
+ && rm -rf /var/lib/apt/lists/*
+```
+
+**Suggested Code:**
+```python
+FROM python:3.9 AS builder
+
+RUN apt-get update && apt-get install -y \
+ git \
+ build-essential
+
+# ... (build steps)
+
+FROM python:3.9-slim
+
+COPY --from=builder /app /app
+
+# ... (runtime configuration)
+```
+
+### 2. Hardcoded embedding dimensions may limit flexibility
+📁 **File:** `kaizen/llms/provider.py:242`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Using a fixed embedding size of 1536 may not be suitable for all models or future changes
+💡 **Solution:** Consider making the embedding dimensions configurable
+
+**Current Code:**
+```python
+response = self.provider.embedding(
+ model="embedding", input=[text], dimensions=1536, encoding_format="float"
+)
+```
+
+**Suggested Code:**
+```python
+embedding_dim = self.config.get('embedding_dimensions', 1536)
+response = self.provider.embedding(
+ model="embedding", input=[text], dimensions=embedding_dim, encoding_format="float"
+)
+```
+
+### 3. Consider using a list comprehension for creating the result list to improve performance and readability.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:44`
+⚖️ **Severity:** 3/10
+🔍 **Description:** List comprehensions are generally more efficient and concise than traditional for loops for creating lists.
+💡 **Solution:** Replace the for loop with a list comprehension.
+
+**Current Code:**
+```python
+return[
+{
+ "id": row[0],
+ "text": row[1],
+ "metadata": row[2] if isinstance(row[2], dict) else Json(row[2]),
+ "similarity": row[3]
+}
+ for row in results
+ ]
+```
+
+**Suggested Code:**
+```python
+return[{
+ "id": row[0],
+ "text": row[1],
+ "metadata": row[2] if isinstance(row[2], dict) else Json(row[2]),
+ "similarity": row[3]
+}for row in results]
+```
+
+### 4. Consider adding docstrings to methods for better documentation.
+📁 **File:** `kaizen/retriever/custom_vector_store.py:13`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Docstrings improve code readability and help other developers understand the purpose and usage of methods.
+💡 **Solution:** Add descriptive docstrings to the custom_query method and the AbstractionFeedback class methods.
+
+**Current Code:**
+```python
+def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:
+```
+
+**Suggested Code:**
+```python
+def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:
+ """Perform a custom query on the vector store.
+
+ Args:
+ query_embedding (List[float]): The query embedding vector.
+ repo_id (int): The repository ID to filter results.
+ similarity_top_k (int): The number of top similar results to return.
+
+ Returns:
+ List[dict]: A list of dictionaries containing the query results.
+ """
+```
+
+### 5. Potential performance issue in store_function_relationships method
+📁 **File:** `kaizen/retriever/llama_index_retriever.py:298`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The method executes a database query for each edge in the graph, which could be inefficient for large graphs.
+💡 **Solution:** Consider batching the inserts or using a more efficient bulk insert method if supported by the database.
+
+**Current Code:**
+```python
+for caller, callee in self.graph.edges():
+ query = text(
+ """
+ INSERT INTO node_relationships (parent_node_id, child_node_id, relationship_type)
+ VALUES (
+ (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :caller),
+ (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :callee),
+ 'calls'
+ )
+ ON CONFLICT DO NOTHING
+ """)
+ connection.execute(
+ query,{"caller": f"%{caller}%", "callee": f"%{callee}%"}
+ )
+```
+
+**Suggested Code:**
+```python
+relationships =[(caller, callee) for caller, callee in self.graph.edges()]
+ query = text("""
+ INSERT INTO node_relationships (parent_node_id, child_node_id, relationship_type)
+ VALUES (
+ (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :caller),
+ (SELECT node_id FROM syntax_nodes WHERE node_content LIKE :callee),
+ 'calls'
+ )
+ ON CONFLICT DO NOTHING
+ """)
+ connection.execute(query,[{"caller": f"%{caller}%", "callee": f"%{callee}%"}for caller, callee in relationships])
+```
+
+### 6. The error handling in the LanguageLoader class could be improved for better debugging.
+📁 **File:** `kaizen/retriever/tree_sitter_utils.py:28`
+⚖️ **Severity:** 5/10
+🔍 **Description:** The current error handling catches all exceptions and logs them, which might hide specific issues.
+💡 **Solution:** Consider catching specific exceptions (e.g., ImportError) separately and provide more detailed error messages.
+
+**Current Code:**
+```python
+except Exception as e:
+ logger.error(f"Failed to load language{language}:{str(e)}")
+ raise
+```
+
+**Suggested Code:**
+```python
+except ImportError as e:
+ logger.error(f"Failed to import language module for{language}:{str(e)}")
+ raise
+except Exception as e:
+ logger.error(f"Unexpected error loading language{language}:{str(e)}")
+ raise
+```
+
+### 7. The 'json' module is imported but not used in the test file.
+📁 **File:** `tests/retriever/test_chunker.py:2`
+⚖️ **Severity:** 3/10
+🔍 **Description:** Unused imports can clutter the code and potentially confuse other developers.
+💡 **Solution:** Remove the unused import to improve code cleanliness.
+
+**Current Code:**
+```python
+import json
+```
+
+**Suggested Code:**
+```python
+
+```
+
+### 8. There are several blocks of commented-out code in the test file.
+📁 **File:** `tests/retriever/test_chunker.py:81`
+⚖️ **Severity:** 4/10
+🔍 **Description:** Commented-out code can make the file harder to read and maintain.
+💡 **Solution:** Remove commented-out code if it's no longer needed, or add a clear comment explaining why it's kept if it might be useful in the future.
+
+**Current Code:**
+```python
+# print("\nFunctions:")
+# for name, func in chunks["functions"].items():
+# print(f"\n{name}:\n{func}")
+
+# print("\nClasses:")
+# for name, class_info in chunks["classes"].items():
+# print(f"\n{name}:")
+# print(f"Definition:\n{class_info['definition']}")
+# print("Methods:")
+# for method_name, method in class_info["methods"].items():
+# print(f"\n{method_name}:\n{method}")
+
+# print("\nOther Blocks:")
+# for i, block in enumerate(chunks["other_blocks"], 1):
+# print(f"\nBlock{i}:\n{block}")
+```
+
+**Suggested Code:**
+```python
+
+```
+
+
+
+
+
+---
+
+> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️
+
+
+Useful Commands
+
+- **Feedback:** Reply with `!feedback [your message]`
+- **Ask PR:** Reply with `!ask-pr [your question]`
+- **Review:** Reply with `!review`
+- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue
+- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive
+- **Update Tests:** Reply with `!unittest` to create a PR with test changes
+
+
+
+----- Cost Usage (anthropic.claude-3-5-sonnet-20240620-v1:0)
+{"prompt_tokens": 24844, "completion_tokens": 5105, "total_tokens": 29949}
\ No newline at end of file
diff --git a/.experiments/code_review/sonnet-3.5/no_eval/pr_232/comments.json b/.experiments/code_review/sonnet-3.5/no_eval/pr_232/comments.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/.experiments/code_review/sonnet-3.5/no_eval/pr_232/comments.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.experiments/code_review/sonnet-3.5/no_eval/pr_232/issues.json b/.experiments/code_review/sonnet-3.5/no_eval/pr_232/issues.json
new file mode 100644
index 00000000..5a4abbcf
--- /dev/null
+++ b/.experiments/code_review/sonnet-3.5/no_eval/pr_232/issues.json
@@ -0,0 +1,227 @@
+[
+ {
+ "topic": "Type Definition",
+ "comment": "Improved type definition for MemoriesPage props",
+ "confidence": "important",
+ "reason": "Using a separate type definition improves code readability and maintainability",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/(memories)/content.tsx",
+ "start_line": 40,
+ "end_line": 45,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Error Message",
+ "comment": "Improved error message for space deletion",
+ "confidence": "moderate",
+ "reason": "More specific error message provides better user feedback",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/(memories)/content.tsx",
+ "start_line": 73,
+ "end_line": 73,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 2
+ },
+ {
+ "topic": "Component Naming",
+ "comment": "Renamed components for better clarity",
+ "confidence": "important",
+ "reason": "More descriptive component names improve code readability",
+ "solution": "The changes are already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/(memories)/content.tsx",
+ "start_line": 231,
+ "end_line": 231,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "UI Improvements",
+ "comment": "Enhanced UI elements with better styling and layout",
+ "confidence": "moderate",
+ "reason": "Improved visual consistency and user experience",
+ "solution": "The changes are already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/(memories)/content.tsx",
+ "start_line": 140,
+ "end_line": 152,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Import Optimization",
+ "comment": "Removed unused import",
+ "confidence": "important",
+ "reason": "Removing unused imports improves code cleanliness and potentially reduces bundle size",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/home/page.tsx",
+ "start_line": 6,
+ "end_line": 6,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Import Cleanup",
+ "comment": "Removed unused imports",
+ "confidence": "important",
+ "reason": "Removing unused imports improves code cleanliness and potentially reduces bundle size",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/home/queryinput.tsx",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 3
+ },
+ {
+ "topic": "Code Structure",
+ "comment": "Improved code organization by extracting dialog content into a separate component",
+ "confidence": "important",
+ "reason": "Separating concerns improves readability and maintainability",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 163,
+ "end_line": 346,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 7
+ },
+ {
+ "topic": "State Management",
+ "comment": "Moved state management for spaces and selectedSpaces into the DialogContentContainer component",
+ "confidence": "important",
+ "reason": "Localizing state management to the component that uses it improves encapsulation",
+ "solution": "The change is already implemented correctly",
+ "actual_code": "",
+ "fixed_code": "",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 168,
+ "end_line": 170,
+ "side": "RIGHT",
+ "sentiment": "positive",
+ "severity_level": 6
+ },
+ {
+ "topic": "Prop Drilling",
+ "comment": "Consider using context or state management library to avoid prop drilling",
+ "confidence": "moderate",
+ "reason": "The setDialogClose function is passed down as a prop, which could lead to prop drilling in larger components",
+ "solution": "Implement React Context or use a state management library like Redux for managing global state",
+ "actual_code": "function DialogContentContainer({\n\tsetDialogClose,\n}:{\n\tsetDialogClose: () => void;\n}){",
+ "fixed_code": "const DialogContext = React.createContext();\n\nfunction DialogContentContainer(){\n const{setDialogClose}= useContext(DialogContext);",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 163,
+ "end_line": 167,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "Improve error handling in the handleSubmit function",
+ "confidence": "important",
+ "reason": "The current implementation throws an error but then continues execution",
+ "solution": "Remove the return statement after throwing the error",
+ "actual_code": "throw new Error(`Memory creation failed: ${cont.error}`);\nreturn cont;",
+ "fixed_code": "throw new Error(`Memory creation failed: ${cont.error}`);",
+ "file_name": "apps/web/app/(dash)/menu.tsx",
+ "start_line": 230,
+ "end_line": 231,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 7
+ },
+ {
+ "topic": "Unused Import",
+ "comment": "The useEffect import is removed but not replaced with any other import.",
+ "confidence": "important",
+ "reason": "Removing unused imports improves code cleanliness and potentially reduces bundle size.",
+ "solution": "Ensure all necessary hooks are imported and remove any unused imports.",
+ "actual_code": "import{useState}from \"react\";",
+ "fixed_code": "import{useState, useEffect}from \"react\";",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 3,
+ "end_line": 3,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 3
+ },
+ {
+ "topic": "Type Safety",
+ "comment": "The component definition has been changed from a typed functional component to a regular function without explicit typing.",
+ "confidence": "important",
+ "reason": "Removing explicit typing can lead to potential type-related bugs and reduces code readability.",
+ "solution": "Maintain explicit typing for the component to ensure type safety and improve code clarity.",
+ "actual_code": "const ComboboxWithCreate = ({",
+ "fixed_code": "const ComboboxWithCreate: React.FC = ({",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 32,
+ "end_line": 32,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 6
+ },
+ {
+ "topic": "Error Handling",
+ "comment": "The new handleKeyDown function doesn't handle potential undefined values when accessing selectedSpaces.",
+ "confidence": "moderate",
+ "reason": "Not checking for undefined values can lead to runtime errors if selectedSpaces is not properly initialized.",
+ "solution": "Add a null check before accessing selectedSpaces.length.",
+ "actual_code": "if (\n\t\t\te.key === \"Backspace\" &&\n\t\t\tinputValue === \"\" &&\n\t\t\tselectedSpaces.length > 0\n\t\t){\n\t\t\tsetSelectedSpaces((prev) => prev.slice(0, -1));\n\t\t}",
+ "fixed_code": "if (\n\t\t\te.key === \"Backspace\" &&\n\t\t\tinputValue === \"\" &&\n\t\t\tselectedSpaces?.length > 0\n\t\t){\n\t\t\tsetSelectedSpaces((prev) => prev.slice(0, -1));\n\t\t}",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 46,
+ "end_line": 52,
+ "side": "RIGHT",
+ "sentiment": "negative",
+ "severity_level": 5
+ },
+ {
+ "topic": "Performance Optimization",
+ "comment": "The filteredOptions array is being recalculated on every render, which could be inefficient for large arrays.",
+ "confidence": "moderate",
+ "reason": "Recalculating filtered options on every render can lead to unnecessary computations and potential performance issues.",
+ "solution": "Consider using useMemo to memoize the filteredOptions calculation.",
+ "actual_code": "const filteredOptions = options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t);",
+ "fixed_code": "const filteredOptions = useMemo(() => options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t),[options, selectedSpaces]);",
+ "file_name": "packages/ui/shadcn/combobox.tsx",
+ "start_line": 55,
+ "end_line": 57,
+ "side": "RIGHT",
+ "sentiment": "neutral",
+ "severity_level": 4
+ },
+ {
+ "topic": "Accessibility",
+ "comment": "The button for removing selected spaces lacks an aria-label for better accessibility.",
+ "confidence": "moderate",
+ "reason": "Missing aria-labels can make it difficult for screen reader users to understand the purpose of interactive elements.",
+ "solution": "Add an appropriate aria-label to the button for removing selected spaces.",
+ "actual_code": "