diff --git a/README.md b/README.md index e0fa5556..7684ae42 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,16 @@ With Aider, you can choose from two leading AI model providers: - 💻 Local LLMs with [Ollama](https://ollama.com/) - 🧠 [OpenAI API](https://openai.com/api/) +- **[Cline](https://github.com/cline/cline)** + +With Cline, you can choose between differnet leading AI model providers: + +- 🤖 [Anthropic API](https://www.anthropic.com/api) +- 🧠 [OpenAI API](https://openai.com/api/) +- 💻 [LM Studio](https://lmstudio.ai/) +- 💻 Local LLMs with [Ollama](https://ollama.com/) + + ### Privacy first Unlike E.T., your code never phones home! 🛸 CodeGate is designed with privacy diff --git a/src/codegate/config.py b/src/codegate/config.py index 3f99fd04..dbe17fff 100644 --- a/src/codegate/config.py +++ b/src/codegate/config.py @@ -20,6 +20,7 @@ "anthropic": "https://api.anthropic.com/v1", "vllm": "http://localhost:8000", # Base URL without /v1 path "ollama": "http://localhost:11434", # Default Ollama server URL + "lm_studio": "http://localhost:1234", } diff --git a/src/codegate/pipeline/cli/cli.py b/src/codegate/pipeline/cli/cli.py index 09628efe..d94af629 100644 --- a/src/codegate/pipeline/cli/cli.py +++ b/src/codegate/pipeline/cli/cli.py @@ -1,3 +1,4 @@ +import re import shlex from litellm import ChatCompletionRequest @@ -76,12 +77,19 @@ async def process( if last_user_message is not None: last_user_message_str, _ = last_user_message - splitted_message = last_user_message_str.lower().split(" ") + cleaned_message_str = re.sub(r"<.*?>", "", last_user_message_str).strip() + splitted_message = cleaned_message_str.lower().split(" ") # We expect codegate as the first word in the message if splitted_message[0] == "codegate": context.shortcut_response = True - args = shlex.split(last_user_message_str) + args = shlex.split(cleaned_message_str) cmd_out = await codegate_cli(args[1:]) + + if cleaned_message_str != last_user_message_str: + # it came from Cline, need to wrap into tags + cmd_out = ( + f"{cmd_out}\n" + ) return PipelineResult( response=PipelineResponse( step_name=self.name, diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py index 1f193017..ac33b700 100644 --- a/src/codegate/pipeline/codegate_context_retriever/codegate.py +++ b/src/codegate/pipeline/codegate_context_retriever/codegate.py @@ -36,7 +36,7 @@ def generate_context_str(self, objects: list[object], context: PipelineContext) matched_packages = [] for obj in objects: # The object is already a dictionary with 'properties' - package_obj = obj["properties"] + package_obj = obj["properties"] # type: ignore matched_packages.append(f"{package_obj['name']} ({package_obj['type']})") # Add one alert for each package found context.add_alert( @@ -91,13 +91,16 @@ async def process( ) # type: ignore logger.info(f"Found {len(bad_snippet_packages)} bad packages in code snippets.") - # Remove code snippets from the user messages and search for bad packages + # Remove code snippets and file listing from the user messages and search for bad packages # in the rest of the user query/messsages user_messages = re.sub(r"```.*?```", "", user_message, flags=re.DOTALL) user_messages = re.sub(r"⋮...*?⋮...\n\n", "", user_messages, flags=re.DOTALL) + user_messages = re.sub( + r".*?", "", user_messages, flags=re.DOTALL + ) # split messages into double newlines, to avoid passing so many content in the search - split_messages = user_messages.split("\n\n") + split_messages = re.split(r"|(\n\n)", user_messages) collected_bad_packages = [] for item_message in split_messages: # Vector search to find bad packages @@ -126,10 +129,26 @@ async def process( # Make a copy of the request new_request = request.copy() - # Add the context to the last user message # Format: "Context: {context_str} \n Query: {last user message content}" message = new_request["messages"][last_user_idx] - context_msg = f'Context: {context_str} \n\n Query: {message["content"]}' # type: ignore + message_str = str(message["content"]) # type: ignore + # Add the context to the last user message + if message_str.strip().startswith(""): + # formatting of cline + match = re.match(r"()(.*?)()(.*)", message_str, re.DOTALL) + if match: + task_start, task_content, task_end, rest_of_message = match.groups() + + # Embed the context into the task block + updated_task_content = ( + f"{task_start}Context: {context_str}\n" + + f"Query: {task_content.strip()}{task_end}" + ) + + # Combine the updated task block with the rest of the message + context_msg = updated_task_content + rest_of_message + else: + context_msg = f"Context: {context_str} \n\n Query: {message_str}" # type: ignore message["content"] = context_msg logger.debug("Final context message", context_message=context_msg) diff --git a/src/codegate/pipeline/extract_snippets/extract_snippets.py b/src/codegate/pipeline/extract_snippets/extract_snippets.py index 8f7ebbd7..78eac1f1 100644 --- a/src/codegate/pipeline/extract_snippets/extract_snippets.py +++ b/src/codegate/pipeline/extract_snippets/extract_snippets.py @@ -125,7 +125,8 @@ def extract_snippets(message: str) -> List[CodeSnippet]: #  just correct the typescript exception lang_map = {"typescript": "javascript"} - lang = lang_map.get(lang, lang) + if lang: + lang = lang_map.get(lang, lang) snippets.append(CodeSnippet(filepath=filename, code=content, language=lang)) return snippets diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py index fce826c1..f2f0fca4 100644 --- a/src/codegate/pipeline/secrets/secrets.py +++ b/src/codegate/pipeline/secrets/secrets.py @@ -451,17 +451,31 @@ async def process_chunk( ): return [chunk] + is_cline_client = any( + "Cline" in str(message.trigger_string or "") + for message in input_context.alerts_raised or [] + ) + # Check if this is the first chunk (delta role will be present, others will not) if len(chunk.choices) > 0 and chunk.choices[0].delta.role: redacted_count = input_context.metadata["redacted_secrets_count"] secret_text = "secret" if redacted_count == 1 else "secrets" # Create notification chunk - notification_chunk = self._create_chunk( - chunk, - f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]" - f"(http://localhost:9090/?search=codegate-secrets) from being leaked " - f"by redacting them.\n\n", - ) + if is_cline_client: + notification_chunk = self._create_chunk( + chunk, + f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]" + f"(http://localhost:9090/?search=codegate-secrets) from being leaked " + f"by redacting them.\n\n", + ) + notification_chunk.choices[0].delta.role = "assistant" + else: + notification_chunk = self._create_chunk( + chunk, + f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]" + f"(http://localhost:9090/?search=codegate-secrets) from being leaked " + f"by redacting them.\n\n", + ) # Reset the counter input_context.metadata["redacted_secrets_count"] = 0 diff --git a/src/codegate/pipeline/secrets/signatures.py b/src/codegate/pipeline/secrets/signatures.py index 8d2a0c3d..928668b2 100644 --- a/src/codegate/pipeline/secrets/signatures.py +++ b/src/codegate/pipeline/secrets/signatures.py @@ -2,7 +2,7 @@ import re from pathlib import Path from threading import Lock -from typing import ClassVar, Dict, List, NamedTuple, Optional +from typing import ClassVar, Dict, List, NamedTuple, Optional, Union import structlog import yaml @@ -215,8 +215,8 @@ def _load_signatures(cls) -> None: raise @classmethod - def find_in_string(cls, text: str) -> List[Match]: - """Search for secrets in the provided string.""" + def find_in_string(cls, text: Union[str, List[str]]) -> List[Match]: + """Search for secrets in the provided string or list of strings.""" if not text: return [] @@ -224,7 +224,13 @@ def find_in_string(cls, text: str) -> List[Match]: raise RuntimeError("SecretFinder not initialized.") matches = [] - lines = text.splitlines() + + # Split text into lines for processing + try: + lines = text.splitlines() + except Exception as e: + logger.warning(f"Error splitting text into lines: {e}") + return [] for line_num, line in enumerate(lines, start=1): for group in cls._signature_groups: diff --git a/src/codegate/pipeline/systemmsg.py b/src/codegate/pipeline/systemmsg.py index f98bec8a..29b91937 100644 --- a/src/codegate/pipeline/systemmsg.py +++ b/src/codegate/pipeline/systemmsg.py @@ -16,6 +16,7 @@ def get_existing_system_message(request: ChatCompletionRequest) -> Optional[dict Returns: The existing system message if found, otherwise None. """ + for message in request.get("messages", []): if message["role"] == "system": return message @@ -50,8 +51,18 @@ def add_or_update_system_message( context.add_alert("add-system-message", trigger_string=json.dumps(system_message)) new_request["messages"].insert(0, system_message) else: + # Handle both string and list content types (needed for Cline (sends list) + existing_content = request_system_message["content"] + new_content = system_message["content"] + + # Convert list to string if necessary (needed for Cline (sends list) + if isinstance(existing_content, list): + existing_content = "\n".join(str(item) for item in existing_content) + if isinstance(new_content, list): + new_content = "\n".join(str(item) for item in new_content) + # Update existing system message - updated_content = request_system_message["content"] + "\n\n" + system_message["content"] + updated_content = existing_content + "\n\n" + new_content context.add_alert("update-system-message", trigger_string=updated_content) request_system_message["content"] = updated_content diff --git a/src/codegate/providers/anthropic/completion_handler.py b/src/codegate/providers/anthropic/completion_handler.py index da7e6537..0f36df11 100644 --- a/src/codegate/providers/anthropic/completion_handler.py +++ b/src/codegate/providers/anthropic/completion_handler.py @@ -16,6 +16,7 @@ async def execute_completion( api_key: Optional[str], stream: bool = False, is_fim_request: bool = False, + base_tool: Optional[str] = "", ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]: """ Ensures the model name is prefixed with 'anthropic/' to explicitly route to Anthropic's API. diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py index 2daa5a8d..10215c9e 100644 --- a/src/codegate/providers/anthropic/provider.py +++ b/src/codegate/providers/anthropic/provider.py @@ -32,9 +32,14 @@ def _setup_routes(self): Sets up the /messages route for the provider as expected by the Anthropic API. Extracts the API key from the "x-api-key" header and passes it to the completion handler. + + There are two routes: + - /messages: This is the route that is used by the Anthropic API with Continue.dev + - /v1/messages: This is the route that is used by the Anthropic API with Cline """ @self.router.post(f"/{self.provider_route_name}/messages") + @self.router.post(f"/{self.provider_route_name}/v1/messages") async def create_message( request: Request, x_api_key: str = Header(None), diff --git a/src/codegate/providers/base.py b/src/codegate/providers/base.py index dc45616e..a525d176 100644 --- a/src/codegate/providers/base.py +++ b/src/codegate/providers/base.py @@ -220,20 +220,32 @@ async def complete( data.get("base_url"), is_fim_request, ) - if input_pipeline_result.response: + if input_pipeline_result.response and input_pipeline_result.context: return await self._pipeline_response_formatter.handle_pipeline_response( input_pipeline_result.response, streaming, context=input_pipeline_result.context ) - provider_request = self._input_normalizer.denormalize(input_pipeline_result.request) + if input_pipeline_result.request: + provider_request = self._input_normalizer.denormalize(input_pipeline_result.request) if is_fim_request: - provider_request = self._fim_normalizer.denormalize(provider_request) + provider_request = self._fim_normalizer.denormalize(provider_request) # type: ignore # Execute the completion and translate the response # This gives us either a single response or a stream of responses # based on the streaming flag + is_cline_client = any( + "Cline" in str(message.get("content", "")) for message in data.get("messages", []) + ) + base_tool = "" + if is_cline_client: + base_tool = "cline" + model_response = await self._completion_handler.execute_completion( - provider_request, api_key=api_key, stream=streaming, is_fim_request=is_fim_request + provider_request, + api_key=api_key, + stream=streaming, + is_fim_request=is_fim_request, + base_tool=base_tool, ) if not streaming: normalized_response = self._output_normalizer.normalize(model_response) @@ -242,9 +254,9 @@ async def complete( return self._output_normalizer.denormalize(pipeline_output) pipeline_output_stream = await self._run_output_stream_pipeline( - input_pipeline_result.context, model_response, is_fim_request=is_fim_request + input_pipeline_result.context, model_response, is_fim_request=is_fim_request # type: ignore ) - return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context) + return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context) # type: ignore def get_routes(self) -> APIRouter: return self.router diff --git a/src/codegate/providers/completion/base.py b/src/codegate/providers/completion/base.py index 96e68d2c..166835de 100644 --- a/src/codegate/providers/completion/base.py +++ b/src/codegate/providers/completion/base.py @@ -20,6 +20,7 @@ async def execute_completion( api_key: Optional[str], stream: bool = False, # TODO: remove this param? is_fim_request: bool = False, + base_tool: Optional[str] = "", ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]: """Execute the completion request""" pass diff --git a/src/codegate/providers/litellmshim/litellmshim.py b/src/codegate/providers/litellmshim/litellmshim.py index f0088219..5a382160 100644 --- a/src/codegate/providers/litellmshim/litellmshim.py +++ b/src/codegate/providers/litellmshim/litellmshim.py @@ -43,6 +43,7 @@ async def execute_completion( api_key: Optional[str], stream: bool = False, is_fim_request: bool = False, + base_tool: Optional[str] = "", ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]: """ Execute the completion request with LiteLLM's API diff --git a/src/codegate/providers/llamacpp/completion_handler.py b/src/codegate/providers/llamacpp/completion_handler.py index 5660f66d..4bfba6eb 100644 --- a/src/codegate/providers/llamacpp/completion_handler.py +++ b/src/codegate/providers/llamacpp/completion_handler.py @@ -52,6 +52,7 @@ async def execute_completion( api_key: Optional[str], stream: bool = False, is_fim_request: bool = False, + base_tool: Optional[str] = "", ) -> Union[ModelResponse, AsyncIterator[ModelResponse]]: """ Execute the completion request with inference engine API diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index 2f8da42d..cfd04acd 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -1,3 +1,4 @@ +import json from typing import AsyncIterator, Optional, Union import structlog @@ -10,26 +11,72 @@ logger = structlog.get_logger("codegate") -async def ollama_stream_generator(stream: AsyncIterator[ChatResponse]) -> AsyncIterator[str]: +async def ollama_stream_generator( + stream: AsyncIterator[ChatResponse], base_tool: str +) -> AsyncIterator[str]: """OpenAI-style SSE format""" try: async for chunk in stream: try: - content = chunk.model_dump_json() - if content: + # TODO We should wire in the client info so we can respond with + # the correct format and start to handle multiple clients + # in a more robust way. + if base_tool != "cline": yield f"{chunk.model_dump_json()}\n" + else: + # First get the raw dict from the chunk + chunk_dict = chunk.model_dump() + # Create response dictionary in OpenAI-like format + response = { + "id": f"chatcmpl-{chunk_dict.get('created_at', '')}", + "object": "chat.completion.chunk", + "created": chunk_dict.get("created_at"), + "model": chunk_dict.get("model"), + "choices": [ + { + "index": 0, + "delta": { + "content": chunk_dict.get("message", {}).get("content", ""), + "role": chunk_dict.get("message", {}).get("role", "assistant"), + }, + "finish_reason": ( + chunk_dict.get("done_reason") + if chunk_dict.get("done", False) + else None + ), + } + ], + } + # Preserve existing type or add default if missing + response["type"] = chunk_dict.get("type", "stream") + + # Add optional fields that might be present in the final message + optional_fields = [ + "total_duration", + "load_duration", + "prompt_eval_count", + "prompt_eval_duration", + "eval_count", + "eval_duration", + ] + for field in optional_fields: + if field in chunk_dict: + response[field] = chunk_dict[field] + + yield f"\ndata: {json.dumps(response)}\n" except Exception as e: - if str(e): - yield f"{str(e)}\n" + logger.error(f"Error in stream generator: {str(e)}") + yield f"\ndata: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n" except Exception as e: - if str(e): - yield f"{str(e)}\n" + logger.error(f"Stream error: {str(e)}") + yield f"\ndata: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n" class OllamaShim(BaseCompletionHandler): def __init__(self, base_url): self.client = AsyncClient(host=base_url, timeout=300) + self.base_tool = "" async def execute_completion( self, @@ -37,8 +84,10 @@ async def execute_completion( api_key: Optional[str], stream: bool = False, is_fim_request: bool = False, + base_tool: Optional[str] = "", ) -> Union[ChatResponse, GenerateResponse]: """Stream response directly from Ollama API.""" + self.base_tool = base_tool if is_fim_request: prompt = request["messages"][0].get("content", "") response = await self.client.generate( @@ -59,12 +108,14 @@ def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> Str is the format that FastAPI expects for streaming responses. """ return StreamingResponse( - ollama_stream_generator(stream), + ollama_stream_generator(stream, self.base_tool or ""), media_type="application/x-ndjson; charset=utf-8", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", + "Transfer-Encoding": "chunked", }, + status_code=200, ) def _create_json_response( diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py index 114f0001..ac8013b9 100644 --- a/src/codegate/providers/ollama/provider.py +++ b/src/codegate/providers/ollama/provider.py @@ -75,6 +75,9 @@ async def show_model(request: Request): # OpenAI-compatible routes for backward compatibility @self.router.post(f"/{self.provider_route_name}/chat/completions") @self.router.post(f"/{self.provider_route_name}/completions") + # Cline API routes + @self.router.post(f"/{self.provider_route_name}/v1/chat/completions") + @self.router.post(f"/{self.provider_route_name}/v1/generate") async def create_completion(request: Request): body = await request.body() data = json.loads(body) @@ -90,7 +93,7 @@ async def create_completion(request: Request): logger.error("Error in OllamaProvider completion", error=str(e)) raise HTTPException(status_code=503, detail="Ollama service is unavailable") except Exception as e: - #  check if we have an status code there + # check if we have an status code there if hasattr(e, "status_code"): # log the exception logger = structlog.get_logger("codegate") diff --git a/src/codegate/providers/openai/provider.py b/src/codegate/providers/openai/provider.py index 53aa7db8..af42b551 100644 --- a/src/codegate/providers/openai/provider.py +++ b/src/codegate/providers/openai/provider.py @@ -1,8 +1,11 @@ import json +from fastapi.responses import JSONResponse +import httpx import structlog from fastapi import Header, HTTPException, Request +from codegate.config import Config from codegate.pipeline.factory import PipelineFactory from codegate.providers.base import BaseProvider from codegate.providers.litellmshim import LiteLLmShim, sse_stream_generator @@ -15,6 +18,11 @@ def __init__( pipeline_factory: PipelineFactory, ): completion_handler = LiteLLmShim(stream_generator=sse_stream_generator) + config = Config.get_config() + if config is not None: + provided_urls = config.provider_urls + self.lm_studio_url = provided_urls.get("lm_studio", "http://localhost:11434/") + super().__init__( OpenAIInputNormalizer(), OpenAIOutputNormalizer(), @@ -33,8 +41,15 @@ def _setup_routes(self): passes it to the completion handler. """ + @self.router.get(f"/{self.provider_route_name}/models") + @self.router.get(f"/{self.provider_route_name}/v1/models") + async def get_models(): + # dummy method for lm studio + return JSONResponse(status_code=200, content=[]) + @self.router.post(f"/{self.provider_route_name}/chat/completions") @self.router.post(f"/{self.provider_route_name}/completions") + @self.router.post(f"/{self.provider_route_name}/v1/chat/completions") async def create_completion( request: Request, authorization: str = Header(..., description="Bearer token"), @@ -46,6 +61,9 @@ async def create_completion( body = await request.body() data = json.loads(body) + # if model starts with lm_studio, propagate it + if data.get("model", "").startswith("lm_studio"): + data["base_url"] = self.lm_studio_url + "/v1/" is_fim_request = self._is_fim_request(request, data) try: stream = await self.complete(data, api_key, is_fim_request=is_fim_request) diff --git a/src/codegate/storage/storage_engine.py b/src/codegate/storage/storage_engine.py index afd5cbd7..9543fe70 100644 --- a/src/codegate/storage/storage_engine.py +++ b/src/codegate/storage/storage_engine.py @@ -1,7 +1,7 @@ import os import re import sqlite3 -from typing import List +from typing import List, Optional import numpy as np import sqlite_vec_sl_tmp @@ -51,9 +51,11 @@ def __init__(self, data_path="./sqlite_data"): ) self.inference_engine = LlamaCppInferenceEngine() - self.model_path = ( - f"{Config.get_config().model_base_path}/{Config.get_config().embedding_model}" - ) + conf = Config.get_config() + if conf and conf.model_base_path and conf.embedding_model: + self.model_path = f"{conf.model_base_path}/{conf.embedding_model}" + else: + self.model_path = "" self.conn = self._get_connection() self._setup_schema() @@ -131,10 +133,10 @@ async def search_by_property(self, name: str, properties: List[str]) -> list[dic async def search( self, - query: str = None, - language: str = None, - ecosystem: str = None, - packages: List[str] = None, + query: Optional[str] = None, + language: Optional[str] = None, + ecosystem: Optional[str] = None, + packages: Optional[List[str]] = None, limit: int = 50, distance: float = 0.3, ) -> list[object]: