diff --git a/README.md b/README.md
index e0fa5556..7684ae42 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,16 @@ With Aider, you can choose from two leading AI model providers:
- 💻 Local LLMs with [Ollama](https://ollama.com/)
- 🧠 [OpenAI API](https://openai.com/api/)
+- **[Cline](https://github.com/cline/cline)**
+
+With Cline, you can choose between differnet leading AI model providers:
+
+- 🤖 [Anthropic API](https://www.anthropic.com/api)
+- 🧠 [OpenAI API](https://openai.com/api/)
+- 💻 [LM Studio](https://lmstudio.ai/)
+- 💻 Local LLMs with [Ollama](https://ollama.com/)
+
+
### Privacy first
Unlike E.T., your code never phones home! 🛸 CodeGate is designed with privacy
diff --git a/src/codegate/config.py b/src/codegate/config.py
index 3f99fd04..dbe17fff 100644
--- a/src/codegate/config.py
+++ b/src/codegate/config.py
@@ -20,6 +20,7 @@
"anthropic": "https://api.anthropic.com/v1",
"vllm": "http://localhost:8000", # Base URL without /v1 path
"ollama": "http://localhost:11434", # Default Ollama server URL
+ "lm_studio": "http://localhost:1234",
}
diff --git a/src/codegate/pipeline/cli/cli.py b/src/codegate/pipeline/cli/cli.py
index 09628efe..d94af629 100644
--- a/src/codegate/pipeline/cli/cli.py
+++ b/src/codegate/pipeline/cli/cli.py
@@ -1,3 +1,4 @@
+import re
import shlex
from litellm import ChatCompletionRequest
@@ -76,12 +77,19 @@ async def process(
if last_user_message is not None:
last_user_message_str, _ = last_user_message
- splitted_message = last_user_message_str.lower().split(" ")
+ cleaned_message_str = re.sub(r"<.*?>", "", last_user_message_str).strip()
+ splitted_message = cleaned_message_str.lower().split(" ")
# We expect codegate as the first word in the message
if splitted_message[0] == "codegate":
context.shortcut_response = True
- args = shlex.split(last_user_message_str)
+ args = shlex.split(cleaned_message_str)
cmd_out = await codegate_cli(args[1:])
+
+ if cleaned_message_str != last_user_message_str:
+ # it came from Cline, need to wrap into tags
+ cmd_out = (
+ f"{cmd_out}\n"
+ )
return PipelineResult(
response=PipelineResponse(
step_name=self.name,
diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py
index 1f193017..ac33b700 100644
--- a/src/codegate/pipeline/codegate_context_retriever/codegate.py
+++ b/src/codegate/pipeline/codegate_context_retriever/codegate.py
@@ -36,7 +36,7 @@ def generate_context_str(self, objects: list[object], context: PipelineContext)
matched_packages = []
for obj in objects:
# The object is already a dictionary with 'properties'
- package_obj = obj["properties"]
+ package_obj = obj["properties"] # type: ignore
matched_packages.append(f"{package_obj['name']} ({package_obj['type']})")
# Add one alert for each package found
context.add_alert(
@@ -91,13 +91,16 @@ async def process(
) # type: ignore
logger.info(f"Found {len(bad_snippet_packages)} bad packages in code snippets.")
- # Remove code snippets from the user messages and search for bad packages
+ # Remove code snippets and file listing from the user messages and search for bad packages
# in the rest of the user query/messsages
user_messages = re.sub(r"```.*?```", "", user_message, flags=re.DOTALL)
user_messages = re.sub(r"⋮...*?⋮...\n\n", "", user_messages, flags=re.DOTALL)
+ user_messages = re.sub(
+ r".*?", "", user_messages, flags=re.DOTALL
+ )
# split messages into double newlines, to avoid passing so many content in the search
- split_messages = user_messages.split("\n\n")
+ split_messages = re.split(r"?task>|(\n\n)", user_messages)
collected_bad_packages = []
for item_message in split_messages:
# Vector search to find bad packages
@@ -126,10 +129,26 @@ async def process(
# Make a copy of the request
new_request = request.copy()
- # Add the context to the last user message
# Format: "Context: {context_str} \n Query: {last user message content}"
message = new_request["messages"][last_user_idx]
- context_msg = f'Context: {context_str} \n\n Query: {message["content"]}' # type: ignore
+ message_str = str(message["content"]) # type: ignore
+ # Add the context to the last user message
+ if message_str.strip().startswith(""):
+ # formatting of cline
+ match = re.match(r"()(.*?)()(.*)", message_str, re.DOTALL)
+ if match:
+ task_start, task_content, task_end, rest_of_message = match.groups()
+
+ # Embed the context into the task block
+ updated_task_content = (
+ f"{task_start}Context: {context_str}\n"
+ + f"Query: {task_content.strip()}{task_end}"
+ )
+
+ # Combine the updated task block with the rest of the message
+ context_msg = updated_task_content + rest_of_message
+ else:
+ context_msg = f"Context: {context_str} \n\n Query: {message_str}" # type: ignore
message["content"] = context_msg
logger.debug("Final context message", context_message=context_msg)
diff --git a/src/codegate/pipeline/extract_snippets/extract_snippets.py b/src/codegate/pipeline/extract_snippets/extract_snippets.py
index 8f7ebbd7..78eac1f1 100644
--- a/src/codegate/pipeline/extract_snippets/extract_snippets.py
+++ b/src/codegate/pipeline/extract_snippets/extract_snippets.py
@@ -125,7 +125,8 @@ def extract_snippets(message: str) -> List[CodeSnippet]:
# just correct the typescript exception
lang_map = {"typescript": "javascript"}
- lang = lang_map.get(lang, lang)
+ if lang:
+ lang = lang_map.get(lang, lang)
snippets.append(CodeSnippet(filepath=filename, code=content, language=lang))
return snippets
diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py
index fce826c1..f2f0fca4 100644
--- a/src/codegate/pipeline/secrets/secrets.py
+++ b/src/codegate/pipeline/secrets/secrets.py
@@ -451,17 +451,31 @@ async def process_chunk(
):
return [chunk]
+ is_cline_client = any(
+ "Cline" in str(message.trigger_string or "")
+ for message in input_context.alerts_raised or []
+ )
+
# Check if this is the first chunk (delta role will be present, others will not)
if len(chunk.choices) > 0 and chunk.choices[0].delta.role:
redacted_count = input_context.metadata["redacted_secrets_count"]
secret_text = "secret" if redacted_count == 1 else "secrets"
# Create notification chunk
- notification_chunk = self._create_chunk(
- chunk,
- f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
- f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
- f"by redacting them.\n\n",
- )
+ if is_cline_client:
+ notification_chunk = self._create_chunk(
+ chunk,
+ f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
+ f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
+ f"by redacting them.\n\n",
+ )
+ notification_chunk.choices[0].delta.role = "assistant"
+ else:
+ notification_chunk = self._create_chunk(
+ chunk,
+ f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
+ f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
+ f"by redacting them.\n\n",
+ )
# Reset the counter
input_context.metadata["redacted_secrets_count"] = 0
diff --git a/src/codegate/pipeline/secrets/signatures.py b/src/codegate/pipeline/secrets/signatures.py
index 8d2a0c3d..928668b2 100644
--- a/src/codegate/pipeline/secrets/signatures.py
+++ b/src/codegate/pipeline/secrets/signatures.py
@@ -2,7 +2,7 @@
import re
from pathlib import Path
from threading import Lock
-from typing import ClassVar, Dict, List, NamedTuple, Optional
+from typing import ClassVar, Dict, List, NamedTuple, Optional, Union
import structlog
import yaml
@@ -215,8 +215,8 @@ def _load_signatures(cls) -> None:
raise
@classmethod
- def find_in_string(cls, text: str) -> List[Match]:
- """Search for secrets in the provided string."""
+ def find_in_string(cls, text: Union[str, List[str]]) -> List[Match]:
+ """Search for secrets in the provided string or list of strings."""
if not text:
return []
@@ -224,7 +224,13 @@ def find_in_string(cls, text: str) -> List[Match]:
raise RuntimeError("SecretFinder not initialized.")
matches = []
- lines = text.splitlines()
+
+ # Split text into lines for processing
+ try:
+ lines = text.splitlines()
+ except Exception as e:
+ logger.warning(f"Error splitting text into lines: {e}")
+ return []
for line_num, line in enumerate(lines, start=1):
for group in cls._signature_groups:
diff --git a/src/codegate/pipeline/systemmsg.py b/src/codegate/pipeline/systemmsg.py
index f98bec8a..29b91937 100644
--- a/src/codegate/pipeline/systemmsg.py
+++ b/src/codegate/pipeline/systemmsg.py
@@ -16,6 +16,7 @@ def get_existing_system_message(request: ChatCompletionRequest) -> Optional[dict
Returns:
The existing system message if found, otherwise None.
"""
+
for message in request.get("messages", []):
if message["role"] == "system":
return message
@@ -50,8 +51,18 @@ def add_or_update_system_message(
context.add_alert("add-system-message", trigger_string=json.dumps(system_message))
new_request["messages"].insert(0, system_message)
else:
+ # Handle both string and list content types (needed for Cline (sends list)
+ existing_content = request_system_message["content"]
+ new_content = system_message["content"]
+
+ # Convert list to string if necessary (needed for Cline (sends list)
+ if isinstance(existing_content, list):
+ existing_content = "\n".join(str(item) for item in existing_content)
+ if isinstance(new_content, list):
+ new_content = "\n".join(str(item) for item in new_content)
+
# Update existing system message
- updated_content = request_system_message["content"] + "\n\n" + system_message["content"]
+ updated_content = existing_content + "\n\n" + new_content
context.add_alert("update-system-message", trigger_string=updated_content)
request_system_message["content"] = updated_content
diff --git a/src/codegate/providers/anthropic/completion_handler.py b/src/codegate/providers/anthropic/completion_handler.py
index da7e6537..0f36df11 100644
--- a/src/codegate/providers/anthropic/completion_handler.py
+++ b/src/codegate/providers/anthropic/completion_handler.py
@@ -16,6 +16,7 @@ async def execute_completion(
api_key: Optional[str],
stream: bool = False,
is_fim_request: bool = False,
+ base_tool: Optional[str] = "",
) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
"""
Ensures the model name is prefixed with 'anthropic/' to explicitly route to Anthropic's API.
diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py
index 2daa5a8d..10215c9e 100644
--- a/src/codegate/providers/anthropic/provider.py
+++ b/src/codegate/providers/anthropic/provider.py
@@ -32,9 +32,14 @@ def _setup_routes(self):
Sets up the /messages route for the provider as expected by the Anthropic
API. Extracts the API key from the "x-api-key" header and passes it to the
completion handler.
+
+ There are two routes:
+ - /messages: This is the route that is used by the Anthropic API with Continue.dev
+ - /v1/messages: This is the route that is used by the Anthropic API with Cline
"""
@self.router.post(f"/{self.provider_route_name}/messages")
+ @self.router.post(f"/{self.provider_route_name}/v1/messages")
async def create_message(
request: Request,
x_api_key: str = Header(None),
diff --git a/src/codegate/providers/base.py b/src/codegate/providers/base.py
index dc45616e..a525d176 100644
--- a/src/codegate/providers/base.py
+++ b/src/codegate/providers/base.py
@@ -220,20 +220,32 @@ async def complete(
data.get("base_url"),
is_fim_request,
)
- if input_pipeline_result.response:
+ if input_pipeline_result.response and input_pipeline_result.context:
return await self._pipeline_response_formatter.handle_pipeline_response(
input_pipeline_result.response, streaming, context=input_pipeline_result.context
)
- provider_request = self._input_normalizer.denormalize(input_pipeline_result.request)
+ if input_pipeline_result.request:
+ provider_request = self._input_normalizer.denormalize(input_pipeline_result.request)
if is_fim_request:
- provider_request = self._fim_normalizer.denormalize(provider_request)
+ provider_request = self._fim_normalizer.denormalize(provider_request) # type: ignore
# Execute the completion and translate the response
# This gives us either a single response or a stream of responses
# based on the streaming flag
+ is_cline_client = any(
+ "Cline" in str(message.get("content", "")) for message in data.get("messages", [])
+ )
+ base_tool = ""
+ if is_cline_client:
+ base_tool = "cline"
+
model_response = await self._completion_handler.execute_completion(
- provider_request, api_key=api_key, stream=streaming, is_fim_request=is_fim_request
+ provider_request,
+ api_key=api_key,
+ stream=streaming,
+ is_fim_request=is_fim_request,
+ base_tool=base_tool,
)
if not streaming:
normalized_response = self._output_normalizer.normalize(model_response)
@@ -242,9 +254,9 @@ async def complete(
return self._output_normalizer.denormalize(pipeline_output)
pipeline_output_stream = await self._run_output_stream_pipeline(
- input_pipeline_result.context, model_response, is_fim_request=is_fim_request
+ input_pipeline_result.context, model_response, is_fim_request=is_fim_request # type: ignore
)
- return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context)
+ return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context) # type: ignore
def get_routes(self) -> APIRouter:
return self.router
diff --git a/src/codegate/providers/completion/base.py b/src/codegate/providers/completion/base.py
index 96e68d2c..166835de 100644
--- a/src/codegate/providers/completion/base.py
+++ b/src/codegate/providers/completion/base.py
@@ -20,6 +20,7 @@ async def execute_completion(
api_key: Optional[str],
stream: bool = False, # TODO: remove this param?
is_fim_request: bool = False,
+ base_tool: Optional[str] = "",
) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
"""Execute the completion request"""
pass
diff --git a/src/codegate/providers/litellmshim/litellmshim.py b/src/codegate/providers/litellmshim/litellmshim.py
index f0088219..5a382160 100644
--- a/src/codegate/providers/litellmshim/litellmshim.py
+++ b/src/codegate/providers/litellmshim/litellmshim.py
@@ -43,6 +43,7 @@ async def execute_completion(
api_key: Optional[str],
stream: bool = False,
is_fim_request: bool = False,
+ base_tool: Optional[str] = "",
) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
"""
Execute the completion request with LiteLLM's API
diff --git a/src/codegate/providers/llamacpp/completion_handler.py b/src/codegate/providers/llamacpp/completion_handler.py
index 5660f66d..4bfba6eb 100644
--- a/src/codegate/providers/llamacpp/completion_handler.py
+++ b/src/codegate/providers/llamacpp/completion_handler.py
@@ -52,6 +52,7 @@ async def execute_completion(
api_key: Optional[str],
stream: bool = False,
is_fim_request: bool = False,
+ base_tool: Optional[str] = "",
) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
"""
Execute the completion request with inference engine API
diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
index 2f8da42d..cfd04acd 100644
--- a/src/codegate/providers/ollama/completion_handler.py
+++ b/src/codegate/providers/ollama/completion_handler.py
@@ -1,3 +1,4 @@
+import json
from typing import AsyncIterator, Optional, Union
import structlog
@@ -10,26 +11,72 @@
logger = structlog.get_logger("codegate")
-async def ollama_stream_generator(stream: AsyncIterator[ChatResponse]) -> AsyncIterator[str]:
+async def ollama_stream_generator(
+ stream: AsyncIterator[ChatResponse], base_tool: str
+) -> AsyncIterator[str]:
"""OpenAI-style SSE format"""
try:
async for chunk in stream:
try:
- content = chunk.model_dump_json()
- if content:
+ # TODO We should wire in the client info so we can respond with
+ # the correct format and start to handle multiple clients
+ # in a more robust way.
+ if base_tool != "cline":
yield f"{chunk.model_dump_json()}\n"
+ else:
+ # First get the raw dict from the chunk
+ chunk_dict = chunk.model_dump()
+ # Create response dictionary in OpenAI-like format
+ response = {
+ "id": f"chatcmpl-{chunk_dict.get('created_at', '')}",
+ "object": "chat.completion.chunk",
+ "created": chunk_dict.get("created_at"),
+ "model": chunk_dict.get("model"),
+ "choices": [
+ {
+ "index": 0,
+ "delta": {
+ "content": chunk_dict.get("message", {}).get("content", ""),
+ "role": chunk_dict.get("message", {}).get("role", "assistant"),
+ },
+ "finish_reason": (
+ chunk_dict.get("done_reason")
+ if chunk_dict.get("done", False)
+ else None
+ ),
+ }
+ ],
+ }
+ # Preserve existing type or add default if missing
+ response["type"] = chunk_dict.get("type", "stream")
+
+ # Add optional fields that might be present in the final message
+ optional_fields = [
+ "total_duration",
+ "load_duration",
+ "prompt_eval_count",
+ "prompt_eval_duration",
+ "eval_count",
+ "eval_duration",
+ ]
+ for field in optional_fields:
+ if field in chunk_dict:
+ response[field] = chunk_dict[field]
+
+ yield f"\ndata: {json.dumps(response)}\n"
except Exception as e:
- if str(e):
- yield f"{str(e)}\n"
+ logger.error(f"Error in stream generator: {str(e)}")
+ yield f"\ndata: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n"
except Exception as e:
- if str(e):
- yield f"{str(e)}\n"
+ logger.error(f"Stream error: {str(e)}")
+ yield f"\ndata: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n"
class OllamaShim(BaseCompletionHandler):
def __init__(self, base_url):
self.client = AsyncClient(host=base_url, timeout=300)
+ self.base_tool = ""
async def execute_completion(
self,
@@ -37,8 +84,10 @@ async def execute_completion(
api_key: Optional[str],
stream: bool = False,
is_fim_request: bool = False,
+ base_tool: Optional[str] = "",
) -> Union[ChatResponse, GenerateResponse]:
"""Stream response directly from Ollama API."""
+ self.base_tool = base_tool
if is_fim_request:
prompt = request["messages"][0].get("content", "")
response = await self.client.generate(
@@ -59,12 +108,14 @@ def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> Str
is the format that FastAPI expects for streaming responses.
"""
return StreamingResponse(
- ollama_stream_generator(stream),
+ ollama_stream_generator(stream, self.base_tool or ""),
media_type="application/x-ndjson; charset=utf-8",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
+ "Transfer-Encoding": "chunked",
},
+ status_code=200,
)
def _create_json_response(
diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py
index 114f0001..ac8013b9 100644
--- a/src/codegate/providers/ollama/provider.py
+++ b/src/codegate/providers/ollama/provider.py
@@ -75,6 +75,9 @@ async def show_model(request: Request):
# OpenAI-compatible routes for backward compatibility
@self.router.post(f"/{self.provider_route_name}/chat/completions")
@self.router.post(f"/{self.provider_route_name}/completions")
+ # Cline API routes
+ @self.router.post(f"/{self.provider_route_name}/v1/chat/completions")
+ @self.router.post(f"/{self.provider_route_name}/v1/generate")
async def create_completion(request: Request):
body = await request.body()
data = json.loads(body)
@@ -90,7 +93,7 @@ async def create_completion(request: Request):
logger.error("Error in OllamaProvider completion", error=str(e))
raise HTTPException(status_code=503, detail="Ollama service is unavailable")
except Exception as e:
- # check if we have an status code there
+ # check if we have an status code there
if hasattr(e, "status_code"):
# log the exception
logger = structlog.get_logger("codegate")
diff --git a/src/codegate/providers/openai/provider.py b/src/codegate/providers/openai/provider.py
index 53aa7db8..af42b551 100644
--- a/src/codegate/providers/openai/provider.py
+++ b/src/codegate/providers/openai/provider.py
@@ -1,8 +1,11 @@
import json
+from fastapi.responses import JSONResponse
+import httpx
import structlog
from fastapi import Header, HTTPException, Request
+from codegate.config import Config
from codegate.pipeline.factory import PipelineFactory
from codegate.providers.base import BaseProvider
from codegate.providers.litellmshim import LiteLLmShim, sse_stream_generator
@@ -15,6 +18,11 @@ def __init__(
pipeline_factory: PipelineFactory,
):
completion_handler = LiteLLmShim(stream_generator=sse_stream_generator)
+ config = Config.get_config()
+ if config is not None:
+ provided_urls = config.provider_urls
+ self.lm_studio_url = provided_urls.get("lm_studio", "http://localhost:11434/")
+
super().__init__(
OpenAIInputNormalizer(),
OpenAIOutputNormalizer(),
@@ -33,8 +41,15 @@ def _setup_routes(self):
passes it to the completion handler.
"""
+ @self.router.get(f"/{self.provider_route_name}/models")
+ @self.router.get(f"/{self.provider_route_name}/v1/models")
+ async def get_models():
+ # dummy method for lm studio
+ return JSONResponse(status_code=200, content=[])
+
@self.router.post(f"/{self.provider_route_name}/chat/completions")
@self.router.post(f"/{self.provider_route_name}/completions")
+ @self.router.post(f"/{self.provider_route_name}/v1/chat/completions")
async def create_completion(
request: Request,
authorization: str = Header(..., description="Bearer token"),
@@ -46,6 +61,9 @@ async def create_completion(
body = await request.body()
data = json.loads(body)
+ # if model starts with lm_studio, propagate it
+ if data.get("model", "").startswith("lm_studio"):
+ data["base_url"] = self.lm_studio_url + "/v1/"
is_fim_request = self._is_fim_request(request, data)
try:
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)
diff --git a/src/codegate/storage/storage_engine.py b/src/codegate/storage/storage_engine.py
index afd5cbd7..9543fe70 100644
--- a/src/codegate/storage/storage_engine.py
+++ b/src/codegate/storage/storage_engine.py
@@ -1,7 +1,7 @@
import os
import re
import sqlite3
-from typing import List
+from typing import List, Optional
import numpy as np
import sqlite_vec_sl_tmp
@@ -51,9 +51,11 @@ def __init__(self, data_path="./sqlite_data"):
)
self.inference_engine = LlamaCppInferenceEngine()
- self.model_path = (
- f"{Config.get_config().model_base_path}/{Config.get_config().embedding_model}"
- )
+ conf = Config.get_config()
+ if conf and conf.model_base_path and conf.embedding_model:
+ self.model_path = f"{conf.model_base_path}/{conf.embedding_model}"
+ else:
+ self.model_path = ""
self.conn = self._get_connection()
self._setup_schema()
@@ -131,10 +133,10 @@ async def search_by_property(self, name: str, properties: List[str]) -> list[dic
async def search(
self,
- query: str = None,
- language: str = None,
- ecosystem: str = None,
- packages: List[str] = None,
+ query: Optional[str] = None,
+ language: Optional[str] = None,
+ ecosystem: Optional[str] = None,
+ packages: Optional[List[str]] = None,
limit: int = 50,
distance: float = 0.3,
) -> list[object]: