From a760f6d7d4cdf15bbc1b243cadd36d038969a1ec Mon Sep 17 00:00:00 2001 From: wzq <40971680+chinese-wzq@users.noreply.github.com> Date: Thu, 1 May 2025 00:30:37 +0800 Subject: [PATCH 1/3] Update oolama_to_lmstudio_proxy.py Support stream and /api/tags --- oolama_to_lmstudio_proxy.py | 381 ++++++++++++++++++++++++++++-------- 1 file changed, 296 insertions(+), 85 deletions(-) diff --git a/oolama_to_lmstudio_proxy.py b/oolama_to_lmstudio_proxy.py index 3e9a9b7..32dfb63 100644 --- a/oolama_to_lmstudio_proxy.py +++ b/oolama_to_lmstudio_proxy.py @@ -1,117 +1,328 @@ -from flask import Flask, request, jsonify +from flask import Flask, request, jsonify, Response, stream_with_context # Added Response, stream_with_context import requests -from datetime import datetime +from datetime import datetime, timezone import json +from flask_cors import CORS app = Flask(__name__) +CORS(app) # Enable CORS # Configure OLP (Ollama to LM Studio Proxy) API endpoint. OLP_HOST = "127.0.0.1" OLP_PORT = 11434 # Change this port to whatever your expected caller application sends requests to, by default its Ollama's port is 11434 # OLP Settings -WORKAROUND_FOR_GITBUTLER = True +WORKAROUND_FOR_GITBUTLER = False # NOTE: This workaround is ignored during streaming DEBUGGING = True +LM_STUDIO_PORT = 11234 +LM_STUDIO_BASE_URL = f"http://localhost:{LM_STUDIO_PORT}/v1" +LM_STUDIO_CHAT_URL = f"{LM_STUDIO_BASE_URL}/chat/completions" +LM_STUDIO_MODELS_URL = f"{LM_STUDIO_BASE_URL}/models" +# --- End Configuration --- -# Configure LM Studio's API endpoint. -LM_STUDIO_PORT = 1234 -LM_STUDIO_API_URL = f"http://localhost:{LM_STUDIO_PORT}/v1/chat/completions" +# Helper function to create Ollama-style stream chunks +def format_ollama_stream_chunk(model_name, content_delta, created_at_iso, done=False, done_reason=None, final_stats=None): + """Formats a chunk for Ollama streaming response.""" + chunk = { + "model": model_name, + "created_at": created_at_iso, + "message": { + "role": "assistant", + "content": content_delta # Send only the delta + }, + "done": done, + } + if done: + # For the final chunk, add reason and potentially stats + chunk["done_reason"] = done_reason if done_reason else "stop" # Default to stop if None + if final_stats: + chunk.update(final_stats) # Add stats if provided + # Ensure message is present even if empty content in final chunk + if "message" not in chunk or not chunk["message"]: + chunk["message"] = {"role": "assistant", "content": ""} -# Configure Ollama's API endpoint -# Commented out because it was used for debugging. -# OLLAMA_PORT = 11435 # Default for Ollama is 11434 -# OLLAMA_API_URL = f"http://localhost:{OLLAMA_PORT}/api/chat" + # Important: Each JSON object must be followed by a newline for ndjson + return json.dumps(chunk) + '\n' @app.route('/api/chat', methods=['POST']) -def proxy_to_lm_studio(): +def proxy_to_lm_studio_chat(): """ - This function acts as a proxy between the original caller application and LM Studio's API. - It receives a JSON request, forwards it to LM Studio, and transforms the response - to match the expected format of the original caller (in this case Ollama style response). - - Parameters: - None (uses Flask's request object) - - Returns: - A JSON response with the transformed data from LM Studio, along with the - status code from LM Studio's API response. + Proxies '/api/chat' requests to LM Studio. + Handles both streaming and non-streaming responses based on request payload. """ + try: + request_data = request.get_json() + if not request_data: + return jsonify({"error": "Invalid JSON payload"}), 400 + except Exception as e: + if DEBUGGING: print(f"Error parsing request JSON: {e}") + return jsonify({"error": f"Failed to parse request JSON: {e}"}), 400 - # Extract the request data (assuming it's JSON) - request_data = request.get_json() + is_streaming = request_data.get("stream", True) + model_name = request_data.get("model", "unknown_model") # Get model name early - # Print the incoming request data for debugging if DEBUGGING: - print('INCOMING REQUEST:') - print(request_data) + print(f'INCOMING /api/chat REQUEST (Streaming: {is_streaming}):') + print(json.dumps(request_data, indent=2)) print('') - # Forward the request to LM Studio (note that LM Studio does not need the data transformed) - lm_response = requests.post(LM_STUDIO_API_URL, json=request_data) - lm_data = lm_response.json() + # --- Streaming Logic --- + if is_streaming: + # Make sure stream is True in the forwarded request + request_data["stream"] = True + # GitButler workaround is incompatible with streaming + if WORKAROUND_FOR_GITBUTLER and DEBUGGING: + print("Note: WORKAROUND_FOR_GITBUTLER is ignored for streaming requests.") - # Print the LM Studio response for debugging - if DEBUGGING: - print('LM STUDIO RESPONSE:') - print(lm_data) - print('') + def event_stream(): + created_at_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + last_chunk_data = {} # To store final stats if needed + finish_reason = None - # Forward the request to Ollama - # o_response = requests.post(OLLAMA_API_URL, json=request_data) - # o_data = o_response.json() - - # Print the Ollama response for debugging - # if DEBUGGING: - # print('OLLAMA RESPONSE:') - # print(o_data) - # print('') - - # Transform the response from LM Studio to match what Ollama-clients expects - message = "" - done_response = "unknown" - if lm_data.get("choices"): - # Extract the message from the LM Studio response - message = lm_data["choices"][0]["message"] - - # Workaround for GitButler, it wants the AI response message to be a JSON object for some reason. - if WORKAROUND_FOR_GITBUTLER: - # Convert the message to a JSON object - message_json = { - "result": message["content"] - } - - # Replace the message with the JSON object - message["content"] = json.dumps(message_json) - - # Extract the done reason from the LM Studio response - done_response = lm_data["choices"][0]["finish_reason"] - - # Transform the response from LM Studio to match what Ollama expects - transformed_response = { - "model": request_data.get("model", "llama3"), # Most applications might require it to be llama3 back. - "created_at": datetime.utcfromtimestamp(lm_data.get("created")).isoformat() + "Z", - "message": message, - "done": True, - "done_reason": done_response, - "total_duration": 0, # Placeholder, as LM Studio doesn't provide this directly - "load_duration": 0, # Placeholder, as LM Studio doesn't provide this directly - "prompt_eval_count": 0, # Placeholder, as LM Studio doesn't provide this directly - "prompt_eval_duration": 0, # Placeholder, as LM Studio doesn't provide this directly - "eval_count": 0, # Placeholder, as LM Studio doesn't provide this directly - "eval_duration": 0 # Placeholder, as LM Studio doesn't provide this directly - } + try: + # Use stream=True with requests + with requests.post(LM_STUDIO_CHAT_URL, json=request_data, stream=True, timeout=300) as lm_response: + lm_response.raise_for_status() # Check for initial HTTP errors + + if DEBUGGING: print('LM STUDIO STREAMING RESPONSE STARTED') + + for line in lm_response.iter_lines(): + if line: + decoded_line = line.decode('utf-8') + + # LM Studio/OpenAI often send SSE format: "data: {...}" + if decoded_line.startswith('data: '): + json_str = decoded_line[len('data: '):].strip() + else: + # Assume it might just be JSON directly in some cases + json_str = decoded_line + + # Check for final SSE message '[DONE]' + if json_str == '[DONE]': + if DEBUGGING: print("LM Studio stream finished with [DONE]") + break # Stop processing lines + + if not json_str: continue # Skip empty lines after stripping 'data: ' + + try: + chunk_data = json.loads(json_str) + if DEBUGGING: + print('LM STUDIO CHUNK RECEIVED:') + print(json.dumps(chunk_data, indent=2)) + + # --- Transform LM Studio Chunk to Ollama Chunk --- + content_delta = "" + choices = chunk_data.get("choices", []) + if choices and isinstance(choices, list) and len(choices) > 0: + delta = choices[0].get("delta", {}) + content_delta = delta.get("content", "") + # Check for finish reason in the chunk + if choices[0].get("finish_reason"): + finish_reason = choices[0].get("finish_reason") + if DEBUGGING: print(f"Finish reason detected in chunk: {finish_reason}") + + # Store potential final usage stats (might appear in last data chunk before or instead of [DONE]) + usage = chunk_data.get("usage") + if usage: + last_chunk_data["total_duration"] = 0 # Placeholder + last_chunk_data["load_duration"] = 0 # Placeholder + last_chunk_data["prompt_eval_count"] = usage.get("prompt_tokens", 0) + last_chunk_data["prompt_eval_duration"] = 0 # Placeholder + last_chunk_data["eval_count"] = usage.get("completion_tokens", 0) + last_chunk_data["eval_duration"] = 0 # Placeholder + if DEBUGGING: print(f"Usage stats received in chunk: {usage}") + + + # Yield intermediate chunk if there's content + if content_delta: + ollama_chunk = format_ollama_stream_chunk( + model_name=model_name, + content_delta=content_delta, + created_at_iso=created_at_iso, + done=False + ) + if DEBUGGING: + print('YIELDING OLLAMA CHUNK:') + print(ollama_chunk.strip()) + print("-" * 20) + yield ollama_chunk.encode('utf-8') # Yield bytes + + except json.JSONDecodeError: + if DEBUGGING: print(f"Skipping non-JSON line: {json_str}") + continue # Ignore lines that aren't valid JSON data chunks + except Exception as e_transform: + if DEBUGGING: print(f"Error transforming chunk: {e_transform}\nChunk: {json_str}") + # Decide if you want to yield an error chunk or just continue/break + # yield format_ollama_stream_chunk(model_name, f"Error: {e_transform}", created_at_iso, done=True, done_reason="error").encode('utf-8') + # break # Stop streaming on error + + # After the loop finishes (or breaks on [DONE]/error) - yield the final chunk + final_ollama_chunk = format_ollama_stream_chunk( + model_name=model_name, + content_delta="", # No more content in the final meta chunk + created_at_iso=created_at_iso, + done=True, + done_reason=finish_reason, # Use the detected reason + final_stats=last_chunk_data # Include any collected stats + ) + if DEBUGGING: + print('YIELDING FINAL OLLAMA CHUNK:') + print(final_ollama_chunk.strip()) + print("=" * 20) + yield final_ollama_chunk.encode('utf-8') - # Print the transformed response for debugging + except requests.exceptions.RequestException as e: + error_message = f"Error connecting to LM Studio stream: {e}" + if DEBUGGING: print(error_message) + # Yield a final error chunk to the client + yield format_ollama_stream_chunk(model_name, error_message, datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), done=True, done_reason="error").encode('utf-8') + except Exception as e: + error_message = f"Unexpected error during streaming: {e}" + if DEBUGGING: print(error_message) + # Yield a final error chunk to the client + yield format_ollama_stream_chunk(model_name, error_message, datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), done=True, done_reason="error").encode('utf-8') + finally: + if DEBUGGING: print('LM STUDIO STREAMING RESPONSE FINISHED') + + + # Return a Streaming Response using the generator + # Use stream_with_context to ensure generator has access to app/request context if needed + return Response(stream_with_context(event_stream()), mimetype='application/x-ndjson') + + # --- Non-Streaming Logic (Original code slightly adapted) --- + else: + try: + # Make the normal non-streaming request + lm_response = requests.post(LM_STUDIO_CHAT_URL, json=request_data, timeout=300) + lm_response.raise_for_status() + lm_data = lm_response.json() + + if DEBUGGING: + print('LM STUDIO NON-STREAMING RESPONSE:') + print(json.dumps(lm_data, indent=2)) + print('') + + except requests.exceptions.RequestException as e: + if DEBUGGING: print(f"Error connecting to LM Studio chat endpoint: {e}") + return jsonify({"error": f"Failed to connect to LM Studio chat endpoint: {e}"}), 502 + except json.JSONDecodeError as e: + if DEBUGGING: print(f"Error decoding LM Studio chat JSON response: {e}") + return jsonify({"error": f"Invalid JSON response from LM Studio chat endpoint: {e}"}), 502 + except Exception as e: + if DEBUGGING: print(f"Unexpected error during LM Studio chat request: {e}") + return jsonify({"error": f"Unexpected error processing LM Studio chat request: {e}"}), 500 + + # Transform the *single* response (Mostly same as before, ensure WORKAROUND applies only here if needed) + message = {} + done_response = "unknown" + prompt_tokens = completion_tokens = 0 + try: + if lm_data.get("choices") and isinstance(lm_data["choices"], list) and len(lm_data["choices"]) > 0: + choice = lm_data["choices"][0] + message = choice.get("message", {}) + done_response = choice.get("finish_reason", "unknown") + + # Apply workaround ONLY if not streaming and flag is True + if WORKAROUND_FOR_GITBUTLER and message and "content" in message: + message_json = {"result": message["content"]} + message["content"] = json.dumps(message_json) # Double encoding + + usage = lm_data.get("usage", {}) + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + + except (KeyError, IndexError, TypeError) as e: + if DEBUGGING: print(f"Error parsing LM Studio response structure: {e}") + message = {"role": "assistant", "content": "Error processing LM Studio response."} + done_response = "error" + + try: + created_timestamp = lm_data.get("created") + created_at_iso = datetime.fromtimestamp(created_timestamp, timezone.utc).isoformat().replace("+00:00", "Z") if created_timestamp else datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + except Exception: + created_at_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + + transformed_response = { + "model": model_name, + "created_at": created_at_iso, + "message": message, # Contains full message (potentially double-encoded if workaround active) + "done": True, # Non-streaming is always done=True in the final response + "done_reason": done_response, + "total_duration": 0, + "load_duration": 0, + "prompt_eval_count": prompt_tokens if prompt_tokens is not None else 0, + "prompt_eval_duration": 0, + "eval_count": completion_tokens if completion_tokens is not None else 0, + "eval_duration": 0 + } + + if DEBUGGING: + print('SENDING THIS NON-STREAMING BACK INSTEAD:') + print(json.dumps(transformed_response, indent=2)) + print('') + + return jsonify(transformed_response), lm_response.status_code + +# --- /api/tags endpoint remains the same --- +@app.route('/api/tags', methods=['GET']) +def get_tags(): + """ + Proxies '/api/tags' requests to LM Studio's models endpoint + and transforms the response to Ollama format. + """ if DEBUGGING: - print('SENDING THIS BACK INSTEAD:') - print(transformed_response) + print('INCOMING /api/tags REQUEST') print('') + try: + lm_response = requests.get(LM_STUDIO_MODELS_URL, timeout=60) + lm_response.raise_for_status() + lm_data = lm_response.json() + if DEBUGGING: + print('LM STUDIO MODELS RESPONSE:') + print(json.dumps(lm_data, indent=2)) + print('') + except requests.exceptions.RequestException as e: + if DEBUGGING: print(f"Error connecting to LM Studio models endpoint: {e}") + return jsonify({"error": f"Failed to connect to LM Studio models endpoint: {e}"}), 502 + except json.JSONDecodeError as e: + if DEBUGGING: print(f"Error decoding LM Studio models JSON response: {e}") + return jsonify({"error": f"Invalid JSON response from LM Studio models endpoint: {e}"}), 502 + except Exception as e: + if DEBUGGING: print(f"Unexpected error during LM Studio models request: {e}") + return jsonify({"error": f"Unexpected error processing LM Studio models request: {e}"}), 500 - # Return the transformed response to the original caller - return jsonify(transformed_response), lm_response.status_code # return lm studio + # Transform LM Studio models to Ollama tags format + ollama_models = [] + if lm_data and isinstance(lm_data.get('data'), list): + now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + for model_info in lm_data['data']: + model_id = model_info.get('id') + if model_id: + family = "unknown" + if '/' in model_id: + parts = model_id.split('/') + if len(parts) > 1: family = parts[1] + elif '-' in model_id: + family = model_id.split('-')[0] + ollama_model = { + "name": f"{model_id}:latest", "model": f"{model_id}:latest", + "modified_at": now_iso, "size": 0, "digest": "", + "details": { "parent_model": "", "format": "gguf", "family": family, + "families": [family] if family != "unknown" else None, + "parameter_size": "unknown", "quantization_level": "unknown" } + } + if ollama_model["details"]["families"] == ["unknown"]: + ollama_model["details"]["families"] = None + ollama_models.append(ollama_model) + transformed_response = {"models": ollama_models} - # Uncomment this if you want to return Ollama's response instead - #return o_data, 200 # return ollama + if DEBUGGING: + print('SENDING THIS /api/tags BACK INSTEAD:') + print(json.dumps(transformed_response, indent=2)) + print('') + return jsonify(transformed_response), 200 if __name__ == '__main__': + # Use 0.0.0.0 if your client is on a different machine than the proxy + # app.run(host='0.0.0.0', port=OLP_PORT) app.run(host=OLP_HOST, port=OLP_PORT) From 8662a43e5293c908e6852ddc7c124dd59fc9499a Mon Sep 17 00:00:00 2001 From: wzq <40971680+chinese-wzq@users.noreply.github.com> Date: Thu, 1 May 2025 00:34:22 +0800 Subject: [PATCH 2/3] Rename oolama_to_lmstudio_proxy.py to ollama_to_lmstudio_proxy.py --- oolama_to_lmstudio_proxy.py => ollama_to_lmstudio_proxy.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename oolama_to_lmstudio_proxy.py => ollama_to_lmstudio_proxy.py (100%) diff --git a/oolama_to_lmstudio_proxy.py b/ollama_to_lmstudio_proxy.py similarity index 100% rename from oolama_to_lmstudio_proxy.py rename to ollama_to_lmstudio_proxy.py From 6fc50ab52b48522b7e40a744d6c5ea97c2ed5e76 Mon Sep 17 00:00:00 2001 From: wzq <40971680+chinese-wzq@users.noreply.github.com> Date: Thu, 1 May 2025 00:41:18 +0800 Subject: [PATCH 3/3] Update README.md Use Gemini 2.5 Pro to update the README.md. --- README.md | 87 +++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index bbdac8a..679b04c 100644 --- a/README.md +++ b/README.md @@ -1,63 +1,62 @@ # Ollama to LM Studio API Proxy/Converter -A simple proxy that transforms requests from Ollama to LM Studio, so that you can use applications that only support Ollama. -I mainly created this for use with GitButler, but I could also see it being useful for other purposes. +A simple proxy that transforms API requests intended for Ollama into the format expected by LM Studio, allowing you to use LM Studio with applications that only support Ollama's API. + +This project was primarily created for use with GitButler, which supports Ollama, but it can be useful for other applications as well. ## API Endpoints -Here is a list of the endpoints and where they point to. In a table below. -The first column is the ollama endpoint, and the second column is the LM Studio endpoint that it is forwarded to. +Here is a list of the Ollama endpoints that are proxied and the corresponding LM Studio endpoints they are forwarded to: + +| Ollama Endpoint | LM Studio Endpoint | Notes | +| :-------------- | :--------------------- | :---------------------------------- | +| `/api/chat` | `/v1/chat/completions` | Handles both streaming and non-streaming requests. Transforms response format. | +| `/api/tags` | `/v1/models` | Transforms response format. | + +## Setup + +To use this proxy, you need: -| Ollama Endpoint | LM Studio Endpoint | -|----------------------|----------------------| -| /api/chat | /v1/chat/completions | +1. **LM Studio:** Have LM Studio installed and running, with the models you wish to use loaded. +2. **This Proxy Script:** Run this Python script. +3. **Client Configuration:** Configure your Ollama-compatible application (e.g., GitButler) to connect to this proxy's address and port instead of a running Ollama instance. -## Setup +### Configuration -You need to have a working Ollama installation and LM Studio. Please look up their getting started guides initially if you haven't got those already. +You can configure the proxy's behavior by editing the following variables at the top of the `ollama_lm_studio_proxy.py` script: -For me, GitButler did not like using a different port number, so I had to change Ollama's default port to `11435` by using the environment variable `OLLAMA_HOST=127.0.0.1:11435`. +* `OLP_HOST`: The IP address the proxy server will listen on (e.g., `"127.0.0.1"` for localhost). +* `OLP_PORT`: The port the proxy server will listen on. This is the port your client application should connect to (default is `11434`, Ollama's default). +* `LM_STUDIO_PORT`: The port LM Studio's API is running on (default is `11234`). The `LM_STUDIO_BASE_URL` and `LM_STUDIO_CHAT_URL`/`LM_STUDIO_MODELS_URL` are derived from this. +* `WORKAROUND_FOR_GITBUTLER`: A boolean flag. Setting this to `True` applies a specific response transformation needed for GitButler's *non-streaming* chat response parsing (it wraps the content in a `{"result": "..."}` JSON string). **Note:** This workaround is ignored for streaming requests. For most other clients, it should likely be `False`. +* `DEBUGGING`: A boolean flag. Set to `True` to print detailed request/response information to the console for debugging purposes. -However, for broad compatibility, this is probably the recommended method since a lot of applications might not let you configure the port, but Ollama does allow you to change it. +### Running on Ollama's Default Port (11434) -On Linux I had to do the following: +If your client application *must* connect to port `11434` and you *also* have Ollama installed and potentially running on the same machine, you might need to change Ollama's default port to free up `11434` for the proxy. A common way to do this is by setting the `OLLAMA_HOST` environment variable for the Ollama service. -1. Open the file `/etc/systemd/system/ollama.service` in your favourite editor. - 1. You might have to edit as root, e.g. `nano /etc/systemd/system/ollama.service` -2. Add an extra line after `Environment="PATH=..."` with `Environment="OLLAMA_HOST=127.0.0.1:11435"`. - 1. So you should have something like: - ``` - [Unit] - Description=Ollama Service - After=network-online.target +For example, on Linux systems using `systemd`, you can edit the Ollama service file (`/etc/systemd/system/ollama.service`) and add the line: - [Service] - ExecStart=/usr/local/bin/ollama serve - User=ollama - Group=ollama - Restart=always - RestartSec=3 - Environment="PATH=..." - Environment="OLLAMA_HOST=127.0.0.1:11435" - - [Install] - WantedBy=default.target - ``` +``` +Environment="OLLAMA_HOST=127.0.0.1:11435" +``` -If you want to keep Ollama's default port, you need to change the line `app.run(host='127.0.0.1', port=11434)` and replace the port number `11434` with whatever you have configured in your client making requests to Ollama. +After modifying the service file, reload the systemd manager configuration (`sudo systemctl daemon-reload`) and restart the Ollama service (`sudo systemctl restart ollama`). This will make Ollama listen on port `11435`, allowing you to run the proxy on `11434`. ## Usage -Once the script is running, you can use any Ollama-compatible application by pointing it to `http://localhost:11434` (or the appropriate host and port if you've modified the configuration). +1. Ensure LM Studio is running and serving models on its configured port (default 11234). +2. Run the Python script: `python ollama_lm_studio_proxy.py` +3. Configure your Ollama-compatible application to connect to the proxy's host and port (e.g., `http://127.0.0.1:11434`). -## Issues +The proxy will now intercept calls to `/api/chat` and `/api/tags`, forward them to LM Studio, and convert the responses back to the Ollama format expected by your client. -Really the only issue at the moment are: +## Issues and Limitations -- LM Studio returns usage as tokens, but Ollama returns durations. - - This means mapping them needs to be calculated somehow. - - For now, I have these as placeholders set to 0, so that there are no issues with software expecting them. -- This is very rough code that I threw together quickly. Needs more polish! +* **Duration Metrics:** LM Studio provides token counts (prompt, completion) in its `usage` data, which are mapped to `prompt_eval_count` and `eval_count` respectively in the Ollama response. However, Ollama also provides detailed timing/duration metrics (`total_duration`, `load_duration`, etc.), which LM Studio does not expose in the same way. These duration fields are currently hardcoded to `0` in the proxy response to maintain compatibility. +* **GitButler Workaround:** The `WORKAROUND_FOR_GITBUTLER` setting is a specific transformation applied to *non-streaming* responses based on observed behavior with GitButler. It may not be needed for other clients and is ignored for streaming requests. +* **Endpoint Coverage:** Currently, only `/api/chat` and `/api/tags` are implemented. Other Ollama endpoints (like `/api/generate`, `/api/embeddings`, etc.) are not yet supported. Requests to unimplemented endpoints will likely result in a 404 error. +* **Robustness & Features:** This is a relatively simple proxy. More advanced features like full error mapping, better logging, support for more Ollama request parameters, etc., could be added. ## Contributing @@ -69,7 +68,7 @@ This project is licensed under the [MIT License](LICENSE). ## Acknowledgements -- [Ollama](https://github.com/ollama/ollama) -- [LM Studio](https://lmstudio.ai/) -- [LM Studio CLI](https://github.com/lmstudio-ai/lms) -- [GitButler](https://github.com/gitbutlerapp/gitbutler) \ No newline at end of file +* [Ollama](https://github.com/ollama/ollama) +* [LM Studio](https://lmstudio.ai/) +* [LM Studio CLI](https://github.com/lmstudio-ai/lms) +* [GitButler](https://github.com/gitbutlerapp/gitbutler)