From a760f6d7d4cdf15bbc1b243cadd36d038969a1ec Mon Sep 17 00:00:00 2001
From: wzq <40971680+chinese-wzq@users.noreply.github.com>
Date: Thu, 1 May 2025 00:30:37 +0800
Subject: [PATCH 1/3] Update oolama_to_lmstudio_proxy.py

Support stream and /api/tags
---
 oolama_to_lmstudio_proxy.py | 381 ++++++++++++++++++++++++++++--------
 1 file changed, 296 insertions(+), 85 deletions(-)

diff --git a/oolama_to_lmstudio_proxy.py b/oolama_to_lmstudio_proxy.py
index 3e9a9b7..32dfb63 100644
--- a/oolama_to_lmstudio_proxy.py
+++ b/oolama_to_lmstudio_proxy.py
@@ -1,117 +1,328 @@
-from flask import Flask, request, jsonify
+from flask import Flask, request, jsonify, Response, stream_with_context # Added Response, stream_with_context
 import requests
-from datetime import datetime
+from datetime import datetime, timezone
 import json
+from flask_cors import CORS
 
 app = Flask(__name__)
+CORS(app) # Enable CORS
 
 # Configure OLP (Ollama to LM Studio Proxy) API endpoint.
 OLP_HOST = "127.0.0.1"
 OLP_PORT = 11434 # Change this port to whatever your expected caller application sends requests to, by default its Ollama's port is 11434
 
 # OLP Settings
-WORKAROUND_FOR_GITBUTLER = True
+WORKAROUND_FOR_GITBUTLER = False # NOTE: This workaround is ignored during streaming
 DEBUGGING = True
+LM_STUDIO_PORT = 11234
+LM_STUDIO_BASE_URL = f"http://localhost:{LM_STUDIO_PORT}/v1"
+LM_STUDIO_CHAT_URL = f"{LM_STUDIO_BASE_URL}/chat/completions"
+LM_STUDIO_MODELS_URL = f"{LM_STUDIO_BASE_URL}/models"
+# --- End Configuration ---
 
-# Configure LM Studio's API endpoint.
-LM_STUDIO_PORT = 1234
-LM_STUDIO_API_URL = f"http://localhost:{LM_STUDIO_PORT}/v1/chat/completions"
+# Helper function to create Ollama-style stream chunks
+def format_ollama_stream_chunk(model_name, content_delta, created_at_iso, done=False, done_reason=None, final_stats=None):
+    """Formats a chunk for Ollama streaming response."""
+    chunk = {
+        "model": model_name,
+        "created_at": created_at_iso,
+        "message": {
+            "role": "assistant",
+            "content": content_delta # Send only the delta
+        },
+        "done": done,
+    }
+    if done:
+        # For the final chunk, add reason and potentially stats
+        chunk["done_reason"] = done_reason if done_reason else "stop" # Default to stop if None
+        if final_stats:
+             chunk.update(final_stats) # Add stats if provided
+        # Ensure message is present even if empty content in final chunk
+        if "message" not in chunk or not chunk["message"]:
+             chunk["message"] = {"role": "assistant", "content": ""}
 
-# Configure Ollama's API endpoint
-# Commented out because it was used for debugging.
-# OLLAMA_PORT = 11435 # Default for Ollama is 11434
-# OLLAMA_API_URL = f"http://localhost:{OLLAMA_PORT}/api/chat"
+    # Important: Each JSON object must be followed by a newline for ndjson
+    return json.dumps(chunk) + '\n'
 
 @app.route('/api/chat', methods=['POST'])
-def proxy_to_lm_studio():
+def proxy_to_lm_studio_chat():
     """
-    This function acts as a proxy between the original caller application and LM Studio's API.
-    It receives a JSON request, forwards it to LM Studio, and transforms the response
-    to match the expected format of the original caller (in this case Ollama style response).
-
-    Parameters:
-        None (uses Flask's request object)
-
-    Returns:
-        A JSON response with the transformed data from LM Studio, along with the
-        status code from LM Studio's API response.
+    Proxies '/api/chat' requests to LM Studio.
+    Handles both streaming and non-streaming responses based on request payload.
     """
+    try:
+        request_data = request.get_json()
+        if not request_data:
+            return jsonify({"error": "Invalid JSON payload"}), 400
+    except Exception as e:
+        if DEBUGGING: print(f"Error parsing request JSON: {e}")
+        return jsonify({"error": f"Failed to parse request JSON: {e}"}), 400
 
-    # Extract the request data (assuming it's JSON)
-    request_data = request.get_json()
+    is_streaming = request_data.get("stream", True)
+    model_name = request_data.get("model", "unknown_model") # Get model name early
 
-    # Print the incoming request data for debugging
     if DEBUGGING:
-        print('INCOMING REQUEST:')
-        print(request_data)
+        print(f'INCOMING /api/chat REQUEST (Streaming: {is_streaming}):')
+        print(json.dumps(request_data, indent=2))
         print('')
 
-    # Forward the request to LM Studio (note that LM Studio does not need the data transformed)
-    lm_response = requests.post(LM_STUDIO_API_URL, json=request_data)
-    lm_data = lm_response.json()
+    # --- Streaming Logic ---
+    if is_streaming:
+        # Make sure stream is True in the forwarded request
+        request_data["stream"] = True
+        # GitButler workaround is incompatible with streaming
+        if WORKAROUND_FOR_GITBUTLER and DEBUGGING:
+            print("Note: WORKAROUND_FOR_GITBUTLER is ignored for streaming requests.")
 
-    # Print the LM Studio response for debugging
-    if DEBUGGING:
-        print('LM STUDIO RESPONSE:')
-        print(lm_data)
-        print('')
+        def event_stream():
+            created_at_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+            last_chunk_data = {} # To store final stats if needed
+            finish_reason = None
 
-    # Forward the request to Ollama
-    # o_response = requests.post(OLLAMA_API_URL, json=request_data)
-    # o_data = o_response.json()
-
-    # Print the Ollama response for debugging
-    # if DEBUGGING:
-    #     print('OLLAMA RESPONSE:')
-    #     print(o_data)
-    #     print('')
-
-    # Transform the response from LM Studio to match what Ollama-clients expects
-    message = ""
-    done_response = "unknown"
-    if lm_data.get("choices"):
-        # Extract the message from the LM Studio response
-        message = lm_data["choices"][0]["message"]
-
-        # Workaround for GitButler, it wants the AI response message to be a JSON object for some reason.
-        if WORKAROUND_FOR_GITBUTLER:
-            # Convert the message to a JSON object
-            message_json = {
-                "result": message["content"]
-            }
-
-            # Replace the message with the JSON object
-            message["content"] = json.dumps(message_json)
-
-        # Extract the done reason from the LM Studio response
-        done_response = lm_data["choices"][0]["finish_reason"]
-
-    # Transform the response from LM Studio to match what Ollama expects
-    transformed_response = {
-        "model": request_data.get("model", "llama3"), # Most applications might require it to be llama3 back.
-        "created_at": datetime.utcfromtimestamp(lm_data.get("created")).isoformat() + "Z",
-        "message": message,
-        "done": True,
-        "done_reason": done_response,
-        "total_duration": 0,  # Placeholder, as LM Studio doesn't provide this directly
-        "load_duration": 0,   # Placeholder, as LM Studio doesn't provide this directly
-        "prompt_eval_count": 0,  # Placeholder, as LM Studio doesn't provide this directly
-        "prompt_eval_duration": 0,  # Placeholder, as LM Studio doesn't provide this directly
-        "eval_count": 0,  # Placeholder, as LM Studio doesn't provide this directly
-        "eval_duration": 0  # Placeholder, as LM Studio doesn't provide this directly
-    }
+            try:
+                # Use stream=True with requests
+                with requests.post(LM_STUDIO_CHAT_URL, json=request_data, stream=True, timeout=300) as lm_response:
+                    lm_response.raise_for_status() # Check for initial HTTP errors
+
+                    if DEBUGGING: print('LM STUDIO STREAMING RESPONSE STARTED')
+
+                    for line in lm_response.iter_lines():
+                        if line:
+                            decoded_line = line.decode('utf-8')
+
+                            # LM Studio/OpenAI often send SSE format: "data: {...}"
+                            if decoded_line.startswith('data: '):
+                                json_str = decoded_line[len('data: '):].strip()
+                            else:
+                                # Assume it might just be JSON directly in some cases
+                                json_str = decoded_line
+
+                            # Check for final SSE message '[DONE]'
+                            if json_str == '[DONE]':
+                                if DEBUGGING: print("LM Studio stream finished with [DONE]")
+                                break # Stop processing lines
+
+                            if not json_str: continue # Skip empty lines after stripping 'data: '
+
+                            try:
+                                chunk_data = json.loads(json_str)
+                                if DEBUGGING:
+                                    print('LM STUDIO CHUNK RECEIVED:')
+                                    print(json.dumps(chunk_data, indent=2))
+
+                                # --- Transform LM Studio Chunk to Ollama Chunk ---
+                                content_delta = ""
+                                choices = chunk_data.get("choices", [])
+                                if choices and isinstance(choices, list) and len(choices) > 0:
+                                    delta = choices[0].get("delta", {})
+                                    content_delta = delta.get("content", "")
+                                    # Check for finish reason in the chunk
+                                    if choices[0].get("finish_reason"):
+                                        finish_reason = choices[0].get("finish_reason")
+                                        if DEBUGGING: print(f"Finish reason detected in chunk: {finish_reason}")
+
+                                # Store potential final usage stats (might appear in last data chunk before or instead of [DONE])
+                                usage = chunk_data.get("usage")
+                                if usage:
+                                    last_chunk_data["total_duration"] = 0 # Placeholder
+                                    last_chunk_data["load_duration"] = 0 # Placeholder
+                                    last_chunk_data["prompt_eval_count"] = usage.get("prompt_tokens", 0)
+                                    last_chunk_data["prompt_eval_duration"] = 0 # Placeholder
+                                    last_chunk_data["eval_count"] = usage.get("completion_tokens", 0)
+                                    last_chunk_data["eval_duration"] = 0 # Placeholder
+                                    if DEBUGGING: print(f"Usage stats received in chunk: {usage}")
+
+
+                                # Yield intermediate chunk if there's content
+                                if content_delta:
+                                     ollama_chunk = format_ollama_stream_chunk(
+                                         model_name=model_name,
+                                         content_delta=content_delta,
+                                         created_at_iso=created_at_iso,
+                                         done=False
+                                     )
+                                     if DEBUGGING:
+                                         print('YIELDING OLLAMA CHUNK:')
+                                         print(ollama_chunk.strip())
+                                         print("-" * 20)
+                                     yield ollama_chunk.encode('utf-8') # Yield bytes
+
+                            except json.JSONDecodeError:
+                                if DEBUGGING: print(f"Skipping non-JSON line: {json_str}")
+                                continue # Ignore lines that aren't valid JSON data chunks
+                            except Exception as e_transform:
+                                if DEBUGGING: print(f"Error transforming chunk: {e_transform}\nChunk: {json_str}")
+                                # Decide if you want to yield an error chunk or just continue/break
+                                # yield format_ollama_stream_chunk(model_name, f"Error: {e_transform}", created_at_iso, done=True, done_reason="error").encode('utf-8')
+                                # break # Stop streaming on error
+
+                    # After the loop finishes (or breaks on [DONE]/error) - yield the final chunk
+                    final_ollama_chunk = format_ollama_stream_chunk(
+                        model_name=model_name,
+                        content_delta="", # No more content in the final meta chunk
+                        created_at_iso=created_at_iso,
+                        done=True,
+                        done_reason=finish_reason, # Use the detected reason
+                        final_stats=last_chunk_data # Include any collected stats
+                    )
+                    if DEBUGGING:
+                        print('YIELDING FINAL OLLAMA CHUNK:')
+                        print(final_ollama_chunk.strip())
+                        print("=" * 20)
+                    yield final_ollama_chunk.encode('utf-8')
 
-    # Print the transformed response for debugging
+            except requests.exceptions.RequestException as e:
+                error_message = f"Error connecting to LM Studio stream: {e}"
+                if DEBUGGING: print(error_message)
+                # Yield a final error chunk to the client
+                yield format_ollama_stream_chunk(model_name, error_message, datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), done=True, done_reason="error").encode('utf-8')
+            except Exception as e:
+                error_message = f"Unexpected error during streaming: {e}"
+                if DEBUGGING: print(error_message)
+                # Yield a final error chunk to the client
+                yield format_ollama_stream_chunk(model_name, error_message, datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), done=True, done_reason="error").encode('utf-8')
+            finally:
+                 if DEBUGGING: print('LM STUDIO STREAMING RESPONSE FINISHED')
+
+
+        # Return a Streaming Response using the generator
+        # Use stream_with_context to ensure generator has access to app/request context if needed
+        return Response(stream_with_context(event_stream()), mimetype='application/x-ndjson')
+
+    # --- Non-Streaming Logic (Original code slightly adapted) ---
+    else:
+        try:
+            # Make the normal non-streaming request
+            lm_response = requests.post(LM_STUDIO_CHAT_URL, json=request_data, timeout=300)
+            lm_response.raise_for_status()
+            lm_data = lm_response.json()
+
+            if DEBUGGING:
+                print('LM STUDIO NON-STREAMING RESPONSE:')
+                print(json.dumps(lm_data, indent=2))
+                print('')
+
+        except requests.exceptions.RequestException as e:
+            if DEBUGGING: print(f"Error connecting to LM Studio chat endpoint: {e}")
+            return jsonify({"error": f"Failed to connect to LM Studio chat endpoint: {e}"}), 502
+        except json.JSONDecodeError as e:
+             if DEBUGGING: print(f"Error decoding LM Studio chat JSON response: {e}")
+             return jsonify({"error": f"Invalid JSON response from LM Studio chat endpoint: {e}"}), 502
+        except Exception as e:
+            if DEBUGGING: print(f"Unexpected error during LM Studio chat request: {e}")
+            return jsonify({"error": f"Unexpected error processing LM Studio chat request: {e}"}), 500
+
+        # Transform the *single* response (Mostly same as before, ensure WORKAROUND applies only here if needed)
+        message = {}
+        done_response = "unknown"
+        prompt_tokens = completion_tokens = 0
+        try:
+            if lm_data.get("choices") and isinstance(lm_data["choices"], list) and len(lm_data["choices"]) > 0:
+                choice = lm_data["choices"][0]
+                message = choice.get("message", {})
+                done_response = choice.get("finish_reason", "unknown")
+
+                # Apply workaround ONLY if not streaming and flag is True
+                if WORKAROUND_FOR_GITBUTLER and message and "content" in message:
+                    message_json = {"result": message["content"]}
+                    message["content"] = json.dumps(message_json) # Double encoding
+
+            usage = lm_data.get("usage", {})
+            prompt_tokens = usage.get("prompt_tokens")
+            completion_tokens = usage.get("completion_tokens")
+
+        except (KeyError, IndexError, TypeError) as e:
+            if DEBUGGING: print(f"Error parsing LM Studio response structure: {e}")
+            message = {"role": "assistant", "content": "Error processing LM Studio response."}
+            done_response = "error"
+
+        try:
+            created_timestamp = lm_data.get("created")
+            created_at_iso = datetime.fromtimestamp(created_timestamp, timezone.utc).isoformat().replace("+00:00", "Z") if created_timestamp else datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+        except Exception:
+             created_at_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+
+        transformed_response = {
+            "model": model_name,
+            "created_at": created_at_iso,
+            "message": message, # Contains full message (potentially double-encoded if workaround active)
+            "done": True, # Non-streaming is always done=True in the final response
+            "done_reason": done_response,
+            "total_duration": 0,
+            "load_duration": 0,
+            "prompt_eval_count": prompt_tokens if prompt_tokens is not None else 0,
+            "prompt_eval_duration": 0,
+            "eval_count": completion_tokens if completion_tokens is not None else 0,
+            "eval_duration": 0
+        }
+
+        if DEBUGGING:
+            print('SENDING THIS NON-STREAMING BACK INSTEAD:')
+            print(json.dumps(transformed_response, indent=2))
+            print('')
+
+        return jsonify(transformed_response), lm_response.status_code
+
+# --- /api/tags endpoint remains the same ---
+@app.route('/api/tags', methods=['GET'])
+def get_tags():
+    """
+    Proxies '/api/tags' requests to LM Studio's models endpoint
+    and transforms the response to Ollama format.
+    """
     if DEBUGGING:
-        print('SENDING THIS BACK INSTEAD:')
-        print(transformed_response)
+        print('INCOMING /api/tags REQUEST')
         print('')
+    try:
+        lm_response = requests.get(LM_STUDIO_MODELS_URL, timeout=60)
+        lm_response.raise_for_status()
+        lm_data = lm_response.json()
+        if DEBUGGING:
+            print('LM STUDIO MODELS RESPONSE:')
+            print(json.dumps(lm_data, indent=2))
+            print('')
+    except requests.exceptions.RequestException as e:
+        if DEBUGGING: print(f"Error connecting to LM Studio models endpoint: {e}")
+        return jsonify({"error": f"Failed to connect to LM Studio models endpoint: {e}"}), 502
+    except json.JSONDecodeError as e:
+         if DEBUGGING: print(f"Error decoding LM Studio models JSON response: {e}")
+         return jsonify({"error": f"Invalid JSON response from LM Studio models endpoint: {e}"}), 502
+    except Exception as e:
+        if DEBUGGING: print(f"Unexpected error during LM Studio models request: {e}")
+        return jsonify({"error": f"Unexpected error processing LM Studio models request: {e}"}), 500
 
-    # Return the transformed response to the original caller
-    return jsonify(transformed_response), lm_response.status_code # return lm studio
+    # Transform LM Studio models to Ollama tags format
+    ollama_models = []
+    if lm_data and isinstance(lm_data.get('data'), list):
+        now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+        for model_info in lm_data['data']:
+            model_id = model_info.get('id')
+            if model_id:
+                family = "unknown"
+                if '/' in model_id:
+                     parts = model_id.split('/')
+                     if len(parts) > 1: family = parts[1]
+                elif '-' in model_id:
+                     family = model_id.split('-')[0]
+                ollama_model = {
+                    "name": f"{model_id}:latest", "model": f"{model_id}:latest",
+                    "modified_at": now_iso, "size": 0, "digest": "",
+                    "details": { "parent_model": "", "format": "gguf", "family": family,
+                                 "families": [family] if family != "unknown" else None,
+                                 "parameter_size": "unknown", "quantization_level": "unknown" }
+                }
+                if ollama_model["details"]["families"] == ["unknown"]:
+                     ollama_model["details"]["families"] = None
+                ollama_models.append(ollama_model)
+    transformed_response = {"models": ollama_models}
 
-    # Uncomment this if you want to return Ollama's response instead
-    #return o_data, 200 # return ollama
+    if DEBUGGING:
+        print('SENDING THIS /api/tags BACK INSTEAD:')
+        print(json.dumps(transformed_response, indent=2))
+        print('')
+    return jsonify(transformed_response), 200
 
 if __name__ == '__main__':
+    # Use 0.0.0.0 if your client is on a different machine than the proxy
+    # app.run(host='0.0.0.0', port=OLP_PORT)
     app.run(host=OLP_HOST, port=OLP_PORT)

From 8662a43e5293c908e6852ddc7c124dd59fc9499a Mon Sep 17 00:00:00 2001
From: wzq <40971680+chinese-wzq@users.noreply.github.com>
Date: Thu, 1 May 2025 00:34:22 +0800
Subject: [PATCH 2/3] Rename oolama_to_lmstudio_proxy.py to
 ollama_to_lmstudio_proxy.py

---
 oolama_to_lmstudio_proxy.py => ollama_to_lmstudio_proxy.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename oolama_to_lmstudio_proxy.py => ollama_to_lmstudio_proxy.py (100%)

diff --git a/oolama_to_lmstudio_proxy.py b/ollama_to_lmstudio_proxy.py
similarity index 100%
rename from oolama_to_lmstudio_proxy.py
rename to ollama_to_lmstudio_proxy.py

From 6fc50ab52b48522b7e40a744d6c5ea97c2ed5e76 Mon Sep 17 00:00:00 2001
From: wzq <40971680+chinese-wzq@users.noreply.github.com>
Date: Thu, 1 May 2025 00:41:18 +0800
Subject: [PATCH 3/3] Update README.md

Use Gemini 2.5 Pro to update the README.md.
---
 README.md | 87 +++++++++++++++++++++++++++----------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index bbdac8a..679b04c 100644
--- a/README.md
+++ b/README.md
@@ -1,63 +1,62 @@
 # Ollama to LM Studio API Proxy/Converter
-A simple proxy that transforms requests from Ollama to LM Studio, so that you can use applications that only support Ollama.
 
-I mainly created this for use with GitButler, but I could also see it being useful for other purposes.
+A simple proxy that transforms API requests intended for Ollama into the format expected by LM Studio, allowing you to use LM Studio with applications that only support Ollama's API.
+
+This project was primarily created for use with GitButler, which supports Ollama, but it can be useful for other applications as well.
 
 ## API Endpoints
 
-Here is a list of the endpoints and where they point to. In a table below.
-The first column is the ollama endpoint, and the second column is the LM Studio endpoint that it is forwarded to.
+Here is a list of the Ollama endpoints that are proxied and the corresponding LM Studio endpoints they are forwarded to:
+
+| Ollama Endpoint | LM Studio Endpoint     | Notes                               |
+| :-------------- | :--------------------- | :---------------------------------- |
+| `/api/chat`     | `/v1/chat/completions` | Handles both streaming and non-streaming requests. Transforms response format. |
+| `/api/tags`     | `/v1/models`           | Transforms response format.           |
+
+## Setup
+
+To use this proxy, you need:
 
-| Ollama Endpoint      | LM Studio Endpoint   |
-|----------------------|----------------------|
-| /api/chat            | /v1/chat/completions |
+1.  **LM Studio:** Have LM Studio installed and running, with the models you wish to use loaded.
+2.  **This Proxy Script:** Run this Python script.
+3.  **Client Configuration:** Configure your Ollama-compatible application (e.g., GitButler) to connect to this proxy's address and port instead of a running Ollama instance.
 
-## Setup 
+### Configuration
 
-You need to have a working Ollama installation and LM Studio. Please look up their getting started guides initially if you haven't got those already.
+You can configure the proxy's behavior by editing the following variables at the top of the `ollama_lm_studio_proxy.py` script:
 
-For me, GitButler did not like using a different port number, so I had to change Ollama's default port to `11435` by using the environment variable `OLLAMA_HOST=127.0.0.1:11435`.
+*   `OLP_HOST`: The IP address the proxy server will listen on (e.g., `"127.0.0.1"` for localhost).
+*   `OLP_PORT`: The port the proxy server will listen on. This is the port your client application should connect to (default is `11434`, Ollama's default).
+*   `LM_STUDIO_PORT`: The port LM Studio's API is running on (default is `11234`). The `LM_STUDIO_BASE_URL` and `LM_STUDIO_CHAT_URL`/`LM_STUDIO_MODELS_URL` are derived from this.
+*   `WORKAROUND_FOR_GITBUTLER`: A boolean flag. Setting this to `True` applies a specific response transformation needed for GitButler's *non-streaming* chat response parsing (it wraps the content in a `{"result": "..."}` JSON string). **Note:** This workaround is ignored for streaming requests. For most other clients, it should likely be `False`.
+*   `DEBUGGING`: A boolean flag. Set to `True` to print detailed request/response information to the console for debugging purposes.
 
-However, for broad compatibility, this is probably the recommended method since a lot of applications might not let you configure the port, but Ollama does allow you to change it.
+### Running on Ollama's Default Port (11434)
 
-On Linux I had to do the following:
+If your client application *must* connect to port `11434` and you *also* have Ollama installed and potentially running on the same machine, you might need to change Ollama's default port to free up `11434` for the proxy. A common way to do this is by setting the `OLLAMA_HOST` environment variable for the Ollama service.
 
-1. Open the file `/etc/systemd/system/ollama.service` in your favourite editor. 
-   1. You might have to edit as root, e.g. `nano /etc/systemd/system/ollama.service` 
-2. Add an extra line after `Environment="PATH=..."` with `Environment="OLLAMA_HOST=127.0.0.1:11435"`.
-   1. So you should have something like:
-   ```
-   [Unit]
-   Description=Ollama Service
-   After=network-online.target
+For example, on Linux systems using `systemd`, you can edit the Ollama service file (`/etc/systemd/system/ollama.service`) and add the line:
 
-   [Service]
-   ExecStart=/usr/local/bin/ollama serve
-   User=ollama
-   Group=ollama
-   Restart=always
-   RestartSec=3
-   Environment="PATH=..."
-   Environment="OLLAMA_HOST=127.0.0.1:11435"
-   
-   [Install]
-   WantedBy=default.target
-   ```
+```
+Environment="OLLAMA_HOST=127.0.0.1:11435"
+```
 
-If you want to keep Ollama's default port, you need to change the line `app.run(host='127.0.0.1', port=11434)` and replace the port number `11434` with whatever you have configured in your client making requests to Ollama.
+After modifying the service file, reload the systemd manager configuration (`sudo systemctl daemon-reload`) and restart the Ollama service (`sudo systemctl restart ollama`). This will make Ollama listen on port `11435`, allowing you to run the proxy on `11434`.
 
 ## Usage
 
-Once the script is running, you can use any Ollama-compatible application by pointing it to `http://localhost:11434` (or the appropriate host and port if you've modified the configuration).
+1.  Ensure LM Studio is running and serving models on its configured port (default 11234).
+2.  Run the Python script: `python ollama_lm_studio_proxy.py`
+3.  Configure your Ollama-compatible application to connect to the proxy's host and port (e.g., `http://127.0.0.1:11434`).
 
-## Issues
+The proxy will now intercept calls to `/api/chat` and `/api/tags`, forward them to LM Studio, and convert the responses back to the Ollama format expected by your client.
 
-Really the only issue at the moment are:
+## Issues and Limitations
 
-- LM Studio returns usage as tokens, but Ollama returns durations.
-  - This means mapping them needs to be calculated somehow.
-  - For now, I have these as placeholders set to 0, so that there are no issues with software expecting them.
-- This is very rough code that I threw together quickly. Needs more polish!
+*   **Duration Metrics:** LM Studio provides token counts (prompt, completion) in its `usage` data, which are mapped to `prompt_eval_count` and `eval_count` respectively in the Ollama response. However, Ollama also provides detailed timing/duration metrics (`total_duration`, `load_duration`, etc.), which LM Studio does not expose in the same way. These duration fields are currently hardcoded to `0` in the proxy response to maintain compatibility.
+*   **GitButler Workaround:** The `WORKAROUND_FOR_GITBUTLER` setting is a specific transformation applied to *non-streaming* responses based on observed behavior with GitButler. It may not be needed for other clients and is ignored for streaming requests.
+*   **Endpoint Coverage:** Currently, only `/api/chat` and `/api/tags` are implemented. Other Ollama endpoints (like `/api/generate`, `/api/embeddings`, etc.) are not yet supported. Requests to unimplemented endpoints will likely result in a 404 error.
+*   **Robustness & Features:** This is a relatively simple proxy. More advanced features like full error mapping, better logging, support for more Ollama request parameters, etc., could be added.
 
 ## Contributing
 
@@ -69,7 +68,7 @@ This project is licensed under the [MIT License](LICENSE).
 
 ## Acknowledgements
 
-- [Ollama](https://github.com/ollama/ollama)
-- [LM Studio](https://lmstudio.ai/)
-- [LM Studio CLI](https://github.com/lmstudio-ai/lms)
-- [GitButler](https://github.com/gitbutlerapp/gitbutler)
\ No newline at end of file
+*   [Ollama](https://github.com/ollama/ollama)
+*   [LM Studio](https://lmstudio.ai/)
+*   [LM Studio CLI](https://github.com/lmstudio-ai/lms)
+*   [GitButler](https://github.com/gitbutlerapp/gitbutler)