From b0a8cd8c19fab2d387ef87f6322958d017181f3e Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Mon, 7 Oct 2024 15:52:52 -0500
Subject: [PATCH 1/6] Add support for local mode via Ollama

Adds support for running against local models by supporting the Ollama
API in addition to the Qiskit Code Assistant service API.

This allows users to input an Ollama API URL instead of a Qiskit Code
Assistant service URL and the server extension will detect which API
is set and call the correct endpoints.
---
 qiskit_code_assistant_jupyterlab/handlers.py | 206 +++++++++++++------
 src/service/autocomplete.ts                  |  28 ++-
 2 files changed, 156 insertions(+), 78 deletions(-)

diff --git a/qiskit_code_assistant_jupyterlab/handlers.py b/qiskit_code_assistant_jupyterlab/handlers.py
index 310bb1d..bdafadd 100644
--- a/qiskit_code_assistant_jupyterlab/handlers.py
+++ b/qiskit_code_assistant_jupyterlab/handlers.py
@@ -24,7 +24,11 @@
 from jupyter_server.utils import url_path_join
 from qiskit_ibm_runtime import QiskitRuntimeService
 
-runtime_configs = {"service_url": "http://localhost", "api_token": ""}
+runtime_configs = {
+    "service_url": "http://localhost",
+    "api_token": "",
+    "is_ollama": False,
+}
 
 
 def update_token(token):
@@ -55,11 +59,26 @@ def init_token():
 
 
 def get_header():
-    return {
+    header = {
         "Accept": "application/json",
         "Content-Type": "application/json",
         "X-Caller": "qiskit-code-assistant-jupyterlab",
-        "Authorization": f"Bearer {runtime_configs['api_token']}",
+    }
+    if not runtime_configs["is_ollama"]:
+        header["Authorization"] = f"Bearer {runtime_configs['api_token']}"
+    return header
+
+
+def convert_ollama(model):
+    return {
+        "_id": model["model"],
+        "disclaimer": {"accepted": "true"},
+        "display_name": model["name"],
+        "doc_link": "",
+        "license": {"name": "", "link": ""},
+        "model_id": model["model"],
+        "prompt_type": 1,
+        "token_limit": 255
     }
 
 
@@ -74,13 +93,19 @@ def post(self):
 
         runtime_configs["service_url"] = json_payload["url"]
 
-        self.finish(json.dumps({"url": runtime_configs["service_url"]}))
+        try:
+            r = requests.get(url_path_join(runtime_configs["service_url"]), headers=get_header())
+            # TODO: Replace with a check against the QCA service instead
+            runtime_configs["is_ollama"] = ("Ollama is running" in r.text)
+        finally:
+            self.finish(json.dumps({"url": runtime_configs["service_url"]}))
 
 
 class TokenHandler(APIHandler):
     @tornado.web.authenticated
     def get(self):
-        self.finish(json.dumps({"success": (runtime_configs["api_token"] != "")}))
+        self.finish(json.dumps({"success": (runtime_configs["api_token"] != ""
+                                            or runtime_configs["is_ollama"])}))
 
     @tornado.web.authenticated
     def post(self):
@@ -94,98 +119,155 @@ def post(self):
 class ModelsHandler(APIHandler):
     @tornado.web.authenticated
     def get(self):
-        url = url_path_join(runtime_configs["service_url"], "models")
-
-        try:
-            r = requests.get(url, headers=get_header())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            url = url_path_join(runtime_configs["service_url"], "api", "tags")
+            models = []
+            try:
+                r = requests.get(url, headers=get_header())
+                r.raise_for_status()
+
+                if r.ok:
+                    ollama_models = r.json()["models"]
+                    models = list(map(convert_ollama, ollama_models))
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps({"models": models}))
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(runtime_configs["service_url"], "models")
+
+            try:
+                r = requests.get(url, headers=get_header())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 class ModelHandler(APIHandler):
     @tornado.web.authenticated
     def get(self, id):
-        url = url_path_join(runtime_configs["service_url"], "model", id)
-
-        try:
-            r = requests.get(url, headers=get_header())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            self.set_status(501, "Not implemented")
+            self.finish()
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(runtime_configs["service_url"], "model", id)
+
+            try:
+                r = requests.get(url, headers=get_header())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 class DisclaimerHandler(APIHandler):
     @tornado.web.authenticated
     def get(self, id):
-        url = url_path_join(runtime_configs["service_url"], "model", id, "disclaimer")
-
-        try:
-            r = requests.get(url, headers=get_header())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            self.set_status(501, "Not implemented")
+            self.finish()
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(runtime_configs["service_url"], "model", id, "disclaimer")
+
+            try:
+                r = requests.get(url, headers=get_header())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 class DisclaimerAcceptanceHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        url = url_path_join(
-            runtime_configs["service_url"], "disclaimer", id, "acceptance"
-        )
-
-        try:
-            r = requests.post(url, headers=get_header(), json=self.get_json_body())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            self.set_status(501, "Not implemented")
+            self.finish()
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(
+                runtime_configs["service_url"], "disclaimer", id, "acceptance"
+            )
+
+            try:
+                r = requests.post(url, headers=get_header(), json=self.get_json_body())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 class PromptHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        url = url_path_join(runtime_configs["service_url"], "model", id, "prompt")
-
-        try:
-            r = requests.post(url, headers=get_header(), json=self.get_json_body())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            url = url_path_join(runtime_configs["service_url"], "api", "generate")
+            result = {}
+            try:
+                r = requests.post(url,
+                                  headers=get_header(),
+                                  json={
+                                      "model": id,
+                                      "prompt": self.get_json_body()["input"],
+                                      "stream": False
+                                  })
+                r.raise_for_status()
+
+                if r.ok:
+                    ollama_response = r.json()
+                    result = {
+                        "results": [{"generated_text": ollama_response["response"]}],
+                        "prompt_id": ollama_response["created_at"],
+                        "created_at": ollama_response["created_at"]
+                    }
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(result))
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(runtime_configs["service_url"], "model", id, "prompt")
+
+            try:
+                r = requests.post(url, headers=get_header(), json=self.get_json_body())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 class PromptAcceptanceHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        url = url_path_join(runtime_configs["service_url"], "prompt", id, "acceptance")
-
-        try:
-            r = requests.post(url, headers=get_header(), json=self.get_json_body())
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as err:
-            self.set_status(err.response.status_code)
-            self.finish(json.dumps(err.response.json()))
+        if runtime_configs["is_ollama"]:
+            self.finish(json.dumps({"success": "true"}))
         else:
-            self.finish(json.dumps(r.json()))
+            url = url_path_join(runtime_configs["service_url"], "prompt", id, "acceptance")
+
+            try:
+                r = requests.post(url, headers=get_header(), json=self.get_json_body())
+                r.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(r.json()))
 
 
 def setup_handlers(web_app):
     host_pattern = ".*$"
-    id_regex = r"(?P<id>[\w\-]+)"
+    id_regex = r"(?P<id>[\w\-\_\.\:]+)" # valid chars: alphanum | "-" | "_" | "." | ":"
     base_url = url_path_join(web_app.settings["base_url"], "qiskit-code-assistant")
 
     handlers = [
diff --git a/src/service/autocomplete.ts b/src/service/autocomplete.ts
index 508425a..79e5673 100644
--- a/src/service/autocomplete.ts
+++ b/src/service/autocomplete.ts
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-import { getModel, postModelPrompt } from './api';
+import { postModelPrompt } from './api';
 import { showDisclaimer } from './disclaimer';
 import { getCurrentModel } from './modelHandler';
 import { checkAPIToken } from './token';
@@ -51,25 +51,21 @@ export async function autoComplete(text: string): Promise<ICompletionReturn> {
       const requestText = text.slice(startingOffset, text.length);
       const model = getCurrentModel();
 
-      return await getModel(model?._id || '')
-        .then(async model => {
-          if (model.disclaimer?.accepted) {
+      if (model === undefined) {
+        console.error('Failed to send prompt', 'No model selected');
+        return emptyReturn;
+      } else if (model.disclaimer?.accepted) {
+        return await promptPromise(model._id, requestText);
+      } else {
+        return await showDisclaimer(model._id).then(async accepted => {
+          if (accepted) {
             return await promptPromise(model._id, requestText);
           } else {
-            return await showDisclaimer(model._id).then(async accepted => {
-              if (accepted) {
-                return await promptPromise(model._id, requestText);
-              } else {
-                console.error('Disclaimer not accepted');
-                return emptyReturn;
-              }
-            });
+            console.error('Disclaimer not accepted');
+            return emptyReturn;
           }
-        })
-        .catch(reason => {
-          console.error('Failed to send prompt', reason);
-          return emptyReturn;
         });
+      }
     })
     .catch(reason => {
       console.error('Failed to send prompt', reason);

From 5c4d3cc18abc7a9074f8acce9c0b5f921f5dd2b4 Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Thu, 10 Oct 2024 15:10:23 -0500
Subject: [PATCH 2/6] Switch to OpenAI API

---
 qiskit_code_assistant_jupyterlab/handlers.py | 69 ++++++++++++--------
 src/index.ts                                 |  4 +-
 2 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/qiskit_code_assistant_jupyterlab/handlers.py b/qiskit_code_assistant_jupyterlab/handlers.py
index bdafadd..5260e99 100644
--- a/qiskit_code_assistant_jupyterlab/handlers.py
+++ b/qiskit_code_assistant_jupyterlab/handlers.py
@@ -16,6 +16,7 @@
 
 import json
 import os
+from datetime import datetime
 from pathlib import Path
 
 import requests
@@ -27,7 +28,7 @@
 runtime_configs = {
     "service_url": "http://localhost",
     "api_token": "",
-    "is_ollama": False,
+    "is_openai": False,
 }
 
 
@@ -64,19 +65,19 @@ def get_header():
         "Content-Type": "application/json",
         "X-Caller": "qiskit-code-assistant-jupyterlab",
     }
-    if not runtime_configs["is_ollama"]:
+    if not runtime_configs["is_openai"]:
         header["Authorization"] = f"Bearer {runtime_configs['api_token']}"
     return header
 
 
-def convert_ollama(model):
+def convert_openai(model):
     return {
-        "_id": model["model"],
+        "_id": model["id"],
         "disclaimer": {"accepted": "true"},
-        "display_name": model["name"],
+        "display_name": model["id"],
         "doc_link": "",
         "license": {"name": "", "link": ""},
-        "model_id": model["model"],
+        "model_id": model["id"],
         "prompt_type": 1,
         "token_limit": 255
     }
@@ -95,8 +96,9 @@ def post(self):
 
         try:
             r = requests.get(url_path_join(runtime_configs["service_url"]), headers=get_header())
-            # TODO: Replace with a check against the QCA service instead
-            runtime_configs["is_ollama"] = ("Ollama is running" in r.text)
+            runtime_configs["is_openai"] = (r.json()["name"] != "qiskit-code-assistant")
+        except requests.exceptions.JSONDecodeError:
+            runtime_configs["is_openai"] = True
         finally:
             self.finish(json.dumps({"url": runtime_configs["service_url"]}))
 
@@ -105,7 +107,7 @@ class TokenHandler(APIHandler):
     @tornado.web.authenticated
     def get(self):
         self.finish(json.dumps({"success": (runtime_configs["api_token"] != ""
-                                            or runtime_configs["is_ollama"])}))
+                                            or runtime_configs["is_openai"])}))
 
     @tornado.web.authenticated
     def post(self):
@@ -119,16 +121,16 @@ def post(self):
 class ModelsHandler(APIHandler):
     @tornado.web.authenticated
     def get(self):
-        if runtime_configs["is_ollama"]:
-            url = url_path_join(runtime_configs["service_url"], "api", "tags")
+        if runtime_configs["is_openai"]:
+            url = url_path_join(runtime_configs["service_url"], "v1", "models")
             models = []
             try:
                 r = requests.get(url, headers=get_header())
                 r.raise_for_status()
 
                 if r.ok:
-                    ollama_models = r.json()["models"]
-                    models = list(map(convert_ollama, ollama_models))
+                    data = r.json()["data"]
+                    models = list(map(convert_openai, data))
             except requests.exceptions.HTTPError as err:
                 self.set_status(err.response.status_code)
                 self.finish(json.dumps(err.response.json()))
@@ -150,9 +152,20 @@ def get(self):
 class ModelHandler(APIHandler):
     @tornado.web.authenticated
     def get(self, id):
-        if runtime_configs["is_ollama"]:
-            self.set_status(501, "Not implemented")
-            self.finish()
+        if runtime_configs["is_openai"]:
+            url = url_path_join(runtime_configs["service_url"], "v1", "models", id)
+            model = {}
+            try:
+                r = requests.get(url, headers=get_header())
+                r.raise_for_status()
+
+                if r.ok:
+                    model = convert_openai(r.json())
+            except requests.exceptions.HTTPError as err:
+                self.set_status(err.response.status_code)
+                self.finish(json.dumps(err.response.json()))
+            else:
+                self.finish(json.dumps(model))
         else:
             url = url_path_join(runtime_configs["service_url"], "model", id)
 
@@ -169,7 +182,7 @@ def get(self, id):
 class DisclaimerHandler(APIHandler):
     @tornado.web.authenticated
     def get(self, id):
-        if runtime_configs["is_ollama"]:
+        if runtime_configs["is_openai"]:
             self.set_status(501, "Not implemented")
             self.finish()
         else:
@@ -188,7 +201,7 @@ def get(self, id):
 class DisclaimerAcceptanceHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        if runtime_configs["is_ollama"]:
+        if runtime_configs["is_openai"]:
             self.set_status(501, "Not implemented")
             self.finish()
         else:
@@ -209,25 +222,25 @@ def post(self, id):
 class PromptHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        if runtime_configs["is_ollama"]:
-            url = url_path_join(runtime_configs["service_url"], "api", "generate")
+        if runtime_configs["is_openai"]:
+            url = url_path_join(runtime_configs["service_url"], "v1", "completions")
             result = {}
             try:
                 r = requests.post(url,
                                   headers=get_header(),
                                   json={
                                       "model": id,
-                                      "prompt": self.get_json_body()["input"],
-                                      "stream": False
+                                      "prompt": self.get_json_body()["input"]
                                   })
                 r.raise_for_status()
 
                 if r.ok:
-                    ollama_response = r.json()
+                    response = r.json()
                     result = {
-                        "results": [{"generated_text": ollama_response["response"]}],
-                        "prompt_id": ollama_response["created_at"],
-                        "created_at": ollama_response["created_at"]
+                        "results": list(map(lambda c: {"generated_text": c["text"]},
+                                            response["choices"])),
+                        "prompt_id": response["id"],
+                        "created_at": datetime.fromtimestamp(int(response["created"])).isoformat()
                     }
             except requests.exceptions.HTTPError as err:
                 self.set_status(err.response.status_code)
@@ -250,7 +263,7 @@ def post(self, id):
 class PromptAcceptanceHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
-        if runtime_configs["is_ollama"]:
+        if runtime_configs["is_openai"]:
             self.finish(json.dumps({"success": "true"}))
         else:
             url = url_path_join(runtime_configs["service_url"], "prompt", id, "acceptance")
@@ -267,7 +280,7 @@ def post(self, id):
 
 def setup_handlers(web_app):
     host_pattern = ".*$"
-    id_regex = r"(?P<id>[\w\-\_\.\:]+)" # valid chars: alphanum | "-" | "_" | "." | ":"
+    id_regex = r"(?P<id>[\w\-\_\.\:]+)"  # valid chars: alphanum | "-" | "_" | "." | ":"
     base_url = url_path_join(web_app.settings["base_url"], "qiskit-code-assistant")
 
     handlers = [
diff --git a/src/index.ts b/src/index.ts
index d996b8d..3a458c1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -68,7 +68,9 @@ const plugin: JupyterFrontEndPlugin<void> = {
 
     postServiceUrl(settings.composite['serviceUrl'] as string);
     settings.changed.connect(() =>
-      postServiceUrl(settings.composite['serviceUrl'] as string)
+      postServiceUrl(settings.composite['serviceUrl'] as string).then(() =>
+        refreshModelsList()
+      )
     );
 
     const provider = new QiskitCompletionProvider({ settings });

From f36623570f1c458fffbf4f380c48f96f8a49dc79 Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Fri, 11 Oct 2024 13:02:59 -0500
Subject: [PATCH 3/6] address review

---
 qiskit_code_assistant_jupyterlab/handlers.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/qiskit_code_assistant_jupyterlab/handlers.py b/qiskit_code_assistant_jupyterlab/handlers.py
index 5260e99..69b7b0f 100644
--- a/qiskit_code_assistant_jupyterlab/handlers.py
+++ b/qiskit_code_assistant_jupyterlab/handlers.py
@@ -25,6 +25,8 @@
 from jupyter_server.utils import url_path_join
 from qiskit_ibm_runtime import QiskitRuntimeService
 
+OPENAI_VERSION = "v1"
+
 runtime_configs = {
     "service_url": "http://localhost",
     "api_token": "",
@@ -97,7 +99,7 @@ def post(self):
         try:
             r = requests.get(url_path_join(runtime_configs["service_url"]), headers=get_header())
             runtime_configs["is_openai"] = (r.json()["name"] != "qiskit-code-assistant")
-        except requests.exceptions.JSONDecodeError:
+        except (requests.exceptions.JSONDecodeError, KeyError):
             runtime_configs["is_openai"] = True
         finally:
             self.finish(json.dumps({"url": runtime_configs["service_url"]}))
@@ -122,7 +124,7 @@ class ModelsHandler(APIHandler):
     @tornado.web.authenticated
     def get(self):
         if runtime_configs["is_openai"]:
-            url = url_path_join(runtime_configs["service_url"], "v1", "models")
+            url = url_path_join(runtime_configs["service_url"], OPENAI_VERSION, "models")
             models = []
             try:
                 r = requests.get(url, headers=get_header())
@@ -153,7 +155,7 @@ class ModelHandler(APIHandler):
     @tornado.web.authenticated
     def get(self, id):
         if runtime_configs["is_openai"]:
-            url = url_path_join(runtime_configs["service_url"], "v1", "models", id)
+            url = url_path_join(runtime_configs["service_url"], OPENAI_VERSION, "models", id)
             model = {}
             try:
                 r = requests.get(url, headers=get_header())
@@ -223,7 +225,7 @@ class PromptHandler(APIHandler):
     @tornado.web.authenticated
     def post(self, id):
         if runtime_configs["is_openai"]:
-            url = url_path_join(runtime_configs["service_url"], "v1", "completions")
+            url = url_path_join(runtime_configs["service_url"], OPENAI_VERSION, "completions")
             result = {}
             try:
                 r = requests.post(url,

From 230d244d5be67819f988a9d7b5b1dc00ffb5e34c Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Wed, 30 Oct 2024 12:59:38 -0500
Subject: [PATCH 4/6] Update docs

---
 GETTING_STARTED.md | 4 +++-
 README-PyPi.md     | 3 ++-
 README.md          | 4 +++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index 3db6ea6..c73c744 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -111,7 +111,9 @@ There are a few settings we recommend to edit in your user settings.
    `Tab`, the inline completer has a default of 10 seconds.
 
 3. If you want to change the instance of the Qiskit Code Assistant Service that the
-   extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`
+   extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`.
+   This can also be set to any OpenAI compatible API endpoint.
+
 
 4. Keyboard shortcuts can be changed by searching for `completer` in the Keyboard Shortcuts
    settings and adding new shortcuts for the relevant commands.
diff --git a/README-PyPi.md b/README-PyPi.md
index 44646c4..11e5f63 100644
--- a/README-PyPi.md
+++ b/README-PyPi.md
@@ -117,7 +117,8 @@ There are a few settings we recommend to edit in your user settings.
    `Tab`, the inline completer has a default of 10 seconds.
 
 3. If you want to change the instance of the Qiskit Code Assistant Service that the
-   extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`
+   extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`.
+   This can also be set to any OpenAI compatible API endpoint.
 
 4. Keyboard shortcuts can be changed by searching for `completer` in the Keyboard Shortcuts
    settings and adding new shortcuts for the relevant commands.
diff --git a/README.md b/README.md
index 2ce4607..a209d11 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,9 @@ for the frontend extension.
 ## Requirements
 
 - JupyterLab >= 4.2.0
-- An IBM Quantum premium account
+- Access to either:
+  - An IBM Quantum premium account
+  - A model with an OpenAI compatible API endpoint
 
 ## Install
 

From 708ad51d490509263047a1865c16149d8161fc6b Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Thu, 31 Oct 2024 21:06:38 -0500
Subject: [PATCH 5/6] fix lint error

---
 GETTING_STARTED.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index 7ffa72a..62f8bb0 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -114,7 +114,6 @@ There are a few settings we recommend to edit in your user settings.
    extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`.
    This can also be set to any OpenAI compatible API endpoint.
 
-
 4. Keyboard shortcuts can be changed by searching for `completer` in the Keyboard Shortcuts
    settings and adding new shortcuts for the relevant commands.
 

From 6eb428252c74fe953789617fd59e265a1627fca8 Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Mon, 11 Nov 2024 12:32:14 -0600
Subject: [PATCH 6/6] update docs

---
 GETTING_STARTED.md | 6 ++++--
 README-PyPi.md     | 4 +---
 README.md          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index 62f8bb0..b8fa0b4 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -3,7 +3,9 @@
 ## Requirements
 
 - JupyterLab >= 4.3.0
-- An IBM Quantum premium account
+- Access to either:
+  - An IBM Quantum premium account
+  - A service exposing LLMs using OpenAI-compatible API endpoints
 
 ## Install
 
@@ -112,7 +114,7 @@ There are a few settings we recommend to edit in your user settings.
 
 3. If you want to change the instance of the Qiskit Code Assistant Service that the
    extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`.
-   This can also be set to any OpenAI compatible API endpoint.
+   This can also be set to any service exposing LLMs using OpenAI-compatible API endpoints.
 
 4. Keyboard shortcuts can be changed by searching for `completer` in the Keyboard Shortcuts
    settings and adding new shortcuts for the relevant commands.
diff --git a/README-PyPi.md b/README-PyPi.md
index 11e5f63..004f150 100644
--- a/README-PyPi.md
+++ b/README-PyPi.md
@@ -1,7 +1,5 @@
 # Qiskit Code Assistant (Beta)
 
-> This experimental feature is only available, as of today, to IBM Quantum premium users.
-> If you are not part of the IBM Quantum premium plan, you can still install this extension; however you will not be able to use the assistant.
 > The Qiskit Code Assistant is a beta release, subject to change.
 
 Write and optimize Qiskit code with a generative AI code assistant.
@@ -118,7 +116,7 @@ There are a few settings we recommend to edit in your user settings.
 
 3. If you want to change the instance of the Qiskit Code Assistant Service that the
    extension should use you can edit the Qiskit Code Assistant setting `serviceUrl`.
-   This can also be set to any OpenAI compatible API endpoint.
+   This can also be set to any service exposing LLMs using OpenAI-compatible API endpoints.
 
 4. Keyboard shortcuts can be changed by searching for `completer` in the Keyboard Shortcuts
    settings and adding new shortcuts for the relevant commands.
diff --git a/README.md b/README.md
index 928d967..f291743 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ for the frontend extension.
 - JupyterLab >= 4.3.0
 - Access to either:
   - An IBM Quantum premium account
-  - A model with an OpenAI compatible API endpoint
+  - A service exposing LLMs using OpenAI-compatible API endpoints
 
 ## Install