Merge pull request #155 from Azure-Samples/githubmodels

Add support for Github models, stop copying to .env
Azure-Samples · Aug 2, 2024 · 20628f3 · 20628f3
2 parents 0f11b4a + d39190e
commit 20628f3
Show file tree

Hide file tree

Showing 9 changed files with 136 additions and 65 deletions.
diff --git a/.env.sample b/.env.sample
@@ -1,9 +1,18 @@
-# The .env file should be auto-generated by `azd up` -
-# see README for more detailed instructions.
-AZURE_OPENAI_ENDPOINT=
+# Can be "azure", "github", or "local"
+OPENAI_HOST="azure"
+
+# For Azure host:
+AZURE_OPENAI_API_VERSION="2024-02-15-preview"
+AZURE_OPENAI_ENDPOINT="https://YOUR-ENDPOINT-HERE.openai.azure.com/"
+# Name of the Azure OpenAI GPT deployment (different from the model name)
+AZURE_OPENAI_CHATGPT_DEPLOYMENT=chatgpt
 # This only needs to be specified when using the key instead of DefaultCredentials
 AZURE_OPENAI_KEY=
-# Name of the Azure OpenAI GPT deployment (different from the model name)
-AZURE_OPENAI_CHATGPT_DEPLOYMENT=
-# Azure OpenAI API version
-AZURE_OPENAI_API_VERSION="2024-02-15-preview"
+
+# For GitHub models:
+GITHUB_MODELS_ENDPOINT="https://models.inference.ai.azure.com"
+GITHUB_MODELS_NAME="gpt-4o"
+
+# For local models, like Ollama/llamafile:
+LOCAL_MODELS_ENDPOINT="http://localhost:11434/v1"
+LOCAL_MODELS_NAME="llama3.1"
diff --git a/README.md b/README.md
@@ -12,11 +12,15 @@ since the local app needs credentials for Azure OpenAI to work properly.
 
 * [Features](#features)
 * [Architecture diagram](#architecture-diagram)
-* [Opening the project](#opening-the-project)
+* [Getting started](#getting-started)
+  * [GitHub Codespaces](#github-codespaces)
+  * [VS Code Dev Containers](#vs-code-dev-containers)
+  * [Local Environment](#local-environment)
 * [Deploying](#deploying)
 * [Development server](#development-server)
-* [Costs](#costs)
-* [Security Guidelines](#security-guidelines)
+* [Guidance](#guidance)
+  * [Costs](#costs)
+  * [Security Guidelines](#security-guidelines)
 * [Resources](#resources)
 
 ## Features
@@ -32,7 +36,7 @@ since the local app needs credentials for Azure OpenAI to work properly.
 
 ![Architecture diagram: Azure Container Apps inside Container Apps Environment, connected to Container Registry with Container, connected to Managed Identity for Azure OpenAI](readme_diagram.png)
 
-## Opening the project
+## Getting started
 
 You have a few options for getting started with this template.
 The quickest way to get started is GitHub Codespaces, since it will setup all the tools for you, but you can also [set it up locally](#local-environment).
@@ -142,17 +146,43 @@ azd pipeline config
 
 ## Development server
 
-Assuming you've run the steps in [Opening the project](#opening-the-project) and the steps in [Deploying](#deploying), you can now run the Quart app in your development environment:
+In order to run this app, you need to either have an Azure OpenAI account deployed (from the [deploying steps](#deploying)), use a model from [GitHub models](https://github.com/marketplace/models), or use a [local LLM server](/docs/local_ollama.md).
 
-```shell
-python -m quart --app src.quartapp run --port 50505 --reload
-```
+1. Copy `.env.sample.azure` into `.env`:
+
+    ```shell
+    cp .env.sample .env
+    ```
+
+2. For use with Azure OpenAI, run this command to get the value of `AZURE_OPENAI_ENDPOINT` from your deployed resource group and paste it in the `.env` file:
+
+    ```shell
+    azd env get-value AZURE_OPENAI_ENDPOINT
+    ```
+
+3. For use with GitHub models, change `OPENAI_HOST` to "github" in the `.env` file.
+
+    You'll need a `GITHUB_TOKEN` environment variable that stores a GitHub personal access token.
+    If you're running this inside a GitHub Codespace, the token will be automatically available.
+    If not, generate a new [personal access token](https://github.com/settings/tokens) and run this command to set the `GITHUB_TOKEN` environment variable:
+
+    ```shell
+    export GITHUB_TOKEN="<your-github-token-goes-here>"
+    ```
+
+4. For use with local models, change `OPENAI_HOST` to "local" in the `.env` file and change `LOCAL_MODELS_ENDPOINT` and `LOCAL_MODELS_NAME` to match the local server. See [local LLM server](/docs/local_ollama.md) for more information.
+
+5. Start the development server:
+
+    ```shell
+    python -m quart --app src.quartapp run --port 50505 --reload
+    ```
 
-This will start the app on port 50505, and you can access it at `http://localhost:50505`.
+    This will start the app on port 50505, and you can access it at `http://localhost:50505`.
 
-To save costs during development, you may point the app at a [local LLM server](/docs/local_ollama.md).
+## Guidance
 
-## Costs
+### Costs
 
 Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage.
 The majority of the Azure resources used in this infrastructure are on usage-based pricing tiers.
@@ -168,7 +198,7 @@ You can try the [Azure pricing calculator](https://azure.com/e/2176802ea14941e49
 ⚠️ To avoid unnecessary costs, remember to take down your app if it's no longer in use,
 either by deleting the resource group in the Portal or running `azd down`.
 
-## Security Guidelines
+### Security Guidelines
 
 This template uses [Managed Identity](https://learn.microsoft.com/entra/identity/managed-identities-azure-resources/overview) for authenticating to the Azure OpenAI service.
 

diff --git a/azure.yaml b/azure.yaml
@@ -8,15 +8,3 @@ services:
     project: ./src
     language: py
     host: containerapp
-hooks:
-    postprovision:
-      windows:
-        shell: pwsh
-        run: $output = azd env get-values; Add-Content -Path .env -Value $output;
-        interactive: true
-        continueOnError: false
-      posix:
-        shell: sh
-        run: azd env get-values > .env
-        interactive: true
-        continueOnError: false
diff --git a/docs/local_ollama.md b/docs/local_ollama.md
@@ -4,16 +4,32 @@ You may want to save costs by developing against a local LLM server, such as
 [llamafile](https://github.com/Mozilla-Ocho/llamafile/). Note that a local LLM
 will generally be slower and not as sophisticated.
 
-Once you've got your local LLM running and serving an OpenAI-compatible endpoint, define `LOCAL_OPENAI_ENDPOINT` in your `.env` file.
+Once you've got your local LLM running and serving an OpenAI-compatible endpoint, define `LOCAL_MODELS_ENDPOINT` in your `.env` file.
 
-For example, to point at a local llamafile server running on its default port:
+## Llamafile
+
+To point at a local llamafile server running on its default port:
+
+```shell
+LOCAL_MODELS_ENDPOINT="http://localhost:8080/v1"
+```
+
+If you're running inside a dev container, use this local URL instead:
+
+```shell
+LOCAL_MODELS_ENDPOINT="http://host.docker.internal:8080/v1"
+```
+
+## Ollama
+
+To point at a local ollama server running on its default port:
 
 ```shell
-LOCAL_OPENAI_ENDPOINT="http://localhost:8080/v1"
+LOCAL_MODELS_ENDPOINT="http://localhost:11434/v1"
 ```
 
 If you're running inside a dev container, use this local URL instead:
 
 ```shell
-LOCAL_OPENAI_ENDPOINT="http://host.docker.internal:8080/v1"
+LOCAL_MODELS_ENDPOINT="http://host.docker.internal:11434/v1"
 ```
diff --git a/src/quartapp/__init__.py b/src/quartapp/__init__.py
@@ -5,9 +5,11 @@
 from quart import Quart
 
 
-def create_app():
+def create_app(testing=False):
     # We do this here in addition to gunicorn.conf.py, since we don't always use gunicorn
-    load_dotenv(override=True)
+    if not testing:
+        load_dotenv(override=True)
+
     if os.getenv("RUNNING_IN_PRODUCTION"):
         logging.basicConfig(level=logging.WARNING)
     else:

diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py
@@ -17,16 +17,23 @@
 
 @bp.before_app_serving
 async def configure_openai():
-    client_args = {}
-    if os.getenv("LOCAL_OPENAI_ENDPOINT"):
-        # Use a local endpoint like llamafile server
+    openai_host = os.getenv("OPENAI_HOST")
+    if openai_host == "local":
         current_app.logger.info("Using local OpenAI-compatible API with no key")
-        client_args["api_key"] = "no-key-required"
-        client_args["base_url"] = os.getenv("LOCAL_OPENAI_ENDPOINT")
         bp.openai_client = openai.AsyncOpenAI(
-            **client_args,
+            api_key="no-key-required",
+            base_url=os.environ["LOCAL_MODELS_ENDPOINT"],
+        )
+        bp.openai_model = os.environ["LOCAL_MODELS_NAME"]
+    elif openai_host == "github":
+        current_app.logger.info("Using GitHub-hosted model")
+        bp.openai_client = openai.AsyncOpenAI(
+            api_key=os.environ["GITHUB_TOKEN"],
+            base_url=os.environ["GITHUB_MODELS_ENDPOINT"],
         )
+        bp.openai_model = os.environ["GITHUB_MODELS_NAME"]
     else:
+        client_args = {}
         # Use an Azure OpenAI endpoint instead,
         # either with a key or with keyless authentication
         if os.getenv("AZURE_OPENAI_KEY"):
@@ -55,6 +62,7 @@ async def configure_openai():
             azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
             **client_args,
         )
+        bp.openai_model = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT")
 
 
 @bp.after_app_serving
@@ -80,7 +88,7 @@ async def response_stream():
 
         chat_coroutine = bp.openai_client.chat.completions.create(
             # Azure Open AI takes the deployment name as the model name
-            model=os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
+            model=bp.openai_model,
             messages=all_messages,
             stream=True,
         )

diff --git a/src/requirements.txt b/src/requirements.txt
@@ -4,9 +4,11 @@
 #
 #    pip-compile --output-file=requirements.txt pyproject.toml
 #
-aiofiles==23.2.1
+aiofiles==24.1.0
     # via quart
-aiohttp==3.9.5
+aiohappyeyeballs==2.3.4
+    # via aiohttp
+aiohttp==3.10.0
     # via quartapp (pyproject.toml)
 aiosignal==1.3.1
     # via aiohttp
@@ -21,13 +23,13 @@ attrs==23.2.0
     # via aiohttp
 azure-core==1.30.2
     # via azure-identity
-azure-identity==1.17.0
+azure-identity==1.17.1
     # via quartapp (pyproject.toml)
 blinker==1.8.2
     # via
     #   flask
     #   quart
-certifi==2024.6.2
+certifi==2024.7.4
     # via
     #   httpcore
     #   httpx
@@ -41,7 +43,7 @@ click==8.1.7
     #   flask
     #   quart
     #   uvicorn
-cryptography==42.0.8
+cryptography==43.0.0
     # via
     #   azure-identity
     #   msal
@@ -95,33 +97,31 @@ markupsafe==2.1.5
     #   jinja2
     #   quart
     #   werkzeug
-msal==1.28.1
+msal==1.30.0
     # via
     #   azure-identity
     #   msal-extensions
-msal-extensions==1.1.0
+msal-extensions==1.2.0
     # via azure-identity
 multidict==6.0.5
     # via
     #   aiohttp
     #   yarl
-openai==1.37.1
+openai==1.37.2
     # via quartapp (pyproject.toml)
 packaging==24.1
-    # via
-    #   gunicorn
-    #   msal-extensions
-portalocker==2.8.2
+    # via gunicorn
+portalocker==2.10.1
     # via msal-extensions
 priority==2.0.0
     # via hypercorn
 pycparser==2.22
     # via cffi
-pydantic==2.7.4
+pydantic==2.8.2
     # via openai
-pydantic-core==2.18.4
+pydantic-core==2.20.1
     # via pydantic
-pyjwt[crypto]==2.8.0
+pyjwt[crypto]==2.9.0
     # via msal
 python-dotenv==1.0.1
     # via
@@ -155,7 +155,7 @@ typing-extensions==4.12.2
     #   pydantic-core
 urllib3==2.2.2
     # via requests
-uvicorn[standard]==0.30.1
+uvicorn[standard]==0.30.5
     # via quartapp (pyproject.toml)
 uvloop==0.19.0
     # via uvicorn

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -131,7 +131,7 @@ async def client(monkeypatch, mock_openai_chatcompletion, mock_defaultazurecrede
     monkeypatch.setenv("AZURE_OPENAI_ENDPOINT", "test-openai-service.openai.azure.com")
     monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "test-chatgpt")
 
-    quart_app = quartapp.create_app()
+    quart_app = quartapp.create_app(testing=True)
 
     async with quart_app.test_app() as test_app:
         quart_app.config.update({"TESTING": True})

diff --git a/tests/test_app.py b/tests/test_app.py
@@ -50,7 +50,7 @@ async def test_openai_key(monkeypatch):
     monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "test-chatgpt")
     monkeypatch.setenv("AZURE_OPENAI_VERSION", "2023-10-01-preview")
 
-    quart_app = quartapp.create_app()
+    quart_app = quartapp.create_app(testing=True)
 
     async with quart_app.test_app():
         assert quart_app.blueprints["chat"].openai_client.api_key == "test-key"
@@ -66,18 +66,36 @@ async def test_openai_managedidentity(monkeypatch):
 
     monkeypatch.setattr("azure.identity.aio.ManagedIdentityCredential", mock_cred.MockAzureCredential)
 
-    quart_app = quartapp.create_app()
+    quart_app = quartapp.create_app(testing=True)
 
     async with quart_app.test_app():
         assert quart_app.blueprints["chat"].openai_client._azure_ad_token_provider is not None
 
 
 @pytest.mark.asyncio
 async def test_openai_local(monkeypatch):
-    monkeypatch.setenv("LOCAL_OPENAI_ENDPOINT", "http://localhost:8080")
+    monkeypatch.setenv("OPENAI_HOST", "local")
+    monkeypatch.setenv("LOCAL_MODELS_ENDPOINT", "http://localhost:8080")
+    monkeypatch.setenv("LOCAL_MODELS_NAME", "test-model")
 
-    quart_app = quartapp.create_app()
+    quart_app = quartapp.create_app(testing=True)
 
     async with quart_app.test_app():
         assert quart_app.blueprints["chat"].openai_client.api_key == "no-key-required"
         assert quart_app.blueprints["chat"].openai_client.base_url == "http://localhost:8080"
+        assert quart_app.blueprints["chat"].openai_model == "test-model"
+
+
+@pytest.mark.asyncio
+async def test_openai_github(monkeypatch):
+    monkeypatch.setenv("OPENAI_HOST", "github")
+    monkeypatch.setenv("GITHUB_MODELS_ENDPOINT", "https://models.inference.ai.azure.com")
+    monkeypatch.setenv("GITHUB_MODELS_NAME", "gpt-4o")
+    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
+
+    quart_app = quartapp.create_app(testing=True)
+
+    async with quart_app.test_app():
+        assert quart_app.blueprints["chat"].openai_client.api_key == "fake-token"
+        assert quart_app.blueprints["chat"].openai_client.base_url == "https://models.inference.ai.azure.com"
+        assert quart_app.blueprints["chat"].openai_model == "gpt-4o"