diff --git a/configs/llm_model_configs/azure-gpt-4o-mini.json b/configs/llm_model_configs/azure-gpt-4o-mini.json
new file mode 100644
index 0000000000..58eb3a00dc
--- /dev/null
+++ b/configs/llm_model_configs/azure-gpt-4o-mini.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 128000,
+    "model": "gpt-4o-mini",
+    "model_endpoint_type": "azure",
+    "api_version": "2023-03-15-preview",
+    "model_wrapper": null
+}
diff --git a/letta/agent.py b/letta/agent.py
index 831e0f4a71..164c33690d 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -18,7 +18,7 @@
     MESSAGE_SUMMARY_WARNING_FRAC,
 )
 from letta.interface import AgentInterface
-from letta.llm_api.llm_api_tools import create, is_context_overflow_error
+from letta.llm_api.llm_api_tools import create
 from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
 from letta.metadata import MetadataStore
 from letta.persistence_manager import LocalStateManager
@@ -56,6 +56,7 @@
 )
 
 from .errors import LLMError
+from .llm_api.helpers import is_context_overflow_error
 
 
 def compile_memory_metadata_block(
@@ -207,7 +208,7 @@ def step(
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """
@@ -223,7 +224,7 @@ def update_state(self) -> AgentState:
 class Agent(BaseAgent):
     def __init__(
         self,
-        interface: AgentInterface,
+        interface: Optional[AgentInterface],
         # agents can be created from providing agent_state
         agent_state: AgentState,
         tools: List[Tool],
@@ -460,7 +461,7 @@ def _get_ai_reply(
         function_call: str = "auto",
         first_message: bool = False,  # hint
         stream: bool = False,  # TODO move to config?
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
     ) -> ChatCompletionResponse:
         """Get response from LLM API"""
         try:
@@ -478,7 +479,7 @@ def _get_ai_reply(
                 stream=stream,
                 stream_inferface=self.interface,
                 # putting inner thoughts in func args or not
-                inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )
 
             if len(response.choices) == 0:
@@ -560,6 +561,8 @@ def _handle_ai_response(
             function_call = (
                 response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
             )
+
+            # Get the name of the function
             function_name = function_call.name
             printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
 
@@ -608,6 +611,13 @@ def _handle_ai_response(
                 self.interface.function_message(f"Error: {error_msg}", msg_obj=messages[-1])
                 return messages, False, True  # force a heartbeat to allow agent to handle error
 
+            # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
+            if "inner_thoughts" in function_args:
+                response_message.content = function_args.pop("inner_thoughts")
+            # The content if then internal monologue, not chat
+            if response_message.content:
+                self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
+
             # (Still parsing function args)
             # Handle requests for immediate heartbeat
             heartbeat_request = function_args.pop("request_heartbeat", None)
@@ -716,7 +726,7 @@ def step(
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """Top-level event message handler for the Letta agent"""
@@ -795,7 +805,7 @@ def step(
                         message_sequence=input_message_sequence,
                         first_message=True,  # passed through to the prompt formatter
                         stream=stream,
-                        inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                        inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     )
                     if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
                         break
@@ -808,7 +818,7 @@ def step(
                 response = self._get_ai_reply(
                     message_sequence=input_message_sequence,
                     stream=stream,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                 )
 
             # Step 3: check if LLM wanted to call a function
@@ -892,7 +902,7 @@ def step(
                     recreate_message_timestamp=recreate_message_timestamp,
                     stream=stream,
                     timestamp=timestamp,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     ms=ms,
                 )
 
diff --git a/letta/credentials.py b/letta/credentials.py
index d662e76e24..05d683ae22 100644
--- a/letta/credentials.py
+++ b/letta/credentials.py
@@ -30,7 +30,7 @@ class LettaCredentials:
 
     # azure config
     azure_auth_type: str = "api_key"
-    azure_key: Optional[str] = None
+    azure_key: Optional[str] = os.getenv("AZURE_OPENAI_API_KEY")
 
     # groq config
     groq_key: Optional[str] = os.getenv("GROQ_API_KEY")
diff --git a/letta/errors.py b/letta/errors.py
index 852ec874c4..bd1e5421c7 100644
--- a/letta/errors.py
+++ b/letta/errors.py
@@ -56,7 +56,7 @@ def construct_error_message(messages: List[Union["Message", "LettaMessage"]], er
             error_msg += f" (Explanation: {explanation})"
 
         # Pretty print out message JSON
-        message_json = json.dumps([message.model_dump_json(indent=4) for message in messages], indent=4)
+        message_json = json.dumps([message.model_dump() for message in messages], indent=4)
         return f"{error_msg}\n\n{message_json}"
 
 
diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py
index bdcc8806b2..57b49f7cfc 100644
--- a/letta/llm_api/azure_openai.py
+++ b/letta/llm_api/azure_openai.py
@@ -2,8 +2,11 @@
 
 import requests
 
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.chat_completions import ChatCompletionRequest
 from letta.schemas.openai.embedding_response import EmbeddingResponse
+from letta.settings import ModelSettings
 from letta.utils import smart_urljoin
 
 MODEL_TO_AZURE_ENGINE = {
@@ -13,17 +16,16 @@
     "gpt-3.5": "gpt-35-turbo",
     "gpt-3.5-turbo": "gpt-35-turbo",
     "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
+    "gpt-4o-mini": "gpt-4o-mini",
 }
 
 
-def clean_azure_endpoint(raw_endpoint_name: str) -> str:
-    """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'"""
-    if raw_endpoint_name is None:
-        raise ValueError(raw_endpoint_name)
-    endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "")
-    endpoint_address = endpoint_address.replace("http://", "")
-    endpoint_address = endpoint_address.replace("https://", "")
-    return endpoint_address
+def get_azure_endpoint(llm_config: LLMConfig, model_settings: ModelSettings):
+    assert llm_config.api_version, "Missing model version! This field must be provided in the LLM config for Azure."
+    assert llm_config.model in MODEL_TO_AZURE_ENGINE, f"{llm_config.model} not in supported models: {list(MODEL_TO_AZURE_ENGINE.keys())}"
+
+    model = MODEL_TO_AZURE_ENGINE[llm_config.model]
+    return f"{model_settings.azure_base_url}/openai/deployments/{model}/chat/completions?api-version={llm_config.api_version}"
 
 
 def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict:
@@ -72,19 +74,15 @@ def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version
 
 
 def azure_openai_chat_completions_request(
-    resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
+    model_settings: ModelSettings, llm_config: LLMConfig, api_key: str, chat_completion_request: ChatCompletionRequest
 ) -> ChatCompletionResponse:
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
     from letta.utils import printd
 
-    assert resource_name is not None, "Missing required field when calling Azure OpenAI"
-    assert deployment_id is not None, "Missing required field when calling Azure OpenAI"
-    assert api_version is not None, "Missing required field when calling Azure OpenAI"
     assert api_key is not None, "Missing required field when calling Azure OpenAI"
 
-    resource_name = clean_azure_endpoint(resource_name)
-    url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
+    data = chat_completion_request.model_dump(exclude_none=True)
 
     # If functions == None, strip from the payload
     if "functions" in data and data["functions"] is None:
@@ -95,11 +93,10 @@ def azure_openai_chat_completions_request(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
 
-    printd(f"Sending request to {url}")
+    model_endpoint = get_azure_endpoint(llm_config, model_settings)
+    printd(f"Sending request to {model_endpoint}")
     try:
-        data["messages"] = [i.to_openai_dict() for i in data["messages"]]
-        response = requests.post(url, headers=headers, json=data)
-        printd(f"response = {response}")
+        response = requests.post(model_endpoint, headers=headers, json=data)
         response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
         response = response.json()  # convert to dict from string
         printd(f"response.json = {response}")
@@ -128,7 +125,6 @@ def azure_openai_embeddings_request(
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
     from letta.utils import printd
 
-    resource_name = clean_azure_endpoint(resource_name)
     url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
 
diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py
new file mode 100644
index 0000000000..3fae442ae1
--- /dev/null
+++ b/letta/llm_api/helpers.py
@@ -0,0 +1,153 @@
+import copy
+import json
+import warnings
+from typing import List, Union
+
+import requests
+
+from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+from letta.schemas.enums import OptionState
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
+from letta.utils import json_dumps
+
+
+# TODO update to use better types
+def add_inner_thoughts_to_functions(
+    functions: List[dict],
+    inner_thoughts_key: str,
+    inner_thoughts_description: str,
+    inner_thoughts_required: bool = True,
+    # inner_thoughts_to_front: bool = True,  TODO support sorting somewhere, probably in the to_dict?
+) -> List[dict]:
+    """Add an inner_thoughts kwarg to every function in the provided list"""
+    # return copies
+    new_functions = []
+
+    # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
+    for function_object in functions:
+        function_params = function_object["parameters"]["properties"]
+        required_params = list(function_object["parameters"]["required"])
+
+        # if the inner thoughts arg doesn't exist, add it
+        if inner_thoughts_key not in function_params:
+            function_params[inner_thoughts_key] = {
+                "type": "string",
+                "description": inner_thoughts_description,
+            }
+
+        # make sure it's tagged as required
+        new_function_object = copy.deepcopy(function_object)
+        if inner_thoughts_required and inner_thoughts_key not in required_params:
+            required_params.append(inner_thoughts_key)
+            new_function_object["parameters"]["required"] = required_params
+
+        new_functions.append(new_function_object)
+
+    # return a list of copies
+    return new_functions
+
+
+def unpack_all_inner_thoughts_from_kwargs(
+    response: ChatCompletionResponse,
+    inner_thoughts_key: str,
+) -> ChatCompletionResponse:
+    """Strip the inner thoughts out of the tool call and put it in the message content"""
+    if len(response.choices) == 0:
+        raise ValueError(f"Unpacking inner thoughts from empty response not supported")
+
+    new_choices = []
+    for choice in response.choices:
+        new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
+
+    # return an updated copy
+    new_response = response.model_copy(deep=True)
+    new_response.choices = new_choices
+    return new_response
+
+
+def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
+    message = choice.message
+    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
+        if len(message.tool_calls) > 1:
+            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
+        # TODO support multiple tool calls
+        tool_call = message.tool_calls[0]
+
+        try:
+            # Sadly we need to parse the JSON since args are in string format
+            func_args = dict(json.loads(tool_call.function.arguments))
+            if inner_thoughts_key in func_args:
+                # extract the inner thoughts
+                inner_thoughts = func_args.pop(inner_thoughts_key)
+
+                # replace the kwargs
+                new_choice = choice.model_copy(deep=True)
+                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
+                # also replace the message content
+                if new_choice.message.content is not None:
+                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
+                new_choice.message.content = inner_thoughts
+
+                return new_choice
+            else:
+                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+
+        except json.JSONDecodeError as e:
+            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
+            raise e
+
+
+def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
+    """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
+    from letta.utils import printd
+
+    match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+
+    # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
+    if match_string in str(exception):
+        printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
+        return True
+
+    # Based on python requests + OpenAI REST API (/v1)
+    elif isinstance(exception, requests.exceptions.HTTPError):
+        if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
+            try:
+                error_details = exception.response.json()
+                if "error" not in error_details:
+                    printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
+                    return False
+                else:
+                    error_details = error_details["error"]
+
+                # Check for the specific error code
+                if error_details.get("code") == "context_length_exceeded":
+                    printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
+                    return True
+                # Soft-check for "maximum context length" inside of the message
+                elif error_details.get("message") and "maximum context length" in error_details.get("message"):
+                    printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
+                    return True
+                else:
+                    printd(f"HTTPError occurred, but unknown error message: {error_details}")
+                    return False
+            except ValueError:
+                # JSON decoding failed
+                printd(f"HTTPError occurred ({exception}), but no JSON error message.")
+
+    # Generic fail
+    else:
+        return False
+
+
+def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
+    if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
+        # model that are known to not use `content` fields on tool calls
+        inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
+    else:
+        inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
+
+    if not isinstance(inner_thoughts_in_kwargs, bool):
+        warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
+        inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
+
+    return inner_thoughts_in_kwargs
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index c95ef7a35e..7ccd23ac3e 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -1,25 +1,25 @@
-import copy
-import json
 import os
 import random
 import time
-import warnings
 from typing import List, Optional, Union
 
 import requests
 
-from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+from letta.constants import CLI_WARNING_PREFIX
 from letta.llm_api.anthropic import anthropic_chat_completions_request
-from letta.llm_api.azure_openai import (
-    MODEL_TO_AZURE_ENGINE,
-    azure_openai_chat_completions_request,
-)
+from letta.llm_api.azure_openai import azure_openai_chat_completions_request
 from letta.llm_api.cohere import cohere_chat_completions_request
 from letta.llm_api.google_ai import (
     convert_tools_to_google_ai_format,
     google_ai_chat_completions_request,
 )
+from letta.llm_api.helpers import (
+    add_inner_thoughts_to_functions,
+    derive_inner_thoughts_in_kwargs,
+    unpack_all_inner_thoughts_from_kwargs,
+)
 from letta.llm_api.openai import (
+    build_openai_chat_completions_request,
     openai_chat_completions_process_stream,
     openai_chat_completions_request,
 )
@@ -37,144 +37,15 @@
     Tool,
     cast_message_to_subtype,
 )
-from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.streaming_interface import (
     AgentChunkStreamingInterface,
     AgentRefreshStreamingInterface,
 )
-from letta.utils import json_dumps
 
 LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
 
 
-# TODO update to use better types
-def add_inner_thoughts_to_functions(
-    functions: List[dict],
-    inner_thoughts_key: str,
-    inner_thoughts_description: str,
-    inner_thoughts_required: bool = True,
-    # inner_thoughts_to_front: bool = True,  TODO support sorting somewhere, probably in the to_dict?
-) -> List[dict]:
-    """Add an inner_thoughts kwarg to every function in the provided list"""
-    # return copies
-    new_functions = []
-
-    # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
-    for function_object in functions:
-        function_params = function_object["parameters"]["properties"]
-        required_params = list(function_object["parameters"]["required"])
-
-        # if the inner thoughts arg doesn't exist, add it
-        if inner_thoughts_key not in function_params:
-            function_params[inner_thoughts_key] = {
-                "type": "string",
-                "description": inner_thoughts_description,
-            }
-
-        # make sure it's tagged as required
-        new_function_object = copy.deepcopy(function_object)
-        if inner_thoughts_required and inner_thoughts_key not in required_params:
-            required_params.append(inner_thoughts_key)
-            new_function_object["parameters"]["required"] = required_params
-
-        new_functions.append(new_function_object)
-
-    # return a list of copies
-    return new_functions
-
-
-def unpack_all_inner_thoughts_from_kwargs(
-    response: ChatCompletionResponse,
-    inner_thoughts_key: str,
-) -> ChatCompletionResponse:
-    """Strip the inner thoughts out of the tool call and put it in the message content"""
-    if len(response.choices) == 0:
-        raise ValueError(f"Unpacking inner thoughts from empty response not supported")
-
-    new_choices = []
-    for choice in response.choices:
-        new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
-
-    # return an updated copy
-    new_response = response.model_copy(deep=True)
-    new_response.choices = new_choices
-    return new_response
-
-
-def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
-    message = choice.message
-    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
-        if len(message.tool_calls) > 1:
-            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
-        # TODO support multiple tool calls
-        tool_call = message.tool_calls[0]
-
-        try:
-            # Sadly we need to parse the JSON since args are in string format
-            func_args = dict(json.loads(tool_call.function.arguments))
-            if inner_thoughts_key in func_args:
-                # extract the inner thoughts
-                inner_thoughts = func_args.pop(inner_thoughts_key)
-
-                # replace the kwargs
-                new_choice = choice.model_copy(deep=True)
-                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
-                # also replace the message content
-                if new_choice.message.content is not None:
-                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
-                new_choice.message.content = inner_thoughts
-
-                return new_choice
-            else:
-                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
-
-        except json.JSONDecodeError as e:
-            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
-            raise e
-
-
-def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
-    """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
-    from letta.utils import printd
-
-    match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
-
-    # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
-    if match_string in str(exception):
-        printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
-        return True
-
-    # Based on python requests + OpenAI REST API (/v1)
-    elif isinstance(exception, requests.exceptions.HTTPError):
-        if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
-            try:
-                error_details = exception.response.json()
-                if "error" not in error_details:
-                    printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
-                    return False
-                else:
-                    error_details = error_details["error"]
-
-                # Check for the specific error code
-                if error_details.get("code") == "context_length_exceeded":
-                    printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
-                    return True
-                # Soft-check for "maximum context length" inside of the message
-                elif error_details.get("message") and "maximum context length" in error_details.get("message"):
-                    printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
-                    return True
-                else:
-                    printd(f"HTTPError occurred, but unknown error message: {error_details}")
-                    return False
-            except ValueError:
-                # JSON decoding failed
-                printd(f"HTTPError occurred ({exception}), but no JSON error message.")
-
-    # Generic fail
-    else:
-        return False
-
-
 def retry_with_exponential_backoff(
     func,
     initial_delay: float = 1,
@@ -248,7 +119,8 @@ def create(
     stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     # TODO move to llm_config?
     # if unspecified (None), default to something we've tested
-    inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+    inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
+    max_tokens: Optional[int] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
@@ -267,59 +139,14 @@ def create(
 
     # openai
     if llm_config.model_endpoint_type == "openai":
-
-        if inner_thoughts_in_kwargs == OptionState.DEFAULT:
-            # model that are known to not use `content` fields on tool calls
-            inner_thoughts_in_kwargs = (
-                "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
-            )
-        else:
-            inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
-
-        if not isinstance(inner_thoughts_in_kwargs, bool):
-            warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
-            inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
-        if inner_thoughts_in_kwargs:
-            functions = add_inner_thoughts_to_functions(
-                functions=functions,
-                inner_thoughts_key=INNER_THOUGHTS_KWARG,
-                inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
-            )
-
-        openai_message_list = [
-            cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
-        ]
-
-        # TODO do the same for Azure?
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise ValueError(f"OpenAI key is missing from letta config file")
-        if use_tool_naming:
-            data = ChatCompletionRequest(
-                model=llm_config.model,
-                messages=openai_message_list,
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                tool_choice=function_call,
-                user=str(user_id),
-            )
-        else:
-            data = ChatCompletionRequest(
-                model=llm_config.model,
-                messages=openai_message_list,
-                functions=functions,
-                function_call=function_call,
-                user=str(user_id),
-            )
-            # https://platform.openai.com/docs/guides/text-generation/json-mode
-            # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
-            if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
-                data.response_format = {"type": "json_object"}
 
-        if "inference.memgpt.ai" in llm_config.model_endpoint:
-            # override user id for inference.memgpt.ai
-            import uuid
-
-            data.user = str(uuid.UUID(int=0))
+        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
+        data = build_openai_chat_completions_request(
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
+        )
 
         if stream:  # Client requested token streaming
             data.stream = True
@@ -356,35 +183,32 @@ def create(
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
 
-        azure_deployment = (
-            model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
+        if model_settings.azure_api_key is None:
+            raise ValueError(f"Azure API key is missing. Did you set AZURE_API_KEY in your env?")
+
+        if model_settings.azure_base_url is None:
+            raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
+
+        # Set the llm config model_endpoint from model_settings
+        # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
+        llm_config.model_endpoint = model_settings.azure_base_url
+        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
+        chat_completion_request = build_openai_chat_completions_request(
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
         )
-        if use_tool_naming:
-            data = dict(
-                # NOTE: don't pass model to Azure calls, that is the deployment_id
-                # model=agent_config.model,
-                messages=[m.to_openai_dict() for m in messages],
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                tool_choice=function_call,
-                user=str(user_id),
-            )
-        else:
-            data = dict(
-                # NOTE: don't pass model to Azure calls, that is the deployment_id
-                # model=agent_config.model,
-                messages=[m.to_openai_dict() for m in messages],
-                functions=functions,
-                function_call=function_call,
-                user=str(user_id),
-            )
-        return azure_openai_chat_completions_request(
-            resource_name=model_settings.azure_endpoint,
-            deployment_id=azure_deployment,
-            api_version=model_settings.azure_version,
-            api_key=model_settings.azure_key,
-            data=data,
+
+        response = azure_openai_chat_completions_request(
+            model_settings=model_settings,
+            llm_config=llm_config,
+            api_key=model_settings.azure_api_key,
+            chat_completion_request=chat_completion_request,
         )
 
+        if inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+
+        return response
+
     elif llm_config.model_endpoint_type == "google_ai":
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
@@ -517,7 +341,7 @@ def create(
                 stream_inferface.stream_end()
 
         if inner_thoughts_in_kwargs:
-            response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
 
         return response
 
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index 49d00f373f..18f923723e 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -1,5 +1,6 @@
 import json
-from typing import Generator, Optional, Union
+import warnings
+from typing import Generator, List, Optional, Union
 
 import httpx
 import requests
@@ -8,10 +9,19 @@
 
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from letta.errors import LLMError
+from letta.llm_api.helpers import add_inner_thoughts_to_functions
+from letta.local_llm.constants import (
+    INNER_THOUGHTS_KWARG,
+    INNER_THOUGHTS_KWARG_DESCRIPTION,
+)
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
+from letta.schemas.openai.chat_completion_request import (
+    ChatCompletionRequest,
+    cast_message_to_subtype,
+)
 from letta.schemas.openai.chat_completion_response import (
     ChatCompletionChunkResponse,
     ChatCompletionResponse,
@@ -81,6 +91,64 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
         raise e
 
 
+def build_openai_chat_completions_request(
+    llm_config: LLMConfig,
+    messages: List[Message],
+    user_id: Optional[str],
+    functions: Optional[list],
+    function_call: str,
+    use_tool_naming: bool,
+    inner_thoughts_in_kwargs: bool,
+    max_tokens: Optional[int],
+) -> ChatCompletionRequest:
+    if inner_thoughts_in_kwargs:
+        functions = add_inner_thoughts_to_functions(
+            functions=functions,
+            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+        )
+
+    openai_message_list = [
+        cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
+    ]
+    if llm_config.model:
+        model = llm_config.model
+    else:
+        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        model = None
+
+    if use_tool_naming:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            tools=[{"type": "function", "function": f} for f in functions] if functions else None,
+            tool_choice=function_call,
+            user=str(user_id),
+            max_tokens=max_tokens,
+        )
+    else:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            functions=functions,
+            function_call=function_call,
+            user=str(user_id),
+            max_tokens=max_tokens,
+        )
+        # https://platform.openai.com/docs/guides/text-generation/json-mode
+        # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
+        if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
+            data.response_format = {"type": "json_object"}
+
+    if "inference.memgpt.ai" in llm_config.model_endpoint:
+        # override user id for inference.memgpt.ai
+        import uuid
+
+        data.user = str(uuid.UUID(int=0))
+
+    return data
+
+
 def openai_chat_completions_process_stream(
     url: str,
     api_key: str,
diff --git a/letta/providers.py b/letta/providers.py
index c0fab23002..ccb6c97c3a 100644
--- a/letta/providers.py
+++ b/letta/providers.py
@@ -220,7 +220,11 @@ def get_model_context_window(self, model_name: str):
 
 
 class AzureProvider(Provider):
-    pass
+    name: str = "azure"
+    base_url: str = Field(
+        ..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
+    )
+    api_key: str = Field(..., description="API key for the Azure API.")
 
 
 class VLLMProvider(OpenAIProvider):
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index d951c2dd08..493adabf85 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -11,7 +11,7 @@ class LLMConfig(BaseModel):
         model (str): The name of the LLM model.
         model_endpoint_type (str): The endpoint type for the model.
         model_endpoint (str): The endpoint for the model.
-        model_wrapper (str): The wrapper for the model.
+        model_wrapper (str): The wrapper for the model. This is used to wrap additional text around the input/output of the model. This is useful for text-to-text completions, such as the Completions API in OpenAI.
         context_window (int): The context window size for the model.
     """
 
@@ -34,7 +34,10 @@ class LLMConfig(BaseModel):
         "vllm",
         "hugging-face",
     ] = Field(..., description="The endpoint type for the model.")
-    model_endpoint: str = Field(..., description="The endpoint for the model.")
+    model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
+    api_version: Optional[str] = Field(
+        None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview."
+    )
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
     context_window: int = Field(..., description="The context window size for the model.")
 
diff --git a/letta/server/server.py b/letta/server/server.py
index ff1e6c6e72..2e08bcba93 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -45,6 +45,7 @@
 from letta.prompts import gpt_system
 from letta.providers import (
     AnthropicProvider,
+    AzureProvider,
     GoogleAIProvider,
     OllamaProvider,
     OpenAIProvider,
@@ -270,6 +271,8 @@ def __init__(
             self._enabled_providers.append(VLLMProvider(base_url=model_settings.vllm_base_url))
         if model_settings.gemini_api_key:
             self._enabled_providers.append(GoogleAIProvider(api_key=model_settings.gemini_api_key))
+        if model_settings.azure_api_key and model_settings.azure_base_url:
+            self._enabled_providers.append(AzureProvider(api_key=model_settings.azure_api_key, base_url=model_settings.azure_base_url))
 
     def save_agents(self):
         """Saves all the agents that are in the in-memory object store"""
diff --git a/letta/settings.py b/letta/settings.py
index 84066c411b..4f8bb2de2f 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -23,7 +23,8 @@ class ModelSettings(BaseSettings):
     ollama_base_url: Optional[str] = None
 
     # azure
-    azure_deployment: Optional[str] = None
+    azure_api_key: Optional[str] = None
+    azure_base_url: Optional[str] = None
 
     # google ai
     gemini_api_key: Optional[str] = None
diff --git a/tests/configs/embedding_model_configs/letta-hosted.json b/tests/configs/embedding_model_configs/letta-hosted.json
new file mode 100644
index 0000000000..42478ed8d2
--- /dev/null
+++ b/tests/configs/embedding_model_configs/letta-hosted.json
@@ -0,0 +1,7 @@
+{
+    "embedding_endpoint": "https://embeddings.memgpt.ai",
+    "embedding_model": "BAAI/bge-large-en-v1.5",
+    "embedding_dim": 1024,
+    "embedding_chunk_size": 300,
+    "embedding_endpoint_type": "hugging-face"
+}
diff --git a/tests/configs/embedding_model_configs/local.json b/tests/configs/embedding_model_configs/local.json
new file mode 100644
index 0000000000..aaac3621a5
--- /dev/null
+++ b/tests/configs/embedding_model_configs/local.json
@@ -0,0 +1,7 @@
+{
+    "embedding_endpoint": null,
+    "embedding_model": "BAAI/bge-small-en-v1.5",
+    "embedding_dim": 384,
+    "embedding_chunk_size": 300,
+    "embedding_endpoint_type": "local"
+}
diff --git a/tests/configs/embedding_model_configs/ollama.json b/tests/configs/embedding_model_configs/ollama.json
new file mode 100644
index 0000000000..84ad72f650
--- /dev/null
+++ b/tests/configs/embedding_model_configs/ollama.json
@@ -0,0 +1,7 @@
+{
+    "embedding_endpoint_type": "ollama",
+    "embedding_endpoint": "http://127.0.0.1:11434",
+    "embedding_model": "mxbai-embed-large",
+    "embedding_dim": 512,
+    "embedding_chunk_size": 200
+}
diff --git a/tests/configs/embedding_model_configs/text-embedding-ada-002.json b/tests/configs/embedding_model_configs/text-embedding-ada-002.json
new file mode 100644
index 0000000000..8791ad67e4
--- /dev/null
+++ b/tests/configs/embedding_model_configs/text-embedding-ada-002.json
@@ -0,0 +1,7 @@
+{
+    "embedding_endpoint_type": "openai",
+    "embedding_endpoint": "https://api.openai.com/v1",
+    "embedding_model": "text-embedding-ada-002",
+    "embedding_dim": 1536,
+    "embedding_chunk_size": 300
+}
diff --git a/tests/configs/letta_hosted.json b/tests/configs/letta_hosted.json
new file mode 100644
index 0000000000..3fd85a4c18
--- /dev/null
+++ b/tests/configs/letta_hosted.json
@@ -0,0 +1,11 @@
+{
+    "context_window": 8192,
+    "model_endpoint_type": "openai",
+    "model_endpoint": "https://inference.memgpt.ai",
+    "model": "memgpt-openai",
+    "embedding_endpoint_type": "hugging-face",
+    "embedding_endpoint": "https://embeddings.memgpt.ai",
+    "embedding_model": "BAAI/bge-large-en-v1.5",
+    "embedding_dim": 1024,
+    "embedding_chunk_size": 300
+}
diff --git a/tests/configs/llm_model_configs/azure-gpt-4o-mini.json b/tests/configs/llm_model_configs/azure-gpt-4o-mini.json
new file mode 100644
index 0000000000..58eb3a00dc
--- /dev/null
+++ b/tests/configs/llm_model_configs/azure-gpt-4o-mini.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 128000,
+    "model": "gpt-4o-mini",
+    "model_endpoint_type": "azure",
+    "api_version": "2023-03-15-preview",
+    "model_wrapper": null
+}
diff --git a/tests/configs/llm_model_configs/claude-3-opus.json b/tests/configs/llm_model_configs/claude-3-opus.json
new file mode 100644
index 0000000000..6281aa9644
--- /dev/null
+++ b/tests/configs/llm_model_configs/claude-3-opus.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 200000,
+    "model": "claude-3-opus-20240229",
+    "model_endpoint_type": "anthropic",
+    "model_endpoint": "https://api.anthropic.com/v1",
+    "model_wrapper": null
+}
diff --git a/tests/configs/llm_model_configs/gpt-4.json b/tests/configs/llm_model_configs/gpt-4.json
new file mode 100644
index 0000000000..c572428e49
--- /dev/null
+++ b/tests/configs/llm_model_configs/gpt-4.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 8192,
+    "model": "gpt-4",
+    "model_endpoint_type": "openai",
+    "model_endpoint": "https://api.openai.com/v1",
+    "model_wrapper": null
+}
diff --git a/tests/configs/llm_model_configs/groq.json b/tests/configs/llm_model_configs/groq.json
new file mode 100644
index 0000000000..a63acbf06c
--- /dev/null
+++ b/tests/configs/llm_model_configs/groq.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 8192,
+    "model": "llama3-groq-70b-8192-tool-use-preview",
+    "model_endpoint_type": "groq",
+    "model_endpoint": "https://api.groq.com/openai/v1",
+    "model_wrapper": null
+}
diff --git a/tests/configs/llm_model_configs/letta-hosted.json b/tests/configs/llm_model_configs/letta-hosted.json
new file mode 100644
index 0000000000..3ba968226b
--- /dev/null
+++ b/tests/configs/llm_model_configs/letta-hosted.json
@@ -0,0 +1,6 @@
+{
+    "context_window": 16384,
+    "model_endpoint_type": "openai",
+    "model_endpoint": "https://inference.memgpt.ai",
+    "model": "memgpt-openai"
+}
diff --git a/tests/configs/llm_model_configs/ollama.json b/tests/configs/llm_model_configs/ollama.json
new file mode 100644
index 0000000000..d18a4e7724
--- /dev/null
+++ b/tests/configs/llm_model_configs/ollama.json
@@ -0,0 +1,6 @@
+{
+    "context_window": 8192,
+    "model_endpoint_type": "ollama",
+    "model_endpoint": "http://127.0.0.1:11434",
+    "model": "dolphin2.2-mistral:7b-q6_K"
+}
diff --git a/tests/configs/openai.json b/tests/configs/openai.json
new file mode 100644
index 0000000000..82ed0d72e1
--- /dev/null
+++ b/tests/configs/openai.json
@@ -0,0 +1,12 @@
+{
+    "context_window": 8192,
+    "model": "gpt-4",
+    "model_endpoint_type": "openai",
+    "model_endpoint": "https://api.openai.com/v1",
+    "model_wrapper": null,
+    "embedding_endpoint_type": "openai",
+    "embedding_endpoint": "https://api.openai.com/v1",
+    "embedding_model": "text-embedding-ada-002",
+    "embedding_dim": 1536,
+    "embedding_chunk_size": 300
+}
diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py
index d32503a3db..7aac1f03d9 100644
--- a/tests/helpers/endpoints_helper.py
+++ b/tests/helpers/endpoints_helper.py
@@ -3,6 +3,12 @@
 import uuid
 from typing import Callable, List, Optional, Union
 
+from letta.llm_api.helpers import (
+    derive_inner_thoughts_in_kwargs,
+    unpack_inner_thoughts_from_kwargs,
+)
+from letta.schemas.enums import OptionState
+
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 
@@ -18,7 +24,7 @@
     MissingFunctionCallError,
     MissingInnerMonologueError,
 )
-from letta.llm_api.llm_api_tools import create, unpack_inner_thoughts_from_kwargs
+from letta.llm_api.llm_api_tools import create
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.schemas.agent import AgentState
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -83,7 +89,7 @@ def setup_agent(
 # ======================================================================================================================
 
 
-def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts_in_kwargs: bool = False) -> ChatCompletionResponse:
+def check_first_response_is_valid_for_llm_endpoint(filename: str) -> ChatCompletionResponse:
     """
     Checks that the first response is valid:
 
@@ -113,7 +119,9 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts
     )
 
     # Basic check
-    assert response is not None
+    assert response is not None, response
+    assert response.choices is not None, response
+    assert len(response.choices) > 0, response
 
     # Select first choice
     choice = response.choices[0]
@@ -122,6 +130,9 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts
     validator_func = lambda function_call: function_call.name == "send_message" or function_call.name == "archival_memory_search"
     assert_contains_valid_function_call(choice.message, validator_func)
 
+    # Get inner_thoughts_in_kwargs
+    inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(OptionState.DEFAULT, agent_state.llm_config.model)
+
     # Assert that the message has an inner monologue
     assert_contains_correct_inner_monologue(choice, inner_thoughts_in_kwargs)
 
@@ -302,9 +313,9 @@ def run_embedding_endpoint(filename):
 
 
 def assert_sanity_checks(response: LettaResponse):
-    assert response is not None
-    assert response.messages is not None
-    assert len(response.messages) > 0
+    assert response is not None, response
+    assert response.messages is not None, response
+    assert len(response.messages) > 0, response
 
 
 def assert_invoked_send_message_with_keyword(messages: List[LettaMessage], keyword: str, case_sensitive: bool = False) -> None:
diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py
index a6cd16a300..5bf9d326d3 100644
--- a/tests/test_endpoints.py
+++ b/tests/test_endpoints.py
@@ -12,7 +12,7 @@
 
 # directories
 embedding_config_dir = "configs/embedding_model_configs"
-llm_config_dir = "configs/llm_model_configs"
+llm_config_dir = "tests/configs/llm_model_configs"
 
 
 # ======================================================================================================================
@@ -66,6 +66,52 @@ def test_embedding_endpoint_openai():
     run_embedding_endpoint(filename)
 
 
+# ======================================================================================================================
+# AZURE TESTS
+# ======================================================================================================================
+def test_azure_gpt_4o_mini_returns_valid_first_message():
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_first_response_is_valid_for_llm_endpoint(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
+def test_azure_gpt_4o_mini_returns_keyword():
+    keyword = "banana"
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_response_contains_keyword(filename, keyword=keyword)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
+def test_azure_gpt_4o_mini_uses_external_tool():
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_agent_uses_external_tool(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
+def test_azure_gpt_4o_mini_recall_chat_memory():
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_agent_recall_chat_memory(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
+def test_azure_gpt_4o_mini_archival_memory_retrieval():
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_agent_archival_memory_retrieval(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
+def test_azure_gpt_4o_mini_edit_core_memory():
+    filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json")
+    response = check_agent_edit_core_memory(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
 # ======================================================================================================================
 # LETTA HOSTED
 # ======================================================================================================================