From 04feb6c85553d9123ffeab7ddc2a2dadecea9978 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Wed, 4 Dec 2024 17:32:57 +0000
Subject: [PATCH 01/14] fix: catching errors during streaming in a single place

---
 aidial_adapter_openai/app.py                  |  20 +-
 aidial_adapter_openai/exception_handlers.py   | 113 +++++--
 aidial_adapter_openai/gpt.py                  |   2 +-
 .../gpt4_multi_modal/chat_completion.py       |   8 +-
 aidial_adapter_openai/utils/sse_stream.py     |  18 +-
 aidial_adapter_openai/utils/streaming.py      |  26 +-
 aidial_adapter_openai/utils/tokenizer.py      |   8 +-
 poetry.lock                                   | 319 ++++++++----------
 pyproject.toml                                |   2 +-
 tests/test_errors.py                          |   2 +-
 10 files changed, 271 insertions(+), 247 deletions(-)

diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py
index 5d07629..ba27873 100644
--- a/aidial_adapter_openai/app.py
+++ b/aidial_adapter_openai/app.py
@@ -1,5 +1,6 @@
 from contextlib import asynccontextmanager
 
+import aidial_sdk._errors as sdk_error_handlers
 import pydantic
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.telemetry.init import init_telemetry as sdk_init_telemetry
@@ -9,11 +10,7 @@
 
 import aidial_adapter_openai.endpoints as endpoints
 from aidial_adapter_openai.app_config import ApplicationConfig
-from aidial_adapter_openai.exception_handlers import (
-    dial_exception_handler,
-    openai_exception_handler,
-    pydantic_exception_handler,
-)
+from aidial_adapter_openai.exception_handlers import openai_exception_handler
 from aidial_adapter_openai.utils.http_client import get_http_client
 from aidial_adapter_openai.utils.log_config import configure_loggers, logger
 from aidial_adapter_openai.utils.request import set_app_config
@@ -30,7 +27,7 @@ def create_app(
     app_config: ApplicationConfig | None = None,
     init_telemetry: bool = True,
 ) -> FastAPI:
-    app = FastAPI(lifespan=lifespan)
+    app = FastAPI(lifespan=lifespan, debug=True)
     set_app_config(app, app_config or ApplicationConfig.from_env())
 
     if init_telemetry:
@@ -45,9 +42,14 @@ def create_app(
     app.post("/openai/deployments/{deployment_id:path}/chat/completions")(
         endpoints.chat_completion
     )
-    app.exception_handler(OpenAIError)(openai_exception_handler)
-    app.exception_handler(pydantic.ValidationError)(pydantic_exception_handler)
-    app.exception_handler(DialException)(dial_exception_handler)
+    app.add_exception_handler(OpenAIError, openai_exception_handler)
+    app.add_exception_handler(
+        pydantic.ValidationError,
+        sdk_error_handlers.pydantic_validation_exception_handler,
+    )
+    app.add_exception_handler(
+        DialException, sdk_error_handlers.dial_exception_handler
+    )
 
     return app
 
diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py
index c98c122..3eb0a5b 100644
--- a/aidial_adapter_openai/exception_handlers.py
+++ b/aidial_adapter_openai/exception_handlers.py
@@ -1,56 +1,117 @@
-import pydantic
-from aidial_sdk._errors import pydantic_validation_exception_handler
+from typing import Dict
+
 from aidial_sdk.exceptions import HTTPException as DialException
+from aidial_sdk.exceptions import InternalServerError
 from fastapi import Request
 from fastapi.responses import Response
-from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError
+from openai import (
+    APIConnectionError,
+    APIError,
+    APIStatusError,
+    APITimeoutError,
+    OpenAIError,
+)
+
 
+def _parse_dial_exception(
+    status_code: int,
+    content: dict | str,
+    headers: Dict[str, str] | None = None,
+) -> DialException:
+    if (
+        isinstance(content, dict)
+        and (error := content.get("error"))
+        and isinstance(error, dict)
+    ):
+        message = error.get("message") or "Unknown error"
+        code = error.get("code")
+        type = error.get("type")
+        param = error.get("param")
+        display_message = error.get("display_message")
+
+        return DialException(
+            status_code=status_code,
+            message=message,
+            type=type,
+            param=param,
+            code=code,
+            display_message=display_message,
+            headers=headers,
+        )
+    else:
+        return DialException(
+            status_code=status_code,
+            message=str(content),
+            headers=headers,
+        )
 
-def openai_exception_handler(request: Request, e: DialException):
-    if isinstance(e, APIStatusError):
-        r = e.response
+
+def to_dial_exception(exc: Exception) -> DialException:
+    if isinstance(exc, APIStatusError):
+        # Non-streaming errors reported by `openai` library via this exception
+
+        r = exc.response
         headers = r.headers
 
-        # Avoid encoding the error message when the original response was encoded.
+        # httpx library (used by openai) automatically sets
+        # "Accept-Encoding:gzip,deflate" header in requests to the upstream.
+        # Therefore, we may receive from the upstream gzip-encoded
+        # response along with "Content-Encoding:gzip" header.
+        # We either need to encode the response, or
+        # remove the "Content-Encoding" header.
         if "Content-Encoding" in headers:
             del headers["Content-Encoding"]
 
-        return Response(
-            content=r.content,
+        plain_headers = {k.decode(): v.decode() for k, v in headers.raw}
+
+        try:
+            content = r.json()
+        except Exception:
+            content = r.text
+
+        return _parse_dial_exception(
             status_code=r.status_code,
-            headers=headers,
+            headers=plain_headers,
+            content=content,
         )
 
-    if isinstance(e, APITimeoutError):
-        raise DialException(
+    if isinstance(exc, APITimeoutError):
+        return DialException(
             status_code=504,
             type="timeout",
             message="Request timed out",
             display_message="Request timed out. Please try again later.",
         )
 
-    if isinstance(e, APIConnectionError):
-        raise DialException(
+    if isinstance(exc, APIConnectionError):
+        return DialException(
             status_code=502,
             type="connection",
             message="Error communicating with OpenAI",
             display_message="OpenAI server is not responsive. Please try again later.",
         )
 
-    if isinstance(e, APIError):
-        raise DialException(
-            status_code=getattr(e, "status_code", None) or 500,
-            message=e.message,
-            type=e.type,
-            code=e.code,
-            param=e.param,
-            display_message=None,
+    if isinstance(exc, APIError):
+        # Streaming errors reported by `openai` library via this exception
+        status_code: int = 500
+        if exc.code:
+            try:
+                status_code = int(exc.code)
+            except Exception:
+                pass
+
+        return _parse_dial_exception(
+            status_code=status_code,
+            headers={},
+            content={"error": exc.body or {}},
         )
 
+    if isinstance(exc, DialException):
+        return exc
 
-def pydantic_exception_handler(request: Request, exc: pydantic.ValidationError):
-    return pydantic_validation_exception_handler(request, exc)
+    return InternalServerError(str(exc))
 
 
-def dial_exception_handler(request: Request, exc: DialException):
-    return exc.to_fastapi_response()
+def openai_exception_handler(request: Request, exc: Exception) -> Response:
+    assert isinstance(exc, OpenAIError)
+    return to_dial_exception(exc).to_fastapi_response()
diff --git a/aidial_adapter_openai/gpt.py b/aidial_adapter_openai/gpt.py
index d4c6cde..b18ee86 100644
--- a/aidial_adapter_openai/gpt.py
+++ b/aidial_adapter_openai/gpt.py
@@ -78,12 +78,12 @@ async def gpt_chat_completion(
 
     if isinstance(response, AsyncIterator):
         return generate_stream(
+            map_stream(chunk_to_dict, response),
             get_prompt_tokens=lambda: estimated_prompt_tokens
             or tokenizer.tokenize_request(request, request["messages"]),
             tokenize_response=tokenizer.tokenize_response,
             deployment=deployment_id,
             discarded_messages=discarded_messages,
-            stream=map_stream(chunk_to_dict, response),
             eliminate_empty_choices=eliminate_empty_choices,
         )
     else:
diff --git a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
index 216137d..f9fd3ba 100644
--- a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
+++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
@@ -262,14 +262,14 @@ def debug_print(chunk: T) -> T:
         return map_stream(
             debug_print,
             generate_stream(
+                map_stream(
+                    response_transformer,
+                    parse_openai_sse_stream(response),
+                ),
                 get_prompt_tokens=lambda: estimated_prompt_tokens,
                 tokenize_response=tokenizer.tokenize_response,
                 deployment=deployment,
                 discarded_messages=discarded_messages,
-                stream=map_stream(
-                    response_transformer,
-                    parse_openai_sse_stream(response),
-                ),
                 eliminate_empty_choices=eliminate_empty_choices,
             ),
         )
diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py
index 3094d00..b4a6ce9 100644
--- a/aidial_adapter_openai/utils/sse_stream.py
+++ b/aidial_adapter_openai/utils/sse_stream.py
@@ -3,6 +3,9 @@
 
 from aidial_sdk.exceptions import runtime_server_error
 
+from aidial_adapter_openai.exception_handlers import to_dial_exception
+from aidial_adapter_openai.utils.log_config import logger
+
 DATA_PREFIX = "data: "
 OPENAI_END_MARKER = "[DONE]"
 
@@ -53,6 +56,17 @@ async def parse_openai_sse_stream(
 async def to_openai_sse_stream(
     stream: AsyncIterator[dict],
 ) -> AsyncIterator[str]:
-    async for chunk in stream:
-        yield format_chunk(chunk)
+    try:
+        async for chunk in stream:
+            yield format_chunk(chunk)
+    except Exception as e:
+        logger.exception(
+            f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}"
+        )
+
+        dial_exception = to_dial_exception(e)
+        logger.exception(f"converted to the dial exception: {dial_exception!r}")
+
+        yield format_chunk(dial_exception.json_error())
+
     yield END_CHUNK
diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index 724ae00..e54f5dd 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -6,10 +6,10 @@
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.utils.merge_chunks import merge_chat_completion_chunks
 from fastapi.responses import JSONResponse, Response, StreamingResponse
-from openai import APIError, APIStatusError
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from pydantic import BaseModel
 
+from aidial_adapter_openai.exception_handlers import to_dial_exception
 from aidial_adapter_openai.utils.chat_completion_response import (
     ChatCompletionResponse,
     ChatCompletionStreamingChunk,
@@ -53,12 +53,12 @@ def build_chunk(
 
 
 async def generate_stream(
+    stream: AsyncIterator[dict],
     *,
     get_prompt_tokens: Callable[[], int],
     tokenize_response: Callable[[ChatCompletionResponse], int],
     deployment: str,
     discarded_messages: Optional[list[int]],
-    stream: AsyncIterator[dict],
     eliminate_empty_choices: bool,
 ) -> AsyncIterator[dict]:
 
@@ -98,7 +98,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
     buffer_chunk = None
     response_snapshot = ChatCompletionStreamingChunk()
 
-    error = None
+    error: DialException | None = None
 
     try:
         async for chunk in stream:
@@ -121,15 +121,8 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
                     yield last_chunk
                 last_chunk = chunk
 
-    except APIError as e:
-        status_code = e.status_code if isinstance(e, APIStatusError) else 500
-        error = DialException(
-            status_code=status_code,
-            message=e.message,
-            type=e.type,
-            param=e.param,
-            code=e.code,
-        ).json_error()
+    except Exception as e:
+        error = to_dial_exception(e)
 
     if last_chunk is not None and buffer_chunk is not None:
         last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk)
@@ -160,7 +153,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
         yield last_chunk
 
     if error:
-        yield error
+        raise error
 
 
 def create_stage_chunk(name: str, content: str, stream: bool) -> dict:
@@ -196,7 +189,7 @@ def create_stage_chunk(name: str, content: str, stream: bool) -> dict:
 
 def create_response_from_chunk(
     chunk: dict, exc: DialException | None, stream: bool
-) -> Response:
+) -> AsyncIterator[dict] | Response:
     if not stream:
         if exc is not None:
             return exc.to_fastapi_response()
@@ -208,10 +201,7 @@ async def generator() -> AsyncIterator[dict]:
         if exc is not None:
             yield exc.json_error()
 
-    return StreamingResponse(
-        to_openai_sse_stream(generator()),
-        media_type="text/event-stream",
-    )
+    return generator()
 
 
 def block_response_to_streaming_chunk(response: dict) -> dict:
diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py
index 3af0e94..c177a7b 100644
--- a/aidial_adapter_openai/utils/tokenizer.py
+++ b/aidial_adapter_openai/utils/tokenizer.py
@@ -14,6 +14,7 @@
 )
 from aidial_adapter_openai.utils.image_tokenizer import ImageTokenizer
 from aidial_adapter_openai.utils.multi_modal_message import MultiModalMessage
+from aidial_adapter_openai.utils.text import truncate_string
 
 MessageType = TypeVar("MessageType")
 
@@ -154,10 +155,11 @@ class PlainTextTokenizer(BaseTokenizer[dict]):
     """
 
     def _handle_custom_content_part(self, content_part: Any):
-        short_content_part = str(content_part)[:100]
+        short_content_part = truncate_string(str(content_part), 100)
         raise InternalServerError(
-            f"Unexpected type of content in message: {short_content_part!r}"
-            f"Use MultiModalTokenizer for messages with images"
+            f"Unexpected non-textural content part in the request: {short_content_part!r}. "
+            f"The deployment only supports plain text messages. "
+            f"Declare the deployment as a multi-modal one to avoid the error."
         )
 
     def tokenize_request_message(self, message: dict) -> int:
diff --git a/poetry.lock b/poetry.lock
index 2094e73..f9ccf6b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,40 +2,35 @@
 
 [[package]]
 name = "aidial-sdk"
-version = "0.15.0"
+version = "0.16.0"
 description = "Framework to create applications and model adapters for AI DIAL"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "aidial_sdk-0.15.0-py3-none-any.whl", hash = "sha256:7b9b3e5ec9688be2919dcd7dd0312aac807dc7917393ee5f846332713ad2e26a"},
-    {file = "aidial_sdk-0.15.0.tar.gz", hash = "sha256:6b47bb36e8c795300e0d4b61308c6a2f86b59abb97905390a02789b343460720"},
+    {file = "aidial_sdk-0.16.0-py3-none-any.whl", hash = "sha256:76bfa50fd08bfabedd572f06974c68cca9dc18b5c38a8d00bf5d59e1f61cb2d9"},
+    {file = "aidial_sdk-0.16.0.tar.gz", hash = "sha256:eddb1f00949bd0e4263c18be03df7b80093ce8caf7e4ed46a550f3a790e01875"},
 ]
 
 [package.dependencies]
-aiohttp = ">=3.8.3,<4.0.0"
 fastapi = ">=0.51,<1.0"
-httpx = ">=0.25.0,<1.0"
-opentelemetry-api = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-distro = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-exporter-otlp-proto-grpc = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-exporter-prometheus = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-aiohttp-client = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-fastapi = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-httpx = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-logging = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-requests = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-system-metrics = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-instrumentation-urllib = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""}
-opentelemetry-sdk = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""}
-prometheus-client = {version = "0.17.1", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-api = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-exporter-otlp-proto-grpc = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-exporter-prometheus = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-aiohttp-client = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-fastapi = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-httpx = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-logging = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-requests = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-system-metrics = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-instrumentation-urllib = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""}
+opentelemetry-sdk = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""}
+prometheus-client = {version = ">=0.17.1,<=0.21", optional = true, markers = "extra == \"telemetry\""}
 pydantic = ">=1.10,<3"
-requests = ">=2.19,<3.0"
 uvicorn = ">=0.19,<1.0"
-wrapt = ">=1.14,<2.0"
+wrapt = ">=1.10,<2"
 
 [package.extras]
-telemetry = ["opentelemetry-api (==1.20.0)", "opentelemetry-distro (==0.41b0)", "opentelemetry-exporter-otlp-proto-grpc (==1.20.0)", "opentelemetry-exporter-prometheus (==0.41b0)", "opentelemetry-instrumentation (==0.41b0)", "opentelemetry-instrumentation-aiohttp-client (==0.41b0)", "opentelemetry-instrumentation-fastapi (==0.41b0)", "opentelemetry-instrumentation-httpx (==0.41b0)", "opentelemetry-instrumentation-logging (==0.41b0)", "opentelemetry-instrumentation-requests (==0.41b0)", "opentelemetry-instrumentation-system-metrics (==0.41b0)", "opentelemetry-instrumentation-urllib (==0.41b0)", "opentelemetry-sdk (==1.20.0)", "prometheus-client (==0.17.1)"]
+telemetry = ["opentelemetry-api (>=1.22.0,<2.0.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.22.0,<2.0.0)", "opentelemetry-exporter-prometheus (>=0.43b0)", "opentelemetry-instrumentation-aiohttp-client (>=0.43b0)", "opentelemetry-instrumentation-fastapi (>=0.43b0)", "opentelemetry-instrumentation-httpx (>=0.43b0)", "opentelemetry-instrumentation-logging (>=0.43b0)", "opentelemetry-instrumentation-requests (>=0.43b0)", "opentelemetry-instrumentation-system-metrics (>=0.43b0)", "opentelemetry-instrumentation-urllib (>=0.43b0)", "opentelemetry-sdk (>=1.22.0,<2.0.0)", "prometheus-client (>=0.17.1,<=0.21)"]
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -289,17 +284,6 @@ cryptography = ">=2.5"
 msal = ">=1.24.0"
 msal-extensions = ">=0.3.0"
 
-[[package]]
-name = "backoff"
-version = "2.2.1"
-description = "Function decoration for backoff and retry"
-optional = false
-python-versions = ">=3.7,<4.0"
-files = [
-    {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
-    {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
-]
-
 [[package]]
 name = "black"
 version = "24.3.0"
@@ -1202,324 +1186,295 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.20.0"
+version = "1.28.2"
 description = "OpenTelemetry Python API"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_api-1.20.0-py3-none-any.whl", hash = "sha256:982b76036fec0fdaf490ae3dfd9f28c81442a33414f737abc687a32758cdcba5"},
-    {file = "opentelemetry_api-1.20.0.tar.gz", hash = "sha256:06abe351db7572f8afdd0fb889ce53f3c992dbf6f6262507b385cc1963e06983"},
+    {file = "opentelemetry_api-1.28.2-py3-none-any.whl", hash = "sha256:6fcec89e265beb258fe6b1acaaa3c8c705a934bd977b9f534a2b7c0d2d4275a6"},
+    {file = "opentelemetry_api-1.28.2.tar.gz", hash = "sha256:ecdc70c7139f17f9b0cf3742d57d7020e3e8315d6cffcdf1a12a905d45b19cc0"},
 ]
 
 [package.dependencies]
 deprecated = ">=1.2.6"
-importlib-metadata = ">=6.0,<7.0"
-
-[[package]]
-name = "opentelemetry-distro"
-version = "0.41b0"
-description = "OpenTelemetry Python Distro"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "opentelemetry_distro-0.41b0-py3-none-any.whl", hash = "sha256:61a028dc8c1418b8634a5bf71e15ad85427cb55d97a0cd6a58dd135e456cc027"},
-    {file = "opentelemetry_distro-0.41b0.tar.gz", hash = "sha256:8ce05f9499a09c99d9c5f550ff2ed6d229444cae17ae36baf705b0ccb647a959"},
-]
-
-[package.dependencies]
-opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-sdk = ">=1.13,<2.0"
-
-[package.extras]
-otlp = ["opentelemetry-exporter-otlp (==1.20.0)"]
+importlib-metadata = ">=6.0,<=8.5.0"
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.20.0"
+version = "1.28.2"
 description = "OpenTelemetry Protobuf encoding"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_exporter_otlp_proto_common-1.20.0-py3-none-any.whl", hash = "sha256:dd63209b40702636ab6ae76a06b401b646ad7b008a906ecb41222d4af24fbdef"},
-    {file = "opentelemetry_exporter_otlp_proto_common-1.20.0.tar.gz", hash = "sha256:df60c681bd61812e50b3a39a7a1afeeb6d4066117583249fcc262269374e7a49"},
+    {file = "opentelemetry_exporter_otlp_proto_common-1.28.2-py3-none-any.whl", hash = "sha256:545b1943b574f666c35b3d6cc67cb0b111060727e93a1e2866e346b33bff2a12"},
+    {file = "opentelemetry_exporter_otlp_proto_common-1.28.2.tar.gz", hash = "sha256:7aebaa5fc9ff6029374546df1f3a62616fda07fccd9c6a8b7892ec130dd8baca"},
 ]
 
 [package.dependencies]
-backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""}
-opentelemetry-proto = "1.20.0"
+opentelemetry-proto = "1.28.2"
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.20.0"
+version = "1.28.2"
 description = "OpenTelemetry Collector Protobuf over gRPC Exporter"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.20.0-py3-none-any.whl", hash = "sha256:7c3f066065891b56348ba2c7f9df6ec635a712841cae0a36f2f6a81642ae7dec"},
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.20.0.tar.gz", hash = "sha256:6c06d43c3771bda1795226e327722b4b980fa1ca1ec9e985f2ef3e29795bdd52"},
+    {file = "opentelemetry_exporter_otlp_proto_grpc-1.28.2-py3-none-any.whl", hash = "sha256:6083d9300863aab35bfce7c172d5fc1007686e6f8dff366eae460cd9a21592e2"},
+    {file = "opentelemetry_exporter_otlp_proto_grpc-1.28.2.tar.gz", hash = "sha256:07c10378380bbb01a7f621a5ce833fc1fab816e971140cd3ea1cd587840bc0e6"},
 ]
 
 [package.dependencies]
-backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""}
 deprecated = ">=1.2.6"
 googleapis-common-protos = ">=1.52,<2.0"
-grpcio = ">=1.0.0,<2.0.0"
+grpcio = ">=1.63.2,<2.0.0"
 opentelemetry-api = ">=1.15,<2.0"
-opentelemetry-exporter-otlp-proto-common = "1.20.0"
-opentelemetry-proto = "1.20.0"
-opentelemetry-sdk = ">=1.20.0,<1.21.0"
-
-[package.extras]
-test = ["pytest-grpc"]
+opentelemetry-exporter-otlp-proto-common = "1.28.2"
+opentelemetry-proto = "1.28.2"
+opentelemetry-sdk = ">=1.28.2,<1.29.0"
 
 [[package]]
 name = "opentelemetry-exporter-prometheus"
-version = "0.41b0"
+version = "0.49b2"
 description = "Prometheus Metric Exporter for OpenTelemetry"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_exporter_prometheus-0.41b0-py3-none-any.whl", hash = "sha256:ca996f3bc15b0cbf3abd798e786095a202650202a5c0edd9e34bb9186a247b79"},
-    {file = "opentelemetry_exporter_prometheus-0.41b0.tar.gz", hash = "sha256:0cc58d5d10040e69090637803b97e120f558467037c88988742c80a627e7f1ed"},
+    {file = "opentelemetry_exporter_prometheus-0.49b2-py3-none-any.whl", hash = "sha256:307594007ee20ec3a51c42548a4dbd66e46701f8523a7780d5e12a8f986a7783"},
+    {file = "opentelemetry_exporter_prometheus-0.49b2.tar.gz", hash = "sha256:70ca3a462ce1ba0d756e4be8a87c04f7196687825fd2d151a428f6c18ef6fd2d"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-sdk = ">=1.12,<2.0"
+opentelemetry-sdk = ">=1.28.2,<1.29.0"
 prometheus-client = ">=0.5.0,<1.0.0"
 
 [[package]]
 name = "opentelemetry-instrumentation"
-version = "0.41b0"
+version = "0.49b2"
 description = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation-0.41b0-py3-none-any.whl", hash = "sha256:0ef9e5705ceca0205992a4a845ae4251ce6ec15a1206ca07c2b00afb0c5bd386"},
-    {file = "opentelemetry_instrumentation-0.41b0.tar.gz", hash = "sha256:214382ba10dfd29d4e24898a4c7ef18b7368178a6277a1aec95cdb75cabf4612"},
+    {file = "opentelemetry_instrumentation-0.49b2-py3-none-any.whl", hash = "sha256:f6d782b0ef9fef4a4c745298651c65f5c532c34cd4c40d230ab5b9f3b3b4d151"},
+    {file = "opentelemetry_instrumentation-0.49b2.tar.gz", hash = "sha256:8cf00cc8d9d479e4b72adb9bd267ec544308c602b7188598db5a687e77b298e2"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.4,<2.0"
-setuptools = ">=16.0"
+opentelemetry-semantic-conventions = "0.49b2"
+packaging = ">=18.0"
 wrapt = ">=1.0.0,<2.0.0"
 
 [[package]]
 name = "opentelemetry-instrumentation-aiohttp-client"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry aiohttp client instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_aiohttp_client-0.41b0-py3-none-any.whl", hash = "sha256:a1d0d18dee5e57cf9187d1a561f9d4ce56d16433231208405458358ff6399a6f"},
-    {file = "opentelemetry_instrumentation_aiohttp_client-0.41b0.tar.gz", hash = "sha256:56fd35e90c2534b2647e7cdd85f34383eddaa300ee51e989c3763dcdb205ca91"},
+    {file = "opentelemetry_instrumentation_aiohttp_client-0.49b2-py3-none-any.whl", hash = "sha256:d1748b9e179ab544568be4403fa790dad13f447b70bc34cba01ab9b1ada63343"},
+    {file = "opentelemetry_instrumentation_aiohttp_client-0.49b2.tar.gz", hash = "sha256:46df2cf68de8c0787b57e925d7764acb0db8bd5f9a9446b1bf470b63f782e762"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
-opentelemetry-util-http = "0.41b0"
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
 wrapt = ">=1.0.0,<2.0.0"
 
 [package.extras]
 instruments = ["aiohttp (>=3.0,<4.0)"]
-test = ["http-server-mock", "opentelemetry-instrumentation-aiohttp-client[instruments]"]
 
 [[package]]
 name = "opentelemetry-instrumentation-asgi"
-version = "0.41b0"
+version = "0.49b2"
 description = "ASGI instrumentation for OpenTelemetry"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_asgi-0.41b0-py3-none-any.whl", hash = "sha256:46084195fb9c50507abbe1dd490ae4c31c8658c5790f1ddf7af95c417dbe6422"},
-    {file = "opentelemetry_instrumentation_asgi-0.41b0.tar.gz", hash = "sha256:921244138b37a9a25edf2153f1c248f16f98610ee8d840b25fd7bf6b165e4d72"},
+    {file = "opentelemetry_instrumentation_asgi-0.49b2-py3-none-any.whl", hash = "sha256:c8ede13ed781402458a800411cb7ec16a25386dc21de8e5b9a568b386a1dc5f4"},
+    {file = "opentelemetry_instrumentation_asgi-0.49b2.tar.gz", hash = "sha256:2af5faf062878330714efe700127b837038c4d9d3b70b451ab2424d5076d6c1c"},
 ]
 
 [package.dependencies]
 asgiref = ">=3.0,<4.0"
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
-opentelemetry-util-http = "0.41b0"
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
 
 [package.extras]
 instruments = ["asgiref (>=3.0,<4.0)"]
-test = ["opentelemetry-instrumentation-asgi[instruments]", "opentelemetry-test-utils (==0.41b0)"]
 
 [[package]]
 name = "opentelemetry-instrumentation-fastapi"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry FastAPI Instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_fastapi-0.41b0-py3-none-any.whl", hash = "sha256:5990368e99ecc989df0a248a0b9b8e85d8b3eb7c1dbf5131c36982ba7f4a43b7"},
-    {file = "opentelemetry_instrumentation_fastapi-0.41b0.tar.gz", hash = "sha256:eb4ceefe8b944fc9ea5e61fa558b99afd1285431b563f3f0104ac177cde4dfe5"},
+    {file = "opentelemetry_instrumentation_fastapi-0.49b2-py3-none-any.whl", hash = "sha256:c66331d05bf806d7ca4f9579c1db7383aad31a9f6665dbaa2b7c9a4c1e830892"},
+    {file = "opentelemetry_instrumentation_fastapi-0.49b2.tar.gz", hash = "sha256:3aa81ed7acf6aa5236d96e90a1218c5e84a9c0dce8fa63bf34ceee6218354b63"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-instrumentation-asgi = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
-opentelemetry-util-http = "0.41b0"
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-instrumentation-asgi = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
 
 [package.extras]
 instruments = ["fastapi (>=0.58,<1.0)"]
-test = ["httpx (>=0.22,<1.0)", "opentelemetry-instrumentation-fastapi[instruments]", "opentelemetry-test-utils (==0.41b0)", "requests (>=2.23,<3.0)"]
 
 [[package]]
 name = "opentelemetry-instrumentation-httpx"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry HTTPX Instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_httpx-0.41b0-py3-none-any.whl", hash = "sha256:6ada84b7caa95a2889b2d883c089a977546b0102c815658b88f1c2dae713e9b2"},
-    {file = "opentelemetry_instrumentation_httpx-0.41b0.tar.gz", hash = "sha256:96ebc54f3f41bfcd2fc043349c8cee4b11737602512383d437e24c39a1e4adff"},
+    {file = "opentelemetry_instrumentation_httpx-0.49b2-py3-none-any.whl", hash = "sha256:08111e6c8d11495dee7ef2243bc2e9acc09c16be8c6f4dd32f939f2b08f30af5"},
+    {file = "opentelemetry_instrumentation_httpx-0.49b2.tar.gz", hash = "sha256:4330f56b0ad382843a1e8fe6179d20c2d2be3ee78e60b9f01ee892b1600de44f"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
+wrapt = ">=1.0.0,<2.0.0"
 
 [package.extras]
 instruments = ["httpx (>=0.18.0)"]
-test = ["opentelemetry-instrumentation-httpx[instruments]", "opentelemetry-sdk (>=1.12,<2.0)", "opentelemetry-test-utils (==0.41b0)"]
 
 [[package]]
 name = "opentelemetry-instrumentation-logging"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry Logging instrumentation"
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_logging-0.41b0-py2.py3-none-any.whl", hash = "sha256:ab7117886695c32eb30d7a59199292283c5e652e2b9f2d11874fe4359eacc16a"},
-    {file = "opentelemetry_instrumentation_logging-0.41b0.tar.gz", hash = "sha256:8ad46e011a99df726323428f0d0a09bf68159ab776b8184ba6d83a7c44f7de81"},
+    {file = "opentelemetry_instrumentation_logging-0.49b2-py3-none-any.whl", hash = "sha256:5ef73c37b34d8f564d37731cb399e7237636e2c8d7d97061d20526f6ece8afb1"},
+    {file = "opentelemetry_instrumentation_logging-0.49b2.tar.gz", hash = "sha256:625c825cb180d1a4da8008af2dc21de5f668af120f3821af16317cd3a2378d7e"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-
-[package.extras]
-test = ["opentelemetry-test-utils (==0.41b0)"]
+opentelemetry-instrumentation = "0.49b2"
 
 [[package]]
 name = "opentelemetry-instrumentation-requests"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry requests instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_requests-0.41b0-py3-none-any.whl", hash = "sha256:687fde31111669e729054e64d246c96b0b9d4d8702bd0e3569b7660bdb528d71"},
-    {file = "opentelemetry_instrumentation_requests-0.41b0.tar.gz", hash = "sha256:bdc5515ae7533e620b312fd989941b7c2c92d492a2d4418f6ef8db5d7422fa64"},
+    {file = "opentelemetry_instrumentation_requests-0.49b2-py3-none-any.whl", hash = "sha256:d49b0022b29fb7f07a38b8e68750304c29a6d6114b94b56e3e811eff59efd318"},
+    {file = "opentelemetry_instrumentation_requests-0.49b2.tar.gz", hash = "sha256:ea7216f13f42d3220ccd60cefd104fae656c9206bf5e3030d59fa367a9452e99"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
-opentelemetry-util-http = "0.41b0"
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
 
 [package.extras]
 instruments = ["requests (>=2.0,<3.0)"]
-test = ["httpretty (>=1.0,<2.0)", "opentelemetry-instrumentation-requests[instruments]", "opentelemetry-test-utils (==0.41b0)"]
 
 [[package]]
 name = "opentelemetry-instrumentation-system-metrics"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry System Metrics Instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_system_metrics-0.41b0-py3-none-any.whl", hash = "sha256:4f2106cf4b77664eb9096727eaba4ccffe28ebf426068b19aa7289644d4b9680"},
-    {file = "opentelemetry_instrumentation_system_metrics-0.41b0.tar.gz", hash = "sha256:727193655d81d31a89e118d905a2691e80d967993ae62bac96979a373f59485a"},
+    {file = "opentelemetry_instrumentation_system_metrics-0.49b2-py3-none-any.whl", hash = "sha256:b599dbfba4ba977a8c248b74b18f147da314dbf2b780f4123e16c50e9c4f1948"},
+    {file = "opentelemetry_instrumentation_system_metrics-0.49b2.tar.gz", hash = "sha256:2ef4949c0c0f64e6b7437b8d23e0ee57245ab3d0d38501157bb93f4e4151207c"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.11,<2.0"
-opentelemetry-sdk = ">=1.11,<2.0"
-psutil = ">=5.9,<6.0"
+opentelemetry-instrumentation = "0.49b2"
+psutil = ">=5.9.0,<7"
 
 [package.extras]
 instruments = ["psutil (>=5)"]
-test = ["opentelemetry-instrumentation-system-metrics[instruments]", "opentelemetry-test-utils (==0.41b0)"]
 
 [[package]]
 name = "opentelemetry-instrumentation-urllib"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry urllib instrumentation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_instrumentation_urllib-0.41b0-py3-none-any.whl", hash = "sha256:cee9e95f55a73480df0915358ce8668bbeda53324c9426847e2ccaea0cac1a87"},
-    {file = "opentelemetry_instrumentation_urllib-0.41b0.tar.gz", hash = "sha256:113416b8bd9c2d5c890cb6f86737886e209a3776c2ecdc023887bd78634d5ef3"},
+    {file = "opentelemetry_instrumentation_urllib-0.49b2-py3-none-any.whl", hash = "sha256:30e8eb408a93e0b09bb67e37c8b2c074ea3d55613467be6dc4be08f034a0d3e2"},
+    {file = "opentelemetry_instrumentation_urllib-0.49b2.tar.gz", hash = "sha256:a148c073de6a1bbaf3ca423e639840cd2aa554eb0dda95077c111e46ea91d8ca"},
 ]
 
 [package.dependencies]
 opentelemetry-api = ">=1.12,<2.0"
-opentelemetry-instrumentation = "0.41b0"
-opentelemetry-semantic-conventions = "0.41b0"
-opentelemetry-util-http = "0.41b0"
-
-[package.extras]
-test = ["httpretty (>=1.0,<2.0)", "opentelemetry-test-utils (==0.41b0)"]
+opentelemetry-instrumentation = "0.49b2"
+opentelemetry-semantic-conventions = "0.49b2"
+opentelemetry-util-http = "0.49b2"
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.20.0"
+version = "1.28.2"
 description = "OpenTelemetry Python Proto"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_proto-1.20.0-py3-none-any.whl", hash = "sha256:512c3d2c6864fb7547a69577c3907348e6c985b7a204533563cb4c4c5046203b"},
-    {file = "opentelemetry_proto-1.20.0.tar.gz", hash = "sha256:cf01f49b3072ee57468bccb1a4f93bdb55411f4512d0ac3f97c5c04c0040b5a2"},
+    {file = "opentelemetry_proto-1.28.2-py3-none-any.whl", hash = "sha256:0837498f59db55086462915e5898d0b1a18c1392f6db4d7e937143072a72370c"},
+    {file = "opentelemetry_proto-1.28.2.tar.gz", hash = "sha256:7c0d125a6b71af88bfeeda16bfdd0ff63dc2cf0039baf6f49fa133b203e3f566"},
 ]
 
 [package.dependencies]
-protobuf = ">=3.19,<5.0"
+protobuf = ">=5.0,<6.0"
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.20.0"
+version = "1.28.2"
 description = "OpenTelemetry Python SDK"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_sdk-1.20.0-py3-none-any.whl", hash = "sha256:f2230c276ff4c63ea09b3cb2e2ac6b1265f90af64e8d16bbf275c81a9ce8e804"},
-    {file = "opentelemetry_sdk-1.20.0.tar.gz", hash = "sha256:702e432a457fa717fd2ddfd30640180e69938f85bb7fec3e479f85f61c1843f8"},
+    {file = "opentelemetry_sdk-1.28.2-py3-none-any.whl", hash = "sha256:93336c129556f1e3ccd21442b94d3521759541521861b2214c499571b85cb71b"},
+    {file = "opentelemetry_sdk-1.28.2.tar.gz", hash = "sha256:5fed24c5497e10df30282456fe2910f83377797511de07d14cec0d3e0a1a3110"},
 ]
 
 [package.dependencies]
-opentelemetry-api = "1.20.0"
-opentelemetry-semantic-conventions = "0.41b0"
+opentelemetry-api = "1.28.2"
+opentelemetry-semantic-conventions = "0.49b2"
 typing-extensions = ">=3.7.4"
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.41b0"
+version = "0.49b2"
 description = "OpenTelemetry Semantic Conventions"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_semantic_conventions-0.41b0-py3-none-any.whl", hash = "sha256:45404391ed9e50998183a4925ad1b497c01c143f06500c3b9c3d0013492bb0f2"},
-    {file = "opentelemetry_semantic_conventions-0.41b0.tar.gz", hash = "sha256:0ce5b040b8a3fc816ea5879a743b3d6fe5db61f6485e4def94c6ee4d402e1eb7"},
+    {file = "opentelemetry_semantic_conventions-0.49b2-py3-none-any.whl", hash = "sha256:51e7e1d0daa958782b6c2a8ed05e5f0e7dd0716fc327ac058777b8659649ee54"},
+    {file = "opentelemetry_semantic_conventions-0.49b2.tar.gz", hash = "sha256:44e32ce6a5bb8d7c0c617f84b9dc1c8deda1045a07dc16a688cc7cbeab679997"},
 ]
 
+[package.dependencies]
+deprecated = ">=1.2.6"
+opentelemetry-api = "1.28.2"
+
 [[package]]
 name = "opentelemetry-util-http"
-version = "0.41b0"
+version = "0.49b2"
 description = "Web util for OpenTelemetry"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "opentelemetry_util_http-0.41b0-py3-none-any.whl", hash = "sha256:6a167fd1e0e8b0f629530d971165b5d82ed0be2154b7f29498499c3a517edee5"},
-    {file = "opentelemetry_util_http-0.41b0.tar.gz", hash = "sha256:16d5bd04a380dc1079e766562d1e1626cbb47720f197f67010c45f090fffdfb3"},
+    {file = "opentelemetry_util_http-0.49b2-py3-none-any.whl", hash = "sha256:e325d6511c6bee7b43170eb0c93261a210ec57e20ab1d7a99838515ef6d2bf58"},
+    {file = "opentelemetry_util_http-0.49b2.tar.gz", hash = "sha256:5958c7009f79146bbe98b0fdb23d9d7bf1ea9cd154a1c199029b1a89e0557199"},
 ]
 
 [[package]]
@@ -1802,22 +1757,22 @@ files = [
 
 [[package]]
 name = "protobuf"
-version = "4.25.4"
+version = "5.29.0"
 description = ""
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "protobuf-4.25.4-cp310-abi3-win32.whl", hash = "sha256:db9fd45183e1a67722cafa5c1da3e85c6492a5383f127c86c4c4aa4845867dc4"},
-    {file = "protobuf-4.25.4-cp310-abi3-win_amd64.whl", hash = "sha256:ba3d8504116a921af46499471c63a85260c1a5fc23333154a427a310e015d26d"},
-    {file = "protobuf-4.25.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:eecd41bfc0e4b1bd3fa7909ed93dd14dd5567b98c941d6c1ad08fdcab3d6884b"},
-    {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:4c8a70fdcb995dcf6c8966cfa3a29101916f7225e9afe3ced4395359955d3835"},
-    {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:3319e073562e2515c6ddc643eb92ce20809f5d8f10fead3332f71c63be6a7040"},
-    {file = "protobuf-4.25.4-cp38-cp38-win32.whl", hash = "sha256:7e372cbbda66a63ebca18f8ffaa6948455dfecc4e9c1029312f6c2edcd86c4e1"},
-    {file = "protobuf-4.25.4-cp38-cp38-win_amd64.whl", hash = "sha256:051e97ce9fa6067a4546e75cb14f90cf0232dcb3e3d508c448b8d0e4265b61c1"},
-    {file = "protobuf-4.25.4-cp39-cp39-win32.whl", hash = "sha256:90bf6fd378494eb698805bbbe7afe6c5d12c8e17fca817a646cd6a1818c696ca"},
-    {file = "protobuf-4.25.4-cp39-cp39-win_amd64.whl", hash = "sha256:ac79a48d6b99dfed2729ccccee547b34a1d3d63289c71cef056653a846a2240f"},
-    {file = "protobuf-4.25.4-py3-none-any.whl", hash = "sha256:bfbebc1c8e4793cfd58589acfb8a1026be0003e852b9da7db5a4285bde996978"},
-    {file = "protobuf-4.25.4.tar.gz", hash = "sha256:0dc4a62cc4052a036ee2204d26fe4d835c62827c855c8a03f29fe6da146b380d"},
+    {file = "protobuf-5.29.0-cp310-abi3-win32.whl", hash = "sha256:ea7fb379b257911c8c020688d455e8f74efd2f734b72dc1ea4b4d7e9fd1326f2"},
+    {file = "protobuf-5.29.0-cp310-abi3-win_amd64.whl", hash = "sha256:34a90cf30c908f47f40ebea7811f743d360e202b6f10d40c02529ebd84afc069"},
+    {file = "protobuf-5.29.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c931c61d0cc143a2e756b1e7f8197a508de5365efd40f83c907a9febf36e6b43"},
+    {file = "protobuf-5.29.0-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:85286a47caf63b34fa92fdc1fd98b649a8895db595cfa746c5286eeae890a0b1"},
+    {file = "protobuf-5.29.0-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:0d10091d6d03537c3f902279fcf11e95372bdd36a79556311da0487455791b20"},
+    {file = "protobuf-5.29.0-cp38-cp38-win32.whl", hash = "sha256:0cd67a1e5c2d88930aa767f702773b2d054e29957432d7c6a18f8be02a07719a"},
+    {file = "protobuf-5.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:e467f81fdd12ded9655cea3e9b83dc319d93b394ce810b556fb0f421d8613e86"},
+    {file = "protobuf-5.29.0-cp39-cp39-win32.whl", hash = "sha256:17d128eebbd5d8aee80300aed7a43a48a25170af3337f6f1333d1fac2c6839ac"},
+    {file = "protobuf-5.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:6c3009e22717c6cc9e6594bb11ef9f15f669b19957ad4087214d69e08a213368"},
+    {file = "protobuf-5.29.0-py3-none-any.whl", hash = "sha256:88c4af76a73183e21061881360240c0cdd3c39d263b4e8fb570aaf83348d608f"},
+    {file = "protobuf-5.29.0.tar.gz", hash = "sha256:445a0c02483869ed8513a585d80020d012c6dc60075f96fa0563a724987b1001"},
 ]
 
 [[package]]
@@ -2568,4 +2523,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.13"
-content-hash = "307c14e21b2fc8b1598cd8b903489c48c6d6367a44bfb6e764d02a45bc6dd9fb"
+content-hash = "fc8455a94cc695d7db784c43fa35e441e3919e5f6819fdcfa743e23c2661e28b"
diff --git a/pyproject.toml b/pyproject.toml
index 024575b..e6da0b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ aiohttp = "^3.10.11"
 numpy = "^1.26.0"
 pillow = "^10.3.0"
 azure-identity = "^1.16.1"
-aidial-sdk = {version = "^0.15.0", extras = ["telemetry"]}
+aidial-sdk = {version = "^0.16.0", extras = ["telemetry"]}
 
 [tool.poetry.group.test.dependencies]
 pytest = "7.4.0"
diff --git a/tests/test_errors.py b/tests/test_errors.py
index dcdafce..59cf255 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -466,7 +466,6 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient):
         },
     )
 
-    assert response.status_code == 504
     assert response.json() == {
         "error": {
             "message": "Request timed out",
@@ -475,6 +474,7 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient):
             "display_message": "Request timed out. Please try again later.",
         }
     }
+    assert response.status_code == 504
 
 
 @respx.mock

From 820078a22304df2f67e0873e0c40db25d061642f Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 09:59:17 +0000
Subject: [PATCH 02/14] fix: fixed unit tests for errors

---
 .vscode/settings.json                       |   4 +-
 aidial_adapter_openai/exception_handlers.py | 100 ++++++++++++++------
 2 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index ab64be2..d62db15 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -8,7 +8,9 @@
     },
     "editor.tabSize": 4
   },
-  "python.testing.pytestArgs": ["."],
+  "python.testing.pytestArgs": [
+    "tests"
+  ],
   "python.testing.unittestEnabled": false,
   "python.testing.pytestEnabled": true,
   "python.analysis.typeCheckingMode": "basic"
diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py
index 3eb0a5b..f4a66e8 100644
--- a/aidial_adapter_openai/exception_handlers.py
+++ b/aidial_adapter_openai/exception_handlers.py
@@ -1,7 +1,9 @@
-from typing import Dict
+import json
+from typing import Any, Dict
 
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.exceptions import InternalServerError
+from fastapi import HTTPException as FastAPIException
 from fastapi import Request
 from fastapi.responses import Response
 from openai import (
@@ -11,16 +13,57 @@
     APITimeoutError,
     OpenAIError,
 )
+from typing_extensions import override
+
+
+class PlainDialException(DialException):
+    content: Any
+
+    def __init__(
+        self,
+        *,
+        content: Any,
+        status_code: int,
+        headers: Dict[str, str] | None,
+    ) -> None:
+        super().__init__(
+            message=str(content),
+            status_code=status_code,
+            headers=headers,
+        )
+        self.content = content
+
+    @override
+    def to_fastapi_response(self) -> Response:  # type: ignore
+        return Response(
+            status_code=self.status_code,
+            content=self.content,
+            headers=self.headers,
+        )
+
+    @override
+    def to_fastapi_exception(self) -> FastAPIException:
+        return FastAPIException(
+            status_code=self.status_code,
+            detail=self.content,
+            headers=self.headers,
+        )
 
 
 def _parse_dial_exception(
-    status_code: int,
-    content: dict | str,
-    headers: Dict[str, str] | None = None,
-) -> DialException:
+    *, status_code: int, headers: Dict[str, str], content: Any
+) -> DialException | None:
+    if isinstance(content, str):
+        try:
+            obj = json.loads(content)
+        except Exception:
+            return None
+    else:
+        obj = content
+
     if (
-        isinstance(content, dict)
-        and (error := content.get("error"))
+        isinstance(obj, dict)
+        and (error := obj.get("error"))
         and isinstance(error, dict)
     ):
         message = error.get("message") or "Unknown error"
@@ -38,20 +81,15 @@ def _parse_dial_exception(
             display_message=display_message,
             headers=headers,
         )
-    else:
-        return DialException(
-            status_code=status_code,
-            message=str(content),
-            headers=headers,
-        )
+
+    return None
 
 
 def to_dial_exception(exc: Exception) -> DialException:
     if isinstance(exc, APIStatusError):
         # Non-streaming errors reported by `openai` library via this exception
-
         r = exc.response
-        headers = r.headers
+        httpx_headers = r.headers
 
         # httpx library (used by openai) automatically sets
         # "Accept-Encoding:gzip,deflate" header in requests to the upstream.
@@ -59,19 +97,20 @@ def to_dial_exception(exc: Exception) -> DialException:
         # response along with "Content-Encoding:gzip" header.
         # We either need to encode the response, or
         # remove the "Content-Encoding" header.
-        if "Content-Encoding" in headers:
-            del headers["Content-Encoding"]
+        if "Content-Encoding" in httpx_headers:
+            del httpx_headers["Content-Encoding"]
 
-        plain_headers = {k.decode(): v.decode() for k, v in headers.raw}
-
-        try:
-            content = r.json()
-        except Exception:
-            content = r.text
+        headers = {k.decode(): v.decode() for k, v in httpx_headers.raw}
+        status_code = r.status_code
+        content = r.text
 
         return _parse_dial_exception(
-            status_code=r.status_code,
-            headers=plain_headers,
+            status_code=status_code,
+            headers=headers,
+            content=content,
+        ) or PlainDialException(
+            status_code=status_code,
+            headers=headers,
             content=content,
         )
 
@@ -100,10 +139,17 @@ def to_dial_exception(exc: Exception) -> DialException:
             except Exception:
                 pass
 
+        headers = {}
+        content = {"error": exc.body or {}}
+
         return _parse_dial_exception(
             status_code=status_code,
-            headers={},
-            content={"error": exc.body or {}},
+            headers=headers,
+            content=content,
+        ) or PlainDialException(
+            status_code=status_code,
+            headers=headers,
+            content=content,
         )
 
     if isinstance(exc, DialException):

From 3956b16d5507056be8813b23a4db7acf12d9802e Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 10:26:55 +0000
Subject: [PATCH 03/14] feat: introduced AdapterException class

---
 aidial_adapter_openai/exception_handlers.py   | 106 ++----------------
 .../utils/adapter_exception.py                | 103 +++++++++++++++++
 aidial_adapter_openai/utils/sse_stream.py     |  10 +-
 aidial_adapter_openai/utils/streaming.py      |   7 +-
 4 files changed, 125 insertions(+), 101 deletions(-)
 create mode 100644 aidial_adapter_openai/utils/adapter_exception.py

diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py
index f4a66e8..635c718 100644
--- a/aidial_adapter_openai/exception_handlers.py
+++ b/aidial_adapter_openai/exception_handlers.py
@@ -1,9 +1,5 @@
-import json
-from typing import Any, Dict
-
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.exceptions import InternalServerError
-from fastapi import HTTPException as FastAPIException
 from fastapi import Request
 from fastapi.responses import Response
 from openai import (
@@ -13,79 +9,14 @@
     APITimeoutError,
     OpenAIError,
 )
-from typing_extensions import override
-
-
-class PlainDialException(DialException):
-    content: Any
-
-    def __init__(
-        self,
-        *,
-        content: Any,
-        status_code: int,
-        headers: Dict[str, str] | None,
-    ) -> None:
-        super().__init__(
-            message=str(content),
-            status_code=status_code,
-            headers=headers,
-        )
-        self.content = content
-
-    @override
-    def to_fastapi_response(self) -> Response:  # type: ignore
-        return Response(
-            status_code=self.status_code,
-            content=self.content,
-            headers=self.headers,
-        )
-
-    @override
-    def to_fastapi_exception(self) -> FastAPIException:
-        return FastAPIException(
-            status_code=self.status_code,
-            detail=self.content,
-            headers=self.headers,
-        )
-
-
-def _parse_dial_exception(
-    *, status_code: int, headers: Dict[str, str], content: Any
-) -> DialException | None:
-    if isinstance(content, str):
-        try:
-            obj = json.loads(content)
-        except Exception:
-            return None
-    else:
-        obj = content
-
-    if (
-        isinstance(obj, dict)
-        and (error := obj.get("error"))
-        and isinstance(error, dict)
-    ):
-        message = error.get("message") or "Unknown error"
-        code = error.get("code")
-        type = error.get("type")
-        param = error.get("param")
-        display_message = error.get("display_message")
 
-        return DialException(
-            status_code=status_code,
-            message=message,
-            type=type,
-            param=param,
-            code=code,
-            display_message=display_message,
-            headers=headers,
-        )
-
-    return None
+from aidial_adapter_openai.utils.adapter_exception import (
+    AdapterException,
+    parse_adapter_exception,
+)
 
 
-def to_dial_exception(exc: Exception) -> DialException:
+def to_adapter_exception(exc: Exception) -> AdapterException:
     if isinstance(exc, APIStatusError):
         # Non-streaming errors reported by `openai` library via this exception
         r = exc.response
@@ -101,17 +32,11 @@ def to_dial_exception(exc: Exception) -> DialException:
             del httpx_headers["Content-Encoding"]
 
         headers = {k.decode(): v.decode() for k, v in httpx_headers.raw}
-        status_code = r.status_code
-        content = r.text
 
-        return _parse_dial_exception(
-            status_code=status_code,
+        return parse_adapter_exception(
+            status_code=r.status_code,
             headers=headers,
-            content=content,
-        ) or PlainDialException(
-            status_code=status_code,
-            headers=headers,
-            content=content,
+            content=r.text,
         )
 
     if isinstance(exc, APITimeoutError):
@@ -139,17 +64,10 @@ def to_dial_exception(exc: Exception) -> DialException:
             except Exception:
                 pass
 
-        headers = {}
-        content = {"error": exc.body or {}}
-
-        return _parse_dial_exception(
+        return parse_adapter_exception(
             status_code=status_code,
-            headers=headers,
-            content=content,
-        ) or PlainDialException(
-            status_code=status_code,
-            headers=headers,
-            content=content,
+            headers={},
+            content={"error": exc.body or {}},
         )
 
     if isinstance(exc, DialException):
@@ -160,4 +78,4 @@ def to_dial_exception(exc: Exception) -> DialException:
 
 def openai_exception_handler(request: Request, exc: Exception) -> Response:
     assert isinstance(exc, OpenAIError)
-    return to_dial_exception(exc).to_fastapi_response()
+    return to_adapter_exception(exc).to_fastapi_response()
diff --git a/aidial_adapter_openai/utils/adapter_exception.py b/aidial_adapter_openai/utils/adapter_exception.py
new file mode 100644
index 0000000..8715131
--- /dev/null
+++ b/aidial_adapter_openai/utils/adapter_exception.py
@@ -0,0 +1,103 @@
+import json
+from typing import Any, Dict
+
+from aidial_sdk.exceptions import HTTPException as DialException
+from fastapi import HTTPException as FastAPIException
+from fastapi.responses import Response as FastAPIResponse
+
+
+class ResponseWrapper(Exception):
+    content: Any
+    status_code: int
+    headers: Dict[str, str] | None
+
+    def __init__(
+        self,
+        *,
+        content: Any,
+        status_code: int,
+        headers: Dict[str, str] | None,
+    ) -> None:
+        super().__init__(str(content))
+        self.content = content
+        self.status_code = status_code
+        self.headers = headers
+
+    def __repr__(self):
+        # headers field is omitted deliberately
+        # since it may contain sensitive information
+        return "%s(content=%r, status_code=%r)" % (
+            self.__class__.__name__,
+            self.content,
+            self.status_code,
+        )
+
+    def to_fastapi_response(self) -> FastAPIResponse:
+        return FastAPIResponse(
+            status_code=self.status_code,
+            content=self.content,
+            headers=self.headers,
+        )
+
+    def to_fastapi_exception(self) -> FastAPIException:
+        return FastAPIException(
+            status_code=self.status_code,
+            detail=self.content,
+            headers=self.headers,
+        )
+
+    def json_error(self) -> dict:
+        return {
+            "error": {
+                "message": str(self.content),
+                "code": int(self.status_code),
+            }
+        }
+
+
+AdapterException = ResponseWrapper | DialException
+
+
+def _parse_dial_exception(
+    *, status_code: int, headers: Dict[str, str], content: Any
+) -> DialException | None:
+    if isinstance(content, str):
+        try:
+            obj = json.loads(content)
+        except Exception:
+            return None
+    else:
+        obj = content
+
+    if (
+        isinstance(obj, dict)
+        and (error := obj.get("error"))
+        and isinstance(error, dict)
+    ):
+        message = error.get("message") or "Unknown error"
+        code = error.get("code")
+        type = error.get("type")
+        param = error.get("param")
+        display_message = error.get("display_message")
+
+        return DialException(
+            status_code=status_code,
+            message=message,
+            type=type,
+            param=param,
+            code=code,
+            display_message=display_message,
+            headers=headers,
+        )
+
+    return None
+
+
+def parse_adapter_exception(
+    *, status_code: int, headers: Dict[str, str], content: Any
+) -> AdapterException:
+    return _parse_dial_exception(
+        status_code=status_code, headers=headers, content=content
+    ) or ResponseWrapper(
+        status_code=status_code, headers=headers, content=content
+    )
diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py
index b4a6ce9..68cfe53 100644
--- a/aidial_adapter_openai/utils/sse_stream.py
+++ b/aidial_adapter_openai/utils/sse_stream.py
@@ -3,7 +3,7 @@
 
 from aidial_sdk.exceptions import runtime_server_error
 
-from aidial_adapter_openai.exception_handlers import to_dial_exception
+from aidial_adapter_openai.exception_handlers import to_adapter_exception
 from aidial_adapter_openai.utils.log_config import logger
 
 DATA_PREFIX = "data: "
@@ -64,9 +64,11 @@ async def to_openai_sse_stream(
             f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}"
         )
 
-        dial_exception = to_dial_exception(e)
-        logger.exception(f"converted to the dial exception: {dial_exception!r}")
+        adapter_exception = to_adapter_exception(e)
+        logger.exception(
+            f"converted to the adapter exception: {adapter_exception!r}"
+        )
 
-        yield format_chunk(dial_exception.json_error())
+        yield format_chunk(adapter_exception.json_error())
 
     yield END_CHUNK
diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index e54f5dd..c9bd2d3 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -9,7 +9,8 @@
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from pydantic import BaseModel
 
-from aidial_adapter_openai.exception_handlers import to_dial_exception
+from aidial_adapter_openai.exception_handlers import to_adapter_exception
+from aidial_adapter_openai.utils.adapter_exception import AdapterException
 from aidial_adapter_openai.utils.chat_completion_response import (
     ChatCompletionResponse,
     ChatCompletionStreamingChunk,
@@ -98,7 +99,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
     buffer_chunk = None
     response_snapshot = ChatCompletionStreamingChunk()
 
-    error: DialException | None = None
+    error: AdapterException | None = None
 
     try:
         async for chunk in stream:
@@ -122,7 +123,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
                 last_chunk = chunk
 
     except Exception as e:
-        error = to_dial_exception(e)
+        error = to_adapter_exception(e)
 
     if last_chunk is not None and buffer_chunk is not None:
         last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk)

From 9bb2b6c98b58fc65f0669d48c8482d0135296f98 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 10:31:13 +0000
Subject: [PATCH 04/14] feat: added a test checking propagation of headers

---
 tests/test_errors.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/tests/test_errors.py b/tests/test_errors.py
index 59cf255..77f8f4d 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -447,7 +447,34 @@ async def test_status_error_from_upstream(test_app: httpx.AsyncClient):
     )
 
     assert response.status_code == 400
-    assert response.content == b"Bad request"
+    assert response.text == "Bad request"
+
+
+@respx.mock
+@pytest.mark.asyncio
+async def test_status_error_from_upstream_with_headers(
+    test_app: httpx.AsyncClient,
+):
+    respx.post(
+        "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
+    ).respond(
+        status_code=429,
+        content="Too many requests",
+        headers={"Retry-After": "42"},
+    )
+
+    response = await test_app.post(
+        "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
+        json={"messages": [{"role": "user", "content": "Test content"}]},
+        headers={
+            "X-UPSTREAM-KEY": "TEST_API_KEY",
+            "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions",
+        },
+    )
+
+    assert response.status_code == 429
+    assert response.text == "Too many requests"
+    assert response.headers["Retry-After"] == "42"
 
 
 @respx.mock
@@ -466,6 +493,7 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient):
         },
     )
 
+    assert response.status_code == 504
     assert response.json() == {
         "error": {
             "message": "Request timed out",
@@ -474,7 +502,6 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient):
             "display_message": "Request timed out. Please try again later.",
         }
     }
-    assert response.status_code == 504
 
 
 @respx.mock
@@ -504,7 +531,6 @@ async def test_connection_error_from_upstream(test_app: httpx.AsyncClient):
     }
 
 
-@respx.mock
 @pytest.mark.asyncio
 async def test_incorrect_streaming_request(test_app: httpx.AsyncClient):
     response = await test_app.post(

From 848f78c5c290d904f589e9b8ae9e85ca61def75b Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 11:02:41 +0000
Subject: [PATCH 05/14] chore: added test for error during streaming

---
 aidial_adapter_openai/utils/sse_stream.py |  2 +-
 aidial_adapter_openai/utils/streaming.py  |  6 +++
 aidial_adapter_openai/utils/tokenizer.py  |  4 +-
 tests/test_errors.py                      | 49 ++++++++++++++++++++++-
 4 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py
index 68cfe53..3b02b29 100644
--- a/aidial_adapter_openai/utils/sse_stream.py
+++ b/aidial_adapter_openai/utils/sse_stream.py
@@ -65,7 +65,7 @@ async def to_openai_sse_stream(
         )
 
         adapter_exception = to_adapter_exception(e)
-        logger.exception(
+        logger.error(
             f"converted to the adapter exception: {adapter_exception!r}"
         )
 
diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index c9bd2d3..1258c62 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -123,8 +123,14 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
                 last_chunk = chunk
 
     except Exception as e:
+        logger.exception(
+            f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}"
+        )
+
         error = to_adapter_exception(e)
 
+        logger.error(f"converted to the adapter exception: {error!r}")
+
     if last_chunk is not None and buffer_chunk is not None:
         last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk)
 
diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py
index c177a7b..63d0871 100644
--- a/aidial_adapter_openai/utils/tokenizer.py
+++ b/aidial_adapter_openai/utils/tokenizer.py
@@ -155,9 +155,9 @@ class PlainTextTokenizer(BaseTokenizer[dict]):
     """
 
     def _handle_custom_content_part(self, content_part: Any):
-        short_content_part = truncate_string(str(content_part), 100)
+        short_content_str = truncate_string(str(content_part), 100)
         raise InternalServerError(
-            f"Unexpected non-textural content part in the request: {short_content_part!r}. "
+            f"Unexpected non-textural content part in the request: {short_content_str!r}. "
             f"The deployment only supports plain text messages. "
             f"Declare the deployment as a multi-modal one to avoid the error."
         )
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 77f8f4d..f7dc6cf 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Callable
+from typing import Any, AsyncIterator, Callable
 
 import httpx
 import pytest
@@ -506,7 +506,9 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient):
 
 @respx.mock
 @pytest.mark.asyncio
-async def test_connection_error_from_upstream(test_app: httpx.AsyncClient):
+async def test_connection_error_from_upstream_non_streaming(
+    test_app: httpx.AsyncClient,
+):
     respx.post(
         "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
     ).mock(side_effect=httpx.ConnectError("Connection error"))
@@ -531,6 +533,49 @@ async def test_connection_error_from_upstream(test_app: httpx.AsyncClient):
     }
 
 
+@respx.mock
+@pytest.mark.asyncio
+async def test_connection_error_from_upstream_streaming(
+    test_app: httpx.AsyncClient,
+):
+    class mock_stream(httpx.AsyncByteStream):
+        async def __aiter__(self) -> AsyncIterator[bytes]:
+            yield b'data: {"message": "first chunk"}\n\n'
+            yield b'data: {"message": "second chunk"}\n\n'
+            raise httpx.ConnectError("Connection error")
+
+    respx.post(
+        "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
+    ).mock(
+        side_effect=lambda request: httpx.Response(
+            status_code=200, stream=mock_stream()
+        )
+    )
+
+    response = await test_app.post(
+        "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
+        json={
+            "stream": True,
+            "messages": [{"role": "user", "content": "Test content"}],
+        },
+        headers={
+            "X-UPSTREAM-KEY": "TEST_API_KEY",
+            "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions",
+        },
+    )
+
+    assert response.status_code == 200
+    assert response.text == "\n\n".join(
+        [
+            'data: {"message":"first chunk"}',
+            'data: {"message":"second chunk"}',
+            'data: {"error":{"message":"Connection error","type":"internal_server_error","code":"500"}}',
+            "data: [DONE]",
+            "",
+        ]
+    )
+
+
 @pytest.mark.asyncio
 async def test_incorrect_streaming_request(test_app: httpx.AsyncClient):
     response = await test_app.post(

From 2ea98b53c52a61970853eb6481f86a8f4dc8c760 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 11:07:05 +0000
Subject: [PATCH 06/14] chore: added test with invalid chunk stream

---
 tests/test_errors.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tests/test_errors.py b/tests/test_errors.py
index f7dc6cf..6f64ef2 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -576,6 +576,48 @@ async def __aiter__(self) -> AsyncIterator[bytes]:
     )
 
 
+@respx.mock
+@pytest.mark.asyncio
+async def test_invalid_chunk_stream_from_upstream(
+    test_app: httpx.AsyncClient,
+):
+    class mock_stream(httpx.AsyncByteStream):
+        async def __aiter__(self) -> AsyncIterator[bytes]:
+            yield b"data: chunk1\n\n"
+            yield b"data: chunk2\n\n"
+            yield b"data: [DONE]\n\n"
+
+    respx.post(
+        "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
+    ).mock(
+        side_effect=lambda request: httpx.Response(
+            status_code=200, stream=mock_stream()
+        )
+    )
+
+    response = await test_app.post(
+        "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
+        json={
+            "stream": True,
+            "messages": [{"role": "user", "content": "Test content"}],
+        },
+        headers={
+            "X-UPSTREAM-KEY": "TEST_API_KEY",
+            "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions",
+        },
+    )
+
+    assert response.status_code == 200
+    assert response.text == "\n\n".join(
+        [
+            # OpenAI is unable to parse SSE entry with invalid JSON and fails with the following error:
+            'data: {"error":{"message":"Expecting value: line 1 column 1 (char 0)","type":"internal_server_error","code":"500"}}',
+            "data: [DONE]",
+            "",
+        ]
+    )
+
+
 @pytest.mark.asyncio
 async def test_incorrect_streaming_request(test_app: httpx.AsyncClient):
     response = await test_app.post(

From 4539e7665d17f1fb57101ca1c5ba8ad4983aea75 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 11:27:40 +0000
Subject: [PATCH 07/14] chore: added test for unexpected multi-modal content
 part

---
 aidial_adapter_openai/utils/tokenizer.py |  2 +-
 tests/test_errors.py                     | 62 ++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py
index 63d0871..4320dfb 100644
--- a/aidial_adapter_openai/utils/tokenizer.py
+++ b/aidial_adapter_openai/utils/tokenizer.py
@@ -159,7 +159,7 @@ def _handle_custom_content_part(self, content_part: Any):
         raise InternalServerError(
             f"Unexpected non-textural content part in the request: {short_content_str!r}. "
             f"The deployment only supports plain text messages. "
-            f"Declare the deployment as a multi-modal one to avoid the error."
+            f"Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error."
         )
 
     def tokenize_request_message(self, message: dict) -> int:
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 6f64ef2..2c00ec6 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -618,6 +618,68 @@ async def __aiter__(self) -> AsyncIterator[bytes]:
     )
 
 
+@respx.mock
+@pytest.mark.asyncio
+async def test_unexpected_multi_modal_input_streaming(
+    test_app: httpx.AsyncClient,
+):
+    mock_stream = OpenAIStream(
+        single_choice_chunk(delta={"role": "assistant"}),
+        single_choice_chunk(delta={"content": "Test response"}),
+        single_choice_chunk(delta={}, finish_reason="stop"),
+    )
+
+    expected_stream = OpenAIStream(
+        single_choice_chunk(delta={"role": "assistant"}),
+        single_choice_chunk(delta={"content": "Test response"}),
+        {
+            "error": {
+                "message": "Unexpected non-textural content part in the request: \"{'type': 'image_url', 'image_url': {'url': 'http://example.com/image.png'}}\". The deployment only supports plain text messages. Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error.",
+                "type": "internal_server_error",
+                "code": "500",
+            }
+        },
+    )
+
+    respx.post(
+        "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
+    ).respond(
+        status_code=200,
+        content=mock_stream.to_content(),
+        content_type="text/event-stream",
+    )
+
+    response = await test_app.post(
+        "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
+        json={
+            "stream": True,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "http://example.com/image.png"
+                            },
+                        }
+                    ],
+                }
+            ],
+        },
+        headers={
+            "X-UPSTREAM-KEY": "TEST_API_KEY",
+            "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions",
+        },
+    )
+
+    assert response.status_code == 200
+    expected_stream.assert_response_content(
+        response,
+        assert_equal,
+    )
+
+
 @pytest.mark.asyncio
 async def test_incorrect_streaming_request(test_app: httpx.AsyncClient):
     response = await test_app.post(

From 1c3833d83b653a08f43e160eb420d49c76c070d4 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 12:17:59 +0000
Subject: [PATCH 08/14] fix: tolerate tokenization fails

---
 aidial_adapter_openai/utils/streaming.py | 33 +++++++++++++-----------
 tests/test_errors.py                     | 17 +-----------
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index 1258c62..832e131 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -9,8 +9,6 @@
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from pydantic import BaseModel
 
-from aidial_adapter_openai.exception_handlers import to_adapter_exception
-from aidial_adapter_openai.utils.adapter_exception import AdapterException
 from aidial_adapter_openai.utils.chat_completion_response import (
     ChatCompletionResponse,
     ChatCompletionStreamingChunk,
@@ -73,15 +71,23 @@ async def generate_stream(
     )
 
     def set_usage(chunk: dict | None, resp: ChatCompletionResponse) -> dict:
-        completion_tokens = tokenize_response(resp)
-        prompt_tokens = get_prompt_tokens()
-
         chunk = chunk or empty_chunk
-        chunk["usage"] = {
-            "completion_tokens": completion_tokens,
-            "prompt_tokens": prompt_tokens,
-            "total_tokens": prompt_tokens + completion_tokens,
-        }
+
+        # Do not fail the whole response if tokenization has failed
+        try:
+            completion_tokens = tokenize_response(resp)
+            prompt_tokens = get_prompt_tokens()
+        except Exception as e:
+            logger.exception(
+                f"caught exception while tokenization: {type(e).__module__}.{type(e).__name__}. "
+                "The tokenization has failed, therefore, the usage won't be reported."
+            )
+        else:
+            chunk["usage"] = {
+                "completion_tokens": completion_tokens,
+                "prompt_tokens": prompt_tokens,
+                "total_tokens": prompt_tokens + completion_tokens,
+            }
         return chunk
 
     def set_finish_reason(chunk: dict | None, finish_reason: str) -> dict:
@@ -99,7 +105,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
     buffer_chunk = None
     response_snapshot = ChatCompletionStreamingChunk()
 
-    error: AdapterException | None = None
+    error: Exception | None = None
 
     try:
         async for chunk in stream:
@@ -126,10 +132,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict:
         logger.exception(
             f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}"
         )
-
-        error = to_adapter_exception(e)
-
-        logger.error(f"converted to the adapter exception: {error!r}")
+        error = e
 
     if last_chunk is not None and buffer_chunk is not None:
         last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk)
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 2c00ec6..14160ca 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -629,18 +629,6 @@ async def test_unexpected_multi_modal_input_streaming(
         single_choice_chunk(delta={}, finish_reason="stop"),
     )
 
-    expected_stream = OpenAIStream(
-        single_choice_chunk(delta={"role": "assistant"}),
-        single_choice_chunk(delta={"content": "Test response"}),
-        {
-            "error": {
-                "message": "Unexpected non-textural content part in the request: \"{'type': 'image_url', 'image_url': {'url': 'http://example.com/image.png'}}\". The deployment only supports plain text messages. Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error.",
-                "type": "internal_server_error",
-                "code": "500",
-            }
-        },
-    )
-
     respx.post(
         "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
     ).respond(
@@ -674,10 +662,7 @@ async def test_unexpected_multi_modal_input_streaming(
     )
 
     assert response.status_code == 200
-    expected_stream.assert_response_content(
-        response,
-        assert_equal,
-    )
+    mock_stream.assert_response_content(response, assert_equal)
 
 
 @pytest.mark.asyncio

From 6ba61956113b7db5c7d14261748e33f5b75f780f Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Thu, 5 Dec 2024 12:40:55 +0000
Subject: [PATCH 09/14] fix: removed accidental debug flag

---
 aidial_adapter_openai/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py
index ba27873..703547d 100644
--- a/aidial_adapter_openai/app.py
+++ b/aidial_adapter_openai/app.py
@@ -27,7 +27,7 @@ def create_app(
     app_config: ApplicationConfig | None = None,
     init_telemetry: bool = True,
 ) -> FastAPI:
-    app = FastAPI(lifespan=lifespan, debug=True)
+    app = FastAPI(lifespan=lifespan)
     set_app_config(app, app_config or ApplicationConfig.from_env())
 
     if init_telemetry:

From 6bfacf843ab2172bdbf5585b26154475f2d5a25c Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Fri, 6 Dec 2024 10:11:32 +0000
Subject: [PATCH 10/14] fix: simplified tests

---
 tests/test_errors.py | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/tests/test_errors.py b/tests/test_errors.py
index 14160ca..604e080 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, AsyncIterator, Callable
+from typing import Any, AsyncIterable, Callable
 
 import httpx
 import pytest
@@ -538,18 +538,17 @@ async def test_connection_error_from_upstream_non_streaming(
 async def test_connection_error_from_upstream_streaming(
     test_app: httpx.AsyncClient,
 ):
-    class mock_stream(httpx.AsyncByteStream):
-        async def __aiter__(self) -> AsyncIterator[bytes]:
-            yield b'data: {"message": "first chunk"}\n\n'
-            yield b'data: {"message": "second chunk"}\n\n'
-            raise httpx.ConnectError("Connection error")
+    async def mock_stream() -> AsyncIterable[bytes]:
+        yield b'data: {"message": "first chunk"}\n\n'
+        yield b'data: {"message": "second chunk"}\n\n'
+        raise httpx.ConnectError("Connection error")
 
     respx.post(
         "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
-    ).mock(
-        side_effect=lambda request: httpx.Response(
-            status_code=200, stream=mock_stream()
-        )
+    ).respond(
+        status_code=200,
+        content_type="text/event-stream",
+        content=mock_stream(),
     )
 
     response = await test_app.post(
@@ -581,18 +580,17 @@ async def __aiter__(self) -> AsyncIterator[bytes]:
 async def test_invalid_chunk_stream_from_upstream(
     test_app: httpx.AsyncClient,
 ):
-    class mock_stream(httpx.AsyncByteStream):
-        async def __aiter__(self) -> AsyncIterator[bytes]:
-            yield b"data: chunk1\n\n"
-            yield b"data: chunk2\n\n"
-            yield b"data: [DONE]\n\n"
+    async def mock_stream() -> AsyncIterable[bytes]:
+        yield b"data: chunk1\n\n"
+        yield b"data: chunk2\n\n"
+        yield b"data: [DONE]\n\n"
 
     respx.post(
         "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
-    ).mock(
-        side_effect=lambda request: httpx.Response(
-            status_code=200, stream=mock_stream()
-        )
+    ).respond(
+        status_code=200,
+        content_type="text/event-stream",
+        content=mock_stream(),
     )
 
     response = await test_app.post(

From dbf672907288bc47a2cd871d128bb88fcb432d0b Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Fri, 6 Dec 2024 10:59:25 +0000
Subject: [PATCH 11/14] chore: added test for interval error in the adapter
 during streaming

---
 aidial_adapter_openai/gpt.py                  |  2 +-
 .../gpt4_multi_modal/chat_completion.py       |  2 +-
 aidial_adapter_openai/utils/streaming.py      |  2 +-
 tests/test_errors.py                          | 53 ++++++++++++++++++-
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/aidial_adapter_openai/gpt.py b/aidial_adapter_openai/gpt.py
index b18ee86..dd909e2 100644
--- a/aidial_adapter_openai/gpt.py
+++ b/aidial_adapter_openai/gpt.py
@@ -78,7 +78,7 @@ async def gpt_chat_completion(
 
     if isinstance(response, AsyncIterator):
         return generate_stream(
-            map_stream(chunk_to_dict, response),
+            stream=map_stream(chunk_to_dict, response),
             get_prompt_tokens=lambda: estimated_prompt_tokens
             or tokenizer.tokenize_request(request, request["messages"]),
             tokenize_response=tokenizer.tokenize_response,
diff --git a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
index f9fd3ba..6e66e59 100644
--- a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
+++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
@@ -262,7 +262,7 @@ def debug_print(chunk: T) -> T:
         return map_stream(
             debug_print,
             generate_stream(
-                map_stream(
+                stream=map_stream(
                     response_transformer,
                     parse_openai_sse_stream(response),
                 ),
diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index 832e131..ebfb779 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -52,8 +52,8 @@ def build_chunk(
 
 
 async def generate_stream(
-    stream: AsyncIterator[dict],
     *,
+    stream: AsyncIterator[dict],
     get_prompt_tokens: Callable[[], int],
     tokenize_response: Callable[[ChatCompletionResponse], int],
     deployment: str,
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 604e080..e4ebf66 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -1,5 +1,6 @@
 import json
-from typing import Any, AsyncIterable, Callable
+from typing import Any, AsyncIterable, AsyncIterator, Callable
+from unittest.mock import patch
 
 import httpx
 import pytest
@@ -575,6 +576,56 @@ async def mock_stream() -> AsyncIterable[bytes]:
     )
 
 
+@respx.mock
+@pytest.mark.asyncio
+async def test_adapter_internal_error(
+    test_app: httpx.AsyncClient,
+):
+    async def mock_generate_stream(stream: AsyncIterator[dict], **kwargs):
+        yield await stream.__anext__()
+        raise ValueError("failed generating the stream")
+
+    with patch(
+        "aidial_adapter_openai.gpt.generate_stream",
+        side_effect=mock_generate_stream,
+    ):
+
+        async def mock_stream() -> AsyncIterable[bytes]:
+            yield b'data: {"message": "first chunk"}\n\n'
+            yield b'data: {"message": "second chunk"}\n\n'
+            yield b"data: [DONE]"
+
+        respx.post(
+            "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview"
+        ).respond(
+            status_code=200,
+            content_type="text/event-stream",
+            content=mock_stream(),
+        )
+
+        response = await test_app.post(
+            "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
+            json={
+                "stream": True,
+                "messages": [{"role": "user", "content": "Test content"}],
+            },
+            headers={
+                "X-UPSTREAM-KEY": "TEST_API_KEY",
+                "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions",
+            },
+        )
+
+        assert response.status_code == 200
+        assert response.text == "\n\n".join(
+            [
+                'data: {"message":"first chunk"}',
+                'data: {"error":{"message":"failed generating the stream","type":"internal_server_error","code":"500"}}',
+                "data: [DONE]",
+                "",
+            ]
+        )
+
+
 @respx.mock
 @pytest.mark.asyncio
 async def test_invalid_chunk_stream_from_upstream(

From 0baeffe32019b2a877c8b048d18b60cf30c22f27 Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Fri, 6 Dec 2024 11:49:53 +0000
Subject: [PATCH 12/14] fix: reverted via in .vscode

---
 .vscode/settings.json | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index d62db15..ab64be2 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -8,9 +8,7 @@
     },
     "editor.tabSize": 4
   },
-  "python.testing.pytestArgs": [
-    "tests"
-  ],
+  "python.testing.pytestArgs": ["."],
   "python.testing.unittestEnabled": false,
   "python.testing.pytestEnabled": true,
   "python.analysis.typeCheckingMode": "basic"

From f05dd1366fc1e8e1d65e63fe41fc02f6010b47fb Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Wed, 11 Dec 2024 11:32:47 +0000
Subject: [PATCH 13/14] fix: removed dependency from sdk _errors module

---
 aidial_adapter_openai/app.py                | 15 ++++---------
 aidial_adapter_openai/exception_handlers.py | 25 +++++++++------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py
index 703547d..07efbee 100644
--- a/aidial_adapter_openai/app.py
+++ b/aidial_adapter_openai/app.py
@@ -1,7 +1,5 @@
 from contextlib import asynccontextmanager
 
-import aidial_sdk._errors as sdk_error_handlers
-import pydantic
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.telemetry.init import init_telemetry as sdk_init_telemetry
 from aidial_sdk.telemetry.types import TelemetryConfig
@@ -10,7 +8,7 @@
 
 import aidial_adapter_openai.endpoints as endpoints
 from aidial_adapter_openai.app_config import ApplicationConfig
-from aidial_adapter_openai.exception_handlers import openai_exception_handler
+from aidial_adapter_openai.exception_handlers import adapter_exception_handler
 from aidial_adapter_openai.utils.http_client import get_http_client
 from aidial_adapter_openai.utils.log_config import configure_loggers, logger
 from aidial_adapter_openai.utils.request import set_app_config
@@ -42,14 +40,9 @@ def create_app(
     app.post("/openai/deployments/{deployment_id:path}/chat/completions")(
         endpoints.chat_completion
     )
-    app.add_exception_handler(OpenAIError, openai_exception_handler)
-    app.add_exception_handler(
-        pydantic.ValidationError,
-        sdk_error_handlers.pydantic_validation_exception_handler,
-    )
-    app.add_exception_handler(
-        DialException, sdk_error_handlers.dial_exception_handler
-    )
+
+    for exc_class in [OpenAIError, DialException]:
+        app.add_exception_handler(exc_class, adapter_exception_handler)
 
     return app
 
diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py
index 635c718..21a9ae2 100644
--- a/aidial_adapter_openai/exception_handlers.py
+++ b/aidial_adapter_openai/exception_handlers.py
@@ -1,22 +1,21 @@
 from aidial_sdk.exceptions import HTTPException as DialException
 from aidial_sdk.exceptions import InternalServerError
-from fastapi import Request
-from fastapi.responses import Response
-from openai import (
-    APIConnectionError,
-    APIError,
-    APIStatusError,
-    APITimeoutError,
-    OpenAIError,
-)
+from fastapi.requests import Request as FastAPIRequest
+from fastapi.responses import Response as FastAPIResponse
+from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError
 
 from aidial_adapter_openai.utils.adapter_exception import (
     AdapterException,
+    ResponseWrapper,
     parse_adapter_exception,
 )
 
 
 def to_adapter_exception(exc: Exception) -> AdapterException:
+
+    if isinstance(exc, (DialException, ResponseWrapper)):
+        return exc
+
     if isinstance(exc, APIStatusError):
         # Non-streaming errors reported by `openai` library via this exception
         r = exc.response
@@ -70,12 +69,10 @@ def to_adapter_exception(exc: Exception) -> AdapterException:
             content={"error": exc.body or {}},
         )
 
-    if isinstance(exc, DialException):
-        return exc
-
     return InternalServerError(str(exc))
 
 
-def openai_exception_handler(request: Request, exc: Exception) -> Response:
-    assert isinstance(exc, OpenAIError)
+def adapter_exception_handler(
+    request: FastAPIRequest, exc: Exception
+) -> FastAPIResponse:
     return to_adapter_exception(exc).to_fastapi_response()

From 968a9af2b8a3392cea8f219ddc05d15f21b7e87b Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Wed, 11 Dec 2024 11:50:31 +0000
Subject: [PATCH 14/14] fix: review fixes

---
 aidial_adapter_openai/exception_handlers.py      | 4 +---
 aidial_adapter_openai/utils/adapter_exception.py | 8 --------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py
index 21a9ae2..55a06e4 100644
--- a/aidial_adapter_openai/exception_handlers.py
+++ b/aidial_adapter_openai/exception_handlers.py
@@ -30,11 +30,9 @@ def to_adapter_exception(exc: Exception) -> AdapterException:
         if "Content-Encoding" in httpx_headers:
             del httpx_headers["Content-Encoding"]
 
-        headers = {k.decode(): v.decode() for k, v in httpx_headers.raw}
-
         return parse_adapter_exception(
             status_code=r.status_code,
-            headers=headers,
+            headers=dict(httpx_headers.items()),
             content=r.text,
         )
 
diff --git a/aidial_adapter_openai/utils/adapter_exception.py b/aidial_adapter_openai/utils/adapter_exception.py
index 8715131..863f6bd 100644
--- a/aidial_adapter_openai/utils/adapter_exception.py
+++ b/aidial_adapter_openai/utils/adapter_exception.py
@@ -2,7 +2,6 @@
 from typing import Any, Dict
 
 from aidial_sdk.exceptions import HTTPException as DialException
-from fastapi import HTTPException as FastAPIException
 from fastapi.responses import Response as FastAPIResponse
 
 
@@ -39,13 +38,6 @@ def to_fastapi_response(self) -> FastAPIResponse:
             headers=self.headers,
         )
 
-    def to_fastapi_exception(self) -> FastAPIException:
-        return FastAPIException(
-            status_code=self.status_code,
-            detail=self.content,
-            headers=self.headers,
-        )
-
     def json_error(self) -> dict:
         return {
             "error": {