From 04feb6c85553d9123ffeab7ddc2a2dadecea9978 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Wed, 4 Dec 2024 17:32:57 +0000 Subject: [PATCH 01/14] fix: catching errors during streaming in a single place --- aidial_adapter_openai/app.py | 20 +- aidial_adapter_openai/exception_handlers.py | 113 +++++-- aidial_adapter_openai/gpt.py | 2 +- .../gpt4_multi_modal/chat_completion.py | 8 +- aidial_adapter_openai/utils/sse_stream.py | 18 +- aidial_adapter_openai/utils/streaming.py | 26 +- aidial_adapter_openai/utils/tokenizer.py | 8 +- poetry.lock | 319 ++++++++---------- pyproject.toml | 2 +- tests/test_errors.py | 2 +- 10 files changed, 271 insertions(+), 247 deletions(-) diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index 5d07629..ba27873 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -1,5 +1,6 @@ from contextlib import asynccontextmanager +import aidial_sdk._errors as sdk_error_handlers import pydantic from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.telemetry.init import init_telemetry as sdk_init_telemetry @@ -9,11 +10,7 @@ import aidial_adapter_openai.endpoints as endpoints from aidial_adapter_openai.app_config import ApplicationConfig -from aidial_adapter_openai.exception_handlers import ( - dial_exception_handler, - openai_exception_handler, - pydantic_exception_handler, -) +from aidial_adapter_openai.exception_handlers import openai_exception_handler from aidial_adapter_openai.utils.http_client import get_http_client from aidial_adapter_openai.utils.log_config import configure_loggers, logger from aidial_adapter_openai.utils.request import set_app_config @@ -30,7 +27,7 @@ def create_app( app_config: ApplicationConfig | None = None, init_telemetry: bool = True, ) -> FastAPI: - app = FastAPI(lifespan=lifespan) + app = FastAPI(lifespan=lifespan, debug=True) set_app_config(app, app_config or ApplicationConfig.from_env()) if init_telemetry: @@ -45,9 +42,14 @@ def create_app( app.post("/openai/deployments/{deployment_id:path}/chat/completions")( endpoints.chat_completion ) - app.exception_handler(OpenAIError)(openai_exception_handler) - app.exception_handler(pydantic.ValidationError)(pydantic_exception_handler) - app.exception_handler(DialException)(dial_exception_handler) + app.add_exception_handler(OpenAIError, openai_exception_handler) + app.add_exception_handler( + pydantic.ValidationError, + sdk_error_handlers.pydantic_validation_exception_handler, + ) + app.add_exception_handler( + DialException, sdk_error_handlers.dial_exception_handler + ) return app diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py index c98c122..3eb0a5b 100644 --- a/aidial_adapter_openai/exception_handlers.py +++ b/aidial_adapter_openai/exception_handlers.py @@ -1,56 +1,117 @@ -import pydantic -from aidial_sdk._errors import pydantic_validation_exception_handler +from typing import Dict + from aidial_sdk.exceptions import HTTPException as DialException +from aidial_sdk.exceptions import InternalServerError from fastapi import Request from fastapi.responses import Response -from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError +from openai import ( + APIConnectionError, + APIError, + APIStatusError, + APITimeoutError, + OpenAIError, +) + +def _parse_dial_exception( + status_code: int, + content: dict | str, + headers: Dict[str, str] | None = None, +) -> DialException: + if ( + isinstance(content, dict) + and (error := content.get("error")) + and isinstance(error, dict) + ): + message = error.get("message") or "Unknown error" + code = error.get("code") + type = error.get("type") + param = error.get("param") + display_message = error.get("display_message") + + return DialException( + status_code=status_code, + message=message, + type=type, + param=param, + code=code, + display_message=display_message, + headers=headers, + ) + else: + return DialException( + status_code=status_code, + message=str(content), + headers=headers, + ) -def openai_exception_handler(request: Request, e: DialException): - if isinstance(e, APIStatusError): - r = e.response + +def to_dial_exception(exc: Exception) -> DialException: + if isinstance(exc, APIStatusError): + # Non-streaming errors reported by `openai` library via this exception + + r = exc.response headers = r.headers - # Avoid encoding the error message when the original response was encoded. + # httpx library (used by openai) automatically sets + # "Accept-Encoding:gzip,deflate" header in requests to the upstream. + # Therefore, we may receive from the upstream gzip-encoded + # response along with "Content-Encoding:gzip" header. + # We either need to encode the response, or + # remove the "Content-Encoding" header. if "Content-Encoding" in headers: del headers["Content-Encoding"] - return Response( - content=r.content, + plain_headers = {k.decode(): v.decode() for k, v in headers.raw} + + try: + content = r.json() + except Exception: + content = r.text + + return _parse_dial_exception( status_code=r.status_code, - headers=headers, + headers=plain_headers, + content=content, ) - if isinstance(e, APITimeoutError): - raise DialException( + if isinstance(exc, APITimeoutError): + return DialException( status_code=504, type="timeout", message="Request timed out", display_message="Request timed out. Please try again later.", ) - if isinstance(e, APIConnectionError): - raise DialException( + if isinstance(exc, APIConnectionError): + return DialException( status_code=502, type="connection", message="Error communicating with OpenAI", display_message="OpenAI server is not responsive. Please try again later.", ) - if isinstance(e, APIError): - raise DialException( - status_code=getattr(e, "status_code", None) or 500, - message=e.message, - type=e.type, - code=e.code, - param=e.param, - display_message=None, + if isinstance(exc, APIError): + # Streaming errors reported by `openai` library via this exception + status_code: int = 500 + if exc.code: + try: + status_code = int(exc.code) + except Exception: + pass + + return _parse_dial_exception( + status_code=status_code, + headers={}, + content={"error": exc.body or {}}, ) + if isinstance(exc, DialException): + return exc -def pydantic_exception_handler(request: Request, exc: pydantic.ValidationError): - return pydantic_validation_exception_handler(request, exc) + return InternalServerError(str(exc)) -def dial_exception_handler(request: Request, exc: DialException): - return exc.to_fastapi_response() +def openai_exception_handler(request: Request, exc: Exception) -> Response: + assert isinstance(exc, OpenAIError) + return to_dial_exception(exc).to_fastapi_response() diff --git a/aidial_adapter_openai/gpt.py b/aidial_adapter_openai/gpt.py index d4c6cde..b18ee86 100644 --- a/aidial_adapter_openai/gpt.py +++ b/aidial_adapter_openai/gpt.py @@ -78,12 +78,12 @@ async def gpt_chat_completion( if isinstance(response, AsyncIterator): return generate_stream( + map_stream(chunk_to_dict, response), get_prompt_tokens=lambda: estimated_prompt_tokens or tokenizer.tokenize_request(request, request["messages"]), tokenize_response=tokenizer.tokenize_response, deployment=deployment_id, discarded_messages=discarded_messages, - stream=map_stream(chunk_to_dict, response), eliminate_empty_choices=eliminate_empty_choices, ) else: diff --git a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py index 216137d..f9fd3ba 100644 --- a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py +++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py @@ -262,14 +262,14 @@ def debug_print(chunk: T) -> T: return map_stream( debug_print, generate_stream( + map_stream( + response_transformer, + parse_openai_sse_stream(response), + ), get_prompt_tokens=lambda: estimated_prompt_tokens, tokenize_response=tokenizer.tokenize_response, deployment=deployment, discarded_messages=discarded_messages, - stream=map_stream( - response_transformer, - parse_openai_sse_stream(response), - ), eliminate_empty_choices=eliminate_empty_choices, ), ) diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py index 3094d00..b4a6ce9 100644 --- a/aidial_adapter_openai/utils/sse_stream.py +++ b/aidial_adapter_openai/utils/sse_stream.py @@ -3,6 +3,9 @@ from aidial_sdk.exceptions import runtime_server_error +from aidial_adapter_openai.exception_handlers import to_dial_exception +from aidial_adapter_openai.utils.log_config import logger + DATA_PREFIX = "data: " OPENAI_END_MARKER = "[DONE]" @@ -53,6 +56,17 @@ async def parse_openai_sse_stream( async def to_openai_sse_stream( stream: AsyncIterator[dict], ) -> AsyncIterator[str]: - async for chunk in stream: - yield format_chunk(chunk) + try: + async for chunk in stream: + yield format_chunk(chunk) + except Exception as e: + logger.exception( + f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}" + ) + + dial_exception = to_dial_exception(e) + logger.exception(f"converted to the dial exception: {dial_exception!r}") + + yield format_chunk(dial_exception.json_error()) + yield END_CHUNK diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index 724ae00..e54f5dd 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -6,10 +6,10 @@ from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.utils.merge_chunks import merge_chat_completion_chunks from fastapi.responses import JSONResponse, Response, StreamingResponse -from openai import APIError, APIStatusError from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from pydantic import BaseModel +from aidial_adapter_openai.exception_handlers import to_dial_exception from aidial_adapter_openai.utils.chat_completion_response import ( ChatCompletionResponse, ChatCompletionStreamingChunk, @@ -53,12 +53,12 @@ def build_chunk( async def generate_stream( + stream: AsyncIterator[dict], *, get_prompt_tokens: Callable[[], int], tokenize_response: Callable[[ChatCompletionResponse], int], deployment: str, discarded_messages: Optional[list[int]], - stream: AsyncIterator[dict], eliminate_empty_choices: bool, ) -> AsyncIterator[dict]: @@ -98,7 +98,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: buffer_chunk = None response_snapshot = ChatCompletionStreamingChunk() - error = None + error: DialException | None = None try: async for chunk in stream: @@ -121,15 +121,8 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: yield last_chunk last_chunk = chunk - except APIError as e: - status_code = e.status_code if isinstance(e, APIStatusError) else 500 - error = DialException( - status_code=status_code, - message=e.message, - type=e.type, - param=e.param, - code=e.code, - ).json_error() + except Exception as e: + error = to_dial_exception(e) if last_chunk is not None and buffer_chunk is not None: last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk) @@ -160,7 +153,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: yield last_chunk if error: - yield error + raise error def create_stage_chunk(name: str, content: str, stream: bool) -> dict: @@ -196,7 +189,7 @@ def create_stage_chunk(name: str, content: str, stream: bool) -> dict: def create_response_from_chunk( chunk: dict, exc: DialException | None, stream: bool -) -> Response: +) -> AsyncIterator[dict] | Response: if not stream: if exc is not None: return exc.to_fastapi_response() @@ -208,10 +201,7 @@ async def generator() -> AsyncIterator[dict]: if exc is not None: yield exc.json_error() - return StreamingResponse( - to_openai_sse_stream(generator()), - media_type="text/event-stream", - ) + return generator() def block_response_to_streaming_chunk(response: dict) -> dict: diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py index 3af0e94..c177a7b 100644 --- a/aidial_adapter_openai/utils/tokenizer.py +++ b/aidial_adapter_openai/utils/tokenizer.py @@ -14,6 +14,7 @@ ) from aidial_adapter_openai.utils.image_tokenizer import ImageTokenizer from aidial_adapter_openai.utils.multi_modal_message import MultiModalMessage +from aidial_adapter_openai.utils.text import truncate_string MessageType = TypeVar("MessageType") @@ -154,10 +155,11 @@ class PlainTextTokenizer(BaseTokenizer[dict]): """ def _handle_custom_content_part(self, content_part: Any): - short_content_part = str(content_part)[:100] + short_content_part = truncate_string(str(content_part), 100) raise InternalServerError( - f"Unexpected type of content in message: {short_content_part!r}" - f"Use MultiModalTokenizer for messages with images" + f"Unexpected non-textural content part in the request: {short_content_part!r}. " + f"The deployment only supports plain text messages. " + f"Declare the deployment as a multi-modal one to avoid the error." ) def tokenize_request_message(self, message: dict) -> int: diff --git a/poetry.lock b/poetry.lock index 2094e73..f9ccf6b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,40 +2,35 @@ [[package]] name = "aidial-sdk" -version = "0.15.0" +version = "0.16.0" description = "Framework to create applications and model adapters for AI DIAL" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "aidial_sdk-0.15.0-py3-none-any.whl", hash = "sha256:7b9b3e5ec9688be2919dcd7dd0312aac807dc7917393ee5f846332713ad2e26a"}, - {file = "aidial_sdk-0.15.0.tar.gz", hash = "sha256:6b47bb36e8c795300e0d4b61308c6a2f86b59abb97905390a02789b343460720"}, + {file = "aidial_sdk-0.16.0-py3-none-any.whl", hash = "sha256:76bfa50fd08bfabedd572f06974c68cca9dc18b5c38a8d00bf5d59e1f61cb2d9"}, + {file = "aidial_sdk-0.16.0.tar.gz", hash = "sha256:eddb1f00949bd0e4263c18be03df7b80093ce8caf7e4ed46a550f3a790e01875"}, ] [package.dependencies] -aiohttp = ">=3.8.3,<4.0.0" fastapi = ">=0.51,<1.0" -httpx = ">=0.25.0,<1.0" -opentelemetry-api = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-distro = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-exporter-otlp-proto-grpc = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-exporter-prometheus = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-aiohttp-client = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-fastapi = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-httpx = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-logging = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-requests = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-system-metrics = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-instrumentation-urllib = {version = "0.41b0", optional = true, markers = "extra == \"telemetry\""} -opentelemetry-sdk = {version = "1.20.0", optional = true, markers = "extra == \"telemetry\""} -prometheus-client = {version = "0.17.1", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-api = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-exporter-otlp-proto-grpc = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-exporter-prometheus = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-aiohttp-client = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-fastapi = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-httpx = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-logging = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-requests = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-system-metrics = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-instrumentation-urllib = {version = ">=0.43b0", optional = true, markers = "extra == \"telemetry\""} +opentelemetry-sdk = {version = ">=1.22.0,<2.0.0", optional = true, markers = "extra == \"telemetry\""} +prometheus-client = {version = ">=0.17.1,<=0.21", optional = true, markers = "extra == \"telemetry\""} pydantic = ">=1.10,<3" -requests = ">=2.19,<3.0" uvicorn = ">=0.19,<1.0" -wrapt = ">=1.14,<2.0" +wrapt = ">=1.10,<2" [package.extras] -telemetry = ["opentelemetry-api (==1.20.0)", "opentelemetry-distro (==0.41b0)", "opentelemetry-exporter-otlp-proto-grpc (==1.20.0)", "opentelemetry-exporter-prometheus (==0.41b0)", "opentelemetry-instrumentation (==0.41b0)", "opentelemetry-instrumentation-aiohttp-client (==0.41b0)", "opentelemetry-instrumentation-fastapi (==0.41b0)", "opentelemetry-instrumentation-httpx (==0.41b0)", "opentelemetry-instrumentation-logging (==0.41b0)", "opentelemetry-instrumentation-requests (==0.41b0)", "opentelemetry-instrumentation-system-metrics (==0.41b0)", "opentelemetry-instrumentation-urllib (==0.41b0)", "opentelemetry-sdk (==1.20.0)", "prometheus-client (==0.17.1)"] +telemetry = ["opentelemetry-api (>=1.22.0,<2.0.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.22.0,<2.0.0)", "opentelemetry-exporter-prometheus (>=0.43b0)", "opentelemetry-instrumentation-aiohttp-client (>=0.43b0)", "opentelemetry-instrumentation-fastapi (>=0.43b0)", "opentelemetry-instrumentation-httpx (>=0.43b0)", "opentelemetry-instrumentation-logging (>=0.43b0)", "opentelemetry-instrumentation-requests (>=0.43b0)", "opentelemetry-instrumentation-system-metrics (>=0.43b0)", "opentelemetry-instrumentation-urllib (>=0.43b0)", "opentelemetry-sdk (>=1.22.0,<2.0.0)", "prometheus-client (>=0.17.1,<=0.21)"] [[package]] name = "aiohappyeyeballs" @@ -289,17 +284,6 @@ cryptography = ">=2.5" msal = ">=1.24.0" msal-extensions = ">=0.3.0" -[[package]] -name = "backoff" -version = "2.2.1" -description = "Function decoration for backoff and retry" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] - [[package]] name = "black" version = "24.3.0" @@ -1202,324 +1186,295 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "opentelemetry-api" -version = "1.20.0" +version = "1.28.2" description = "OpenTelemetry Python API" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_api-1.20.0-py3-none-any.whl", hash = "sha256:982b76036fec0fdaf490ae3dfd9f28c81442a33414f737abc687a32758cdcba5"}, - {file = "opentelemetry_api-1.20.0.tar.gz", hash = "sha256:06abe351db7572f8afdd0fb889ce53f3c992dbf6f6262507b385cc1963e06983"}, + {file = "opentelemetry_api-1.28.2-py3-none-any.whl", hash = "sha256:6fcec89e265beb258fe6b1acaaa3c8c705a934bd977b9f534a2b7c0d2d4275a6"}, + {file = "opentelemetry_api-1.28.2.tar.gz", hash = "sha256:ecdc70c7139f17f9b0cf3742d57d7020e3e8315d6cffcdf1a12a905d45b19cc0"}, ] [package.dependencies] deprecated = ">=1.2.6" -importlib-metadata = ">=6.0,<7.0" - -[[package]] -name = "opentelemetry-distro" -version = "0.41b0" -description = "OpenTelemetry Python Distro" -optional = false -python-versions = ">=3.7" -files = [ - {file = "opentelemetry_distro-0.41b0-py3-none-any.whl", hash = "sha256:61a028dc8c1418b8634a5bf71e15ad85427cb55d97a0cd6a58dd135e456cc027"}, - {file = "opentelemetry_distro-0.41b0.tar.gz", hash = "sha256:8ce05f9499a09c99d9c5f550ff2ed6d229444cae17ae36baf705b0ccb647a959"}, -] - -[package.dependencies] -opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-sdk = ">=1.13,<2.0" - -[package.extras] -otlp = ["opentelemetry-exporter-otlp (==1.20.0)"] +importlib-metadata = ">=6.0,<=8.5.0" [[package]] name = "opentelemetry-exporter-otlp-proto-common" -version = "1.20.0" +version = "1.28.2" description = "OpenTelemetry Protobuf encoding" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_exporter_otlp_proto_common-1.20.0-py3-none-any.whl", hash = "sha256:dd63209b40702636ab6ae76a06b401b646ad7b008a906ecb41222d4af24fbdef"}, - {file = "opentelemetry_exporter_otlp_proto_common-1.20.0.tar.gz", hash = "sha256:df60c681bd61812e50b3a39a7a1afeeb6d4066117583249fcc262269374e7a49"}, + {file = "opentelemetry_exporter_otlp_proto_common-1.28.2-py3-none-any.whl", hash = "sha256:545b1943b574f666c35b3d6cc67cb0b111060727e93a1e2866e346b33bff2a12"}, + {file = "opentelemetry_exporter_otlp_proto_common-1.28.2.tar.gz", hash = "sha256:7aebaa5fc9ff6029374546df1f3a62616fda07fccd9c6a8b7892ec130dd8baca"}, ] [package.dependencies] -backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} -opentelemetry-proto = "1.20.0" +opentelemetry-proto = "1.28.2" [[package]] name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.20.0" +version = "1.28.2" description = "OpenTelemetry Collector Protobuf over gRPC Exporter" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.20.0-py3-none-any.whl", hash = "sha256:7c3f066065891b56348ba2c7f9df6ec635a712841cae0a36f2f6a81642ae7dec"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.20.0.tar.gz", hash = "sha256:6c06d43c3771bda1795226e327722b4b980fa1ca1ec9e985f2ef3e29795bdd52"}, + {file = "opentelemetry_exporter_otlp_proto_grpc-1.28.2-py3-none-any.whl", hash = "sha256:6083d9300863aab35bfce7c172d5fc1007686e6f8dff366eae460cd9a21592e2"}, + {file = "opentelemetry_exporter_otlp_proto_grpc-1.28.2.tar.gz", hash = "sha256:07c10378380bbb01a7f621a5ce833fc1fab816e971140cd3ea1cd587840bc0e6"}, ] [package.dependencies] -backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} deprecated = ">=1.2.6" googleapis-common-protos = ">=1.52,<2.0" -grpcio = ">=1.0.0,<2.0.0" +grpcio = ">=1.63.2,<2.0.0" opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.20.0" -opentelemetry-proto = "1.20.0" -opentelemetry-sdk = ">=1.20.0,<1.21.0" - -[package.extras] -test = ["pytest-grpc"] +opentelemetry-exporter-otlp-proto-common = "1.28.2" +opentelemetry-proto = "1.28.2" +opentelemetry-sdk = ">=1.28.2,<1.29.0" [[package]] name = "opentelemetry-exporter-prometheus" -version = "0.41b0" +version = "0.49b2" description = "Prometheus Metric Exporter for OpenTelemetry" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_exporter_prometheus-0.41b0-py3-none-any.whl", hash = "sha256:ca996f3bc15b0cbf3abd798e786095a202650202a5c0edd9e34bb9186a247b79"}, - {file = "opentelemetry_exporter_prometheus-0.41b0.tar.gz", hash = "sha256:0cc58d5d10040e69090637803b97e120f558467037c88988742c80a627e7f1ed"}, + {file = "opentelemetry_exporter_prometheus-0.49b2-py3-none-any.whl", hash = "sha256:307594007ee20ec3a51c42548a4dbd66e46701f8523a7780d5e12a8f986a7783"}, + {file = "opentelemetry_exporter_prometheus-0.49b2.tar.gz", hash = "sha256:70ca3a462ce1ba0d756e4be8a87c04f7196687825fd2d151a428f6c18ef6fd2d"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-sdk = ">=1.12,<2.0" +opentelemetry-sdk = ">=1.28.2,<1.29.0" prometheus-client = ">=0.5.0,<1.0.0" [[package]] name = "opentelemetry-instrumentation" -version = "0.41b0" +version = "0.49b2" description = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation-0.41b0-py3-none-any.whl", hash = "sha256:0ef9e5705ceca0205992a4a845ae4251ce6ec15a1206ca07c2b00afb0c5bd386"}, - {file = "opentelemetry_instrumentation-0.41b0.tar.gz", hash = "sha256:214382ba10dfd29d4e24898a4c7ef18b7368178a6277a1aec95cdb75cabf4612"}, + {file = "opentelemetry_instrumentation-0.49b2-py3-none-any.whl", hash = "sha256:f6d782b0ef9fef4a4c745298651c65f5c532c34cd4c40d230ab5b9f3b3b4d151"}, + {file = "opentelemetry_instrumentation-0.49b2.tar.gz", hash = "sha256:8cf00cc8d9d479e4b72adb9bd267ec544308c602b7188598db5a687e77b298e2"}, ] [package.dependencies] opentelemetry-api = ">=1.4,<2.0" -setuptools = ">=16.0" +opentelemetry-semantic-conventions = "0.49b2" +packaging = ">=18.0" wrapt = ">=1.0.0,<2.0.0" [[package]] name = "opentelemetry-instrumentation-aiohttp-client" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry aiohttp client instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_aiohttp_client-0.41b0-py3-none-any.whl", hash = "sha256:a1d0d18dee5e57cf9187d1a561f9d4ce56d16433231208405458358ff6399a6f"}, - {file = "opentelemetry_instrumentation_aiohttp_client-0.41b0.tar.gz", hash = "sha256:56fd35e90c2534b2647e7cdd85f34383eddaa300ee51e989c3763dcdb205ca91"}, + {file = "opentelemetry_instrumentation_aiohttp_client-0.49b2-py3-none-any.whl", hash = "sha256:d1748b9e179ab544568be4403fa790dad13f447b70bc34cba01ab9b1ada63343"}, + {file = "opentelemetry_instrumentation_aiohttp_client-0.49b2.tar.gz", hash = "sha256:46df2cf68de8c0787b57e925d7764acb0db8bd5f9a9446b1bf470b63f782e762"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" -opentelemetry-util-http = "0.41b0" +opentelemetry-instrumentation = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" wrapt = ">=1.0.0,<2.0.0" [package.extras] instruments = ["aiohttp (>=3.0,<4.0)"] -test = ["http-server-mock", "opentelemetry-instrumentation-aiohttp-client[instruments]"] [[package]] name = "opentelemetry-instrumentation-asgi" -version = "0.41b0" +version = "0.49b2" description = "ASGI instrumentation for OpenTelemetry" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_asgi-0.41b0-py3-none-any.whl", hash = "sha256:46084195fb9c50507abbe1dd490ae4c31c8658c5790f1ddf7af95c417dbe6422"}, - {file = "opentelemetry_instrumentation_asgi-0.41b0.tar.gz", hash = "sha256:921244138b37a9a25edf2153f1c248f16f98610ee8d840b25fd7bf6b165e4d72"}, + {file = "opentelemetry_instrumentation_asgi-0.49b2-py3-none-any.whl", hash = "sha256:c8ede13ed781402458a800411cb7ec16a25386dc21de8e5b9a568b386a1dc5f4"}, + {file = "opentelemetry_instrumentation_asgi-0.49b2.tar.gz", hash = "sha256:2af5faf062878330714efe700127b837038c4d9d3b70b451ab2424d5076d6c1c"}, ] [package.dependencies] asgiref = ">=3.0,<4.0" opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" -opentelemetry-util-http = "0.41b0" +opentelemetry-instrumentation = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" [package.extras] instruments = ["asgiref (>=3.0,<4.0)"] -test = ["opentelemetry-instrumentation-asgi[instruments]", "opentelemetry-test-utils (==0.41b0)"] [[package]] name = "opentelemetry-instrumentation-fastapi" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry FastAPI Instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_fastapi-0.41b0-py3-none-any.whl", hash = "sha256:5990368e99ecc989df0a248a0b9b8e85d8b3eb7c1dbf5131c36982ba7f4a43b7"}, - {file = "opentelemetry_instrumentation_fastapi-0.41b0.tar.gz", hash = "sha256:eb4ceefe8b944fc9ea5e61fa558b99afd1285431b563f3f0104ac177cde4dfe5"}, + {file = "opentelemetry_instrumentation_fastapi-0.49b2-py3-none-any.whl", hash = "sha256:c66331d05bf806d7ca4f9579c1db7383aad31a9f6665dbaa2b7c9a4c1e830892"}, + {file = "opentelemetry_instrumentation_fastapi-0.49b2.tar.gz", hash = "sha256:3aa81ed7acf6aa5236d96e90a1218c5e84a9c0dce8fa63bf34ceee6218354b63"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-instrumentation-asgi = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" -opentelemetry-util-http = "0.41b0" +opentelemetry-instrumentation = "0.49b2" +opentelemetry-instrumentation-asgi = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" [package.extras] instruments = ["fastapi (>=0.58,<1.0)"] -test = ["httpx (>=0.22,<1.0)", "opentelemetry-instrumentation-fastapi[instruments]", "opentelemetry-test-utils (==0.41b0)", "requests (>=2.23,<3.0)"] [[package]] name = "opentelemetry-instrumentation-httpx" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry HTTPX Instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_httpx-0.41b0-py3-none-any.whl", hash = "sha256:6ada84b7caa95a2889b2d883c089a977546b0102c815658b88f1c2dae713e9b2"}, - {file = "opentelemetry_instrumentation_httpx-0.41b0.tar.gz", hash = "sha256:96ebc54f3f41bfcd2fc043349c8cee4b11737602512383d437e24c39a1e4adff"}, + {file = "opentelemetry_instrumentation_httpx-0.49b2-py3-none-any.whl", hash = "sha256:08111e6c8d11495dee7ef2243bc2e9acc09c16be8c6f4dd32f939f2b08f30af5"}, + {file = "opentelemetry_instrumentation_httpx-0.49b2.tar.gz", hash = "sha256:4330f56b0ad382843a1e8fe6179d20c2d2be3ee78e60b9f01ee892b1600de44f"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" +opentelemetry-instrumentation = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" +wrapt = ">=1.0.0,<2.0.0" [package.extras] instruments = ["httpx (>=0.18.0)"] -test = ["opentelemetry-instrumentation-httpx[instruments]", "opentelemetry-sdk (>=1.12,<2.0)", "opentelemetry-test-utils (==0.41b0)"] [[package]] name = "opentelemetry-instrumentation-logging" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry Logging instrumentation" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_logging-0.41b0-py2.py3-none-any.whl", hash = "sha256:ab7117886695c32eb30d7a59199292283c5e652e2b9f2d11874fe4359eacc16a"}, - {file = "opentelemetry_instrumentation_logging-0.41b0.tar.gz", hash = "sha256:8ad46e011a99df726323428f0d0a09bf68159ab776b8184ba6d83a7c44f7de81"}, + {file = "opentelemetry_instrumentation_logging-0.49b2-py3-none-any.whl", hash = "sha256:5ef73c37b34d8f564d37731cb399e7237636e2c8d7d97061d20526f6ece8afb1"}, + {file = "opentelemetry_instrumentation_logging-0.49b2.tar.gz", hash = "sha256:625c825cb180d1a4da8008af2dc21de5f668af120f3821af16317cd3a2378d7e"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" - -[package.extras] -test = ["opentelemetry-test-utils (==0.41b0)"] +opentelemetry-instrumentation = "0.49b2" [[package]] name = "opentelemetry-instrumentation-requests" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry requests instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_requests-0.41b0-py3-none-any.whl", hash = "sha256:687fde31111669e729054e64d246c96b0b9d4d8702bd0e3569b7660bdb528d71"}, - {file = "opentelemetry_instrumentation_requests-0.41b0.tar.gz", hash = "sha256:bdc5515ae7533e620b312fd989941b7c2c92d492a2d4418f6ef8db5d7422fa64"}, + {file = "opentelemetry_instrumentation_requests-0.49b2-py3-none-any.whl", hash = "sha256:d49b0022b29fb7f07a38b8e68750304c29a6d6114b94b56e3e811eff59efd318"}, + {file = "opentelemetry_instrumentation_requests-0.49b2.tar.gz", hash = "sha256:ea7216f13f42d3220ccd60cefd104fae656c9206bf5e3030d59fa367a9452e99"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" -opentelemetry-util-http = "0.41b0" +opentelemetry-instrumentation = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" [package.extras] instruments = ["requests (>=2.0,<3.0)"] -test = ["httpretty (>=1.0,<2.0)", "opentelemetry-instrumentation-requests[instruments]", "opentelemetry-test-utils (==0.41b0)"] [[package]] name = "opentelemetry-instrumentation-system-metrics" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry System Metrics Instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_system_metrics-0.41b0-py3-none-any.whl", hash = "sha256:4f2106cf4b77664eb9096727eaba4ccffe28ebf426068b19aa7289644d4b9680"}, - {file = "opentelemetry_instrumentation_system_metrics-0.41b0.tar.gz", hash = "sha256:727193655d81d31a89e118d905a2691e80d967993ae62bac96979a373f59485a"}, + {file = "opentelemetry_instrumentation_system_metrics-0.49b2-py3-none-any.whl", hash = "sha256:b599dbfba4ba977a8c248b74b18f147da314dbf2b780f4123e16c50e9c4f1948"}, + {file = "opentelemetry_instrumentation_system_metrics-0.49b2.tar.gz", hash = "sha256:2ef4949c0c0f64e6b7437b8d23e0ee57245ab3d0d38501157bb93f4e4151207c"}, ] [package.dependencies] opentelemetry-api = ">=1.11,<2.0" -opentelemetry-sdk = ">=1.11,<2.0" -psutil = ">=5.9,<6.0" +opentelemetry-instrumentation = "0.49b2" +psutil = ">=5.9.0,<7" [package.extras] instruments = ["psutil (>=5)"] -test = ["opentelemetry-instrumentation-system-metrics[instruments]", "opentelemetry-test-utils (==0.41b0)"] [[package]] name = "opentelemetry-instrumentation-urllib" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry urllib instrumentation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_instrumentation_urllib-0.41b0-py3-none-any.whl", hash = "sha256:cee9e95f55a73480df0915358ce8668bbeda53324c9426847e2ccaea0cac1a87"}, - {file = "opentelemetry_instrumentation_urllib-0.41b0.tar.gz", hash = "sha256:113416b8bd9c2d5c890cb6f86737886e209a3776c2ecdc023887bd78634d5ef3"}, + {file = "opentelemetry_instrumentation_urllib-0.49b2-py3-none-any.whl", hash = "sha256:30e8eb408a93e0b09bb67e37c8b2c074ea3d55613467be6dc4be08f034a0d3e2"}, + {file = "opentelemetry_instrumentation_urllib-0.49b2.tar.gz", hash = "sha256:a148c073de6a1bbaf3ca423e639840cd2aa554eb0dda95077c111e46ea91d8ca"}, ] [package.dependencies] opentelemetry-api = ">=1.12,<2.0" -opentelemetry-instrumentation = "0.41b0" -opentelemetry-semantic-conventions = "0.41b0" -opentelemetry-util-http = "0.41b0" - -[package.extras] -test = ["httpretty (>=1.0,<2.0)", "opentelemetry-test-utils (==0.41b0)"] +opentelemetry-instrumentation = "0.49b2" +opentelemetry-semantic-conventions = "0.49b2" +opentelemetry-util-http = "0.49b2" [[package]] name = "opentelemetry-proto" -version = "1.20.0" +version = "1.28.2" description = "OpenTelemetry Python Proto" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_proto-1.20.0-py3-none-any.whl", hash = "sha256:512c3d2c6864fb7547a69577c3907348e6c985b7a204533563cb4c4c5046203b"}, - {file = "opentelemetry_proto-1.20.0.tar.gz", hash = "sha256:cf01f49b3072ee57468bccb1a4f93bdb55411f4512d0ac3f97c5c04c0040b5a2"}, + {file = "opentelemetry_proto-1.28.2-py3-none-any.whl", hash = "sha256:0837498f59db55086462915e5898d0b1a18c1392f6db4d7e937143072a72370c"}, + {file = "opentelemetry_proto-1.28.2.tar.gz", hash = "sha256:7c0d125a6b71af88bfeeda16bfdd0ff63dc2cf0039baf6f49fa133b203e3f566"}, ] [package.dependencies] -protobuf = ">=3.19,<5.0" +protobuf = ">=5.0,<6.0" [[package]] name = "opentelemetry-sdk" -version = "1.20.0" +version = "1.28.2" description = "OpenTelemetry Python SDK" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_sdk-1.20.0-py3-none-any.whl", hash = "sha256:f2230c276ff4c63ea09b3cb2e2ac6b1265f90af64e8d16bbf275c81a9ce8e804"}, - {file = "opentelemetry_sdk-1.20.0.tar.gz", hash = "sha256:702e432a457fa717fd2ddfd30640180e69938f85bb7fec3e479f85f61c1843f8"}, + {file = "opentelemetry_sdk-1.28.2-py3-none-any.whl", hash = "sha256:93336c129556f1e3ccd21442b94d3521759541521861b2214c499571b85cb71b"}, + {file = "opentelemetry_sdk-1.28.2.tar.gz", hash = "sha256:5fed24c5497e10df30282456fe2910f83377797511de07d14cec0d3e0a1a3110"}, ] [package.dependencies] -opentelemetry-api = "1.20.0" -opentelemetry-semantic-conventions = "0.41b0" +opentelemetry-api = "1.28.2" +opentelemetry-semantic-conventions = "0.49b2" typing-extensions = ">=3.7.4" [[package]] name = "opentelemetry-semantic-conventions" -version = "0.41b0" +version = "0.49b2" description = "OpenTelemetry Semantic Conventions" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_semantic_conventions-0.41b0-py3-none-any.whl", hash = "sha256:45404391ed9e50998183a4925ad1b497c01c143f06500c3b9c3d0013492bb0f2"}, - {file = "opentelemetry_semantic_conventions-0.41b0.tar.gz", hash = "sha256:0ce5b040b8a3fc816ea5879a743b3d6fe5db61f6485e4def94c6ee4d402e1eb7"}, + {file = "opentelemetry_semantic_conventions-0.49b2-py3-none-any.whl", hash = "sha256:51e7e1d0daa958782b6c2a8ed05e5f0e7dd0716fc327ac058777b8659649ee54"}, + {file = "opentelemetry_semantic_conventions-0.49b2.tar.gz", hash = "sha256:44e32ce6a5bb8d7c0c617f84b9dc1c8deda1045a07dc16a688cc7cbeab679997"}, ] +[package.dependencies] +deprecated = ">=1.2.6" +opentelemetry-api = "1.28.2" + [[package]] name = "opentelemetry-util-http" -version = "0.41b0" +version = "0.49b2" description = "Web util for OpenTelemetry" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "opentelemetry_util_http-0.41b0-py3-none-any.whl", hash = "sha256:6a167fd1e0e8b0f629530d971165b5d82ed0be2154b7f29498499c3a517edee5"}, - {file = "opentelemetry_util_http-0.41b0.tar.gz", hash = "sha256:16d5bd04a380dc1079e766562d1e1626cbb47720f197f67010c45f090fffdfb3"}, + {file = "opentelemetry_util_http-0.49b2-py3-none-any.whl", hash = "sha256:e325d6511c6bee7b43170eb0c93261a210ec57e20ab1d7a99838515ef6d2bf58"}, + {file = "opentelemetry_util_http-0.49b2.tar.gz", hash = "sha256:5958c7009f79146bbe98b0fdb23d9d7bf1ea9cd154a1c199029b1a89e0557199"}, ] [[package]] @@ -1802,22 +1757,22 @@ files = [ [[package]] name = "protobuf" -version = "4.25.4" +version = "5.29.0" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.4-cp310-abi3-win32.whl", hash = "sha256:db9fd45183e1a67722cafa5c1da3e85c6492a5383f127c86c4c4aa4845867dc4"}, - {file = "protobuf-4.25.4-cp310-abi3-win_amd64.whl", hash = "sha256:ba3d8504116a921af46499471c63a85260c1a5fc23333154a427a310e015d26d"}, - {file = "protobuf-4.25.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:eecd41bfc0e4b1bd3fa7909ed93dd14dd5567b98c941d6c1ad08fdcab3d6884b"}, - {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:4c8a70fdcb995dcf6c8966cfa3a29101916f7225e9afe3ced4395359955d3835"}, - {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:3319e073562e2515c6ddc643eb92ce20809f5d8f10fead3332f71c63be6a7040"}, - {file = "protobuf-4.25.4-cp38-cp38-win32.whl", hash = "sha256:7e372cbbda66a63ebca18f8ffaa6948455dfecc4e9c1029312f6c2edcd86c4e1"}, - {file = "protobuf-4.25.4-cp38-cp38-win_amd64.whl", hash = "sha256:051e97ce9fa6067a4546e75cb14f90cf0232dcb3e3d508c448b8d0e4265b61c1"}, - {file = "protobuf-4.25.4-cp39-cp39-win32.whl", hash = "sha256:90bf6fd378494eb698805bbbe7afe6c5d12c8e17fca817a646cd6a1818c696ca"}, - {file = "protobuf-4.25.4-cp39-cp39-win_amd64.whl", hash = "sha256:ac79a48d6b99dfed2729ccccee547b34a1d3d63289c71cef056653a846a2240f"}, - {file = "protobuf-4.25.4-py3-none-any.whl", hash = "sha256:bfbebc1c8e4793cfd58589acfb8a1026be0003e852b9da7db5a4285bde996978"}, - {file = "protobuf-4.25.4.tar.gz", hash = "sha256:0dc4a62cc4052a036ee2204d26fe4d835c62827c855c8a03f29fe6da146b380d"}, + {file = "protobuf-5.29.0-cp310-abi3-win32.whl", hash = "sha256:ea7fb379b257911c8c020688d455e8f74efd2f734b72dc1ea4b4d7e9fd1326f2"}, + {file = "protobuf-5.29.0-cp310-abi3-win_amd64.whl", hash = "sha256:34a90cf30c908f47f40ebea7811f743d360e202b6f10d40c02529ebd84afc069"}, + {file = "protobuf-5.29.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c931c61d0cc143a2e756b1e7f8197a508de5365efd40f83c907a9febf36e6b43"}, + {file = "protobuf-5.29.0-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:85286a47caf63b34fa92fdc1fd98b649a8895db595cfa746c5286eeae890a0b1"}, + {file = "protobuf-5.29.0-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:0d10091d6d03537c3f902279fcf11e95372bdd36a79556311da0487455791b20"}, + {file = "protobuf-5.29.0-cp38-cp38-win32.whl", hash = "sha256:0cd67a1e5c2d88930aa767f702773b2d054e29957432d7c6a18f8be02a07719a"}, + {file = "protobuf-5.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:e467f81fdd12ded9655cea3e9b83dc319d93b394ce810b556fb0f421d8613e86"}, + {file = "protobuf-5.29.0-cp39-cp39-win32.whl", hash = "sha256:17d128eebbd5d8aee80300aed7a43a48a25170af3337f6f1333d1fac2c6839ac"}, + {file = "protobuf-5.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:6c3009e22717c6cc9e6594bb11ef9f15f669b19957ad4087214d69e08a213368"}, + {file = "protobuf-5.29.0-py3-none-any.whl", hash = "sha256:88c4af76a73183e21061881360240c0cdd3c39d263b4e8fb570aaf83348d608f"}, + {file = "protobuf-5.29.0.tar.gz", hash = "sha256:445a0c02483869ed8513a585d80020d012c6dc60075f96fa0563a724987b1001"}, ] [[package]] @@ -2568,4 +2523,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "307c14e21b2fc8b1598cd8b903489c48c6d6367a44bfb6e764d02a45bc6dd9fb" +content-hash = "fc8455a94cc695d7db784c43fa35e441e3919e5f6819fdcfa743e23c2661e28b" diff --git a/pyproject.toml b/pyproject.toml index 024575b..e6da0b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ aiohttp = "^3.10.11" numpy = "^1.26.0" pillow = "^10.3.0" azure-identity = "^1.16.1" -aidial-sdk = {version = "^0.15.0", extras = ["telemetry"]} +aidial-sdk = {version = "^0.16.0", extras = ["telemetry"]} [tool.poetry.group.test.dependencies] pytest = "7.4.0" diff --git a/tests/test_errors.py b/tests/test_errors.py index dcdafce..59cf255 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -466,7 +466,6 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient): }, ) - assert response.status_code == 504 assert response.json() == { "error": { "message": "Request timed out", @@ -475,6 +474,7 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient): "display_message": "Request timed out. Please try again later.", } } + assert response.status_code == 504 @respx.mock From 820078a22304df2f67e0873e0c40db25d061642f Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 09:59:17 +0000 Subject: [PATCH 02/14] fix: fixed unit tests for errors --- .vscode/settings.json | 4 +- aidial_adapter_openai/exception_handlers.py | 100 ++++++++++++++------ 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index ab64be2..d62db15 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,7 +8,9 @@ }, "editor.tabSize": 4 }, - "python.testing.pytestArgs": ["."], + "python.testing.pytestArgs": [ + "tests" + ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "python.analysis.typeCheckingMode": "basic" diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py index 3eb0a5b..f4a66e8 100644 --- a/aidial_adapter_openai/exception_handlers.py +++ b/aidial_adapter_openai/exception_handlers.py @@ -1,7 +1,9 @@ -from typing import Dict +import json +from typing import Any, Dict from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.exceptions import InternalServerError +from fastapi import HTTPException as FastAPIException from fastapi import Request from fastapi.responses import Response from openai import ( @@ -11,16 +13,57 @@ APITimeoutError, OpenAIError, ) +from typing_extensions import override + + +class PlainDialException(DialException): + content: Any + + def __init__( + self, + *, + content: Any, + status_code: int, + headers: Dict[str, str] | None, + ) -> None: + super().__init__( + message=str(content), + status_code=status_code, + headers=headers, + ) + self.content = content + + @override + def to_fastapi_response(self) -> Response: # type: ignore + return Response( + status_code=self.status_code, + content=self.content, + headers=self.headers, + ) + + @override + def to_fastapi_exception(self) -> FastAPIException: + return FastAPIException( + status_code=self.status_code, + detail=self.content, + headers=self.headers, + ) def _parse_dial_exception( - status_code: int, - content: dict | str, - headers: Dict[str, str] | None = None, -) -> DialException: + *, status_code: int, headers: Dict[str, str], content: Any +) -> DialException | None: + if isinstance(content, str): + try: + obj = json.loads(content) + except Exception: + return None + else: + obj = content + if ( - isinstance(content, dict) - and (error := content.get("error")) + isinstance(obj, dict) + and (error := obj.get("error")) and isinstance(error, dict) ): message = error.get("message") or "Unknown error" @@ -38,20 +81,15 @@ def _parse_dial_exception( display_message=display_message, headers=headers, ) - else: - return DialException( - status_code=status_code, - message=str(content), - headers=headers, - ) + + return None def to_dial_exception(exc: Exception) -> DialException: if isinstance(exc, APIStatusError): # Non-streaming errors reported by `openai` library via this exception - r = exc.response - headers = r.headers + httpx_headers = r.headers # httpx library (used by openai) automatically sets # "Accept-Encoding:gzip,deflate" header in requests to the upstream. @@ -59,19 +97,20 @@ def to_dial_exception(exc: Exception) -> DialException: # response along with "Content-Encoding:gzip" header. # We either need to encode the response, or # remove the "Content-Encoding" header. - if "Content-Encoding" in headers: - del headers["Content-Encoding"] + if "Content-Encoding" in httpx_headers: + del httpx_headers["Content-Encoding"] - plain_headers = {k.decode(): v.decode() for k, v in headers.raw} - - try: - content = r.json() - except Exception: - content = r.text + headers = {k.decode(): v.decode() for k, v in httpx_headers.raw} + status_code = r.status_code + content = r.text return _parse_dial_exception( - status_code=r.status_code, - headers=plain_headers, + status_code=status_code, + headers=headers, + content=content, + ) or PlainDialException( + status_code=status_code, + headers=headers, content=content, ) @@ -100,10 +139,17 @@ def to_dial_exception(exc: Exception) -> DialException: except Exception: pass + headers = {} + content = {"error": exc.body or {}} + return _parse_dial_exception( status_code=status_code, - headers={}, - content={"error": exc.body or {}}, + headers=headers, + content=content, + ) or PlainDialException( + status_code=status_code, + headers=headers, + content=content, ) if isinstance(exc, DialException): From 3956b16d5507056be8813b23a4db7acf12d9802e Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 10:26:55 +0000 Subject: [PATCH 03/14] feat: introduced AdapterException class --- aidial_adapter_openai/exception_handlers.py | 106 ++---------------- .../utils/adapter_exception.py | 103 +++++++++++++++++ aidial_adapter_openai/utils/sse_stream.py | 10 +- aidial_adapter_openai/utils/streaming.py | 7 +- 4 files changed, 125 insertions(+), 101 deletions(-) create mode 100644 aidial_adapter_openai/utils/adapter_exception.py diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py index f4a66e8..635c718 100644 --- a/aidial_adapter_openai/exception_handlers.py +++ b/aidial_adapter_openai/exception_handlers.py @@ -1,9 +1,5 @@ -import json -from typing import Any, Dict - from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.exceptions import InternalServerError -from fastapi import HTTPException as FastAPIException from fastapi import Request from fastapi.responses import Response from openai import ( @@ -13,79 +9,14 @@ APITimeoutError, OpenAIError, ) -from typing_extensions import override - - -class PlainDialException(DialException): - content: Any - - def __init__( - self, - *, - content: Any, - status_code: int, - headers: Dict[str, str] | None, - ) -> None: - super().__init__( - message=str(content), - status_code=status_code, - headers=headers, - ) - self.content = content - - @override - def to_fastapi_response(self) -> Response: # type: ignore - return Response( - status_code=self.status_code, - content=self.content, - headers=self.headers, - ) - - @override - def to_fastapi_exception(self) -> FastAPIException: - return FastAPIException( - status_code=self.status_code, - detail=self.content, - headers=self.headers, - ) - - -def _parse_dial_exception( - *, status_code: int, headers: Dict[str, str], content: Any -) -> DialException | None: - if isinstance(content, str): - try: - obj = json.loads(content) - except Exception: - return None - else: - obj = content - - if ( - isinstance(obj, dict) - and (error := obj.get("error")) - and isinstance(error, dict) - ): - message = error.get("message") or "Unknown error" - code = error.get("code") - type = error.get("type") - param = error.get("param") - display_message = error.get("display_message") - return DialException( - status_code=status_code, - message=message, - type=type, - param=param, - code=code, - display_message=display_message, - headers=headers, - ) - - return None +from aidial_adapter_openai.utils.adapter_exception import ( + AdapterException, + parse_adapter_exception, +) -def to_dial_exception(exc: Exception) -> DialException: +def to_adapter_exception(exc: Exception) -> AdapterException: if isinstance(exc, APIStatusError): # Non-streaming errors reported by `openai` library via this exception r = exc.response @@ -101,17 +32,11 @@ def to_dial_exception(exc: Exception) -> DialException: del httpx_headers["Content-Encoding"] headers = {k.decode(): v.decode() for k, v in httpx_headers.raw} - status_code = r.status_code - content = r.text - return _parse_dial_exception( - status_code=status_code, + return parse_adapter_exception( + status_code=r.status_code, headers=headers, - content=content, - ) or PlainDialException( - status_code=status_code, - headers=headers, - content=content, + content=r.text, ) if isinstance(exc, APITimeoutError): @@ -139,17 +64,10 @@ def to_dial_exception(exc: Exception) -> DialException: except Exception: pass - headers = {} - content = {"error": exc.body or {}} - - return _parse_dial_exception( + return parse_adapter_exception( status_code=status_code, - headers=headers, - content=content, - ) or PlainDialException( - status_code=status_code, - headers=headers, - content=content, + headers={}, + content={"error": exc.body or {}}, ) if isinstance(exc, DialException): @@ -160,4 +78,4 @@ def to_dial_exception(exc: Exception) -> DialException: def openai_exception_handler(request: Request, exc: Exception) -> Response: assert isinstance(exc, OpenAIError) - return to_dial_exception(exc).to_fastapi_response() + return to_adapter_exception(exc).to_fastapi_response() diff --git a/aidial_adapter_openai/utils/adapter_exception.py b/aidial_adapter_openai/utils/adapter_exception.py new file mode 100644 index 0000000..8715131 --- /dev/null +++ b/aidial_adapter_openai/utils/adapter_exception.py @@ -0,0 +1,103 @@ +import json +from typing import Any, Dict + +from aidial_sdk.exceptions import HTTPException as DialException +from fastapi import HTTPException as FastAPIException +from fastapi.responses import Response as FastAPIResponse + + +class ResponseWrapper(Exception): + content: Any + status_code: int + headers: Dict[str, str] | None + + def __init__( + self, + *, + content: Any, + status_code: int, + headers: Dict[str, str] | None, + ) -> None: + super().__init__(str(content)) + self.content = content + self.status_code = status_code + self.headers = headers + + def __repr__(self): + # headers field is omitted deliberately + # since it may contain sensitive information + return "%s(content=%r, status_code=%r)" % ( + self.__class__.__name__, + self.content, + self.status_code, + ) + + def to_fastapi_response(self) -> FastAPIResponse: + return FastAPIResponse( + status_code=self.status_code, + content=self.content, + headers=self.headers, + ) + + def to_fastapi_exception(self) -> FastAPIException: + return FastAPIException( + status_code=self.status_code, + detail=self.content, + headers=self.headers, + ) + + def json_error(self) -> dict: + return { + "error": { + "message": str(self.content), + "code": int(self.status_code), + } + } + + +AdapterException = ResponseWrapper | DialException + + +def _parse_dial_exception( + *, status_code: int, headers: Dict[str, str], content: Any +) -> DialException | None: + if isinstance(content, str): + try: + obj = json.loads(content) + except Exception: + return None + else: + obj = content + + if ( + isinstance(obj, dict) + and (error := obj.get("error")) + and isinstance(error, dict) + ): + message = error.get("message") or "Unknown error" + code = error.get("code") + type = error.get("type") + param = error.get("param") + display_message = error.get("display_message") + + return DialException( + status_code=status_code, + message=message, + type=type, + param=param, + code=code, + display_message=display_message, + headers=headers, + ) + + return None + + +def parse_adapter_exception( + *, status_code: int, headers: Dict[str, str], content: Any +) -> AdapterException: + return _parse_dial_exception( + status_code=status_code, headers=headers, content=content + ) or ResponseWrapper( + status_code=status_code, headers=headers, content=content + ) diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py index b4a6ce9..68cfe53 100644 --- a/aidial_adapter_openai/utils/sse_stream.py +++ b/aidial_adapter_openai/utils/sse_stream.py @@ -3,7 +3,7 @@ from aidial_sdk.exceptions import runtime_server_error -from aidial_adapter_openai.exception_handlers import to_dial_exception +from aidial_adapter_openai.exception_handlers import to_adapter_exception from aidial_adapter_openai.utils.log_config import logger DATA_PREFIX = "data: " @@ -64,9 +64,11 @@ async def to_openai_sse_stream( f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}" ) - dial_exception = to_dial_exception(e) - logger.exception(f"converted to the dial exception: {dial_exception!r}") + adapter_exception = to_adapter_exception(e) + logger.exception( + f"converted to the adapter exception: {adapter_exception!r}" + ) - yield format_chunk(dial_exception.json_error()) + yield format_chunk(adapter_exception.json_error()) yield END_CHUNK diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index e54f5dd..c9bd2d3 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -9,7 +9,8 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from pydantic import BaseModel -from aidial_adapter_openai.exception_handlers import to_dial_exception +from aidial_adapter_openai.exception_handlers import to_adapter_exception +from aidial_adapter_openai.utils.adapter_exception import AdapterException from aidial_adapter_openai.utils.chat_completion_response import ( ChatCompletionResponse, ChatCompletionStreamingChunk, @@ -98,7 +99,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: buffer_chunk = None response_snapshot = ChatCompletionStreamingChunk() - error: DialException | None = None + error: AdapterException | None = None try: async for chunk in stream: @@ -122,7 +123,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: last_chunk = chunk except Exception as e: - error = to_dial_exception(e) + error = to_adapter_exception(e) if last_chunk is not None and buffer_chunk is not None: last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk) From 9bb2b6c98b58fc65f0669d48c8482d0135296f98 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 10:31:13 +0000 Subject: [PATCH 04/14] feat: added a test checking propagation of headers --- tests/test_errors.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/test_errors.py b/tests/test_errors.py index 59cf255..77f8f4d 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -447,7 +447,34 @@ async def test_status_error_from_upstream(test_app: httpx.AsyncClient): ) assert response.status_code == 400 - assert response.content == b"Bad request" + assert response.text == "Bad request" + + +@respx.mock +@pytest.mark.asyncio +async def test_status_error_from_upstream_with_headers( + test_app: httpx.AsyncClient, +): + respx.post( + "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" + ).respond( + status_code=429, + content="Too many requests", + headers={"Retry-After": "42"}, + ) + + response = await test_app.post( + "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview", + json={"messages": [{"role": "user", "content": "Test content"}]}, + headers={ + "X-UPSTREAM-KEY": "TEST_API_KEY", + "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions", + }, + ) + + assert response.status_code == 429 + assert response.text == "Too many requests" + assert response.headers["Retry-After"] == "42" @respx.mock @@ -466,6 +493,7 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient): }, ) + assert response.status_code == 504 assert response.json() == { "error": { "message": "Request timed out", @@ -474,7 +502,6 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient): "display_message": "Request timed out. Please try again later.", } } - assert response.status_code == 504 @respx.mock @@ -504,7 +531,6 @@ async def test_connection_error_from_upstream(test_app: httpx.AsyncClient): } -@respx.mock @pytest.mark.asyncio async def test_incorrect_streaming_request(test_app: httpx.AsyncClient): response = await test_app.post( From 848f78c5c290d904f589e9b8ae9e85ca61def75b Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 11:02:41 +0000 Subject: [PATCH 05/14] chore: added test for error during streaming --- aidial_adapter_openai/utils/sse_stream.py | 2 +- aidial_adapter_openai/utils/streaming.py | 6 +++ aidial_adapter_openai/utils/tokenizer.py | 4 +- tests/test_errors.py | 49 ++++++++++++++++++++++- 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/aidial_adapter_openai/utils/sse_stream.py b/aidial_adapter_openai/utils/sse_stream.py index 68cfe53..3b02b29 100644 --- a/aidial_adapter_openai/utils/sse_stream.py +++ b/aidial_adapter_openai/utils/sse_stream.py @@ -65,7 +65,7 @@ async def to_openai_sse_stream( ) adapter_exception = to_adapter_exception(e) - logger.exception( + logger.error( f"converted to the adapter exception: {adapter_exception!r}" ) diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index c9bd2d3..1258c62 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -123,8 +123,14 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: last_chunk = chunk except Exception as e: + logger.exception( + f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}" + ) + error = to_adapter_exception(e) + logger.error(f"converted to the adapter exception: {error!r}") + if last_chunk is not None and buffer_chunk is not None: last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk) diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py index c177a7b..63d0871 100644 --- a/aidial_adapter_openai/utils/tokenizer.py +++ b/aidial_adapter_openai/utils/tokenizer.py @@ -155,9 +155,9 @@ class PlainTextTokenizer(BaseTokenizer[dict]): """ def _handle_custom_content_part(self, content_part: Any): - short_content_part = truncate_string(str(content_part), 100) + short_content_str = truncate_string(str(content_part), 100) raise InternalServerError( - f"Unexpected non-textural content part in the request: {short_content_part!r}. " + f"Unexpected non-textural content part in the request: {short_content_str!r}. " f"The deployment only supports plain text messages. " f"Declare the deployment as a multi-modal one to avoid the error." ) diff --git a/tests/test_errors.py b/tests/test_errors.py index 77f8f4d..f7dc6cf 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,5 +1,5 @@ import json -from typing import Any, Callable +from typing import Any, AsyncIterator, Callable import httpx import pytest @@ -506,7 +506,9 @@ async def test_timeout_error_from_upstream(test_app: httpx.AsyncClient): @respx.mock @pytest.mark.asyncio -async def test_connection_error_from_upstream(test_app: httpx.AsyncClient): +async def test_connection_error_from_upstream_non_streaming( + test_app: httpx.AsyncClient, +): respx.post( "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" ).mock(side_effect=httpx.ConnectError("Connection error")) @@ -531,6 +533,49 @@ async def test_connection_error_from_upstream(test_app: httpx.AsyncClient): } +@respx.mock +@pytest.mark.asyncio +async def test_connection_error_from_upstream_streaming( + test_app: httpx.AsyncClient, +): + class mock_stream(httpx.AsyncByteStream): + async def __aiter__(self) -> AsyncIterator[bytes]: + yield b'data: {"message": "first chunk"}\n\n' + yield b'data: {"message": "second chunk"}\n\n' + raise httpx.ConnectError("Connection error") + + respx.post( + "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" + ).mock( + side_effect=lambda request: httpx.Response( + status_code=200, stream=mock_stream() + ) + ) + + response = await test_app.post( + "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview", + json={ + "stream": True, + "messages": [{"role": "user", "content": "Test content"}], + }, + headers={ + "X-UPSTREAM-KEY": "TEST_API_KEY", + "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions", + }, + ) + + assert response.status_code == 200 + assert response.text == "\n\n".join( + [ + 'data: {"message":"first chunk"}', + 'data: {"message":"second chunk"}', + 'data: {"error":{"message":"Connection error","type":"internal_server_error","code":"500"}}', + "data: [DONE]", + "", + ] + ) + + @pytest.mark.asyncio async def test_incorrect_streaming_request(test_app: httpx.AsyncClient): response = await test_app.post( From 2ea98b53c52a61970853eb6481f86a8f4dc8c760 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 11:07:05 +0000 Subject: [PATCH 06/14] chore: added test with invalid chunk stream --- tests/test_errors.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/test_errors.py b/tests/test_errors.py index f7dc6cf..6f64ef2 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -576,6 +576,48 @@ async def __aiter__(self) -> AsyncIterator[bytes]: ) +@respx.mock +@pytest.mark.asyncio +async def test_invalid_chunk_stream_from_upstream( + test_app: httpx.AsyncClient, +): + class mock_stream(httpx.AsyncByteStream): + async def __aiter__(self) -> AsyncIterator[bytes]: + yield b"data: chunk1\n\n" + yield b"data: chunk2\n\n" + yield b"data: [DONE]\n\n" + + respx.post( + "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" + ).mock( + side_effect=lambda request: httpx.Response( + status_code=200, stream=mock_stream() + ) + ) + + response = await test_app.post( + "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview", + json={ + "stream": True, + "messages": [{"role": "user", "content": "Test content"}], + }, + headers={ + "X-UPSTREAM-KEY": "TEST_API_KEY", + "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions", + }, + ) + + assert response.status_code == 200 + assert response.text == "\n\n".join( + [ + # OpenAI is unable to parse SSE entry with invalid JSON and fails with the following error: + 'data: {"error":{"message":"Expecting value: line 1 column 1 (char 0)","type":"internal_server_error","code":"500"}}', + "data: [DONE]", + "", + ] + ) + + @pytest.mark.asyncio async def test_incorrect_streaming_request(test_app: httpx.AsyncClient): response = await test_app.post( From 4539e7665d17f1fb57101ca1c5ba8ad4983aea75 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 11:27:40 +0000 Subject: [PATCH 07/14] chore: added test for unexpected multi-modal content part --- aidial_adapter_openai/utils/tokenizer.py | 2 +- tests/test_errors.py | 62 ++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/aidial_adapter_openai/utils/tokenizer.py b/aidial_adapter_openai/utils/tokenizer.py index 63d0871..4320dfb 100644 --- a/aidial_adapter_openai/utils/tokenizer.py +++ b/aidial_adapter_openai/utils/tokenizer.py @@ -159,7 +159,7 @@ def _handle_custom_content_part(self, content_part: Any): raise InternalServerError( f"Unexpected non-textural content part in the request: {short_content_str!r}. " f"The deployment only supports plain text messages. " - f"Declare the deployment as a multi-modal one to avoid the error." + f"Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error." ) def tokenize_request_message(self, message: dict) -> int: diff --git a/tests/test_errors.py b/tests/test_errors.py index 6f64ef2..2c00ec6 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -618,6 +618,68 @@ async def __aiter__(self) -> AsyncIterator[bytes]: ) +@respx.mock +@pytest.mark.asyncio +async def test_unexpected_multi_modal_input_streaming( + test_app: httpx.AsyncClient, +): + mock_stream = OpenAIStream( + single_choice_chunk(delta={"role": "assistant"}), + single_choice_chunk(delta={"content": "Test response"}), + single_choice_chunk(delta={}, finish_reason="stop"), + ) + + expected_stream = OpenAIStream( + single_choice_chunk(delta={"role": "assistant"}), + single_choice_chunk(delta={"content": "Test response"}), + { + "error": { + "message": "Unexpected non-textural content part in the request: \"{'type': 'image_url', 'image_url': {'url': 'http://example.com/image.png'}}\". The deployment only supports plain text messages. Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error.", + "type": "internal_server_error", + "code": "500", + } + }, + ) + + respx.post( + "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" + ).respond( + status_code=200, + content=mock_stream.to_content(), + content_type="text/event-stream", + ) + + response = await test_app.post( + "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview", + json={ + "stream": True, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "http://example.com/image.png" + }, + } + ], + } + ], + }, + headers={ + "X-UPSTREAM-KEY": "TEST_API_KEY", + "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions", + }, + ) + + assert response.status_code == 200 + expected_stream.assert_response_content( + response, + assert_equal, + ) + + @pytest.mark.asyncio async def test_incorrect_streaming_request(test_app: httpx.AsyncClient): response = await test_app.post( From 1c3833d83b653a08f43e160eb420d49c76c070d4 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 12:17:59 +0000 Subject: [PATCH 08/14] fix: tolerate tokenization fails --- aidial_adapter_openai/utils/streaming.py | 33 +++++++++++++----------- tests/test_errors.py | 17 +----------- 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index 1258c62..832e131 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -9,8 +9,6 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from pydantic import BaseModel -from aidial_adapter_openai.exception_handlers import to_adapter_exception -from aidial_adapter_openai.utils.adapter_exception import AdapterException from aidial_adapter_openai.utils.chat_completion_response import ( ChatCompletionResponse, ChatCompletionStreamingChunk, @@ -73,15 +71,23 @@ async def generate_stream( ) def set_usage(chunk: dict | None, resp: ChatCompletionResponse) -> dict: - completion_tokens = tokenize_response(resp) - prompt_tokens = get_prompt_tokens() - chunk = chunk or empty_chunk - chunk["usage"] = { - "completion_tokens": completion_tokens, - "prompt_tokens": prompt_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + + # Do not fail the whole response if tokenization has failed + try: + completion_tokens = tokenize_response(resp) + prompt_tokens = get_prompt_tokens() + except Exception as e: + logger.exception( + f"caught exception while tokenization: {type(e).__module__}.{type(e).__name__}. " + "The tokenization has failed, therefore, the usage won't be reported." + ) + else: + chunk["usage"] = { + "completion_tokens": completion_tokens, + "prompt_tokens": prompt_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } return chunk def set_finish_reason(chunk: dict | None, finish_reason: str) -> dict: @@ -99,7 +105,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: buffer_chunk = None response_snapshot = ChatCompletionStreamingChunk() - error: AdapterException | None = None + error: Exception | None = None try: async for chunk in stream: @@ -126,10 +132,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: logger.exception( f"caught exception while streaming: {type(e).__module__}.{type(e).__name__}" ) - - error = to_adapter_exception(e) - - logger.error(f"converted to the adapter exception: {error!r}") + error = e if last_chunk is not None and buffer_chunk is not None: last_chunk = merge_chat_completion_chunks(last_chunk, buffer_chunk) diff --git a/tests/test_errors.py b/tests/test_errors.py index 2c00ec6..14160ca 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -629,18 +629,6 @@ async def test_unexpected_multi_modal_input_streaming( single_choice_chunk(delta={}, finish_reason="stop"), ) - expected_stream = OpenAIStream( - single_choice_chunk(delta={"role": "assistant"}), - single_choice_chunk(delta={"content": "Test response"}), - { - "error": { - "message": "Unexpected non-textural content part in the request: \"{'type': 'image_url', 'image_url': {'url': 'http://example.com/image.png'}}\". The deployment only supports plain text messages. Declare the deployment as a multi-modal one in the OpenAI adapter configuration to avoid the error.", - "type": "internal_server_error", - "code": "500", - } - }, - ) - respx.post( "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" ).respond( @@ -674,10 +662,7 @@ async def test_unexpected_multi_modal_input_streaming( ) assert response.status_code == 200 - expected_stream.assert_response_content( - response, - assert_equal, - ) + mock_stream.assert_response_content(response, assert_equal) @pytest.mark.asyncio From 6ba61956113b7db5c7d14261748e33f5b75f780f Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Thu, 5 Dec 2024 12:40:55 +0000 Subject: [PATCH 09/14] fix: removed accidental debug flag --- aidial_adapter_openai/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index ba27873..703547d 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -27,7 +27,7 @@ def create_app( app_config: ApplicationConfig | None = None, init_telemetry: bool = True, ) -> FastAPI: - app = FastAPI(lifespan=lifespan, debug=True) + app = FastAPI(lifespan=lifespan) set_app_config(app, app_config or ApplicationConfig.from_env()) if init_telemetry: From 6bfacf843ab2172bdbf5585b26154475f2d5a25c Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Fri, 6 Dec 2024 10:11:32 +0000 Subject: [PATCH 10/14] fix: simplified tests --- tests/test_errors.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tests/test_errors.py b/tests/test_errors.py index 14160ca..604e080 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,5 +1,5 @@ import json -from typing import Any, AsyncIterator, Callable +from typing import Any, AsyncIterable, Callable import httpx import pytest @@ -538,18 +538,17 @@ async def test_connection_error_from_upstream_non_streaming( async def test_connection_error_from_upstream_streaming( test_app: httpx.AsyncClient, ): - class mock_stream(httpx.AsyncByteStream): - async def __aiter__(self) -> AsyncIterator[bytes]: - yield b'data: {"message": "first chunk"}\n\n' - yield b'data: {"message": "second chunk"}\n\n' - raise httpx.ConnectError("Connection error") + async def mock_stream() -> AsyncIterable[bytes]: + yield b'data: {"message": "first chunk"}\n\n' + yield b'data: {"message": "second chunk"}\n\n' + raise httpx.ConnectError("Connection error") respx.post( "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" - ).mock( - side_effect=lambda request: httpx.Response( - status_code=200, stream=mock_stream() - ) + ).respond( + status_code=200, + content_type="text/event-stream", + content=mock_stream(), ) response = await test_app.post( @@ -581,18 +580,17 @@ async def __aiter__(self) -> AsyncIterator[bytes]: async def test_invalid_chunk_stream_from_upstream( test_app: httpx.AsyncClient, ): - class mock_stream(httpx.AsyncByteStream): - async def __aiter__(self) -> AsyncIterator[bytes]: - yield b"data: chunk1\n\n" - yield b"data: chunk2\n\n" - yield b"data: [DONE]\n\n" + async def mock_stream() -> AsyncIterable[bytes]: + yield b"data: chunk1\n\n" + yield b"data: chunk2\n\n" + yield b"data: [DONE]\n\n" respx.post( "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" - ).mock( - side_effect=lambda request: httpx.Response( - status_code=200, stream=mock_stream() - ) + ).respond( + status_code=200, + content_type="text/event-stream", + content=mock_stream(), ) response = await test_app.post( From dbf672907288bc47a2cd871d128bb88fcb432d0b Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Fri, 6 Dec 2024 10:59:25 +0000 Subject: [PATCH 11/14] chore: added test for interval error in the adapter during streaming --- aidial_adapter_openai/gpt.py | 2 +- .../gpt4_multi_modal/chat_completion.py | 2 +- aidial_adapter_openai/utils/streaming.py | 2 +- tests/test_errors.py | 53 ++++++++++++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/aidial_adapter_openai/gpt.py b/aidial_adapter_openai/gpt.py index b18ee86..dd909e2 100644 --- a/aidial_adapter_openai/gpt.py +++ b/aidial_adapter_openai/gpt.py @@ -78,7 +78,7 @@ async def gpt_chat_completion( if isinstance(response, AsyncIterator): return generate_stream( - map_stream(chunk_to_dict, response), + stream=map_stream(chunk_to_dict, response), get_prompt_tokens=lambda: estimated_prompt_tokens or tokenizer.tokenize_request(request, request["messages"]), tokenize_response=tokenizer.tokenize_response, diff --git a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py index f9fd3ba..6e66e59 100644 --- a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py +++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py @@ -262,7 +262,7 @@ def debug_print(chunk: T) -> T: return map_stream( debug_print, generate_stream( - map_stream( + stream=map_stream( response_transformer, parse_openai_sse_stream(response), ), diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index 832e131..ebfb779 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -52,8 +52,8 @@ def build_chunk( async def generate_stream( - stream: AsyncIterator[dict], *, + stream: AsyncIterator[dict], get_prompt_tokens: Callable[[], int], tokenize_response: Callable[[ChatCompletionResponse], int], deployment: str, diff --git a/tests/test_errors.py b/tests/test_errors.py index 604e080..e4ebf66 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,5 +1,6 @@ import json -from typing import Any, AsyncIterable, Callable +from typing import Any, AsyncIterable, AsyncIterator, Callable +from unittest.mock import patch import httpx import pytest @@ -575,6 +576,56 @@ async def mock_stream() -> AsyncIterable[bytes]: ) +@respx.mock +@pytest.mark.asyncio +async def test_adapter_internal_error( + test_app: httpx.AsyncClient, +): + async def mock_generate_stream(stream: AsyncIterator[dict], **kwargs): + yield await stream.__anext__() + raise ValueError("failed generating the stream") + + with patch( + "aidial_adapter_openai.gpt.generate_stream", + side_effect=mock_generate_stream, + ): + + async def mock_stream() -> AsyncIterable[bytes]: + yield b'data: {"message": "first chunk"}\n\n' + yield b'data: {"message": "second chunk"}\n\n' + yield b"data: [DONE]" + + respx.post( + "http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview" + ).respond( + status_code=200, + content_type="text/event-stream", + content=mock_stream(), + ) + + response = await test_app.post( + "/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview", + json={ + "stream": True, + "messages": [{"role": "user", "content": "Test content"}], + }, + headers={ + "X-UPSTREAM-KEY": "TEST_API_KEY", + "X-UPSTREAM-ENDPOINT": "http://localhost:5001/openai/deployments/gpt-4/chat/completions", + }, + ) + + assert response.status_code == 200 + assert response.text == "\n\n".join( + [ + 'data: {"message":"first chunk"}', + 'data: {"error":{"message":"failed generating the stream","type":"internal_server_error","code":"500"}}', + "data: [DONE]", + "", + ] + ) + + @respx.mock @pytest.mark.asyncio async def test_invalid_chunk_stream_from_upstream( From 0baeffe32019b2a877c8b048d18b60cf30c22f27 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Fri, 6 Dec 2024 11:49:53 +0000 Subject: [PATCH 12/14] fix: reverted via in .vscode --- .vscode/settings.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index d62db15..ab64be2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,9 +8,7 @@ }, "editor.tabSize": 4 }, - "python.testing.pytestArgs": [ - "tests" - ], + "python.testing.pytestArgs": ["."], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "python.analysis.typeCheckingMode": "basic" From f05dd1366fc1e8e1d65e63fe41fc02f6010b47fb Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Wed, 11 Dec 2024 11:32:47 +0000 Subject: [PATCH 13/14] fix: removed dependency from sdk _errors module --- aidial_adapter_openai/app.py | 15 ++++--------- aidial_adapter_openai/exception_handlers.py | 25 +++++++++------------ 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index 703547d..07efbee 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -1,7 +1,5 @@ from contextlib import asynccontextmanager -import aidial_sdk._errors as sdk_error_handlers -import pydantic from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.telemetry.init import init_telemetry as sdk_init_telemetry from aidial_sdk.telemetry.types import TelemetryConfig @@ -10,7 +8,7 @@ import aidial_adapter_openai.endpoints as endpoints from aidial_adapter_openai.app_config import ApplicationConfig -from aidial_adapter_openai.exception_handlers import openai_exception_handler +from aidial_adapter_openai.exception_handlers import adapter_exception_handler from aidial_adapter_openai.utils.http_client import get_http_client from aidial_adapter_openai.utils.log_config import configure_loggers, logger from aidial_adapter_openai.utils.request import set_app_config @@ -42,14 +40,9 @@ def create_app( app.post("/openai/deployments/{deployment_id:path}/chat/completions")( endpoints.chat_completion ) - app.add_exception_handler(OpenAIError, openai_exception_handler) - app.add_exception_handler( - pydantic.ValidationError, - sdk_error_handlers.pydantic_validation_exception_handler, - ) - app.add_exception_handler( - DialException, sdk_error_handlers.dial_exception_handler - ) + + for exc_class in [OpenAIError, DialException]: + app.add_exception_handler(exc_class, adapter_exception_handler) return app diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py index 635c718..21a9ae2 100644 --- a/aidial_adapter_openai/exception_handlers.py +++ b/aidial_adapter_openai/exception_handlers.py @@ -1,22 +1,21 @@ from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.exceptions import InternalServerError -from fastapi import Request -from fastapi.responses import Response -from openai import ( - APIConnectionError, - APIError, - APIStatusError, - APITimeoutError, - OpenAIError, -) +from fastapi.requests import Request as FastAPIRequest +from fastapi.responses import Response as FastAPIResponse +from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError from aidial_adapter_openai.utils.adapter_exception import ( AdapterException, + ResponseWrapper, parse_adapter_exception, ) def to_adapter_exception(exc: Exception) -> AdapterException: + + if isinstance(exc, (DialException, ResponseWrapper)): + return exc + if isinstance(exc, APIStatusError): # Non-streaming errors reported by `openai` library via this exception r = exc.response @@ -70,12 +69,10 @@ def to_adapter_exception(exc: Exception) -> AdapterException: content={"error": exc.body or {}}, ) - if isinstance(exc, DialException): - return exc - return InternalServerError(str(exc)) -def openai_exception_handler(request: Request, exc: Exception) -> Response: - assert isinstance(exc, OpenAIError) +def adapter_exception_handler( + request: FastAPIRequest, exc: Exception +) -> FastAPIResponse: return to_adapter_exception(exc).to_fastapi_response() From 968a9af2b8a3392cea8f219ddc05d15f21b7e87b Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Wed, 11 Dec 2024 11:50:31 +0000 Subject: [PATCH 14/14] fix: review fixes --- aidial_adapter_openai/exception_handlers.py | 4 +--- aidial_adapter_openai/utils/adapter_exception.py | 8 -------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py index 21a9ae2..55a06e4 100644 --- a/aidial_adapter_openai/exception_handlers.py +++ b/aidial_adapter_openai/exception_handlers.py @@ -30,11 +30,9 @@ def to_adapter_exception(exc: Exception) -> AdapterException: if "Content-Encoding" in httpx_headers: del httpx_headers["Content-Encoding"] - headers = {k.decode(): v.decode() for k, v in httpx_headers.raw} - return parse_adapter_exception( status_code=r.status_code, - headers=headers, + headers=dict(httpx_headers.items()), content=r.text, ) diff --git a/aidial_adapter_openai/utils/adapter_exception.py b/aidial_adapter_openai/utils/adapter_exception.py index 8715131..863f6bd 100644 --- a/aidial_adapter_openai/utils/adapter_exception.py +++ b/aidial_adapter_openai/utils/adapter_exception.py @@ -2,7 +2,6 @@ from typing import Any, Dict from aidial_sdk.exceptions import HTTPException as DialException -from fastapi import HTTPException as FastAPIException from fastapi.responses import Response as FastAPIResponse @@ -39,13 +38,6 @@ def to_fastapi_response(self) -> FastAPIResponse: headers=self.headers, ) - def to_fastapi_exception(self) -> FastAPIException: - return FastAPIException( - status_code=self.status_code, - detail=self.content, - headers=self.headers, - ) - def json_error(self) -> dict: return { "error": {