From 9f7f68b5e55579fefa4b5ee278b3401d564de19a Mon Sep 17 00:00:00 2001 From: Gato <115658935+CollectiveUnicorn@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:37:28 -0700 Subject: [PATCH] chore(docs): Add documentation to types (#680) Adds documentation to the request endpoint types that can be seen at the `/redoc` endpoint --- src/leapfrogai_api/backend/grpc_client.py | 6 +- src/leapfrogai_api/backend/helpers.py | 16 +- src/leapfrogai_api/backend/types.py | 465 ++++++++++++++---- src/leapfrogai_api/main.py | 4 +- .../openai/requests/create_thread_request.py | 16 +- .../requests/run_create_params_request.py | 21 +- 6 files changed, 415 insertions(+), 113 deletions(-) diff --git a/src/leapfrogai_api/backend/grpc_client.py b/src/leapfrogai_api/backend/grpc_client.py index 617beee8d..6dd1e9f3d 100644 --- a/src/leapfrogai_api/backend/grpc_client.py +++ b/src/leapfrogai_api/backend/grpc_client.py @@ -30,7 +30,7 @@ async def stream_completion(model: Model, request: lfai.CompletionRequest): await stream.wait_for_connection() return StreamingResponse( - recv_completion(stream), media_type="text/event-stream" + recv_completion(stream, model.name), media_type="text/event-stream" ) @@ -66,7 +66,9 @@ async def stream_chat_completion(model: Model, request: lfai.ChatCompletionReque stream = stub.ChatCompleteStream(request) await stream.wait_for_connection() - return StreamingResponse(recv_chat(stream), media_type="text/event-stream") + return StreamingResponse( + recv_chat(stream, model.name), media_type="text/event-stream" + ) async def stream_chat_completion_raw( diff --git a/src/leapfrogai_api/backend/helpers.py b/src/leapfrogai_api/backend/helpers.py index d1677e07b..79e5e90a0 100644 --- a/src/leapfrogai_api/backend/helpers.py +++ b/src/leapfrogai_api/backend/helpers.py @@ -1,5 +1,7 @@ """Helper functions for the OpenAI backend.""" +import time +import uuid from typing import BinaryIO, Iterator, AsyncGenerator, Any import grpc import leapfrogai_sdk as lfai @@ -15,15 +17,16 @@ async def recv_completion( stream: grpc.aio.UnaryStreamCall[lfai.CompletionRequest, lfai.CompletionResponse], + model: str, ): async for c in stream: yield ( "data: " + CompletionResponse( - id="foo", + id=str(uuid.uuid4()), object="completion.chunk", - created=55, - model="mpt-7b-8k-chat", + created=int(time.time()), + model=model, choices=[ CompletionChoice( index=0, @@ -48,16 +51,17 @@ async def recv_chat( stream: grpc.aio.UnaryStreamCall[ lfai.ChatCompletionRequest, lfai.ChatCompletionResponse ], + model: str, ) -> AsyncGenerator[str, Any]: """Generator that yields chat completion responses as Server-Sent Events.""" async for c in stream: yield ( "data: " + ChatCompletionResponse( - id="foo", + id=str(uuid.uuid4()), object="chat.completion.chunk", - created=55, - model="mpt-7b-8k-chat", + created=int(time.time()), + model=model, choices=[ ChatStreamChoice( index=0, diff --git a/src/leapfrogai_api/backend/types.py b/src/leapfrogai_api/backend/types.py index 5d631563c..4a00d1838 100644 --- a/src/leapfrogai_api/backend/types.py +++ b/src/leapfrogai_api/backend/types.py @@ -41,9 +41,16 @@ class Usage(BaseModel): """Usage object.""" - prompt_tokens: int - completion_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS - total_tokens: int + prompt_tokens: int = Field( + ..., description="The number of tokens used in the prompt." + ) + completion_tokens: int | None = Field( + default=DEFAULT_MAX_COMPLETION_TOKENS, + description="The number of tokens generated in the completion.", + ) + total_tokens: int = Field( + ..., description="The total number of tokens used (prompt + completion)." + ) ########## @@ -52,19 +59,38 @@ class Usage(BaseModel): class ModelResponseModel(BaseModel): - """Response object for models.""" + """Represents a single model in the response.""" - id: str - object: str = "model" - created: int = 0 - owned_by: str = "leapfrogai" + id: str = Field( + ..., + description="The unique identifier of the model.", + examples=["llama-cpp-python"], + ) + object: Literal["model"] = Field( + default="model", + description="The object type, which is always 'model' for this response.", + ) + created: int = Field( + default=0, + description="The Unix timestamp (in seconds) when the model was created. Always 0 for LeapfrogAI models.", + examples=[0], + ) + owned_by: Literal["leapfrogai"] = Field( + default="leapfrogai", + description="The organization that owns the model. Always 'leapfrogai' for LeapfrogAI models.", + ) class ModelResponse(BaseModel): - """Response object for models.""" + """Response object for listing available models.""" - object: str = "list" - data: list[ModelResponseModel] = [] + object: Literal["list"] = Field( + default="list", + description="The object type, which is always 'list' for this response.", + ) + data: list[ModelResponseModel] = Field( + ..., description="A list of available models.", min_length=0 + ) ############ @@ -75,31 +101,64 @@ class ModelResponse(BaseModel): class CompletionRequest(BaseModel): """Request object for completion.""" - model: str - prompt: str | list[int] - stream: bool | None = False - max_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS - temperature: float | None = 1.0 + model: str = Field( + ..., + description="The ID of the model to use for completion.", + example="llama-cpp-python", + ) + prompt: str | list[int] = Field( + ..., + description="The prompt to generate completions for. Can be a string or a list of integers representing token IDs.", + examples=["Once upon a time,"], + ) + stream: bool = Field( + False, description="Whether to stream the results as they become available." + ) + max_tokens: int | None = Field( + default=DEFAULT_MAX_COMPLETION_TOKENS, + description="The maximum number of tokens to generate.", + ge=1, + ) + temperature: float | None = Field( + 1.0, + description="Sampling temperature to use. Higher values mean more random completions. Use lower values for more deterministic completions. The upper limit may vary depending on the backend used.", + ge=0.0, + ) class CompletionChoice(BaseModel): """Choice object for completion.""" - index: int - text: str - logprobs: object = None - finish_reason: str = "" + index: int = Field(..., description="The index of this completion choice.") + text: str = Field(..., description="The generated text for this completion choice.") + logprobs: object | None = Field( + None, + description="Log probabilities for the generated tokens. Only returned if requested.", + ) + finish_reason: str = Field( + "", description="The reason why the completion finished.", example="length" + ) class CompletionResponse(BaseModel): """Response object for completion.""" - id: str = "" - object: str = "completion" - created: int = 0 - model: str = "" - choices: list[CompletionChoice] - usage: Usage | None = None + id: str = Field("", description="A unique identifier for this completion response.") + object: Literal["completion"] = Field( + "completion", + description="The object type, which is always 'completion' for this response.", + ) + created: int = Field( + 0, + description="The Unix timestamp (in seconds) of when the completion was created.", + ) + model: str = Field("", description="The ID of the model used for the completion.") + choices: list[CompletionChoice] = Field( + ..., description="A list of generated completions." + ) + usage: Usage | None = Field( + None, description="Usage statistics for the completion request." + ) ########## @@ -118,55 +177,131 @@ class ChatFunction(BaseModel): class ChatMessage(BaseModel): """Message object for chat completion.""" - role: Literal["user", "assistant", "system", "function"] - content: str | list[TextContentBlockParam] + role: Literal["user", "assistant", "system", "function"] = Field( + default="user", + description="The role of the message author.", + examples=["user", "assistant", "system", "function"], + ) + content: str | list[TextContentBlockParam] = Field( + default="", + description="The content of the message. Can be a string or a list of text content blocks.", + examples=[ + "Hello, how are you?", + [{"type": "text", "text": "Hello, how are you?"}], + ], + ) class ChatDelta(BaseModel): """Delta object for chat completion.""" - role: str - content: str | None = "" + role: str = Field( + default="", + description="The role of the author of this message delta.", + examples=["assistant"], + ) + content: str | None = Field( + default="", description="The content of this message delta." + ) class ChatCompletionRequest(BaseModel): """Request object for chat completion.""" - model: str - messages: list[ChatMessage] - functions: list | None = None - temperature: float | None = 1.0 - top_p: float | None = 1 - stream: bool | None = False - stop: str | None = None - max_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS + model: str = Field( + default="", + description="The ID of the model to use for chat completion.", + examples=["llama-cpp-python"], + ) + messages: list[ChatMessage] = Field( + default=[], description="A list of messages comprising the conversation so far." + ) + functions: list | None = Field( + default=None, + description="A list of functions that the model may generate JSON inputs for.", + ) + temperature: float | None = Field( + default=1.0, + description="What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. The upper limit may vary depending on the backend used.", + ge=0, + ) + top_p: float | None = Field( + default=1, + description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + gt=0, + le=1, + ) + stream: bool | None = Field( + default=False, + description="If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.", + ) + stop: str | None = Field( + default=None, + description="Sequences that determine where the API will stop generating further tokens.", + ) + max_tokens: int | None = Field( + default=DEFAULT_MAX_COMPLETION_TOKENS, + description="The maximum number of tokens to generate in the chat completion.", + gt=0, + ) class ChatChoice(BaseModel): """Choice object for chat completion.""" - index: int - message: ChatMessage - finish_reason: str | None = "" + index: int = Field( + default=0, description="The index of this choice among the list of choices." + ) + message: ChatMessage = Field( + default=ChatMessage(), description="The message content for this choice." + ) + finish_reason: str | None = Field( + default="", + description="The reason why the model stopped generating tokens.", + examples=["stop", "length"], + ) class ChatStreamChoice(BaseModel): """Stream choice object for chat completion.""" - index: int - delta: ChatDelta - finish_reason: str | None = "" + index: int = Field( + default=0, description="The index of this choice among the list of choices." + ) + delta: ChatDelta = Field( + default=ChatDelta(), description="The delta content for this streaming choice." + ) + finish_reason: str | None = Field( + default="", + description="The reason why the model stopped generating tokens.", + examples=["stop", "length"], + ) class ChatCompletionResponse(BaseModel): """Response object for chat completion.""" - id: str = "" - object: str = "chat.completion" - created: int = 0 - model: str = "" - choices: list[ChatChoice] | list[ChatStreamChoice] - usage: Usage | None = None + id: str = Field( + default="", description="A unique identifier for the chat completion." + ) + object: str = Field( + default="chat.completion", + description="The object type, which is always 'chat.completion' for this response.", + ) + created: int = Field( + default=0, + description="The Unix timestamp (in seconds) of when the chat completion was created.", + ) + model: str = Field( + default="", description="The ID of the model used for the chat completion." + ) + choices: list[ChatChoice] | list[ChatStreamChoice] = Field( + default=[], + description="A list of chat completion choices. Can be either ChatChoice or ChatStreamChoice depending on whether streaming is enabled.", + ) + usage: Usage | None = Field( + default=None, description="Usage statistics for the completion request." + ) ############# @@ -178,29 +313,52 @@ class CreateEmbeddingRequest(BaseModel): """Request object for creating embeddings.""" model: str = Field( - description="Model that will be doing the embedding", + description="The ID of the model to use for generating embeddings.", examples=["text-embeddings"], ) input: str | list[str] | list[int] | list[list[int]] = Field( - description="The text to be embedded", examples=["My test input"] + description="The text to generate embeddings for. Can be a string, array of strings, array of tokens, or array of token arrays.", + examples=["The quick brown fox jumps over the lazy dog", ["Hello", "World"]], ) class EmbeddingResponseData(BaseModel): """Response object for embeddings.""" - embedding: list[float] - index: int - object: str = "embedding" + embedding: list[float] = Field( + default=[], + description="The embedding vector representing the input text.", + ) + index: int = Field( + default=0, + description="The index of the embedding in the list of generated embeddings.", + ) + object: str = Field( + default="embedding", + description="The object type, which is always 'embedding'.", + ) class CreateEmbeddingResponse(BaseModel): """Response object for embeddings.""" - data: list[EmbeddingResponseData] - model: str - object: str = "list" - usage: Usage | None = None + data: list[EmbeddingResponseData] = Field( + default=[], + description="A list of embedding objects.", + ) + model: str = Field( + default="", + examples=["text-embeddings"], + description="The ID of the model used for generating the embeddings.", + ) + object: str = Field( + default="list", + description="The object type, which is always 'list' for embedding responses.", + ) + usage: Usage | None = Field( + default=None, + description="Usage statistics for the API call.", + ) ########## @@ -209,12 +367,31 @@ class CreateEmbeddingResponse(BaseModel): class CreateTranscriptionRequest(BaseModel): - file: UploadFile - model: str - language: str = "" - prompt: str = "" - response_format: str = "" - temperature: float = 1.0 + """Request object for creating a transcription.""" + + file: UploadFile = Field( + ..., + description="The audio file to transcribe. Supports any audio format that ffmpeg can handle. For a complete list of supported formats, see: https://ffmpeg.org/ffmpeg-formats.html", + ) + model: str = Field(..., description="ID of the model to use.") + language: str = Field( + default="", + description="The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.", + ) + prompt: str = Field( + default="", + description="An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.", + ) + response_format: str = Field( + default="json", + description="The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.", + ) + temperature: float = Field( + default=1.0, + ge=0, + le=1, + description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.", + ) @classmethod def as_form( @@ -237,7 +414,13 @@ def as_form( class CreateTranscriptionResponse(BaseModel): - text: str + """Response object for transcription.""" + + text: str = Field( + ..., + description="The transcribed text.", + examples=["Hello, this is a transcription of the audio file."], + ) ############# @@ -248,8 +431,14 @@ class CreateTranscriptionResponse(BaseModel): class UploadFileRequest(BaseModel): """Request object for uploading a file.""" - file: UploadFile - purpose: Literal["assistants"] | None = "assistants" + file: UploadFile = Field( + ..., + description="The file to be uploaded. Must be a supported file type.", + ) + purpose: Literal["assistants"] | None = Field( + default="assistants", + description="The intended purpose of the uploaded file. Currently, only 'assistants' is supported.", + ) @classmethod def as_form( @@ -264,8 +453,14 @@ def as_form( class ListFilesResponse(BaseModel): """Response object for listing files.""" - object: str = Literal["list"] - data: list[FileObject] = [] + object: Literal["list"] = Field( + default="list", + description="The type of object returned. Always 'list' for file listing.", + ) + data: list[FileObject] = Field( + default=[], + description="An array of File objects, each representing an uploaded file.", + ) ############# @@ -276,14 +471,30 @@ class ListFilesResponse(BaseModel): class CreateAssistantRequest(BaseModel): """Request object for creating an assistant.""" - model: str = Field(default="llama-cpp-python", examples=["llama-cpp-python"]) - name: str | None = Field(default=None, examples=["Froggy Assistant"]) - description: str | None = Field(default=None, examples=["A helpful assistant."]) + model: str = Field( + default="llama-cpp-python", + examples=["llama-cpp-python"], + description="The model to be used by the assistant. Default is 'llama-cpp-python'.", + ) + name: str | None = Field( + default=None, + examples=["Froggy Assistant"], + description="The name of the assistant. Optional.", + ) + description: str | None = Field( + default=None, + examples=["A helpful assistant."], + description="A description of the assistant's purpose. Optional.", + ) instructions: str | None = Field( - default=None, examples=["You are a helpful assistant."] + default=None, + examples=["You are a helpful assistant."], + description="Instructions that the assistant should follow. Optional.", ) tools: list[AssistantTool] | None = Field( - default=None, examples=[[FileSearchTool(type="file_search")]] + default=None, + examples=[[FileSearchTool(type="file_search")]], + description="List of tools the assistant can use. Optional.", ) tool_resources: BetaAssistantToolResources | None = Field( default=None, @@ -292,24 +503,45 @@ class CreateAssistantRequest(BaseModel): file_search=ToolResourcesFileSearch(vector_store_ids=[]) ) ], + description="Resources for the tools used by the assistant. Optional.", + ) + metadata: dict | None = Field( + default={}, + examples=[{}], + description="Additional metadata for the assistant. Optional.", + ) + temperature: float | None = Field( + default=None, + examples=[1.0], + description="Sampling temperature for the model. Optional.", + ) + top_p: float | None = Field( + default=None, + examples=[1.0], + description="Nucleus sampling parameter. Optional.", ) - metadata: dict | None = Field(default={}, examples=[{}]) - temperature: float | None = Field(default=None, examples=[1.0]) - top_p: float | None = Field(default=None, examples=[1.0]) response_format: Literal["auto"] | None = Field( - default=None, examples=["auto"] - ) # This is all we support right now + default=None, + examples=["auto"], + description="The format of the assistant's responses. Currently only 'auto' is supported. Optional.", + ) class ModifyAssistantRequest(CreateAssistantRequest): """Request object for modifying an assistant.""" + # Inherits all fields from CreateAssistantRequest + # All fields are optional for modification + class ListAssistantsResponse(BaseModel): - """Response object for listing files.""" + """Response object for listing assistants.""" - object: str = Literal["list"] - data: list[Assistant] = [] + object: Literal["list"] = Field( + default="list", + description="The type of object. Always 'list' for this response.", + ) + data: list[Assistant] = Field(description="A list of Assistant objects.") ################ @@ -340,24 +572,43 @@ class CreateVectorStoreFileRequest(BaseModel): chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy | None = ( Field( default=None, + description="The strategy for chunking the file content. Use 'auto' for automatic chunking.", examples=[ ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(type="auto") ], ) ) - file_id: str = Field(default="", examples=[""]) + file_id: str = Field( + default="", + description="The ID of the file to be added to the vector store.", + examples=["file-abc123"], + ) class CreateVectorStoreRequest(BaseModel): """Request object for creating a vector store.""" - file_ids: list[str] | None = [] - name: str | None = None + file_ids: list[str] | None = Field( + default=[], + description="List of file IDs to be included in the vector store.", + example=["file-abc123", "file-def456"], + ) + name: str | None = Field( + default=None, + description="Optional name for the vector store.", + example="My Vector Store", + ) expires_after: ExpiresAfter | None = Field( - default=None, examples=[ExpiresAfter(anchor="last_active_at", days=1)] + default=None, + description="Expiration settings for the vector store.", + examples=[ExpiresAfter(anchor="last_active_at", days=1)], + ) + metadata: dict | None = Field( + default=None, + description="Optional metadata for the vector store.", + example={"project": "AI Research", "version": "1.0"}, ) - metadata: dict | None = Field(default=None, examples=[{}]) def add_days_to_timestamp(self, timestamp: int, days: int) -> int: """ @@ -407,8 +658,14 @@ class ModifyVectorStoreRequest(CreateVectorStoreRequest): class ListVectorStoresResponse(BaseModel): """Response object for listing files.""" - object: str = Literal["list"] - data: list[VectorStore] = [] + object: Literal["list"] = Field( + default="list", + description="The type of object. Always 'list' for this response.", + ) + data: list[VectorStore] = Field( + default=[], + description="A list of VectorStore objects.", + ) ################ @@ -443,17 +700,27 @@ class ModifyMessageRequest(BaseModel): class RAGItem(BaseModel): - """Object for RAG.""" + """Object representing a single item in a Retrieval-Augmented Generation (RAG) result.""" - id: str - vector_store_id: str - file_id: str - content: str - metadata: dict - similarity: float + id: str = Field(..., description="Unique identifier for the RAG item.") + vector_store_id: str = Field( + ..., description="ID of the vector store containing this item." + ) + file_id: str = Field(..., description="ID of the file associated with this item.") + content: str = Field(..., description="The actual content of the RAG item.") + metadata: dict = Field( + ..., description="Additional metadata associated with the RAG item." + ) + similarity: float = Field( + ..., description="Similarity score of this item to the query." + ) class RAGResponse(BaseModel): - """Response object for RAG.""" + """Response object for RAG queries.""" - data: list[RAGItem] = [] + data: list[RAGItem] = Field( + ..., + description="List of RAG items returned as a result of the query.", + min_length=0, + ) diff --git a/src/leapfrogai_api/main.py b/src/leapfrogai_api/main.py index c01fe6b05..4691ef2cb 100644 --- a/src/leapfrogai_api/main.py +++ b/src/leapfrogai_api/main.py @@ -65,7 +65,9 @@ async def validation_exception_handler(request, exc): app.include_router(files.router) app.include_router(vector_stores.router) app.include_router(runs.router) -app.include_router(threads.router) app.include_router(messages.router) app.include_router(runs_steps.router) app.include_router(rag.router) +# This should be at the bottom to prevent it preempting more specific runs endpoints +# https://fastapi.tiangolo.com/tutorial/path-params/#order-matters +app.include_router(threads.router) diff --git a/src/leapfrogai_api/routers/openai/requests/create_thread_request.py b/src/leapfrogai_api/routers/openai/requests/create_thread_request.py index 1399666c5..3fbcb1c8a 100644 --- a/src/leapfrogai_api/routers/openai/requests/create_thread_request.py +++ b/src/leapfrogai_api/routers/openai/requests/create_thread_request.py @@ -13,11 +13,21 @@ class CreateThreadRequest(BaseModel): """Request object for creating a thread.""" - messages: list[Message] | None = Field(default=None, examples=[None]) + messages: list[Message] | None = Field( + default=None, + description="An optional list of messages to initialize the thread with.", + examples=[None], + ) tool_resources: BetaThreadToolResources | None = Field( - default=None, examples=[None] + default=None, + description="Optional tool resources associated with the thread.", + examples=[None], + ) + metadata: dict | None = Field( + default={}, + description="Optional metadata to associate with the thread.", + examples=[{"user_id": "usr_12345", "session_id": "sess_67890"}], ) - metadata: dict | None = Field(default={}, examples=[{}]) async def create_thread(self, session): crud_thread = CRUDThread(db=session) diff --git a/src/leapfrogai_api/routers/openai/requests/run_create_params_request.py b/src/leapfrogai_api/routers/openai/requests/run_create_params_request.py index ff97794cd..e645d3d85 100644 --- a/src/leapfrogai_api/routers/openai/requests/run_create_params_request.py +++ b/src/leapfrogai_api/routers/openai/requests/run_create_params_request.py @@ -1,4 +1,6 @@ from __future__ import annotations + +import logging from typing import AsyncGenerator, Any from openai.types.beta.threads import Run from openai.types.beta.threads.run_create_params import ( @@ -22,9 +24,14 @@ class RunCreateParamsRequestBaseRequest(RunCreateParamsRequestBase): - additional_instructions: str | None = Field(default=None, examples=[""]) + additional_instructions: str | None = Field( + default=None, + examples=["Please provide a summary of the conversation so far."], + description="Additional instructions to be considered during the run execution.", + ) additional_messages: list[AdditionalMessage] | None = Field( default=[], + description="A list of additional messages to be added to the thread before the run starts.", examples=[ [ AdditionalMessage( @@ -45,7 +52,11 @@ class RunCreateParamsRequestBaseRequest(RunCreateParamsRequestBase): ] ], ) - stream: bool | None = Field(default=None, examples=[False]) + stream: bool | None = Field( + default=None, + description="If set to true, the response will be streamed as it's generated.", + example=False, + ) async def create_additional_messages(self, session: Session, thread_id: str): """If additional messages exist, create them in the DB as a part of this thread""" @@ -58,6 +69,12 @@ async def create_additional_messages(self, session: Session, thread_id: str): message_content = from_content_param_to_content( thread_message_content=content ) + else: + logging.getLogger(__name__).warning( + "Found additional message without content" + ) + continue + create_message_request = CreateMessageRequest( role=additional_message["role"], content=[message_content],