diff --git a/docs/openai_api.md b/docs/openai_api.md index 089b500ff..42b381a2d 100644 --- a/docs/openai_api.md +++ b/docs/openai_api.md @@ -63,6 +63,24 @@ completion = openai.chat.completions.create( print(completion.choices[0].message.content) ``` +### Logprobs + +Logprobs are supported with the OpenAI API: + +```python + +# create a chat completion +completion = openai.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "Hello! What is your name?"}], + log_probs=True +) +# print the completion +print(completion.choices[0].logprobs) +``` + +### Streaming + Streaming is also supported. See [test_openai_api.py](../tests/test_openai_api.py). If your api server is behind a proxy you'll need to turn off buffering, you can do so in Nginx by setting `proxy_buffering off;` in the location block for the proxy. ### cURL @@ -146,7 +164,7 @@ export FASTCHAT_WORKER_API_EMBEDDING_BATCH_SIZE=1 ## Todos Some features to be implemented: -- [ ] Support more parameters like `logprobs`, `logit_bias`, `user`, `presence_penalty` and `frequency_penalty` +- [ ] Support more parameters like `logit_bias`, `user`, `presence_penalty` and `frequency_penalty` - [ ] Model details (permissions, owner and create time) - [ ] Edits API - [ ] Rate Limitation Settings diff --git a/fastchat/protocol/openai_api_protocol.py b/fastchat/protocol/openai_api_protocol.py index bb50a5ef0..d6cfb0d80 100644 --- a/fastchat/protocol/openai_api_protocol.py +++ b/fastchat/protocol/openai_api_protocol.py @@ -64,6 +64,7 @@ class ChatCompletionRequest(BaseModel): ] temperature: Optional[float] = 0.7 top_p: Optional[float] = 1.0 + logprobs: Optional[int] = None top_k: Optional[int] = -1 n: Optional[int] = 1 max_tokens: Optional[int] = None @@ -81,6 +82,7 @@ class ChatMessage(BaseModel): class ChatCompletionResponseChoice(BaseModel): index: int + logprobs: Optional[LogProbs] = None message: ChatMessage finish_reason: Optional[Literal["stop", "length"]] = None diff --git a/fastchat/serve/openai_api_server.py b/fastchat/serve/openai_api_server.py index a6ffee96b..0df7e8e74 100644 --- a/fastchat/serve/openai_api_server.py +++ b/fastchat/serve/openai_api_server.py @@ -275,7 +275,7 @@ async def get_gen_params( frequency_penalty: Optional[float], max_tokens: Optional[int], echo: Optional[bool], - logprobs: Optional[int] = None, + logprobs: Optional[int], stop: Optional[Union[str, List[str]]], best_of: Optional[int] = None, use_beam_search: Optional[bool] = None, @@ -431,6 +431,7 @@ async def create_chat_completion(request: ChatCompletionRequest): frequency_penalty=request.frequency_penalty, max_tokens=request.max_tokens, echo=False, + logprobs=request.logprobs, stop=request.stop, ) @@ -472,6 +473,7 @@ async def create_chat_completion(request: ChatCompletionRequest): ChatCompletionResponseChoice( index=i, message=ChatMessage(role="assistant", content=content["text"]), + logprobs=create_openai_logprobs(content.get("logprobs", None)), finish_reason=content.get("finish_reason", "stop"), ) )