From 427ef7e476d9b5fe94f02c2d2445c16a4a4b0d5e Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 29 Apr 2025 23:45:14 +0000 Subject: [PATCH 1/3] docs(openapi): Update model identifiers and enhance request/response schemas in OpenAPI definition Signed-off-by: Eden Reich --- openapi.yaml | 53 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 1867ba0..df70ec1 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -67,43 +67,51 @@ paths: value: object: "list" data: - - id: "gpt-4o" + - id: "openai/gpt-4o" object: "model" created: 1686935002 owned_by: "openai" - - id: "llama-3.3-70b-versatile" + served_by: "openai" + - id: "openai/llama-3.3-70b-versatile" object: "model" created: 1723651281 owned_by: "groq" - - id: "claude-3-opus-20240229" + served_by: "groq" + - id: "cohere/claude-3-opus-20240229" object: "model" created: 1708905600 owned_by: "anthropic" - - id: "command-r" + served_by: "anthropic" + - id: "cohere/command-r" object: "model" created: 1707868800 owned_by: "cohere" - - id: "phi3:3.8b" + served_by: "cohere" + - id: "ollama/phi3:3.8b" object: "model" created: 1718441600 owned_by: "ollama" + served_by: "ollama" singleProvider: summary: Models from a specific provider value: object: "list" data: - - id: "gpt-4o" + - id: "openai/gpt-4o" object: "model" created: 1686935002 owned_by: "openai" - - id: "gpt-4-turbo" + served_by: "openai" + - id: "openai/gpt-4-turbo" object: "model" created: 1687882410 owned_by: "openai" - - id: "gpt-3.5-turbo" + served_by: "openai" + - id: "openai/gpt-3.5-turbo" object: "model" created: 1677649963 owned_by: "openai" + served_by: "openai" "401": $ref: "#/components/responses/Unauthorized" "500": @@ -562,6 +570,9 @@ components: type: string chat: type: string + required: + - models + - chat Error: type: object properties: @@ -611,6 +622,12 @@ components: type: string served_by: $ref: "#/components/schemas/Provider" + required: + - id + - object + - created + - owned_by + - served_by ListModelsResponse: type: object description: Response structure for listing models @@ -717,7 +734,8 @@ components: usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. - default: true + required: + - include_usage CreateChatCompletionRequest: type: object properties: @@ -754,6 +772,13 @@ components: are supported. items: $ref: "#/components/schemas/ChatCompletionTool" + reasoning_format: + type: string + description: > + The format of the reasoning content. Can be `raw` or `parsed`. + + When specified as raw some reasoning models will output tags. + When specified as parsed the model will output the reasoning under reasoning_content attribute. required: - model - messages @@ -908,6 +933,9 @@ components: refusal: type: string description: The refusal message generated by the model. + required: + - content + - role ChatCompletionMessageToolCallChunk: type: object properties: @@ -1040,6 +1068,13 @@ components: description: The object type, which is always `chat.completion.chunk`. usage: $ref: "#/components/schemas/CompletionUsage" + reasoning_format: + type: string + description: > + The format of the reasoning content. Can be `raw` or `parsed`. + + When specified as raw some reasoning models will output tags. + When specified as parsed the model will output the reasoning under reasoning_content. required: - choices - created From 510470be28460ef4571837d36c59efe8abb2f4f3 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 29 Apr 2025 23:47:55 +0000 Subject: [PATCH 2/3] chore: Generate types and fix test Signed-off-by: Eden Reich --- src/types/generated/index.ts | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts index 1936db3..45fb989 100644 --- a/src/types/generated/index.ts +++ b/src/types/generated/index.ts @@ -181,8 +181,8 @@ export interface components { retry?: number; }; Endpoints: { - models?: string; - chat?: string; + models: string; + chat: string; }; Error: { error?: string; @@ -203,12 +203,12 @@ export interface components { }; /** @description Common model information */ Model: { - id?: string; - object?: string; + id: string; + object: string; /** Format: int64 */ - created?: number; - owned_by?: string; - served_by?: components['schemas']['Provider']; + created: number; + owned_by: string; + served_by: components['schemas']['Provider']; }; /** @description Response structure for listing models */ ListModelsResponse: { @@ -267,11 +267,8 @@ export interface components { /** @description Options for streaming response. Only set this when you set `stream: true`. * */ ChatCompletionStreamOptions: { - /** - * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. - * - * @default true - */ + /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. + * */ include_usage: boolean; }; CreateChatCompletionRequest: { @@ -293,6 +290,10 @@ export interface components { /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. * */ tools?: components['schemas']['ChatCompletionTool'][]; + /** @description The format of the reasoning content. Can be `raw` or `parsed`. + * When specified as raw some reasoning models will output tags. When specified as parsed the model will output the reasoning under reasoning_content attribute. + * */ + reasoning_format?: string; }; /** @description The function that the model called. */ ChatCompletionMessageToolCallFunction: { @@ -351,11 +352,11 @@ export interface components { /** @description A chat completion delta generated by streamed model responses. */ ChatCompletionStreamResponseDelta: { /** @description The contents of the chunk message. */ - content?: string; + content: string; /** @description The reasoning content of the chunk message. */ reasoning_content?: string; tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][]; - role?: components['schemas']['MessageRole']; + role: components['schemas']['MessageRole']; /** @description The refusal message generated by the model. */ refusal?: string; }; @@ -419,6 +420,10 @@ export interface components { /** @description The object type, which is always `chat.completion.chunk`. */ object: string; usage?: components['schemas']['CompletionUsage']; + /** @description The format of the reasoning content. Can be `raw` or `parsed`. + * When specified as raw some reasoning models will output tags. When specified as parsed the model will output the reasoning under reasoning_content. + * */ + reasoning_format?: string; }; Config: unknown; }; From 4bb9e7efb7afd5d6c082648eed1ba18465ca3946 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Wed, 30 Apr 2025 00:18:33 +0000 Subject: [PATCH 3/3] feat: Enhance OpenAPI schema and client to support reasoning field in chat completions Previously it supported only DeepSeek reasoning, but now it will support also Groq reasoning. Signed-off-by: Eden Reich --- openapi.yaml | 12 ++- src/client.ts | 5 + src/types/generated/index.ts | 8 +- tests/client.test.ts | 183 +++++++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 5 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index df70ec1..876bf71 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -600,10 +600,12 @@ components: $ref: "#/components/schemas/ChatCompletionMessageToolCall" tool_call_id: type: string - reasoning: - type: string reasoning_content: type: string + description: The reasoning content of the chunk message. + reasoning: + type: string + description: The reasoning of the chunk message. Same as reasoning_content. required: - role - content @@ -778,7 +780,8 @@ components: The format of the reasoning content. Can be `raw` or `parsed`. When specified as raw some reasoning models will output tags. - When specified as parsed the model will output the reasoning under reasoning_content attribute. + When specified as parsed the model will output the reasoning under + `reasoning` or `reasoning_content` attribute. required: - model - messages @@ -924,6 +927,9 @@ components: reasoning_content: type: string description: The reasoning content of the chunk message. + reasoning: + type: string + description: The reasoning of the chunk message. Same as reasoning_content. tool_calls: type: array items: diff --git a/src/client.ts b/src/client.ts index 6ca7cb1..a18b28d 100644 --- a/src/client.ts +++ b/src/client.ts @@ -281,6 +281,11 @@ export class InferenceGatewayClient { callbacks.onReasoning?.(reasoning_content); } + const reasoning = chunk.choices[0]?.delta?.reasoning; + if (reasoning !== undefined) { + callbacks.onReasoning?.(reasoning); + } + const content = chunk.choices[0]?.delta?.content; if (content) { callbacks.onContent?.(content); diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts index 45fb989..740cb45 100644 --- a/src/types/generated/index.ts +++ b/src/types/generated/index.ts @@ -198,8 +198,10 @@ export interface components { content: string; tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][]; tool_call_id?: string; - reasoning?: string; + /** @description The reasoning content of the chunk message. */ reasoning_content?: string; + /** @description The reasoning of the chunk message. Same as reasoning_content. */ + reasoning?: string; }; /** @description Common model information */ Model: { @@ -291,7 +293,7 @@ export interface components { * */ tools?: components['schemas']['ChatCompletionTool'][]; /** @description The format of the reasoning content. Can be `raw` or `parsed`. - * When specified as raw some reasoning models will output tags. When specified as parsed the model will output the reasoning under reasoning_content attribute. + * When specified as raw some reasoning models will output tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute. * */ reasoning_format?: string; }; @@ -355,6 +357,8 @@ export interface components { content: string; /** @description The reasoning content of the chunk message. */ reasoning_content?: string; + /** @description The reasoning of the chunk message. Same as reasoning_content. */ + reasoning?: string; tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][]; role: components['schemas']['MessageRole']; /** @description The refusal message generated by the model. */ diff --git a/tests/client.test.ts b/tests/client.test.ts index 01081f8..a9388b5 100644 --- a/tests/client.test.ts +++ b/tests/client.test.ts @@ -74,6 +74,7 @@ describe('InferenceGatewayClient', () => { object: 'model', created: 1686935002, owned_by: 'openai', + served_by: Provider.openai, }, ], }; @@ -494,6 +495,188 @@ describe('InferenceGatewayClient', () => { }) ); }); + + it('should handle streaming chat completions with reasoning field', async () => { + const mockRequest = { + model: 'groq/deepseek-distilled-llama-3.1-70b', + messages: [{ role: MessageRole.user, content: 'Hello' }], + }; + const mockStream = new TransformStream(); + const writer = mockStream.writable.getWriter(); + const encoder = new TextEncoder(); + mockFetch.mockResolvedValueOnce({ + ok: true, + body: mockStream.readable, + }); + const callbacks = { + onOpen: jest.fn(), + onChunk: jest.fn(), + onReasoning: jest.fn(), + onContent: jest.fn(), + onFinish: jest.fn(), + }; + const streamPromise = client.streamChatCompletion(mockRequest, callbacks); + await writer.write( + encoder.encode( + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' + + 'data: [DONE]\n\n' + ) + ); + await writer.close(); + await streamPromise; + expect(callbacks.onOpen).toHaveBeenCalledTimes(1); + expect(callbacks.onChunk).toHaveBeenCalledTimes(7); + expect(callbacks.onReasoning).toHaveBeenCalledTimes(4); + expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' think'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' about'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' this'); + expect(callbacks.onContent).toHaveBeenCalledTimes(2); + expect(callbacks.onContent).toHaveBeenCalledWith('Hello'); + expect(callbacks.onContent).toHaveBeenCalledWith('!'); + expect(callbacks.onFinish).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:8080/v1/chat/completions', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ + ...mockRequest, + stream: true, + stream_options: { + include_usage: true, + }, + }), + }) + ); + }); + + it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => { + const mockRequest = { + model: 'deepseek/deepseek-reasoner', + messages: [{ role: MessageRole.user, content: 'Hello' }], + }; + const mockStream = new TransformStream(); + const writer = mockStream.writable.getWriter(); + const encoder = new TextEncoder(); + mockFetch.mockResolvedValueOnce({ + ok: true, + body: mockStream.readable, + }); + const callbacks = { + onOpen: jest.fn(), + onChunk: jest.fn(), + onReasoning: jest.fn(), + onContent: jest.fn(), + onFinish: jest.fn(), + }; + const streamPromise = client.streamChatCompletion(mockRequest, callbacks); + await writer.write( + encoder.encode( + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' + + 'data: [DONE]\n\n' + ) + ); + await writer.close(); + await streamPromise; + expect(callbacks.onOpen).toHaveBeenCalledTimes(1); + expect(callbacks.onChunk).toHaveBeenCalledTimes(8); + expect(callbacks.onReasoning).toHaveBeenCalledTimes(5); + expect(callbacks.onReasoning).toHaveBeenCalledWith('This'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' is'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' a'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' content'); + expect(callbacks.onContent).toHaveBeenCalledTimes(2); + expect(callbacks.onContent).toHaveBeenCalledWith('Hello'); + expect(callbacks.onContent).toHaveBeenCalledWith('!'); + expect(callbacks.onFinish).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:8080/v1/chat/completions', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ + ...mockRequest, + stream: true, + stream_options: { + include_usage: true, + }, + }), + }) + ); + }); + + it('should handle streaming chat completions with reasoning field (Groq)', async () => { + const mockRequest = { + model: 'llama-3.1-70b-versatile', + messages: [{ role: MessageRole.user, content: 'Hello' }], + }; + const mockStream = new TransformStream(); + const writer = mockStream.writable.getWriter(); + const encoder = new TextEncoder(); + mockFetch.mockResolvedValueOnce({ + ok: true, + body: mockStream.readable, + }); + const callbacks = { + onOpen: jest.fn(), + onChunk: jest.fn(), + onReasoning: jest.fn(), + onContent: jest.fn(), + onFinish: jest.fn(), + }; + const streamPromise = client.streamChatCompletion(mockRequest, callbacks); + await writer.write( + encoder.encode( + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' + + 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' + + 'data: [DONE]\n\n' + ) + ); + await writer.close(); + await streamPromise; + expect(callbacks.onOpen).toHaveBeenCalledTimes(1); + expect(callbacks.onChunk).toHaveBeenCalledTimes(7); + expect(callbacks.onReasoning).toHaveBeenCalledTimes(4); + expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' think'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' about'); + expect(callbacks.onReasoning).toHaveBeenCalledWith(' this'); + expect(callbacks.onContent).toHaveBeenCalledTimes(2); + expect(callbacks.onContent).toHaveBeenCalledWith('Hello'); + expect(callbacks.onContent).toHaveBeenCalledWith('!'); + expect(callbacks.onFinish).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:8080/v1/chat/completions', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ + ...mockRequest, + stream: true, + stream_options: { + include_usage: true, + }, + }), + }) + ); + }); }); describe('proxy', () => {