From 427ef7e476d9b5fe94f02c2d2445c16a4a4b0d5e Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Tue, 29 Apr 2025 23:45:14 +0000
Subject: [PATCH 1/3] docs(openapi): Update model identifiers and enhance
 request/response schemas in OpenAPI definition

Signed-off-by: Eden Reich <eden.reich@gmail.com>
---
 openapi.yaml | 53 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index 1867ba0..df70ec1 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -67,43 +67,51 @@ paths:
                   value:
                     object: "list"
                     data:
-                      - id: "gpt-4o"
+                      - id: "openai/gpt-4o"
                         object: "model"
                         created: 1686935002
                         owned_by: "openai"
-                      - id: "llama-3.3-70b-versatile"
+                        served_by: "openai"
+                      - id: "openai/llama-3.3-70b-versatile"
                         object: "model"
                         created: 1723651281
                         owned_by: "groq"
-                      - id: "claude-3-opus-20240229"
+                        served_by: "groq"
+                      - id: "cohere/claude-3-opus-20240229"
                         object: "model"
                         created: 1708905600
                         owned_by: "anthropic"
-                      - id: "command-r"
+                        served_by: "anthropic"
+                      - id: "cohere/command-r"
                         object: "model"
                         created: 1707868800
                         owned_by: "cohere"
-                      - id: "phi3:3.8b"
+                        served_by: "cohere"
+                      - id: "ollama/phi3:3.8b"
                         object: "model"
                         created: 1718441600
                         owned_by: "ollama"
+                        served_by: "ollama"
                 singleProvider:
                   summary: Models from a specific provider
                   value:
                     object: "list"
                     data:
-                      - id: "gpt-4o"
+                      - id: "openai/gpt-4o"
                         object: "model"
                         created: 1686935002
                         owned_by: "openai"
-                      - id: "gpt-4-turbo"
+                        served_by: "openai"
+                      - id: "openai/gpt-4-turbo"
                         object: "model"
                         created: 1687882410
                         owned_by: "openai"
-                      - id: "gpt-3.5-turbo"
+                        served_by: "openai"
+                      - id: "openai/gpt-3.5-turbo"
                         object: "model"
                         created: 1677649963
                         owned_by: "openai"
+                        served_by: "openai"
         "401":
           $ref: "#/components/responses/Unauthorized"
         "500":
@@ -562,6 +570,9 @@ components:
           type: string
         chat:
           type: string
+      required:
+        - models
+        - chat
     Error:
       type: object
       properties:
@@ -611,6 +622,12 @@ components:
           type: string
         served_by:
           $ref: "#/components/schemas/Provider"
+      required:
+        - id
+        - object
+        - created
+        - owned_by
+        - served_by
     ListModelsResponse:
       type: object
       description: Response structure for listing models
@@ -717,7 +734,8 @@ components:
             usage statistics for the entire request, and the `choices` field
             will always be an empty array. All other chunks will also include a
             `usage` field, but with a null value.
-          default: true
+      required:
+        - include_usage
     CreateChatCompletionRequest:
       type: object
       properties:
@@ -754,6 +772,13 @@ components:
             are supported.
           items:
             $ref: "#/components/schemas/ChatCompletionTool"
+        reasoning_format:
+          type: string
+          description: >
+            The format of the reasoning content. Can be `raw` or `parsed`.
+
+            When specified as raw some reasoning models will output <think /> tags.
+            When specified as parsed the model will output the reasoning under reasoning_content attribute.
       required:
         - model
         - messages
@@ -908,6 +933,9 @@ components:
         refusal:
           type: string
           description: The refusal message generated by the model.
+      required:
+        - content
+        - role
     ChatCompletionMessageToolCallChunk:
       type: object
       properties:
@@ -1040,6 +1068,13 @@ components:
           description: The object type, which is always `chat.completion.chunk`.
         usage:
           $ref: "#/components/schemas/CompletionUsage"
+        reasoning_format:
+          type: string
+          description: >
+            The format of the reasoning content. Can be `raw` or `parsed`.
+
+            When specified as raw some reasoning models will output <think /> tags.
+            When specified as parsed the model will output the reasoning under reasoning_content.
       required:
         - choices
         - created

From 510470be28460ef4571837d36c59efe8abb2f4f3 Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Tue, 29 Apr 2025 23:47:55 +0000
Subject: [PATCH 2/3] chore: Generate types and fix test

Signed-off-by: Eden Reich <eden.reich@gmail.com>
---
 src/types/generated/index.ts | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts
index 1936db3..45fb989 100644
--- a/src/types/generated/index.ts
+++ b/src/types/generated/index.ts
@@ -181,8 +181,8 @@ export interface components {
       retry?: number;
     };
     Endpoints: {
-      models?: string;
-      chat?: string;
+      models: string;
+      chat: string;
     };
     Error: {
       error?: string;
@@ -203,12 +203,12 @@ export interface components {
     };
     /** @description Common model information */
     Model: {
-      id?: string;
-      object?: string;
+      id: string;
+      object: string;
       /** Format: int64 */
-      created?: number;
-      owned_by?: string;
-      served_by?: components['schemas']['Provider'];
+      created: number;
+      owned_by: string;
+      served_by: components['schemas']['Provider'];
     };
     /** @description Response structure for listing models */
     ListModelsResponse: {
@@ -267,11 +267,8 @@ export interface components {
     /** @description Options for streaming response. Only set this when you set `stream: true`.
      *      */
     ChatCompletionStreamOptions: {
-      /**
-       * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
-       *
-       * @default true
-       */
+      /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
+       *      */
       include_usage: boolean;
     };
     CreateChatCompletionRequest: {
@@ -293,6 +290,10 @@ export interface components {
       /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
        *      */
       tools?: components['schemas']['ChatCompletionTool'][];
+      /** @description The format of the reasoning content. Can be `raw` or `parsed`.
+       *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content attribute.
+       *      */
+      reasoning_format?: string;
     };
     /** @description The function that the model called. */
     ChatCompletionMessageToolCallFunction: {
@@ -351,11 +352,11 @@ export interface components {
     /** @description A chat completion delta generated by streamed model responses. */
     ChatCompletionStreamResponseDelta: {
       /** @description The contents of the chunk message. */
-      content?: string;
+      content: string;
       /** @description The reasoning content of the chunk message. */
       reasoning_content?: string;
       tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
-      role?: components['schemas']['MessageRole'];
+      role: components['schemas']['MessageRole'];
       /** @description The refusal message generated by the model. */
       refusal?: string;
     };
@@ -419,6 +420,10 @@ export interface components {
       /** @description The object type, which is always `chat.completion.chunk`. */
       object: string;
       usage?: components['schemas']['CompletionUsage'];
+      /** @description The format of the reasoning content. Can be `raw` or `parsed`.
+       *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
+       *      */
+      reasoning_format?: string;
     };
     Config: unknown;
   };

From 4bb9e7efb7afd5d6c082648eed1ba18465ca3946 Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Wed, 30 Apr 2025 00:18:33 +0000
Subject: [PATCH 3/3] feat: Enhance OpenAPI schema and client to support
 reasoning field in chat completions

Previously it supported only DeepSeek reasoning, but now it will support also Groq reasoning.

Signed-off-by: Eden Reich <eden.reich@gmail.com>
---
 openapi.yaml                 |  12 ++-
 src/client.ts                |   5 +
 src/types/generated/index.ts |   8 +-
 tests/client.test.ts         | 183 +++++++++++++++++++++++++++++++++++
 4 files changed, 203 insertions(+), 5 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index df70ec1..876bf71 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -600,10 +600,12 @@ components:
             $ref: "#/components/schemas/ChatCompletionMessageToolCall"
         tool_call_id:
           type: string
-        reasoning:
-          type: string
         reasoning_content:
           type: string
+          description: The reasoning content of the chunk message.
+        reasoning:
+          type: string
+          description: The reasoning of the chunk message. Same as reasoning_content.
       required:
         - role
         - content
@@ -778,7 +780,8 @@ components:
             The format of the reasoning content. Can be `raw` or `parsed`.
 
             When specified as raw some reasoning models will output <think /> tags.
-            When specified as parsed the model will output the reasoning under reasoning_content attribute.
+            When specified as parsed the model will output the reasoning under 
+            `reasoning` or `reasoning_content` attribute.
       required:
         - model
         - messages
@@ -924,6 +927,9 @@ components:
         reasoning_content:
           type: string
           description: The reasoning content of the chunk message.
+        reasoning:
+          type: string
+          description: The reasoning of the chunk message. Same as reasoning_content.
         tool_calls:
           type: array
           items:
diff --git a/src/client.ts b/src/client.ts
index 6ca7cb1..a18b28d 100644
--- a/src/client.ts
+++ b/src/client.ts
@@ -281,6 +281,11 @@ export class InferenceGatewayClient {
                 callbacks.onReasoning?.(reasoning_content);
               }
 
+              const reasoning = chunk.choices[0]?.delta?.reasoning;
+              if (reasoning !== undefined) {
+                callbacks.onReasoning?.(reasoning);
+              }
+
               const content = chunk.choices[0]?.delta?.content;
               if (content) {
                 callbacks.onContent?.(content);
diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts
index 45fb989..740cb45 100644
--- a/src/types/generated/index.ts
+++ b/src/types/generated/index.ts
@@ -198,8 +198,10 @@ export interface components {
       content: string;
       tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
       tool_call_id?: string;
-      reasoning?: string;
+      /** @description The reasoning content of the chunk message. */
       reasoning_content?: string;
+      /** @description The reasoning of the chunk message. Same as reasoning_content. */
+      reasoning?: string;
     };
     /** @description Common model information */
     Model: {
@@ -291,7 +293,7 @@ export interface components {
        *      */
       tools?: components['schemas']['ChatCompletionTool'][];
       /** @description The format of the reasoning content. Can be `raw` or `parsed`.
-       *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content attribute.
+       *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under  `reasoning` or `reasoning_content` attribute.
        *      */
       reasoning_format?: string;
     };
@@ -355,6 +357,8 @@ export interface components {
       content: string;
       /** @description The reasoning content of the chunk message. */
       reasoning_content?: string;
+      /** @description The reasoning of the chunk message. Same as reasoning_content. */
+      reasoning?: string;
       tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
       role: components['schemas']['MessageRole'];
       /** @description The refusal message generated by the model. */
diff --git a/tests/client.test.ts b/tests/client.test.ts
index 01081f8..a9388b5 100644
--- a/tests/client.test.ts
+++ b/tests/client.test.ts
@@ -74,6 +74,7 @@ describe('InferenceGatewayClient', () => {
             object: 'model',
             created: 1686935002,
             owned_by: 'openai',
+            served_by: Provider.openai,
           },
         ],
       };
@@ -494,6 +495,188 @@ describe('InferenceGatewayClient', () => {
         })
       );
     });
+
+    it('should handle streaming chat completions with reasoning field', async () => {
+      const mockRequest = {
+        model: 'groq/deepseek-distilled-llama-3.1-70b',
+        messages: [{ role: MessageRole.user, content: 'Hello' }],
+      };
+      const mockStream = new TransformStream();
+      const writer = mockStream.writable.getWriter();
+      const encoder = new TextEncoder();
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        body: mockStream.readable,
+      });
+      const callbacks = {
+        onOpen: jest.fn(),
+        onChunk: jest.fn(),
+        onReasoning: jest.fn(),
+        onContent: jest.fn(),
+        onFinish: jest.fn(),
+      };
+      const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+      await writer.write(
+        encoder.encode(
+          'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+            'data: [DONE]\n\n'
+        )
+      );
+      await writer.close();
+      await streamPromise;
+      expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+      expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
+      expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
+      expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
+      expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+      expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+      expect(callbacks.onContent).toHaveBeenCalledWith('!');
+      expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+      expect(mockFetch).toHaveBeenCalledWith(
+        'http://localhost:8080/v1/chat/completions',
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({
+            ...mockRequest,
+            stream: true,
+            stream_options: {
+              include_usage: true,
+            },
+          }),
+        })
+      );
+    });
+
+    it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
+      const mockRequest = {
+        model: 'deepseek/deepseek-reasoner',
+        messages: [{ role: MessageRole.user, content: 'Hello' }],
+      };
+      const mockStream = new TransformStream();
+      const writer = mockStream.writable.getWriter();
+      const encoder = new TextEncoder();
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        body: mockStream.readable,
+      });
+      const callbacks = {
+        onOpen: jest.fn(),
+        onChunk: jest.fn(),
+        onReasoning: jest.fn(),
+        onContent: jest.fn(),
+        onFinish: jest.fn(),
+      };
+      const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+      await writer.write(
+        encoder.encode(
+          'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+            'data: [DONE]\n\n'
+        )
+      );
+      await writer.close();
+      await streamPromise;
+      expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+      expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
+      expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
+      expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
+      expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+      expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+      expect(callbacks.onContent).toHaveBeenCalledWith('!');
+      expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+      expect(mockFetch).toHaveBeenCalledWith(
+        'http://localhost:8080/v1/chat/completions',
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({
+            ...mockRequest,
+            stream: true,
+            stream_options: {
+              include_usage: true,
+            },
+          }),
+        })
+      );
+    });
+
+    it('should handle streaming chat completions with reasoning field (Groq)', async () => {
+      const mockRequest = {
+        model: 'llama-3.1-70b-versatile',
+        messages: [{ role: MessageRole.user, content: 'Hello' }],
+      };
+      const mockStream = new TransformStream();
+      const writer = mockStream.writable.getWriter();
+      const encoder = new TextEncoder();
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        body: mockStream.readable,
+      });
+      const callbacks = {
+        onOpen: jest.fn(),
+        onChunk: jest.fn(),
+        onReasoning: jest.fn(),
+        onContent: jest.fn(),
+        onFinish: jest.fn(),
+      };
+      const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+      await writer.write(
+        encoder.encode(
+          'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+            'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+            'data: [DONE]\n\n'
+        )
+      );
+      await writer.close();
+      await streamPromise;
+      expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+      expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
+      expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
+      expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
+      expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
+      expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+      expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+      expect(callbacks.onContent).toHaveBeenCalledWith('!');
+      expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+      expect(mockFetch).toHaveBeenCalledWith(
+        'http://localhost:8080/v1/chat/completions',
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({
+            ...mockRequest,
+            stream: true,
+            stream_options: {
+              include_usage: true,
+            },
+          }),
+        })
+      );
+    });
   });
 
   describe('proxy', () => {