inference-gateway · edenreich · Apr 30, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 30, 2025
diff --git a/openapi.yaml b/openapi.yaml
@@ -67,43 +67,51 @@ paths:
                   value:
                     object: "list"
                     data:
-                      - id: "gpt-4o"
+                      - id: "openai/gpt-4o"
                         object: "model"
                         created: 1686935002
                         owned_by: "openai"
-                      - id: "llama-3.3-70b-versatile"
+                        served_by: "openai"
+                      - id: "openai/llama-3.3-70b-versatile"
                         object: "model"
                         created: 1723651281
                         owned_by: "groq"
-                      - id: "claude-3-opus-20240229"
+                        served_by: "groq"
+                      - id: "cohere/claude-3-opus-20240229"
                         object: "model"
                         created: 1708905600
                         owned_by: "anthropic"
-                      - id: "command-r"
+                        served_by: "anthropic"
+                      - id: "cohere/command-r"
                         object: "model"
                         created: 1707868800
                         owned_by: "cohere"
-                      - id: "phi3:3.8b"
+                        served_by: "cohere"
+                      - id: "ollama/phi3:3.8b"
                         object: "model"
                         created: 1718441600
                         owned_by: "ollama"
+                        served_by: "ollama"
                 singleProvider:
                   summary: Models from a specific provider
                   value:
                     object: "list"
                     data:
-                      - id: "gpt-4o"
+                      - id: "openai/gpt-4o"
                         object: "model"
                         created: 1686935002
                         owned_by: "openai"
-                      - id: "gpt-4-turbo"
+                        served_by: "openai"
+                      - id: "openai/gpt-4-turbo"
                         object: "model"
                         created: 1687882410
                         owned_by: "openai"
-                      - id: "gpt-3.5-turbo"
+                        served_by: "openai"
+                      - id: "openai/gpt-3.5-turbo"
                         object: "model"
                         created: 1677649963
                         owned_by: "openai"
+                        served_by: "openai"
         "401":
           $ref: "#/components/responses/Unauthorized"
         "500":
@@ -562,6 +570,9 @@ components:
           type: string
         chat:
           type: string
+      required:
+        - models
+        - chat
     Error:
       type: object
       properties:
@@ -589,10 +600,12 @@ components:
             $ref: "#/components/schemas/ChatCompletionMessageToolCall"
         tool_call_id:
           type: string
-        reasoning:
-          type: string
         reasoning_content:
           type: string
+          description: The reasoning content of the chunk message.
+        reasoning:
+          type: string
+          description: The reasoning of the chunk message. Same as reasoning_content.
       required:
         - role
         - content
@@ -611,6 +624,12 @@ components:
           type: string
         served_by:
           $ref: "#/components/schemas/Provider"
+      required:
+        - id
+        - object
+        - created
+        - owned_by
+        - served_by
     ListModelsResponse:
       type: object
       description: Response structure for listing models
@@ -717,7 +736,8 @@ components:
             usage statistics for the entire request, and the `choices` field
             will always be an empty array. All other chunks will also include a
             `usage` field, but with a null value.
-          default: true
+      required:
+        - include_usage
     CreateChatCompletionRequest:
       type: object
       properties:
@@ -754,6 +774,14 @@ components:
             are supported.
           items:
             $ref: "#/components/schemas/ChatCompletionTool"
+        reasoning_format:
+          type: string
+          description: >
+            The format of the reasoning content. Can be `raw` or `parsed`.
+
+            When specified as raw some reasoning models will output <think /> tags.
+            When specified as parsed the model will output the reasoning under 
+            `reasoning` or `reasoning_content` attribute.
       required:
         - model
         - messages
@@ -899,6 +927,9 @@ components:
         reasoning_content:
           type: string
           description: The reasoning content of the chunk message.
+        reasoning:
+          type: string
+          description: The reasoning of the chunk message. Same as reasoning_content.
         tool_calls:
           type: array
           items:
@@ -908,6 +939,9 @@ components:
         refusal:
           type: string
           description: The refusal message generated by the model.
+      required:
+        - content
+        - role
     ChatCompletionMessageToolCallChunk:
       type: object
       properties:
@@ -1040,6 +1074,13 @@ components:
           description: The object type, which is always `chat.completion.chunk`.
         usage:
           $ref: "#/components/schemas/CompletionUsage"
+        reasoning_format:
+          type: string
+          description: >
+            The format of the reasoning content. Can be `raw` or `parsed`.
+
+            When specified as raw some reasoning models will output <think /> tags.
+            When specified as parsed the model will output the reasoning under reasoning_content.
       required:
         - choices
         - created

diff --git a/src/client.ts b/src/client.ts
@@ -281,6 +281,11 @@ export class InferenceGatewayClient {
                 callbacks.onReasoning?.(reasoning_content);
               }
 
+              const reasoning = chunk.choices[0]?.delta?.reasoning;
+              if (reasoning !== undefined) {
+                callbacks.onReasoning?.(reasoning);
+              }
+
               const content = chunk.choices[0]?.delta?.content;
               if (content) {
                 callbacks.onContent?.(content);

diff --git a/src/types/generated/index.ts b/src/types/generated/index.ts