Skip to content

Commit 51ce3bb

Browse files
authored
refactor: Process also groq reasoning models properly (#12)
* docs(openapi): Update model identifiers and enhance request/response schemas in OpenAPI definition Signed-off-by: Eden Reich <[email protected]> * chore: Generate types and fix test Signed-off-by: Eden Reich <[email protected]> * feat: Enhance OpenAPI schema and client to support reasoning field in chat completions Previously it supported only DeepSeek reasoning, but now it will support also Groq reasoning. Signed-off-by: Eden Reich <[email protected]> --------- Signed-off-by: Eden Reich <[email protected]>
1 parent b68b280 commit 51ce3bb

File tree

4 files changed

+264
-26
lines changed

4 files changed

+264
-26
lines changed

openapi.yaml

+52-11
Original file line numberDiff line numberDiff line change
@@ -67,43 +67,51 @@ paths:
6767
value:
6868
object: "list"
6969
data:
70-
- id: "gpt-4o"
70+
- id: "openai/gpt-4o"
7171
object: "model"
7272
created: 1686935002
7373
owned_by: "openai"
74-
- id: "llama-3.3-70b-versatile"
74+
served_by: "openai"
75+
- id: "openai/llama-3.3-70b-versatile"
7576
object: "model"
7677
created: 1723651281
7778
owned_by: "groq"
78-
- id: "claude-3-opus-20240229"
79+
served_by: "groq"
80+
- id: "cohere/claude-3-opus-20240229"
7981
object: "model"
8082
created: 1708905600
8183
owned_by: "anthropic"
82-
- id: "command-r"
84+
served_by: "anthropic"
85+
- id: "cohere/command-r"
8386
object: "model"
8487
created: 1707868800
8588
owned_by: "cohere"
86-
- id: "phi3:3.8b"
89+
served_by: "cohere"
90+
- id: "ollama/phi3:3.8b"
8791
object: "model"
8892
created: 1718441600
8993
owned_by: "ollama"
94+
served_by: "ollama"
9095
singleProvider:
9196
summary: Models from a specific provider
9297
value:
9398
object: "list"
9499
data:
95-
- id: "gpt-4o"
100+
- id: "openai/gpt-4o"
96101
object: "model"
97102
created: 1686935002
98103
owned_by: "openai"
99-
- id: "gpt-4-turbo"
104+
served_by: "openai"
105+
- id: "openai/gpt-4-turbo"
100106
object: "model"
101107
created: 1687882410
102108
owned_by: "openai"
103-
- id: "gpt-3.5-turbo"
109+
served_by: "openai"
110+
- id: "openai/gpt-3.5-turbo"
104111
object: "model"
105112
created: 1677649963
106113
owned_by: "openai"
114+
served_by: "openai"
107115
"401":
108116
$ref: "#/components/responses/Unauthorized"
109117
"500":
@@ -562,6 +570,9 @@ components:
562570
type: string
563571
chat:
564572
type: string
573+
required:
574+
- models
575+
- chat
565576
Error:
566577
type: object
567578
properties:
@@ -589,10 +600,12 @@ components:
589600
$ref: "#/components/schemas/ChatCompletionMessageToolCall"
590601
tool_call_id:
591602
type: string
592-
reasoning:
593-
type: string
594603
reasoning_content:
595604
type: string
605+
description: The reasoning content of the chunk message.
606+
reasoning:
607+
type: string
608+
description: The reasoning of the chunk message. Same as reasoning_content.
596609
required:
597610
- role
598611
- content
@@ -611,6 +624,12 @@ components:
611624
type: string
612625
served_by:
613626
$ref: "#/components/schemas/Provider"
627+
required:
628+
- id
629+
- object
630+
- created
631+
- owned_by
632+
- served_by
614633
ListModelsResponse:
615634
type: object
616635
description: Response structure for listing models
@@ -717,7 +736,8 @@ components:
717736
usage statistics for the entire request, and the `choices` field
718737
will always be an empty array. All other chunks will also include a
719738
`usage` field, but with a null value.
720-
default: true
739+
required:
740+
- include_usage
721741
CreateChatCompletionRequest:
722742
type: object
723743
properties:
@@ -754,6 +774,14 @@ components:
754774
are supported.
755775
items:
756776
$ref: "#/components/schemas/ChatCompletionTool"
777+
reasoning_format:
778+
type: string
779+
description: >
780+
The format of the reasoning content. Can be `raw` or `parsed`.
781+
782+
When specified as raw some reasoning models will output <think /> tags.
783+
When specified as parsed the model will output the reasoning under
784+
`reasoning` or `reasoning_content` attribute.
757785
required:
758786
- model
759787
- messages
@@ -899,6 +927,9 @@ components:
899927
reasoning_content:
900928
type: string
901929
description: The reasoning content of the chunk message.
930+
reasoning:
931+
type: string
932+
description: The reasoning of the chunk message. Same as reasoning_content.
902933
tool_calls:
903934
type: array
904935
items:
@@ -908,6 +939,9 @@ components:
908939
refusal:
909940
type: string
910941
description: The refusal message generated by the model.
942+
required:
943+
- content
944+
- role
911945
ChatCompletionMessageToolCallChunk:
912946
type: object
913947
properties:
@@ -1040,6 +1074,13 @@ components:
10401074
description: The object type, which is always `chat.completion.chunk`.
10411075
usage:
10421076
$ref: "#/components/schemas/CompletionUsage"
1077+
reasoning_format:
1078+
type: string
1079+
description: >
1080+
The format of the reasoning content. Can be `raw` or `parsed`.
1081+
1082+
When specified as raw some reasoning models will output <think /> tags.
1083+
When specified as parsed the model will output the reasoning under reasoning_content.
10431084
required:
10441085
- choices
10451086
- created

src/client.ts

+5
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,11 @@ export class InferenceGatewayClient {
281281
callbacks.onReasoning?.(reasoning_content);
282282
}
283283

284+
const reasoning = chunk.choices[0]?.delta?.reasoning;
285+
if (reasoning !== undefined) {
286+
callbacks.onReasoning?.(reasoning);
287+
}
288+
284289
const content = chunk.choices[0]?.delta?.content;
285290
if (content) {
286291
callbacks.onContent?.(content);

src/types/generated/index.ts

+24-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)