Skip to content

Commit b04d0b2

Browse files
authored
[inference] add maxRetries parameter and retry mechanism (elastic#211096)
## Summary Fix elastic#210859 - Add a retry-on-error mechanism to the `chatComplete` API - defaults to retrying only "non-fatal" errors 3 times, but configurable per call - Wire the retry option to the `output` API and to the `NL-to-ESQL` task ### Example ```ts const response = await chatComplete({ connectorId: 'my-connector', system: "You are a helpful assistant", messages: [ { role: MessageRole.User, content: "Some question?"}, ], maxRetries: 3, // optional, 3 is the default value retryConfiguration: { // everything here is optional, showing default values retryOn: 'auto', initialDelay: 1000, backoffMultiplier: 2, } }); ```
1 parent 63d3364 commit b04d0b2

31 files changed

+1407
-798
lines changed

x-pack/platform/packages/shared/ai-infra/inference-common/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export {
3939
type ChatCompletionMessageEvent,
4040
type ChatCompleteStreamResponse,
4141
type ChatCompleteResponse,
42+
type ChatCompleteRetryConfiguration,
4243
type ChatCompletionTokenCount,
4344
type BoundChatCompleteAPI,
4445
type BoundChatCompleteOptions,
@@ -90,13 +91,16 @@ export {
9091
type InferenceTaskInternalError,
9192
type InferenceTaskRequestError,
9293
type InferenceTaskAbortedError,
94+
type InferenceTaskProviderError,
9395
createInferenceInternalError,
9496
createInferenceRequestError,
9597
createInferenceRequestAbortedError,
98+
createInferenceProviderError,
9699
isInferenceError,
97100
isInferenceInternalError,
98101
isInferenceRequestError,
99102
isInferenceRequestAbortedError,
103+
isInferenceProviderError,
100104
} from './src/errors';
101105
export { generateFakeToolCallId } from './src/utils';
102106
export { elasticModelDictionary } from './src/const';

x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/api.ts

+38
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,46 @@ export type ChatCompleteOptions<
114114
* Optional metadata related to call execution.
115115
*/
116116
metadata?: ChatCompleteMetadata;
117+
/**
118+
* The maximum amount of times to retry in case of error returned from the provider.
119+
*
120+
* Defaults to 3.
121+
*/
122+
maxRetries?: number;
123+
/**
124+
* Optional configuration for the retry mechanism.
125+
*
126+
* Note that defaults are very fine, so only use this if you really have a reason to do so.
127+
*/
128+
retryConfiguration?: ChatCompleteRetryConfiguration;
117129
} & TToolOptions;
118130

131+
export interface ChatCompleteRetryConfiguration {
132+
/**
133+
* Defines the strategy for error retry
134+
*
135+
* Either one of
136+
* - all: will retry all errors
137+
* - auto: will only retry errors that could be recoverable (e.g rate limit, connectivity)
138+
* Of a custom function to manually handle filtering
139+
*
140+
* Defaults to "auto"
141+
*/
142+
retryOn?: 'all' | 'auto' | ((err: Error) => boolean);
143+
/**
144+
* The initial delay for incremental backoff, in ms.
145+
*
146+
* Defaults to 1000.
147+
*/
148+
initialDelay?: number;
149+
/**
150+
* The backoff exponential multiplier.
151+
*
152+
* Defaults to 2.
153+
*/
154+
backoffMultiplier?: number;
155+
}
156+
119157
/**
120158
* Composite response type from the {@link ChatCompleteAPI},
121159
* which can be either an observable or a promise depending on

x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export type {
1212
FunctionCallingMode,
1313
ChatCompleteStreamResponse,
1414
ChatCompleteResponse,
15+
ChatCompleteRetryConfiguration,
1516
} from './api';
1617
export type {
1718
BoundChatCompleteAPI,

x-pack/platform/packages/shared/ai-infra/inference-common/src/errors.ts

+26
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { InferenceTaskEventBase, InferenceTaskEventType } from './inference_task
1111
* Enum for generic inference error codes.
1212
*/
1313
export enum InferenceTaskErrorCode {
14+
providerError = 'providerError',
1415
internalError = 'internalError',
1516
requestError = 'requestError',
1617
abortedError = 'requestAborted',
@@ -62,6 +63,17 @@ export type InferenceTaskInternalError = InferenceTaskError<
6263
Record<string, any>
6364
>;
6465

66+
/**
67+
* Inference error thrown when calling the provider through its connector returned an error.
68+
*
69+
* It includes error responses returned from the provider,
70+
* and any potential errors related to connectivity issue.
71+
*/
72+
export type InferenceTaskProviderError = InferenceTaskError<
73+
InferenceTaskErrorCode.providerError,
74+
{ status?: number }
75+
>;
76+
6577
/**
6678
* Inference error thrown when the request was considered invalid.
6779
*
@@ -92,6 +104,13 @@ export function createInferenceInternalError(
92104
return new InferenceTaskError(InferenceTaskErrorCode.internalError, message, meta ?? {});
93105
}
94106

107+
export function createInferenceProviderError(
108+
message = 'An internal error occurred',
109+
meta?: { status?: number }
110+
): InferenceTaskProviderError {
111+
return new InferenceTaskError(InferenceTaskErrorCode.providerError, message, meta ?? {});
112+
}
113+
95114
export function createInferenceRequestError(
96115
message: string,
97116
status: number
@@ -136,3 +155,10 @@ export function isInferenceRequestError(error: unknown): error is InferenceTaskR
136155
export function isInferenceRequestAbortedError(error: unknown): error is InferenceTaskAbortedError {
137156
return isInferenceError(error) && error.code === InferenceTaskErrorCode.abortedError;
138157
}
158+
159+
/**
160+
* Check if the given error is an {@link InferenceTaskProviderError}
161+
*/
162+
export function isInferenceProviderError(error: unknown): error is InferenceTaskProviderError {
163+
return isInferenceError(error) && error.code === InferenceTaskErrorCode.providerError;
164+
}

x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts

+14-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
FromToolSchema,
1313
ToolSchema,
1414
ChatCompleteMetadata,
15+
ChatCompleteRetryConfiguration,
1516
} from '../chat_complete';
1617
import { Output, OutputEvent } from './events';
1718

@@ -114,7 +115,19 @@ export interface OutputOptions<
114115
*/
115116
abortSignal?: AbortSignal;
116117
/**
117-
* Optional configuration for retrying the call if an error occurs.
118+
* The maximum amount of times to retry in case of error returned from the provider.
119+
*
120+
* Defaults to 3.
121+
*/
122+
maxRetries?: number;
123+
/**
124+
* Optional configuration for the retry mechanism.
125+
*
126+
* Note that defaults are very fine, so only use this if you really have a reason to do so.
127+
*/
128+
retryConfiguration?: ChatCompleteRetryConfiguration;
129+
/**
130+
* Optional configuration for retrying the call if output-specific error occurs.
118131
*/
119132
retry?: {
120133
/**

x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts

+26
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,30 @@ describe('createOutputApi', () => {
220220
})
221221
);
222222
});
223+
224+
it('propagates retry options when provided', async () => {
225+
chatComplete.mockResolvedValue(Promise.resolve({ content: 'content', toolCalls: [] }));
226+
227+
const output = createOutputApi(chatComplete);
228+
229+
await output({
230+
id: 'id',
231+
connectorId: '.my-connector',
232+
input: 'input message',
233+
maxRetries: 42,
234+
retryConfiguration: {
235+
retryOn: 'all',
236+
},
237+
});
238+
239+
expect(chatComplete).toHaveBeenCalledTimes(1);
240+
expect(chatComplete).toHaveBeenCalledWith(
241+
expect.objectContaining({
242+
maxRetries: 42,
243+
retryConfiguration: {
244+
retryOn: 'all',
245+
},
246+
})
247+
);
248+
});
223249
});

x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
3636
functionCalling,
3737
stream,
3838
abortSignal,
39+
maxRetries,
40+
retryConfiguration,
3941
metadata,
4042
retry,
4143
}: DefaultOutputOptions): OutputCompositeResponse<string, ToolSchema | undefined, boolean> {
@@ -57,6 +59,8 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
5759
modelName,
5860
functionCalling,
5961
abortSignal,
62+
maxRetries,
63+
retryConfiguration,
6064
metadata,
6165
system,
6266
messages,

0 commit comments

Comments
 (0)