viduni94
diff --git a/‎x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
+4 b/‎x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
+4
diff --git a/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/api.ts
+38 b/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/api.ts
+38
diff --git a/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
+1 b/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
+1
diff --git a/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/errors.ts
+26 b/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/errors.ts
+26
diff --git a/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts
+14-1 b/‎x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts
+14-1
diff --git a/‎x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts
+26 b/‎x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts
+26
diff --git a/‎x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts
+4 b/‎x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts
+4
@@ -39,6 +39,7 @@ export {
   type ChatCompletionMessageEvent,
   type ChatCompleteStreamResponse,
   type ChatCompleteResponse,
+  type ChatCompleteRetryConfiguration,
   type ChatCompletionTokenCount,
   type BoundChatCompleteAPI,
   type BoundChatCompleteOptions,
@@ -90,13 +91,16 @@ export {
   type InferenceTaskInternalError,
   type InferenceTaskRequestError,
   type InferenceTaskAbortedError,
+  type InferenceTaskProviderError,
   createInferenceInternalError,
   createInferenceRequestError,
   createInferenceRequestAbortedError,
+  createInferenceProviderError,
   isInferenceError,
   isInferenceInternalError,
   isInferenceRequestError,
   isInferenceRequestAbortedError,
+  isInferenceProviderError,
 } from './src/errors';
 export { generateFakeToolCallId } from './src/utils';
 export { elasticModelDictionary } from './src/const';
 
@@ -114,8 +114,46 @@ export type ChatCompleteOptions<
    * Optional metadata related to call execution.
    */
   metadata?: ChatCompleteMetadata;
+  /**
+   * The maximum amount of times to retry in case of error returned from the provider.
+   *
+   * Defaults to 3.
+   */
+  maxRetries?: number;
+  /**
+   * Optional configuration for the retry mechanism.
+   *
+   * Note that defaults are very fine, so only use this if you really have a reason to do so.
+   */
+  retryConfiguration?: ChatCompleteRetryConfiguration;
 } & TToolOptions;
 
+export interface ChatCompleteRetryConfiguration {
+  /**
+   * Defines the strategy for error retry
+   *
+   * Either one of
+   * - all: will retry all errors
+   * - auto: will only retry errors that could be recoverable (e.g rate limit, connectivity)
+   * Of a custom function to manually handle filtering
+   *
+   * Defaults to "auto"
+   */
+  retryOn?: 'all' | 'auto' | ((err: Error) => boolean);
+  /**
+   * The initial delay for incremental backoff, in ms.
+   *
+   * Defaults to 1000.
+   */
+  initialDelay?: number;
+  /**
+   * The backoff exponential multiplier.
+   *
+   * Defaults to 2.
+   */
+  backoffMultiplier?: number;
+}
+
 /**
  * Composite response type from the {@link ChatCompleteAPI},
  * which can be either an observable or a promise depending on
 
@@ -12,6 +12,7 @@ export type {
   FunctionCallingMode,
   ChatCompleteStreamResponse,
   ChatCompleteResponse,
+  ChatCompleteRetryConfiguration,
 } from './api';
 export type {
   BoundChatCompleteAPI,
 
@@ -11,6 +11,7 @@ import { InferenceTaskEventBase, InferenceTaskEventType } from './inference_task
  * Enum for generic inference error codes.
  */
 export enum InferenceTaskErrorCode {
+  providerError = 'providerError',
   internalError = 'internalError',
   requestError = 'requestError',
   abortedError = 'requestAborted',
@@ -62,6 +63,17 @@ export type InferenceTaskInternalError = InferenceTaskError<
   Record<string, any>
 >;
 
+/**
+ * Inference error thrown when calling the provider through its connector returned an error.
+ *
+ * It includes error responses returned from the provider,
+ * and any potential errors related to connectivity issue.
+ */
+export type InferenceTaskProviderError = InferenceTaskError<
+  InferenceTaskErrorCode.providerError,
+  { status?: number }
+>;
+
 /**
  * Inference error thrown when the request was considered invalid.
  *
@@ -92,6 +104,13 @@ export function createInferenceInternalError(
   return new InferenceTaskError(InferenceTaskErrorCode.internalError, message, meta ?? {});
 }
 
+export function createInferenceProviderError(
+  message = 'An internal error occurred',
+  meta?: { status?: number }
+): InferenceTaskProviderError {
+  return new InferenceTaskError(InferenceTaskErrorCode.providerError, message, meta ?? {});
+}
+
 export function createInferenceRequestError(
   message: string,
   status: number
@@ -136,3 +155,10 @@ export function isInferenceRequestError(error: unknown): error is InferenceTaskR
 export function isInferenceRequestAbortedError(error: unknown): error is InferenceTaskAbortedError {
   return isInferenceError(error) && error.code === InferenceTaskErrorCode.abortedError;
 }
+
+/**
+ * Check if the given error is an {@link InferenceTaskProviderError}
+ */
+export function isInferenceProviderError(error: unknown): error is InferenceTaskProviderError {
+  return isInferenceError(error) && error.code === InferenceTaskErrorCode.providerError;
+}
@@ -12,6 +12,7 @@ import {
   FromToolSchema,
   ToolSchema,
   ChatCompleteMetadata,
+  ChatCompleteRetryConfiguration,
 } from '../chat_complete';
 import { Output, OutputEvent } from './events';
 
@@ -114,7 +115,19 @@ export interface OutputOptions<
    */
   abortSignal?: AbortSignal;
   /**
-   * Optional configuration for retrying the call if an error occurs.
+   * The maximum amount of times to retry in case of error returned from the provider.
+   *
+   * Defaults to 3.
+   */
+  maxRetries?: number;
+  /**
+   * Optional configuration for the retry mechanism.
+   *
+   * Note that defaults are very fine, so only use this if you really have a reason to do so.
+   */
+  retryConfiguration?: ChatCompleteRetryConfiguration;
+  /**
+   * Optional configuration for retrying the call if output-specific error occurs.
    */
   retry?: {
     /**
 
@@ -220,4 +220,30 @@ describe('createOutputApi', () => {
       })
     );
   });
+
+  it('propagates retry options when provided', async () => {
+    chatComplete.mockResolvedValue(Promise.resolve({ content: 'content', toolCalls: [] }));
+
+    const output = createOutputApi(chatComplete);
+
+    await output({
+      id: 'id',
+      connectorId: '.my-connector',
+      input: 'input message',
+      maxRetries: 42,
+      retryConfiguration: {
+        retryOn: 'all',
+      },
+    });
+
+    expect(chatComplete).toHaveBeenCalledTimes(1);
+    expect(chatComplete).toHaveBeenCalledWith(
+      expect.objectContaining({
+        maxRetries: 42,
+        retryConfiguration: {
+          retryOn: 'all',
+        },
+      })
+    );
+  });
 });
@@ -36,6 +36,8 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
     functionCalling,
     stream,
     abortSignal,
+    maxRetries,
+    retryConfiguration,
     metadata,
     retry,
   }: DefaultOutputOptions): OutputCompositeResponse<string, ToolSchema | undefined, boolean> {
@@ -57,6 +59,8 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) {
       modelName,
       functionCalling,
       abortSignal,
+      maxRetries,
+      retryConfiguration,
       metadata,
       system,
       messages,