Update inference types (automated commit)

Wauplin · github-actions[bot] · commit 4662b7b64aa2 · 2025-02-04T10:21:53.000Z
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -92,6 +92,7 @@
     TextGenerationInputGrammarType,
     TextGenerationOutput,
     TextGenerationStreamOutput,
+    TextToImageTargetSize,
     TextToSpeechEarlyStoppingEnum,
     TokenClassificationAggregationStrategy,
     TokenClassificationOutputElement,
@@ -2403,6 +2404,7 @@ def text_to_image(
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
         extra_body: Optional[Dict[str, Any]] = None,
+        target_size: Optional[TextToImageTargetSize] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2443,6 +2445,8 @@ def text_to_image(
             extra_body (`Dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
+            target_size (`TextToImageTargetSize`, *optional*):
+                The size in pixel of the output image
 
         Returns:
             `Image`: The generated image.
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -77,6 +77,7 @@
     TextGenerationInputGrammarType,
     TextGenerationOutput,
     TextGenerationStreamOutput,
+    TextToImageTargetSize,
     TextToSpeechEarlyStoppingEnum,
     TokenClassificationAggregationStrategy,
     TokenClassificationOutputElement,
@@ -2459,6 +2460,7 @@ async def text_to_image(
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
         extra_body: Optional[Dict[str, Any]] = None,
+        target_size: Optional[TextToImageTargetSize] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2499,6 +2501,8 @@ async def text_to_image(
             extra_body (`Dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
+            target_size (`TextToImageTargetSize`, *optional*):
+                The size in pixel of the output image
 
         Returns:
             `Image`: The generated image.
diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -76,11 +76,10 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
 class AutomaticSpeechRecognitionParameters(BaseInferenceType):
     """Additional inference parameters for Automatic Speech Recognition"""
 
+    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    """Parametrization of the text generation process"""
     return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
-    """Parametrization of the text generation process"""
 
 
 @dataclass
@@ -99,7 +98,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
 class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
     text: str
     """A chunk of text identified by the model"""
-    timestamp: List[float]
+    timestamps: List[float]
     """The start and end timestamps corresponding with the text"""
 
 
diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -4,7 +4,7 @@
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
 from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import List, Literal, Optional, Union
 
 from .base import BaseInferenceType
 
@@ -20,8 +20,8 @@ class FeatureExtractionInput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
     """
 
-    inputs: str
-    """The text to embed."""
+    inputs: Union[List[str], str]
+    """The text or list of texts to embed."""
     normalize: Optional[bool] = None
     prompt_name: Optional[str] = None
     """The name of the prompt that should be used by for encoding. If not set, no prompt
diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -76,11 +76,10 @@ class ImageToTextGenerationParameters(BaseInferenceType):
 class ImageToTextParameters(BaseInferenceType):
     """Additional inference parameters for Image To Text"""
 
+    generation_parameters: Optional[ImageToTextGenerationParameters] = None
+    """Parametrization of the text generation process"""
     max_new_tokens: Optional[int] = None
     """The amount of maximum tokens to generate."""
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[ImageToTextGenerationParameters] = None
-    """Parametrization of the text generation process"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -76,8 +76,7 @@ class TextToAudioGenerationParameters(BaseInferenceType):
 class TextToAudioParameters(BaseInferenceType):
     """Additional inference parameters for Text To Audio"""
 
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[TextToAudioGenerationParameters] = None
+    generation_parameters: Optional[TextToAudioGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_image.py b/src/huggingface_hub/inference/_generated/types/text_to_image.py
@@ -35,6 +35,8 @@ class TextToImageParameters(BaseInferenceType):
     """Override the scheduler with a compatible one."""
     seed: Optional[int] = None
     """Seed for the random number generator."""
+    target_size: Optional[TextToImageTargetSize] = None
+    """The size in pixel of the output image"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -76,8 +76,7 @@ class TextToSpeechGenerationParameters(BaseInferenceType):
 class TextToSpeechParameters(BaseInferenceType):
     """Additional inference parameters for Text To Speech"""
 
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
+    generation_parameters: Optional[TextToSpeechGenerationParameters] = None
     """Parametrization of the text generation process"""