From e8ed5f74189caae7921f28ff1300f5080112433a Mon Sep 17 00:00:00 2001
From: Wauplin <11801849+Wauplin@users.noreply.github.com>
Date: Tue, 4 Feb 2025 10:25:49 +0000
Subject: [PATCH 1/4] Update inference types (automated commit)

---
 docs/source/en/package_reference/inference_types.md  |  2 --
 docs/source/ko/package_reference/inference_types.md  |  2 --
 src/huggingface_hub/inference/_client.py             |  8 ++++----
 .../inference/_generated/_async_client.py            |  8 ++++----
 .../inference/_generated/types/__init__.py           |  2 +-
 .../_generated/types/automatic_speech_recognition.py |  5 ++---
 .../inference/_generated/types/feature_extraction.py |  6 +++---
 .../inference/_generated/types/image_to_text.py      |  5 ++---
 .../inference/_generated/types/text_to_audio.py      |  3 +--
 .../inference/_generated/types/text_to_image.py      | 12 ++++--------
 .../inference/_generated/types/text_to_speech.py     |  3 +--
 11 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
index ca2f039ffe..ba62f63904 100644
--- a/docs/source/en/package_reference/inference_types.md
+++ b/docs/source/en/package_reference/inference_types.md
@@ -309,8 +309,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.TextToImageParameters
 
-[[autodoc]] huggingface_hub.TextToImageTargetSize
-
 
 
 ## text_to_speech
diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
index 08063558a7..1dc7c8c0f5 100644
--- a/docs/source/ko/package_reference/inference_types.md
+++ b/docs/source/ko/package_reference/inference_types.md
@@ -308,8 +308,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.TextToImageParameters
 
-[[autodoc]] huggingface_hub.TextToImageTargetSize
-
 
 
 ## text_to_speech[[huggingface_hub.TextToSpeechGenerationParameters]]
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index 932c3aa98e..10a761dd5f 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -2395,8 +2395,8 @@ def text_to_image(
         prompt: str,
         *,
         negative_prompt: Optional[str] = None,
-        height: Optional[float] = None,
-        width: Optional[float] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         num_inference_steps: Optional[int] = None,
         guidance_scale: Optional[float] = None,
         model: Optional[str] = None,
@@ -2422,8 +2422,8 @@ def text_to_image(
                 The prompt to generate an image from.
             negative_prompt (`str`, *optional*):
                 One prompt to guide what NOT to include in image generation.
-            height (`float`, *optional*):
-                The height in pixels of the image to generate.
+            height (`int`, *optional*):
+                The height in pixels of the output image
             width (`float`, *optional*):
                 The width in pixels of the image to generate.
             num_inference_steps (`int`, *optional*):
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index 7503f35ac3..ed430ee208 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -2451,8 +2451,8 @@ async def text_to_image(
         prompt: str,
         *,
         negative_prompt: Optional[str] = None,
-        height: Optional[float] = None,
-        width: Optional[float] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         num_inference_steps: Optional[int] = None,
         guidance_scale: Optional[float] = None,
         model: Optional[str] = None,
@@ -2478,8 +2478,8 @@ async def text_to_image(
                 The prompt to generate an image from.
             negative_prompt (`str`, *optional*):
                 One prompt to guide what NOT to include in image generation.
-            height (`float`, *optional*):
-                The height in pixels of the image to generate.
+            height (`int`, *optional*):
+                The height in pixels of the output image
             width (`float`, *optional*):
                 The width in pixels of the image to generate.
             num_inference_steps (`int`, *optional*):
diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
index 137c3c3e23..edbc967cb6 100644
--- a/src/huggingface_hub/inference/_generated/types/__init__.py
+++ b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -141,7 +141,7 @@
     TextToAudioOutput,
     TextToAudioParameters,
 )
-from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
+from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
 from .text_to_speech import (
     TextToSpeechEarlyStoppingEnum,
     TextToSpeechGenerationParameters,
diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
index 083461f6a9..2bed5f9d87 100644
--- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
+++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -76,11 +76,10 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
 class AutomaticSpeechRecognitionParameters(BaseInferenceType):
     """Additional inference parameters for Automatic Speech Recognition"""
 
+    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    """Parametrization of the text generation process"""
     return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
-    """Parametrization of the text generation process"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
index 2c43e82cc6..61d0e5d5ec 100644
--- a/src/huggingface_hub/inference/_generated/types/feature_extraction.py
+++ b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -4,7 +4,7 @@
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
 from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import List, Literal, Optional, Union
 
 from .base import BaseInferenceType
 
@@ -20,8 +20,8 @@ class FeatureExtractionInput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
     """
 
-    inputs: str
-    """The text to embed."""
+    inputs: Union[List[str], str]
+    """The text or list of texts to embed."""
     normalize: Optional[bool] = None
     prompt_name: Optional[str] = None
     """The name of the prompt that should be used by for encoding. If not set, no prompt
diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py
index d00ae3cf9c..b0c47b3f44 100644
--- a/src/huggingface_hub/inference/_generated/types/image_to_text.py
+++ b/src/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -76,11 +76,10 @@ class ImageToTextGenerationParameters(BaseInferenceType):
 class ImageToTextParameters(BaseInferenceType):
     """Additional inference parameters for Image To Text"""
 
+    generation_parameters: Optional[ImageToTextGenerationParameters] = None
+    """Parametrization of the text generation process"""
     max_new_tokens: Optional[int] = None
     """The amount of maximum tokens to generate."""
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[ImageToTextGenerationParameters] = None
-    """Parametrization of the text generation process"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
index b57fadb86f..59380ddf94 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_audio.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -76,8 +76,7 @@ class TextToAudioGenerationParameters(BaseInferenceType):
 class TextToAudioParameters(BaseInferenceType):
     """Additional inference parameters for Text To Audio"""
 
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[TextToAudioGenerationParameters] = None
+    generation_parameters: Optional[TextToAudioGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_image.py b/src/huggingface_hub/inference/_generated/types/text_to_image.py
index 57d10aedd1..70918a6351 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_image.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_image.py
@@ -9,14 +9,6 @@
 from .base import BaseInferenceType
 
 
-@dataclass
-class TextToImageTargetSize(BaseInferenceType):
-    """The size in pixel of the output image"""
-
-    height: int
-    width: int
-
-
 @dataclass
 class TextToImageParameters(BaseInferenceType):
     """Additional inference parameters for Text To Image"""
@@ -25,6 +17,8 @@ class TextToImageParameters(BaseInferenceType):
     """A higher guidance scale value encourages the model to generate images closely linked to
     the text prompt, but values too high may cause saturation and other artifacts.
     """
+    height: Optional[int] = None
+    """The height in pixels of the output image"""
     negative_prompt: Optional[str] = None
     """One prompt to guide what NOT to include in image generation."""
     num_inference_steps: Optional[int] = None
@@ -35,6 +29,8 @@ class TextToImageParameters(BaseInferenceType):
     """Override the scheduler with a compatible one."""
     seed: Optional[int] = None
     """Seed for the random number generator."""
+    width: Optional[int] = None
+    """The width in pixels of the output image"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
index 20bcd27965..dc7b6ee4f7 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_speech.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -76,8 +76,7 @@ class TextToSpeechGenerationParameters(BaseInferenceType):
 class TextToSpeechParameters(BaseInferenceType):
     """Additional inference parameters for Text To Speech"""
 
-    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
-    generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
+    generation_parameters: Optional[TextToSpeechGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 

From e52fb6b250c731ae2014dcee465f7c8146ef0549 Mon Sep 17 00:00:00 2001
From: Lucain Pouget <lucainp@gmail.com>
Date: Tue, 4 Feb 2025 12:23:57 +0100
Subject: [PATCH 2/4] widget

---
 src/huggingface_hub/inference/_client.py                  | 4 ++--
 src/huggingface_hub/inference/_generated/_async_client.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index 10a761dd5f..98b951bd40 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -2424,8 +2424,8 @@ def text_to_image(
                 One prompt to guide what NOT to include in image generation.
             height (`int`, *optional*):
                 The height in pixels of the output image
-            width (`float`, *optional*):
-                The width in pixels of the image to generate.
+            width (`int`, *optional*):
+                The width in pixels of the output image
             num_inference_steps (`int`, *optional*):
                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                 expense of slower inference.
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index ed430ee208..3c814aca91 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -2480,8 +2480,8 @@ async def text_to_image(
                 One prompt to guide what NOT to include in image generation.
             height (`int`, *optional*):
                 The height in pixels of the output image
-            width (`float`, *optional*):
-                The width in pixels of the image to generate.
+            width (`int`, *optional*):
+                The width in pixels of the output image
             num_inference_steps (`int`, *optional*):
                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                 expense of slower inference.

From 1495efd99808d9ff4a3f1a04e357fb552a5fab70 Mon Sep 17 00:00:00 2001
From: Lucain Pouget <lucainp@gmail.com>
Date: Tue, 4 Feb 2025 12:29:32 +0100
Subject: [PATCH 3/4] tmp revert generate_kwargs

---
 .../_generated/types/automatic_speech_recognition.py         | 5 +++--
 .../inference/_generated/types/image_to_text.py              | 5 +++--
 .../inference/_generated/types/text_to_audio.py              | 3 ++-
 .../inference/_generated/types/text_to_speech.py             | 3 ++-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
index 2bed5f9d87..083461f6a9 100644
--- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
+++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -76,10 +76,11 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
 class AutomaticSpeechRecognitionParameters(BaseInferenceType):
     """Additional inference parameters for Automatic Speech Recognition"""
 
-    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
-    """Parametrization of the text generation process"""
     return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    """Parametrization of the text generation process"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py
index b0c47b3f44..d00ae3cf9c 100644
--- a/src/huggingface_hub/inference/_generated/types/image_to_text.py
+++ b/src/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -76,10 +76,11 @@ class ImageToTextGenerationParameters(BaseInferenceType):
 class ImageToTextParameters(BaseInferenceType):
     """Additional inference parameters for Image To Text"""
 
-    generation_parameters: Optional[ImageToTextGenerationParameters] = None
-    """Parametrization of the text generation process"""
     max_new_tokens: Optional[int] = None
     """The amount of maximum tokens to generate."""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[ImageToTextGenerationParameters] = None
+    """Parametrization of the text generation process"""
 
 
 @dataclass
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
index 59380ddf94..b57fadb86f 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_audio.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -76,7 +76,8 @@ class TextToAudioGenerationParameters(BaseInferenceType):
 class TextToAudioParameters(BaseInferenceType):
     """Additional inference parameters for Text To Audio"""
 
-    generation_parameters: Optional[TextToAudioGenerationParameters] = None
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[TextToAudioGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
index dc7b6ee4f7..20bcd27965 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_speech.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -76,7 +76,8 @@ class TextToSpeechGenerationParameters(BaseInferenceType):
 class TextToSpeechParameters(BaseInferenceType):
     """Additional inference parameters for Text To Speech"""
 
-    generation_parameters: Optional[TextToSpeechGenerationParameters] = None
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
     """Parametrization of the text generation process"""
 
 

From 80461b826d9d397dcd53aaefdc13c0df01f8f5b1 Mon Sep 17 00:00:00 2001
From: Lucain Pouget <lucainp@gmail.com>
Date: Tue, 4 Feb 2025 12:30:09 +0100
Subject: [PATCH 4/4] code quality

---
 src/huggingface_hub/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index fe2fb433e2..38b153ac79 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -392,7 +392,6 @@
         "TextToImageInput",
         "TextToImageOutput",
         "TextToImageParameters",
-        "TextToImageTargetSize",
         "TextToSpeechEarlyStoppingEnum",
         "TextToSpeechGenerationParameters",
         "TextToSpeechInput",
@@ -702,7 +701,6 @@
     "TextToImageInput",
     "TextToImageOutput",
     "TextToImageParameters",
-    "TextToImageTargetSize",
     "TextToSpeechEarlyStoppingEnum",
     "TextToSpeechGenerationParameters",
     "TextToSpeechInput",
@@ -1334,7 +1332,6 @@ def __dir__():
         TextToImageInput,  # noqa: F401
         TextToImageOutput,  # noqa: F401
         TextToImageParameters,  # noqa: F401
-        TextToImageTargetSize,  # noqa: F401
         TextToSpeechEarlyStoppingEnum,  # noqa: F401
         TextToSpeechGenerationParameters,  # noqa: F401
         TextToSpeechInput,  # noqa: F401