From e8ed5f74189caae7921f28ff1300f5080112433a Mon Sep 17 00:00:00 2001 From: Wauplin <11801849+Wauplin@users.noreply.github.com> Date: Tue, 4 Feb 2025 10:25:49 +0000 Subject: [PATCH 1/4] Update inference types (automated commit) --- docs/source/en/package_reference/inference_types.md | 2 -- docs/source/ko/package_reference/inference_types.md | 2 -- src/huggingface_hub/inference/_client.py | 8 ++++---- .../inference/_generated/_async_client.py | 8 ++++---- .../inference/_generated/types/__init__.py | 2 +- .../_generated/types/automatic_speech_recognition.py | 5 ++--- .../inference/_generated/types/feature_extraction.py | 6 +++--- .../inference/_generated/types/image_to_text.py | 5 ++--- .../inference/_generated/types/text_to_audio.py | 3 +-- .../inference/_generated/types/text_to_image.py | 12 ++++-------- .../inference/_generated/types/text_to_speech.py | 3 +-- 11 files changed, 22 insertions(+), 34 deletions(-) diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md index ca2f039ffe..ba62f63904 100644 --- a/docs/source/en/package_reference/inference_types.md +++ b/docs/source/en/package_reference/inference_types.md @@ -309,8 +309,6 @@ This part of the lib is still under development and will be improved in future r [[autodoc]] huggingface_hub.TextToImageParameters -[[autodoc]] huggingface_hub.TextToImageTargetSize - ## text_to_speech diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md index 08063558a7..1dc7c8c0f5 100644 --- a/docs/source/ko/package_reference/inference_types.md +++ b/docs/source/ko/package_reference/inference_types.md @@ -308,8 +308,6 @@ rendered properly in your Markdown viewer. [[autodoc]] huggingface_hub.TextToImageParameters -[[autodoc]] huggingface_hub.TextToImageTargetSize - ## text_to_speech[[huggingface_hub.TextToSpeechGenerationParameters]] diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 932c3aa98e..10a761dd5f 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -2395,8 +2395,8 @@ def text_to_image( prompt: str, *, negative_prompt: Optional[str] = None, - height: Optional[float] = None, - width: Optional[float] = None, + height: Optional[int] = None, + width: Optional[int] = None, num_inference_steps: Optional[int] = None, guidance_scale: Optional[float] = None, model: Optional[str] = None, @@ -2422,8 +2422,8 @@ def text_to_image( The prompt to generate an image from. negative_prompt (`str`, *optional*): One prompt to guide what NOT to include in image generation. - height (`float`, *optional*): - The height in pixels of the image to generate. + height (`int`, *optional*): + The height in pixels of the output image width (`float`, *optional*): The width in pixels of the image to generate. num_inference_steps (`int`, *optional*): diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 7503f35ac3..ed430ee208 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -2451,8 +2451,8 @@ async def text_to_image( prompt: str, *, negative_prompt: Optional[str] = None, - height: Optional[float] = None, - width: Optional[float] = None, + height: Optional[int] = None, + width: Optional[int] = None, num_inference_steps: Optional[int] = None, guidance_scale: Optional[float] = None, model: Optional[str] = None, @@ -2478,8 +2478,8 @@ async def text_to_image( The prompt to generate an image from. negative_prompt (`str`, *optional*): One prompt to guide what NOT to include in image generation. - height (`float`, *optional*): - The height in pixels of the image to generate. + height (`int`, *optional*): + The height in pixels of the output image width (`float`, *optional*): The width in pixels of the image to generate. num_inference_steps (`int`, *optional*): diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py index 137c3c3e23..edbc967cb6 100644 --- a/src/huggingface_hub/inference/_generated/types/__init__.py +++ b/src/huggingface_hub/inference/_generated/types/__init__.py @@ -141,7 +141,7 @@ TextToAudioOutput, TextToAudioParameters, ) -from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize +from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters from .text_to_speech import ( TextToSpeechEarlyStoppingEnum, TextToSpeechGenerationParameters, diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py index 083461f6a9..2bed5f9d87 100644 --- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py @@ -76,11 +76,10 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType): class AutomaticSpeechRecognitionParameters(BaseInferenceType): """Additional inference parameters for Automatic Speech Recognition""" + generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None + """Parametrization of the text generation process""" return_timestamps: Optional[bool] = None """Whether to output corresponding timestamps with the generated text""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None - """Parametrization of the text generation process""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py index 2c43e82cc6..61d0e5d5ec 100644 --- a/src/huggingface_hub/inference/_generated/types/feature_extraction.py +++ b/src/huggingface_hub/inference/_generated/types/feature_extraction.py @@ -4,7 +4,7 @@ # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks. from dataclasses import dataclass -from typing import Literal, Optional +from typing import List, Literal, Optional, Union from .base import BaseInferenceType @@ -20,8 +20,8 @@ class FeatureExtractionInput(BaseInferenceType): https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts. """ - inputs: str - """The text to embed.""" + inputs: Union[List[str], str] + """The text or list of texts to embed.""" normalize: Optional[bool] = None prompt_name: Optional[str] = None """The name of the prompt that should be used by for encoding. If not set, no prompt diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py index d00ae3cf9c..b0c47b3f44 100644 --- a/src/huggingface_hub/inference/_generated/types/image_to_text.py +++ b/src/huggingface_hub/inference/_generated/types/image_to_text.py @@ -76,11 +76,10 @@ class ImageToTextGenerationParameters(BaseInferenceType): class ImageToTextParameters(BaseInferenceType): """Additional inference parameters for Image To Text""" + generation_parameters: Optional[ImageToTextGenerationParameters] = None + """Parametrization of the text generation process""" max_new_tokens: Optional[int] = None """The amount of maximum tokens to generate.""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[ImageToTextGenerationParameters] = None - """Parametrization of the text generation process""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py index b57fadb86f..59380ddf94 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_audio.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_audio.py @@ -76,8 +76,7 @@ class TextToAudioGenerationParameters(BaseInferenceType): class TextToAudioParameters(BaseInferenceType): """Additional inference parameters for Text To Audio""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[TextToAudioGenerationParameters] = None + generation_parameters: Optional[TextToAudioGenerationParameters] = None """Parametrization of the text generation process""" diff --git a/src/huggingface_hub/inference/_generated/types/text_to_image.py b/src/huggingface_hub/inference/_generated/types/text_to_image.py index 57d10aedd1..70918a6351 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_image.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_image.py @@ -9,14 +9,6 @@ from .base import BaseInferenceType -@dataclass -class TextToImageTargetSize(BaseInferenceType): - """The size in pixel of the output image""" - - height: int - width: int - - @dataclass class TextToImageParameters(BaseInferenceType): """Additional inference parameters for Text To Image""" @@ -25,6 +17,8 @@ class TextToImageParameters(BaseInferenceType): """A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts. """ + height: Optional[int] = None + """The height in pixels of the output image""" negative_prompt: Optional[str] = None """One prompt to guide what NOT to include in image generation.""" num_inference_steps: Optional[int] = None @@ -35,6 +29,8 @@ class TextToImageParameters(BaseInferenceType): """Override the scheduler with a compatible one.""" seed: Optional[int] = None """Seed for the random number generator.""" + width: Optional[int] = None + """The width in pixels of the output image""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py index 20bcd27965..dc7b6ee4f7 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_speech.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_speech.py @@ -76,8 +76,7 @@ class TextToSpeechGenerationParameters(BaseInferenceType): class TextToSpeechParameters(BaseInferenceType): """Additional inference parameters for Text To Speech""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[TextToSpeechGenerationParameters] = None + generation_parameters: Optional[TextToSpeechGenerationParameters] = None """Parametrization of the text generation process""" From e52fb6b250c731ae2014dcee465f7c8146ef0549 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 4 Feb 2025 12:23:57 +0100 Subject: [PATCH 2/4] widget --- src/huggingface_hub/inference/_client.py | 4 ++-- src/huggingface_hub/inference/_generated/_async_client.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 10a761dd5f..98b951bd40 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -2424,8 +2424,8 @@ def text_to_image( One prompt to guide what NOT to include in image generation. height (`int`, *optional*): The height in pixels of the output image - width (`float`, *optional*): - The width in pixels of the image to generate. + width (`int`, *optional*): + The width in pixels of the output image num_inference_steps (`int`, *optional*): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index ed430ee208..3c814aca91 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -2480,8 +2480,8 @@ async def text_to_image( One prompt to guide what NOT to include in image generation. height (`int`, *optional*): The height in pixels of the output image - width (`float`, *optional*): - The width in pixels of the image to generate. + width (`int`, *optional*): + The width in pixels of the output image num_inference_steps (`int`, *optional*): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. From 1495efd99808d9ff4a3f1a04e357fb552a5fab70 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 4 Feb 2025 12:29:32 +0100 Subject: [PATCH 3/4] tmp revert generate_kwargs --- .../_generated/types/automatic_speech_recognition.py | 5 +++-- .../inference/_generated/types/image_to_text.py | 5 +++-- .../inference/_generated/types/text_to_audio.py | 3 ++- .../inference/_generated/types/text_to_speech.py | 3 ++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py index 2bed5f9d87..083461f6a9 100644 --- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py @@ -76,10 +76,11 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType): class AutomaticSpeechRecognitionParameters(BaseInferenceType): """Additional inference parameters for Automatic Speech Recognition""" - generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None - """Parametrization of the text generation process""" return_timestamps: Optional[bool] = None """Whether to output corresponding timestamps with the generated text""" + # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers + generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None + """Parametrization of the text generation process""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py index b0c47b3f44..d00ae3cf9c 100644 --- a/src/huggingface_hub/inference/_generated/types/image_to_text.py +++ b/src/huggingface_hub/inference/_generated/types/image_to_text.py @@ -76,10 +76,11 @@ class ImageToTextGenerationParameters(BaseInferenceType): class ImageToTextParameters(BaseInferenceType): """Additional inference parameters for Image To Text""" - generation_parameters: Optional[ImageToTextGenerationParameters] = None - """Parametrization of the text generation process""" max_new_tokens: Optional[int] = None """The amount of maximum tokens to generate.""" + # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers + generate_kwargs: Optional[ImageToTextGenerationParameters] = None + """Parametrization of the text generation process""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py index 59380ddf94..b57fadb86f 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_audio.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_audio.py @@ -76,7 +76,8 @@ class TextToAudioGenerationParameters(BaseInferenceType): class TextToAudioParameters(BaseInferenceType): """Additional inference parameters for Text To Audio""" - generation_parameters: Optional[TextToAudioGenerationParameters] = None + # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers + generate_kwargs: Optional[TextToAudioGenerationParameters] = None """Parametrization of the text generation process""" diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py index dc7b6ee4f7..20bcd27965 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_speech.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_speech.py @@ -76,7 +76,8 @@ class TextToSpeechGenerationParameters(BaseInferenceType): class TextToSpeechParameters(BaseInferenceType): """Additional inference parameters for Text To Speech""" - generation_parameters: Optional[TextToSpeechGenerationParameters] = None + # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers + generate_kwargs: Optional[TextToSpeechGenerationParameters] = None """Parametrization of the text generation process""" From 80461b826d9d397dcd53aaefdc13c0df01f8f5b1 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 4 Feb 2025 12:30:09 +0100 Subject: [PATCH 4/4] code quality --- src/huggingface_hub/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index fe2fb433e2..38b153ac79 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -392,7 +392,6 @@ "TextToImageInput", "TextToImageOutput", "TextToImageParameters", - "TextToImageTargetSize", "TextToSpeechEarlyStoppingEnum", "TextToSpeechGenerationParameters", "TextToSpeechInput", @@ -702,7 +701,6 @@ "TextToImageInput", "TextToImageOutput", "TextToImageParameters", - "TextToImageTargetSize", "TextToSpeechEarlyStoppingEnum", "TextToSpeechGenerationParameters", "TextToSpeechInput", @@ -1334,7 +1332,6 @@ def __dir__(): TextToImageInput, # noqa: F401 TextToImageOutput, # noqa: F401 TextToImageParameters, # noqa: F401 - TextToImageTargetSize, # noqa: F401 TextToSpeechEarlyStoppingEnum, # noqa: F401 TextToSpeechGenerationParameters, # noqa: F401 TextToSpeechInput, # noqa: F401