From 121459b35bf3c65391b36eb23dc13ab5ceaeda70 Mon Sep 17 00:00:00 2001 From: Wauplin <11801849+Wauplin@users.noreply.github.com> Date: Tue, 4 Feb 2025 03:02:31 +0000 Subject: [PATCH] Update inference types (automated commit) --- src/huggingface_hub/inference/_client.py | 4 ++++ src/huggingface_hub/inference/_generated/_async_client.py | 4 ++++ .../_generated/types/automatic_speech_recognition.py | 7 +++---- .../inference/_generated/types/image_to_text.py | 5 ++--- .../inference/_generated/types/text_to_audio.py | 3 +-- .../inference/_generated/types/text_to_image.py | 2 ++ .../inference/_generated/types/text_to_speech.py | 3 +-- 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 932c3aa98e..f4f12ba26b 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -92,6 +92,7 @@ TextGenerationInputGrammarType, TextGenerationOutput, TextGenerationStreamOutput, + TextToImageTargetSize, TextToSpeechEarlyStoppingEnum, TokenClassificationAggregationStrategy, TokenClassificationOutputElement, @@ -2403,6 +2404,7 @@ def text_to_image( scheduler: Optional[str] = None, seed: Optional[int] = None, extra_body: Optional[Dict[str, Any]] = None, + target_size: Optional[TextToImageTargetSize] = None, ) -> "Image": """ Generate an image based on a given text using a specified model. @@ -2443,6 +2445,8 @@ def text_to_image( extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. + target_size (`TextToImageTargetSize`, *optional*): + The size in pixel of the output image Returns: `Image`: The generated image. diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 7503f35ac3..07bf64f122 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -77,6 +77,7 @@ TextGenerationInputGrammarType, TextGenerationOutput, TextGenerationStreamOutput, + TextToImageTargetSize, TextToSpeechEarlyStoppingEnum, TokenClassificationAggregationStrategy, TokenClassificationOutputElement, @@ -2459,6 +2460,7 @@ async def text_to_image( scheduler: Optional[str] = None, seed: Optional[int] = None, extra_body: Optional[Dict[str, Any]] = None, + target_size: Optional[TextToImageTargetSize] = None, ) -> "Image": """ Generate an image based on a given text using a specified model. @@ -2499,6 +2501,8 @@ async def text_to_image( extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. + target_size (`TextToImageTargetSize`, *optional*): + The size in pixel of the output image Returns: `Image`: The generated image. diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py index 083461f6a9..1733aae663 100644 --- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py @@ -76,11 +76,10 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType): class AutomaticSpeechRecognitionParameters(BaseInferenceType): """Additional inference parameters for Automatic Speech Recognition""" + generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None + """Parametrization of the text generation process""" return_timestamps: Optional[bool] = None """Whether to output corresponding timestamps with the generated text""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None - """Parametrization of the text generation process""" @dataclass @@ -99,7 +98,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType): class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType): text: str """A chunk of text identified by the model""" - timestamp: List[float] + timestamps: List[float] """The start and end timestamps corresponding with the text""" diff --git a/src/huggingface_hub/inference/_generated/types/image_to_text.py b/src/huggingface_hub/inference/_generated/types/image_to_text.py index d00ae3cf9c..b0c47b3f44 100644 --- a/src/huggingface_hub/inference/_generated/types/image_to_text.py +++ b/src/huggingface_hub/inference/_generated/types/image_to_text.py @@ -76,11 +76,10 @@ class ImageToTextGenerationParameters(BaseInferenceType): class ImageToTextParameters(BaseInferenceType): """Additional inference parameters for Image To Text""" + generation_parameters: Optional[ImageToTextGenerationParameters] = None + """Parametrization of the text generation process""" max_new_tokens: Optional[int] = None """The amount of maximum tokens to generate.""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[ImageToTextGenerationParameters] = None - """Parametrization of the text generation process""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/text_to_audio.py b/src/huggingface_hub/inference/_generated/types/text_to_audio.py index b57fadb86f..59380ddf94 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_audio.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_audio.py @@ -76,8 +76,7 @@ class TextToAudioGenerationParameters(BaseInferenceType): class TextToAudioParameters(BaseInferenceType): """Additional inference parameters for Text To Audio""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[TextToAudioGenerationParameters] = None + generation_parameters: Optional[TextToAudioGenerationParameters] = None """Parametrization of the text generation process""" diff --git a/src/huggingface_hub/inference/_generated/types/text_to_image.py b/src/huggingface_hub/inference/_generated/types/text_to_image.py index 57d10aedd1..8d2ff187a3 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_image.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_image.py @@ -35,6 +35,8 @@ class TextToImageParameters(BaseInferenceType): """Override the scheduler with a compatible one.""" seed: Optional[int] = None """Seed for the random number generator.""" + target_size: Optional[TextToImageTargetSize] = None + """The size in pixel of the output image""" @dataclass diff --git a/src/huggingface_hub/inference/_generated/types/text_to_speech.py b/src/huggingface_hub/inference/_generated/types/text_to_speech.py index 20bcd27965..dc7b6ee4f7 100644 --- a/src/huggingface_hub/inference/_generated/types/text_to_speech.py +++ b/src/huggingface_hub/inference/_generated/types/text_to_speech.py @@ -76,8 +76,7 @@ class TextToSpeechGenerationParameters(BaseInferenceType): class TextToSpeechParameters(BaseInferenceType): """Additional inference parameters for Text To Speech""" - # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers - generate_kwargs: Optional[TextToSpeechGenerationParameters] = None + generation_parameters: Optional[TextToSpeechGenerationParameters] = None """Parametrization of the text generation process"""