Skip to content

Commit

Permalink
Merge branch 'main' into it2t-patch
Browse files Browse the repository at this point in the history
  • Loading branch information
Vaibhavs10 authored Jan 27, 2025
2 parents ab1e2ec + 4fb92f5 commit 317edcf
Show file tree
Hide file tree
Showing 122 changed files with 2,497 additions and 1,157 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Ownership for the Inference Package

/packages/inference/ @vvmnnnkv @radames
/packages/inference/ @julien-c @hanouticelina @SBrandeis @coyotte508

# Ownership for the Tasks Package

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or

```html
<script type="module">
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.0.0/+esm';
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.1.2/+esm';
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
</script>
```
Expand Down
13 changes: 12 additions & 1 deletion packages/agents/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/inference/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 🤗 Hugging Face Inference Endpoints
# 🤗 Hugging Face Inference

A Typescript powered wrapper for the Hugging Face Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.
Expand Down
2 changes: 1 addition & 1 deletion packages/inference/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@huggingface/inference",
"version": "3.0.0",
"version": "3.1.2",
"packageManager": "[email protected]",
"license": "MIT",
"author": "Tim Mikeladze <[email protected]>",
Expand Down
4 changes: 4 additions & 0 deletions packages/inference/src/providers/fal-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ export const FAL_AI_SUPPORTED_MODEL_IDS: ProviderMapping<FalAiId> = {
"automatic-speech-recognition": {
"openai/whisper-large-v3": "fal-ai/whisper",
},
"text-to-video": {
"genmo/mochi-1-preview": "fal-ai/mochi-v1",
"tencent/HunyuanVideo": "fal-ai/hunyuan-video",
},
};
13 changes: 13 additions & 0 deletions packages/inference/src/providers/replicate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,24 @@ type ReplicateId = string;

export const REPLICATE_SUPPORTED_MODEL_IDS: ProviderMapping<ReplicateId> = {
"text-to-image": {
"black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
"ByteDance/Hyper-SD":
"bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
"ByteDance/SDXL-Lightning":
"bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
"playgroundai/playground-v2.5-1024px-aesthetic":
"playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
"stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
"stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
"stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
"stabilityai/stable-diffusion-xl-base-1.0":
"stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc",
},
"text-to-speech": {
"OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26",
},
"text-to-video": {
"genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460",
},
};
2 changes: 2 additions & 0 deletions packages/inference/src/providers/together.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ export const TOGETHER_SUPPORTED_MODEL_IDS: ProviderMapping<TogetherId> = {
},
conversational: {
"databricks/dbrx-instruct": "databricks/dbrx-instruct",
"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
"deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
"google/gemma-2-9b-it": "google/gemma-2-9b-it",
"google/gemma-2b-it": "google/gemma-2-27b-it",
Expand Down
29 changes: 7 additions & 22 deletions packages/inference/src/tasks/audio/audioClassification.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,11 @@
import type { AudioClassificationInput, AudioClassificationOutput } from "@huggingface/tasks";
import { InferenceOutputError } from "../../lib/InferenceOutputError";
import type { BaseArgs, Options } from "../../types";
import { request } from "../custom/request";
import type { LegacyAudioInput } from "./utils";
import { preparePayload } from "./utils";

export type AudioClassificationArgs = BaseArgs & {
/**
* Binary audio data
*/
data: Blob | ArrayBuffer;
};

export interface AudioClassificationOutputValue {
/**
* The label for the class (model specific)
*/
label: string;

/**
* A float that represents how likely it is that the audio file belongs to this class.
*/
score: number;
}

export type AudioClassificationReturn = AudioClassificationOutputValue[];
export type AudioClassificationArgs = BaseArgs & (AudioClassificationInput | LegacyAudioInput);

/**
* This task reads some audio input and outputs the likelihood of classes.
Expand All @@ -30,8 +14,9 @@ export type AudioClassificationReturn = AudioClassificationOutputValue[];
export async function audioClassification(
args: AudioClassificationArgs,
options?: Options
): Promise<AudioClassificationReturn> {
const res = await request<AudioClassificationReturn>(args, {
): Promise<AudioClassificationOutput> {
const payload = preparePayload(args);
const res = await request<AudioClassificationOutput>(payload, {
...options,
taskHint: "audio-classification",
});
Expand Down
66 changes: 43 additions & 23 deletions packages/inference/src/tasks/audio/audioToAudio.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import { InferenceOutputError } from "../../lib/InferenceOutputError";
import type { BaseArgs, Options } from "../../types";
import { request } from "../custom/request";
import type { LegacyAudioInput } from "./utils";
import { preparePayload } from "./utils";

export type AudioToAudioArgs = BaseArgs & {
/**
* Binary audio data
*/
data: Blob | ArrayBuffer;
};
export type AudioToAudioArgs =
| (BaseArgs & {
/**
* Binary audio data
*/
inputs: Blob;
})
| LegacyAudioInput;

export interface AudioToAudioOutputValue {
export interface AudioToAudioOutputElem {
/**
* The label for the audio output (model specific)
*/
Expand All @@ -18,32 +22,48 @@ export interface AudioToAudioOutputValue {
/**
* Base64 encoded audio output.
*/
blob: string;
audio: Blob;
}

/**
* Content-type for blob, e.g. audio/flac
*/
export interface AudioToAudioOutput {
blob: string;
"content-type": string;
label: string;
}

export type AudioToAudioReturn = AudioToAudioOutputValue[];

/**
* This task reads some audio input and outputs one or multiple audio files.
* Example model: speechbrain/sepformer-wham does audio source separation.
*/
export async function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioReturn> {
const res = await request<AudioToAudioReturn>(args, {
export async function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioOutput[]> {
const payload = preparePayload(args);
const res = await request<AudioToAudioOutput>(payload, {
...options,
taskHint: "audio-to-audio",
});
const isValidOutput =
Array.isArray(res) &&
res.every(
(x) => typeof x.label === "string" && typeof x.blob === "string" && typeof x["content-type"] === "string"
);
if (!isValidOutput) {
throw new InferenceOutputError("Expected Array<{label: string, blob: string, content-type: string}>");

return validateOutput(res);
}

function validateOutput(output: unknown): AudioToAudioOutput[] {
if (!Array.isArray(output)) {
throw new InferenceOutputError("Expected Array");
}
if (
!output.every((elem): elem is AudioToAudioOutput => {
return (
typeof elem === "object" &&
elem &&
"label" in elem &&
typeof elem.label === "string" &&
"content-type" in elem &&
typeof elem["content-type"] === "string" &&
"blob" in elem &&
typeof elem.blob === "string"
);
})
) {
throw new InferenceOutputError("Expected Array<{label: string, audio: Blob}>");
}
return res;
return output;
}
58 changes: 35 additions & 23 deletions packages/inference/src/tasks/audio/automaticSpeechRecognition.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
import type { AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput } from "@huggingface/tasks";
import { InferenceOutputError } from "../../lib/InferenceOutputError";
import type { BaseArgs, Options, RequestArgs } from "../../types";
import { base64FromBytes } from "../../utils/base64FromBytes";
import { request } from "../custom/request";
import type { LegacyAudioInput } from "./utils";
import { preparePayload } from "./utils";
import { omit } from "../../utils/omit";

export type AutomaticSpeechRecognitionArgs = BaseArgs & {
/**
* Binary audio data
*/
data: Blob | ArrayBuffer;
};

export interface AutomaticSpeechRecognitionOutput {
/**
* The text that was recognized from the audio
*/
text: string;
}

export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput);
/**
* This task reads some audio input and outputs the said words within the audio files.
* Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
Expand All @@ -25,15 +16,8 @@ export async function automaticSpeechRecognition(
args: AutomaticSpeechRecognitionArgs,
options?: Options
): Promise<AutomaticSpeechRecognitionOutput> {
if (args.provider === "fal-ai") {
const contentType = args.data instanceof Blob ? args.data.type : "audio/mpeg";
const base64audio = base64FromBytes(
new Uint8Array(args.data instanceof ArrayBuffer ? args.data : await args.data.arrayBuffer())
);
(args as RequestArgs & { audio_url: string }).audio_url = `data:${contentType};base64,${base64audio}`;
delete (args as RequestArgs & { data: unknown }).data;
}
const res = await request<AutomaticSpeechRecognitionOutput>(args, {
const payload = await buildPayload(args);
const res = await request<AutomaticSpeechRecognitionOutput>(payload, {
...options,
taskHint: "automatic-speech-recognition",
});
Expand All @@ -43,3 +27,31 @@ export async function automaticSpeechRecognition(
}
return res;
}

const FAL_AI_SUPPORTED_BLOB_TYPES = ["audio/mpeg", "audio/mp4", "audio/wav", "audio/x-wav"];

async function buildPayload(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs> {
if (args.provider === "fal-ai") {
const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : undefined;
const contentType = blob?.type;
if (!contentType) {
throw new Error(
`Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
);
}
if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
throw new Error(
`Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
", "
)}`
);
}
const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
return {
...("data" in args ? omit(args, "data") : omit(args, "inputs")),
audio_url: `data:${contentType};base64,${base64audio}`,
};
} else {
return preparePayload(args);
}
}
22 changes: 8 additions & 14 deletions packages/inference/src/tasks/audio/textToSpeech.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
import type { TextToSpeechInput } from "@huggingface/tasks";
import { InferenceOutputError } from "../../lib/InferenceOutputError";
import type { BaseArgs, Options } from "../../types";
import { request } from "../custom/request";

export type TextToSpeechArgs = BaseArgs & {
/**
* The text to generate an audio from
*/
inputs: string;
};
type TextToSpeechArgs = BaseArgs & TextToSpeechInput;

export type TextToSpeechOutput = Blob;
interface OutputUrlTextToSpeechGeneration {
output: string | string[];
}
/**
* This task synthesize an audio of a voice pronouncing a given text.
* Recommended model: espnet/kan-bayashi_ljspeech_vits
*/
export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
const res = await request<TextToSpeechOutput | OutputUrlTextToSpeechGeneration>(args, {
export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<Blob> {
const res = await request<Blob | OutputUrlTextToSpeechGeneration>(args, {
...options,
taskHint: "text-to-speech",
});
if (res instanceof Blob) {
return res;
}
if (res && typeof res === "object") {
if ("output" in res) {
if (typeof res.output === "string") {
Expand All @@ -35,9 +33,5 @@ export async function textToSpeech(args: TextToSpeechArgs, options?: Options): P
}
}
}
const isValidOutput = res && res instanceof Blob;
if (!isValidOutput) {
throw new InferenceOutputError("Expected Blob");
}
return res;
throw new InferenceOutputError("Expected Blob or object with output");
}
18 changes: 18 additions & 0 deletions packages/inference/src/tasks/audio/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import type { BaseArgs, RequestArgs } from "../../types";
import { omit } from "../../utils/omit";

/**
* @deprecated
*/
export interface LegacyAudioInput {
data: Blob | ArrayBuffer;
}

export function preparePayload(args: BaseArgs & ({ inputs: Blob } | LegacyAudioInput)): RequestArgs {
return "data" in args
? args
: {
...omit(args, "inputs"),
data: args.inputs,
};
}
Loading

0 comments on commit 317edcf

Please sign in to comment.