[Tasks] update automatic speech recognition output specs (#1167)

It should have been `timestamp` instead of `timestamps` (see transformers pipeline output [here](https://github.com/huggingface/transformers/blob/9d2056f12b66e64978f78a2dcb023f65b2be2108/src/transformers/pipelines/automatic_speech_recognition.py#L648)). this also aligns with fal-ai output [specs](https://fal.ai/models/fal-ai/whisper/api#type-WhisperChunk) of the model which is currently supported.
huggingface · Feb 4, 2025 · c843fbe · c843fbe
1 parent 34e5352
commit c843fbe
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -145,6 +145,6 @@ export interface AutomaticSpeechRecognitionOutputChunk {
 	/**
 	 * The start and end timestamps corresponding with the text
 	 */
-	timestamps: number[];
+	timestamp: number[];
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
@@ -20,7 +20,7 @@
 						"type": "string",
 						"description": "A chunk of text identified by the model"
 					},
-					"timestamps": {
+					"timestamp": {
 						"type": "array",
 						"description": "The start and end timestamps corresponding with the text",
 						"items": {
@@ -30,7 +30,7 @@
 						"maxLength": 2
 					}
 				},
-				"required": ["text", "timestamps"]
+				"required": ["text", "timestamp"]
 			}
 		}
 	},