diff --git a/src/speech2text.py b/src/speech2text.py index 4e6af87..758113d 100644 --- a/src/speech2text.py +++ b/src/speech2text.py @@ -6,11 +6,11 @@ import warnings from collections import defaultdict from pathlib import Path - -import torch from typing import Optional, Union -import pandas as pd + import faster_whisper +import pandas as pd +import torch from numba.core.errors import (NumbaDeprecationWarning, NumbaPendingDeprecationWarning) from pyannote.audio import Pipeline @@ -113,8 +113,7 @@ def align(segments, diarization): dict """ transcription_segments = [ - (segment.start, segment.end, segment.text) - for segment in segments + (segment.start, segment.end, segment.text) for segment in segments ] diarization_segments = [ (segment.start, segment.end, speaker) @@ -209,17 +208,19 @@ def write_alignment_to_txt_file(alignment, output_file_stem): logger.info(f".. .. Wrote TXT output to: {output_file}") -def load_whisper_model(name: str = "large-v3", - device: Optional[Union[str, torch.device]] = None, - ): +def load_whisper_model( + name: str = "large-v3", + device: Optional[Union[str, torch.device]] = None, +): if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" - model = faster_whisper.WhisperModel(name, - device=device, - cpu_threads=6, - compute_type="int8", - ) + model = faster_whisper.WhisperModel( + name, + device=device, + cpu_threads=6, + compute_type="int8", + ) return model @@ -285,9 +286,9 @@ def main(): logger.info(f".. Transcribe input file: {args.INPUT_FILE}") t0 = time.time() - segments, _ = faster_whisper_model.transcribe(args.INPUT_FILE, - language=args.SPEECH2TEXT_LANGUAGE, - beam_size=5) + segments, _ = faster_whisper_model.transcribe( + args.INPUT_FILE, language=args.SPEECH2TEXT_LANGUAGE, beam_size=5 + ) segments = list(segments) logger.info(f".. .. Transcription finished in {time.time()-t0:.1f} seconds") diff --git a/src/submit.py b/src/submit.py index 7d69996..a7fcd0c 100644 --- a/src/submit.py +++ b/src/submit.py @@ -107,8 +107,8 @@ def create_array_input_file(input_dir, output_dir, job_name, tmp_dir): ) continue print(f"{input_file}: Submit") - input_files.append(str(input_file)) + print() if not input_files: return @@ -158,7 +158,7 @@ def submit_dir(args, job_name): ) if tmp_file_array is None: print( - f"Submission not necessary since no files in {args.INPUT} need processing" + f"Submission not necessary since no files in {args.INPUT} need processing\n" ) return tmp_file_sh = create_sbatch_script_for_array_job( @@ -244,22 +244,23 @@ def main(): # Parse arguments parser = get_argument_parser() args = parser.parse_args() - print(f"Submit speech2text jobs with arguments:") + print(f"\nSubmit speech2text jobs with arguments:") for key, value in vars(args).items(): print(f"\t{key}: {value}") + print() # Check language if ( args.SPEECH2TEXT_LANGUAGE is not None and args.SPEECH2TEXT_LANGUAGE.lower() in settings.supported_languages ): - print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.") + print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.\n") elif ( args.SPEECH2TEXT_LANGUAGE is not None and args.SPEECH2TEXT_LANGUAGE not in settings.supported_languages ): print( - f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}" + f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}\n" ) return else: @@ -268,24 +269,24 @@ def main(): export SPEECH2TEXT_LANGUAGE=mylanguage -where mylanguage is one of:\n\n{' '.join(settings.supported_languages)} +where mylanguage is one of:\n\n{' '.join(settings.supported_languages)}\n """ ) # Check email if args.SPEECH2TEXT_EMAIL is not None: - print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}") + print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}\n") else: print( f"""Notifications will not be sent as no email address was specified. To specify email address, use - export SPEECH2TEXT_EMAIL=my.name@aalto.fi + export SPEECH2TEXT_EMAIL=my.name@aalto.fi\n """ ) # Notify about temporary folder location print( - f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}" + f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}\n" ) # Submit file or directory @@ -301,7 +302,7 @@ def main(): submit_dir(args, job_name) else: print( - ".. Submission failed: First argument needs to be an existing audio file or a directory with audio files." + ".. Submission failed: First argument needs to be an existing audio file or a directory with audio files.\n" ) diff --git a/src/utils.py b/src/utils.py index e6bf936..f101627 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,5 +1,6 @@ import numpy as np + def seconds_to_human_readable_format(seconds): """ Convert seconds to human readable string.