Skip to content

Commit

Permalink
add line spacing to print outs and lint
Browse files Browse the repository at this point in the history
  • Loading branch information
ruokolt committed Jan 10, 2024
1 parent 3cf38ed commit 3f2cec9
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
33 changes: 17 additions & 16 deletions src/speech2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import warnings
from collections import defaultdict
from pathlib import Path

import torch
from typing import Optional, Union
import pandas as pd

import faster_whisper
import pandas as pd
import torch
from numba.core.errors import (NumbaDeprecationWarning,
NumbaPendingDeprecationWarning)
from pyannote.audio import Pipeline
Expand Down Expand Up @@ -113,8 +113,7 @@ def align(segments, diarization):
dict
"""
transcription_segments = [
(segment.start, segment.end, segment.text)
for segment in segments
(segment.start, segment.end, segment.text) for segment in segments
]
diarization_segments = [
(segment.start, segment.end, speaker)
Expand Down Expand Up @@ -209,17 +208,19 @@ def write_alignment_to_txt_file(alignment, output_file_stem):
logger.info(f".. .. Wrote TXT output to: {output_file}")


def load_whisper_model(name: str = "large-v3",
device: Optional[Union[str, torch.device]] = None,
):
def load_whisper_model(
name: str = "large-v3",
device: Optional[Union[str, torch.device]] = None,
):
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = faster_whisper.WhisperModel(name,
device=device,
cpu_threads=6,
compute_type="int8",
)
model = faster_whisper.WhisperModel(
name,
device=device,
cpu_threads=6,
compute_type="int8",
)

return model

Expand Down Expand Up @@ -285,9 +286,9 @@ def main():

logger.info(f".. Transcribe input file: {args.INPUT_FILE}")
t0 = time.time()
segments, _ = faster_whisper_model.transcribe(args.INPUT_FILE,
language=args.SPEECH2TEXT_LANGUAGE,
beam_size=5)
segments, _ = faster_whisper_model.transcribe(
args.INPUT_FILE, language=args.SPEECH2TEXT_LANGUAGE, beam_size=5
)
segments = list(segments)
logger.info(f".. .. Transcription finished in {time.time()-t0:.1f} seconds")

Expand Down
21 changes: 11 additions & 10 deletions src/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def create_array_input_file(input_dir, output_dir, job_name, tmp_dir):
)
continue
print(f"{input_file}: Submit")

input_files.append(str(input_file))
print()

if not input_files:
return
Expand Down Expand Up @@ -158,7 +158,7 @@ def submit_dir(args, job_name):
)
if tmp_file_array is None:
print(
f"Submission not necessary since no files in {args.INPUT} need processing"
f"Submission not necessary since no files in {args.INPUT} need processing\n"
)
return
tmp_file_sh = create_sbatch_script_for_array_job(
Expand Down Expand Up @@ -244,22 +244,23 @@ def main():
# Parse arguments
parser = get_argument_parser()
args = parser.parse_args()
print(f"Submit speech2text jobs with arguments:")
print(f"\nSubmit speech2text jobs with arguments:")
for key, value in vars(args).items():
print(f"\t{key}: {value}")
print()

# Check language
if (
args.SPEECH2TEXT_LANGUAGE is not None
and args.SPEECH2TEXT_LANGUAGE.lower() in settings.supported_languages
):
print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.")
print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.\n")
elif (
args.SPEECH2TEXT_LANGUAGE is not None
and args.SPEECH2TEXT_LANGUAGE not in settings.supported_languages
):
print(
f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}"
f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}\n"
)
return
else:
Expand All @@ -268,24 +269,24 @@ def main():
export SPEECH2TEXT_LANGUAGE=mylanguage
where mylanguage is one of:\n\n{' '.join(settings.supported_languages)}
where mylanguage is one of:\n\n{' '.join(settings.supported_languages)}\n
"""
)

# Check email
if args.SPEECH2TEXT_EMAIL is not None:
print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}")
print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}\n")
else:
print(
f"""Notifications will not be sent as no email address was specified. To specify email address, use
export [email protected]
export [email protected]\n
"""
)

# Notify about temporary folder location
print(
f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}"
f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}\n"
)

# Submit file or directory
Expand All @@ -301,7 +302,7 @@ def main():
submit_dir(args, job_name)
else:
print(
".. Submission failed: First argument needs to be an existing audio file or a directory with audio files."
".. Submission failed: First argument needs to be an existing audio file or a directory with audio files.\n"
)


Expand Down
1 change: 1 addition & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np


def seconds_to_human_readable_format(seconds):
"""
Convert seconds to human readable string.
Expand Down

0 comments on commit 3f2cec9

Please sign in to comment.