add line spacing to print outs and lint

AaltoRSE · Jan 10, 2024 · 3f2cec9 · 3f2cec9
1 parent 3cf38ed
commit 3f2cec9
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 26 deletions.
diff --git a/src/speech2text.py b/src/speech2text.py
@@ -6,11 +6,11 @@
 import warnings
 from collections import defaultdict
 from pathlib import Path
-
-import torch
 from typing import Optional, Union
-import pandas as pd
+
 import faster_whisper
+import pandas as pd
+import torch
 from numba.core.errors import (NumbaDeprecationWarning,
                                NumbaPendingDeprecationWarning)
 from pyannote.audio import Pipeline
@@ -113,8 +113,7 @@ def align(segments, diarization):
     dict
     """
     transcription_segments = [
-        (segment.start, segment.end, segment.text)
-        for segment in segments
+        (segment.start, segment.end, segment.text) for segment in segments
     ]
     diarization_segments = [
         (segment.start, segment.end, speaker)
@@ -209,17 +208,19 @@ def write_alignment_to_txt_file(alignment, output_file_stem):
     logger.info(f".. .. Wrote TXT output to: {output_file}")
 
 
-def load_whisper_model(name: str = "large-v3",
-                       device: Optional[Union[str, torch.device]] = None,
-                       ):    
+def load_whisper_model(
+    name: str = "large-v3",
+    device: Optional[Union[str, torch.device]] = None,
+):
     if device is None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
-    model = faster_whisper.WhisperModel(name, 
-                                        device=device,
-                                        cpu_threads=6,
-                                        compute_type="int8",
-                                        )
+    model = faster_whisper.WhisperModel(
+        name,
+        device=device,
+        cpu_threads=6,
+        compute_type="int8",
+    )
 
     return model
 
@@ -285,9 +286,9 @@ def main():
 
     logger.info(f".. Transcribe input file: {args.INPUT_FILE}")
     t0 = time.time()
-    segments, _ = faster_whisper_model.transcribe(args.INPUT_FILE,
-                                                  language=args.SPEECH2TEXT_LANGUAGE,
-                                                  beam_size=5)
+    segments, _ = faster_whisper_model.transcribe(
+        args.INPUT_FILE, language=args.SPEECH2TEXT_LANGUAGE, beam_size=5
+    )
     segments = list(segments)
     logger.info(f".. .. Transcription finished in {time.time()-t0:.1f} seconds")
 

diff --git a/src/submit.py b/src/submit.py
@@ -107,8 +107,8 @@ def create_array_input_file(input_dir, output_dir, job_name, tmp_dir):
             )
             continue
         print(f"{input_file}: Submit")
-
         input_files.append(str(input_file))
+    print()
 
     if not input_files:
         return
@@ -158,7 +158,7 @@ def submit_dir(args, job_name):
     )
     if tmp_file_array is None:
         print(
-            f"Submission not necessary since no files in {args.INPUT} need processing"
+            f"Submission not necessary since no files in {args.INPUT} need processing\n"
         )
         return
     tmp_file_sh = create_sbatch_script_for_array_job(
@@ -244,22 +244,23 @@ def main():
     # Parse arguments
     parser = get_argument_parser()
     args = parser.parse_args()
-    print(f"Submit speech2text jobs with arguments:")
+    print(f"\nSubmit speech2text jobs with arguments:")
     for key, value in vars(args).items():
         print(f"\t{key}: {value}")
+    print()
 
     # Check language
     if (
         args.SPEECH2TEXT_LANGUAGE is not None
         and args.SPEECH2TEXT_LANGUAGE.lower() in settings.supported_languages
     ):
-        print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.")
+        print(f"Given language '{args.SPEECH2TEXT_LANGUAGE}' is supported.\n")
     elif (
         args.SPEECH2TEXT_LANGUAGE is not None
         and args.SPEECH2TEXT_LANGUAGE not in settings.supported_languages
     ):
         print(
-            f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}"
+            f"Submission failed: Given language '{args.SPEECH2TEXT_LANGUAGE}' not found in supported languages:\n\n{' '.join(settings.supported_languages)}\n"
         )
         return
     else:
@@ -268,24 +269,24 @@ def main():
               
     export SPEECH2TEXT_LANGUAGE=mylanguage    
 
-where mylanguage is one of:\n\n{' '.join(settings.supported_languages)}
+where mylanguage is one of:\n\n{' '.join(settings.supported_languages)}\n
         """
         )
 
     # Check email
     if args.SPEECH2TEXT_EMAIL is not None:
-        print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}")
+        print(f"Email notifications will be sent to: {args.SPEECH2TEXT_EMAIL}\n")
     else:
         print(
             f"""Notifications will not be sent as no email address was specified. To specify email address, use
               
-    export [email protected]    
+    export [email protected]\n
     """
         )
 
     # Notify about temporary folder location
     print(
-        f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}"
+        f"Log files (.out) and batch submit scripts (.sh) will be written to: {args.SPEECH2TEXT_TMP}\n"
     )
 
     # Submit file or directory
@@ -301,7 +302,7 @@ def main():
         submit_dir(args, job_name)
     else:
         print(
-            ".. Submission failed: First argument needs to be an existing audio file or a directory with audio files."
+            ".. Submission failed: First argument needs to be an existing audio file or a directory with audio files.\n"
         )
 
 

diff --git a/src/utils.py b/src/utils.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 def seconds_to_human_readable_format(seconds):
     """
     Convert seconds to human readable string.