rakuri255 · rakuri255 · Sep 14, 2023 · Oct 3, 2023 · Oct 3, 2023 · Oct 3, 2023
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,7 @@
 MIT License
 
 Copyright (c) 2023 Vadim Rangnau
+Copyright (c) 2020 Max Morrison (torchcrepe code adapted for crepe output filtering abd thresholding)
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -123,6 +123,7 @@ _Not all options working now!_
     --hyphenation           True|False >> ((default) is True)
     --disable_separation    True|False >> ((default) is False)
     --disable_karaoke       True|False >> ((default) is False)
+    --ignore_audio          True|False >> ((default) is False)
     --create_audio_chunks   True|False >> ((default) is False)
     --keep_cache            True|False >> ((default) is False)
     --plot                  True|False >> ((default) is False)

diff --git a/pytest/modules/Pitcher/test_pitcher.py b/pytest/modules/Pitcher/test_pitcher.py
diff --git a/pytest/modules/Speech_Recognition/test_Whisper.py b/pytest/modules/Speech_Recognition/test_Whisper.py
@@ -30,14 +30,13 @@ def test_convert_to_transcribed_data(self):
 
         # Words should have space at the end
         expected_output = [
-            TranscribedData(
-                {"word": "UltraSinger ", "start": 1.23, "end": 2.34, "is_hyphen": None, "confidence": 0.95}),
-            TranscribedData({"word": "is ", "start": 2.34, "end": 3.45, "is_hyphen": None, "confidence": 0.9}),
-            TranscribedData({"word": "cool! ", "start": 3.45, "end": 4.56, "is_hyphen": None, "confidence": 0.85}),
-            TranscribedData({"word": "And ", "start": 4.56, "end": 5.67, "is_hyphen": None, "confidence": 0.95}),
-            TranscribedData({"word": "will ", "start": 5.67, "end": 6.78, "is_hyphen": None, "confidence": 0.9}),
-            TranscribedData({"word": "be ", "start": 6.78, "end": 7.89, "is_hyphen": None, "confidence": 0.85}),
-            TranscribedData({"word": "better! ", "start": 7.89, "end": 9.01, "is_hyphen": None, "confidence": 0.8}),
+            TranscribedData.from_dict({"word": "UltraSinger ", "start": 1.23, "end": 2.34, "is_hyphen": None, "confidence": 0.95}),
+            TranscribedData.from_dict({"word": "is ", "start": 2.34, "end": 3.45, "is_hyphen": None, "confidence": 0.9}),
+            TranscribedData.from_dict({"word": "cool! ", "start": 3.45, "end": 4.56, "is_hyphen": None, "confidence": 0.85}),
+            TranscribedData.from_dict({"word": "And ", "start": 4.56, "end": 5.67, "is_hyphen": None, "confidence": 0.95}),
+            TranscribedData.from_dict({"word": "will ", "start": 5.67, "end": 6.78, "is_hyphen": None, "confidence": 0.9}),
+            TranscribedData.from_dict({"word": "be ", "start": 6.78, "end": 7.89, "is_hyphen": None, "confidence": 0.85}),
+            TranscribedData.from_dict({"word": "better! ", "start": 7.89, "end": 9.01, "is_hyphen": None, "confidence": 0.8}),
         ]
 
         # Act

diff --git a/pytest/modules/UltraSinger.py b/pytest/modules/UltraSinger.py
@@ -0,0 +1,30 @@
+"""Tests for UltraSinger.py"""
+
+import os
+import unittest
+import src.modules.Pitcher.pitcher as test_subject
+
+import pytest
+from src.modules.plot import plot
+
+
+class PitcherTest(unittest.TestCase):
+    # @pytest.mark.skip(reason="Skipping this FUNCTION level test, can be used for manual tests")
+    def test_get_pitch_with_crepe_file(self):
+        # Arrange
+        test_dir = os.path.dirname(os.path.abspath(__file__))
+        root_dir = os.path.abspath(test_dir + "/../../..")
+        # test_file_abs_path = os.path.abspath(root_dir + "/test_input/audio_denoised.wav")
+        test_file_abs_path = os.path.abspath(root_dir + "/test_input/test_denoised.wav")
+        test_output = root_dir + "/test_output"
+
+        # Act
+        # pitched_data = test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', device="cuda")
+        # test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', 'cpu', batch_size=1024)
+        # plot(pitched_data, test_output, title="pitching test")
+
+        print("done")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pytest/modules/UltraStar/test_ultrastar_writer.py b/pytest/modules/UltraStar/test_ultrastar_writer.py
@@ -68,20 +68,20 @@ def test_create_ultrastar_txt_from_automation_full_values(self):
     def arrange(self):
         # Arrange
         transcribed_data = [
-            TranscribedData({
-                "conf": 0.95,
+            TranscribedData.from_dict({
+                "confidence": 0.95,
                 "word": "UltraSinger ",
                 "end": 2.5,
                 "start": 0.5
             }),
-            TranscribedData({
-                "conf": 0.9,
+            TranscribedData.from_dict({
+                "confidence": 0.9,
                 "word": "is ",
                 "end": 4.5,
                 "start": 3.0
             }),
-            TranscribedData({
-                "conf": 0.85,
+            TranscribedData.from_dict({
+                "confidence": 0.85,
                 "word": "cool! ",
                 "end": 7.5,
                 "start": 5.5
@@ -110,20 +110,27 @@ def default_values(default_ultrastar_class, ver):
         expected_calls = []
         if version.parse(ver) >= version.parse("1.0.0"):
             expected_calls.append(f"#{UltrastarTxtTag.VERSION}:{default_ultrastar_class.version}\n")
-        expected_calls.append(f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n")
-        expected_calls.append(f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n")
-        expected_calls.append(f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n")
+        expected_calls += [
+            f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n",
+            f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n",
+            f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n"
+        ]
         if version.parse(ver) >= version.parse("1.1.0"):
-            expected_calls.append(f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n")
-        expected_calls.append(f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n") # todo: video is optional
-        expected_calls.append(f"#{UltrastarTxtTag.BPM}:390.0\n")
-        expected_calls.append(f"#{UltrastarTxtTag.GAP}:500\n")
-        expected_calls.append(f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n")
-        expected_calls.append(f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n")
-        expected_calls.append(": 0 52 1 UltraSinger \n")
-        expected_calls.append(": 65 39 2 is \n")
-        expected_calls.append(": 130 52 3 cool! \n")
-        expected_calls.append("E")
+            expected_calls += [f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n"]
+        if default_ultrastar_class.video is not None:
+            expected_calls += [
+                f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n",
+            ]
+        expected_calls += [
+            f"#{UltrastarTxtTag.BPM}:390.0\n",
+            f"#{UltrastarTxtTag.GAP}:500\n",
+            f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n",
+            f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n",
+            ": 0 52 1 UltraSinger \n",
+            ": 65 39 2 is \n",
+            ": 130 52 3 cool! \n",
+            "E"
+        ]
 
         return expected_calls
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
-crepe~=0.0.15
-demucs~=4.0.1
+crepe~=0.0.13
+demucs~=4.0.0
 ffmpeg_python~=0.2.0
 git+https://github.com/m-bain/whisperx.git
 langcodes~=3.4.0
@@ -14,7 +14,7 @@ pydub~=0.25.1
 PyHyphen~=4.0.3
 python_Levenshtein~=0.25.1
 scipy~=1.13.1
-tensorflow<2.11
+tensorflow==2.10.1
 tqdm~=4.66.4
 #whisperx~=3.1.1
 yt_dlp~=2024.5.27

diff --git a/src/Settings.py b/src/Settings.py
@@ -1,3 +1,10 @@
+from dataclasses import dataclass
+
+from dataclasses_json import dataclass_json
+
+
+@dataclass_json
+@dataclass
 class Settings:
     APP_VERSION = "0.0.11-dev4"
 
@@ -8,6 +15,8 @@ class Settings:
     use_separated_vocal = True
     create_karaoke = True
     keep_cache = False
+    ignore_audio = False
+    input_file_is_ultrastar_txt = False
 
     input_file_path = ""
     output_file_path = ""
@@ -30,10 +39,20 @@ class Settings:
     # Pitch
     crepe_model_capacity = "full"  # tiny|small|medium|large|full
     crepe_step_size = 10 # in miliseconds
+    pitch_loudness_threshold = -60
 
     # Device
     pytorch_device = 'cpu'  # cpu|cuda
     tensorflow_device = 'cpu'  # cpu|cuda
     force_cpu = False
     force_whisper_cpu = False
     force_crepe_cpu = False
+
+    # UltraSinger Evaluation Configuration
+    test_songs_input_folder = None
+    cache_override_path = None
+    skip_cache_vocal_separation = False
+    skip_cache_denoise_vocal_audio = False
+    skip_cache_transcription = False
+    skip_cache_pitch_detection = False
+    calculate_score = True