Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/evaluation #146

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MIT License

Copyright (c) 2023 Vadim Rangnau
Copyright (c) 2020 Max Morrison (torchcrepe code adapted for crepe output filtering abd thresholding)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe fork crepe an make a separate package for it? Or PR the changes to the crepe project?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not fork crepe, move the changes there and release an package?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure how to give proper credit here. I would say it's up to you how to handle this and if you want to accept the code in question.

The code here https://github.com/rakuri255/UltraSinger/pull/146/files#diff-7bda13ea2689179c7952b68174b8b8ea2cc2250f9b9699c5cf55939fb6c4ac7d is copied and adapted from here https://github.com/maxrmorrison/torchcrepe/blob/master/torchcrepe/loudness.py


Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ _Not all options working now!_
--hyphenation True|False >> ((default) is True)
--disable_separation True|False >> ((default) is False)
--disable_karaoke True|False >> ((default) is False)
--ignore_audio True|False >> ((default) is False)
--create_audio_chunks True|False >> ((default) is False)
--keep_cache True|False >> ((default) is False)
--plot True|False >> ((default) is False)
Expand Down
66 changes: 65 additions & 1 deletion pytest/modules/Pitcher/test_pitcher.py

Large diffs are not rendered by default.

15 changes: 7 additions & 8 deletions pytest/modules/Speech_Recognition/test_Whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,13 @@ def test_convert_to_transcribed_data(self):

# Words should have space at the end
expected_output = [
TranscribedData(
{"word": "UltraSinger ", "start": 1.23, "end": 2.34, "is_hyphen": None, "confidence": 0.95}),
TranscribedData({"word": "is ", "start": 2.34, "end": 3.45, "is_hyphen": None, "confidence": 0.9}),
TranscribedData({"word": "cool! ", "start": 3.45, "end": 4.56, "is_hyphen": None, "confidence": 0.85}),
TranscribedData({"word": "And ", "start": 4.56, "end": 5.67, "is_hyphen": None, "confidence": 0.95}),
TranscribedData({"word": "will ", "start": 5.67, "end": 6.78, "is_hyphen": None, "confidence": 0.9}),
TranscribedData({"word": "be ", "start": 6.78, "end": 7.89, "is_hyphen": None, "confidence": 0.85}),
TranscribedData({"word": "better! ", "start": 7.89, "end": 9.01, "is_hyphen": None, "confidence": 0.8}),
TranscribedData.from_dict({"word": "UltraSinger ", "start": 1.23, "end": 2.34, "is_hyphen": None, "confidence": 0.95}),
TranscribedData.from_dict({"word": "is ", "start": 2.34, "end": 3.45, "is_hyphen": None, "confidence": 0.9}),
TranscribedData.from_dict({"word": "cool! ", "start": 3.45, "end": 4.56, "is_hyphen": None, "confidence": 0.85}),
TranscribedData.from_dict({"word": "And ", "start": 4.56, "end": 5.67, "is_hyphen": None, "confidence": 0.95}),
TranscribedData.from_dict({"word": "will ", "start": 5.67, "end": 6.78, "is_hyphen": None, "confidence": 0.9}),
TranscribedData.from_dict({"word": "be ", "start": 6.78, "end": 7.89, "is_hyphen": None, "confidence": 0.85}),
TranscribedData.from_dict({"word": "better! ", "start": 7.89, "end": 9.01, "is_hyphen": None, "confidence": 0.8}),
]

# Act
Expand Down
30 changes: 30 additions & 0 deletions pytest/modules/UltraSinger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Tests for UltraSinger.py"""

import os
import unittest
import src.modules.Pitcher.pitcher as test_subject

import pytest
from src.modules.plot import plot


class PitcherTest(unittest.TestCase):
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also in test_pitcher.py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resolved

# @pytest.mark.skip(reason="Skipping this FUNCTION level test, can be used for manual tests")
def test_get_pitch_with_crepe_file(self):
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test can be removed. PitcherTest class is in test_pitcher.py

# Arrange
test_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.abspath(test_dir + "/../../..")
# test_file_abs_path = os.path.abspath(root_dir + "/test_input/audio_denoised.wav")
test_file_abs_path = os.path.abspath(root_dir + "/test_input/test_denoised.wav")
test_output = root_dir + "/test_output"

# Act
# pitched_data = test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', device="cuda")
# test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', 'cpu', batch_size=1024)
# plot(pitched_data, test_output, title="pitching test")

print("done")


if __name__ == "__main__":
unittest.main()
45 changes: 26 additions & 19 deletions pytest/modules/UltraStar/test_ultrastar_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,20 @@ def test_create_ultrastar_txt_from_automation_full_values(self):
def arrange(self):
# Arrange
transcribed_data = [
TranscribedData({
"conf": 0.95,
TranscribedData.from_dict({
"confidence": 0.95,
"word": "UltraSinger ",
"end": 2.5,
"start": 0.5
}),
TranscribedData({
"conf": 0.9,
TranscribedData.from_dict({
"confidence": 0.9,
"word": "is ",
"end": 4.5,
"start": 3.0
}),
TranscribedData({
"conf": 0.85,
TranscribedData.from_dict({
"confidence": 0.85,
"word": "cool! ",
"end": 7.5,
"start": 5.5
Expand Down Expand Up @@ -110,20 +110,27 @@ def default_values(default_ultrastar_class, ver):
expected_calls = []
if version.parse(ver) >= version.parse("1.0.0"):
expected_calls.append(f"#{UltrastarTxtTag.VERSION}:{default_ultrastar_class.version}\n")
expected_calls.append(f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n")
expected_calls.append(f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n")
expected_calls.append(f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n")
expected_calls += [
f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n",
f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n",
f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n"
]
if version.parse(ver) >= version.parse("1.1.0"):
expected_calls.append(f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n")
expected_calls.append(f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n") # todo: video is optional
expected_calls.append(f"#{UltrastarTxtTag.BPM}:390.0\n")
expected_calls.append(f"#{UltrastarTxtTag.GAP}:500\n")
expected_calls.append(f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n")
expected_calls.append(f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n")
expected_calls.append(": 0 52 1 UltraSinger \n")
expected_calls.append(": 65 39 2 is \n")
expected_calls.append(": 130 52 3 cool! \n")
expected_calls.append("E")
expected_calls += [f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n"]
if default_ultrastar_class.video is not None:
expected_calls += [
f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n",
]
expected_calls += [
f"#{UltrastarTxtTag.BPM}:390.0\n",
f"#{UltrastarTxtTag.GAP}:500\n",
f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n",
f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n",
": 0 52 1 UltraSinger \n",
": 65 39 2 is \n",
": 130 52 3 cool! \n",
"E"
]

return expected_calls

Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
crepe~=0.0.15
demucs~=4.0.1
crepe~=0.0.13
demucs~=4.0.0
ffmpeg_python~=0.2.0
git+https://github.com/m-bain/whisperx.git
langcodes~=3.4.0
Expand All @@ -14,7 +14,7 @@ pydub~=0.25.1
PyHyphen~=4.0.3
python_Levenshtein~=0.25.1
scipy~=1.13.1
tensorflow<2.11
tensorflow==2.10.1
tqdm~=4.66.4
#whisperx~=3.1.1
yt_dlp~=2024.5.27
Expand Down
19 changes: 19 additions & 0 deletions src/Settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from dataclasses import dataclass

from dataclasses_json import dataclass_json


@dataclass_json
@dataclass
class Settings:
APP_VERSION = "0.0.11-dev4"

Expand All @@ -8,6 +15,8 @@ class Settings:
use_separated_vocal = True
create_karaoke = True
keep_cache = False
ignore_audio = False
input_file_is_ultrastar_txt = False

input_file_path = ""
output_file_path = ""
Expand All @@ -30,10 +39,20 @@ class Settings:
# Pitch
crepe_model_capacity = "full" # tiny|small|medium|large|full
crepe_step_size = 10 # in miliseconds
pitch_loudness_threshold = -60

# Device
pytorch_device = 'cpu' # cpu|cuda
tensorflow_device = 'cpu' # cpu|cuda
force_cpu = False
force_whisper_cpu = False
force_crepe_cpu = False

# UltraSinger Evaluation Configuration
test_songs_input_folder = None
cache_override_path = None
skip_cache_vocal_separation = False
skip_cache_denoise_vocal_audio = False
skip_cache_transcription = False
skip_cache_pitch_detection = False
calculate_score = True
Loading
Loading