Skip to content

Commit

Permalink
Merge pull request #831 from AznamirWoW/logging
Browse files Browse the repository at this point in the history
Replaced 'Sync Graph' with 'New Model' setting on UI.
  • Loading branch information
blaisewf authored Oct 26, 2024
2 parents 1f743e9 + a4ff553 commit 5ebc144
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 137 deletions.
4 changes: 2 additions & 2 deletions assets/Applio_NoUI.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@
"pitch_guidance = True # @param{type:\"boolean\"}\n",
"auto_backups = True # @param{type:\"boolean\"}\n",
"pretrained = True # @param{type:\"boolean\"}\n",
"sync_graph = False # @param{type:\"boolean\"}\n",
"cleanup = False # @param{type:\"boolean\"}\n",
"cache_data_in_gpu = False # @param{type:\"boolean\"}\n",
"tensorboard = True # @param{type:\"boolean\"}\n",
"# @markdown ### ➡️ Choose how many epochs your model will be stored\n",
Expand Down Expand Up @@ -638,7 +638,7 @@
" if tensorboard == True:\n",
" %load_ext tensorboard\n",
" %tensorboard --logdir /content/Applio/logs/\n",
" !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pitch_guidance \"{pitch_guidance}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --sync_graph \"{sync_graph}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n",
" !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pitch_guidance \"{pitch_guidance}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n",
"\n",
"\n",
"server_thread = threading.Thread(target=start_train)\n",
Expand Down
4 changes: 2 additions & 2 deletions assets/i18n/languages/en_US.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@
"Overtraining Detector Settings": "Overtraining Detector Settings",
"Overtraining Threshold": "Overtraining Threshold",
"Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.",
"Sync Graph": "Sync Graph",
"Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.",
"New Model": "New Model",
"Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs.": "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs.",
"Start Training": "Start Training",
"Stop Training": "Stop Training",
"Generate Index": "Generate Index",
Expand Down
10 changes: 5 additions & 5 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def run_train_script(
overtraining_detector: bool,
overtraining_threshold: int,
pretrained: bool,
sync_graph: bool,
cleanup: bool,
index_algorithm: str = "Auto",
cache_data_in_gpu: bool = False,
custom_pretrained: bool = False,
Expand Down Expand Up @@ -575,7 +575,7 @@ def run_train_script(
cache_data_in_gpu,
overtraining_detector,
overtraining_threshold,
sync_graph,
cleanup,
],
),
]
Expand Down Expand Up @@ -2129,10 +2129,10 @@ def parse_arguments():
default=50,
)
train_parser.add_argument(
"--sync_graph",
"--cleanup",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help="Enable graph synchronization for distributed training.",
help="Cleanup previous training attempt.",
default=False,
)
train_parser.add_argument(
Expand Down Expand Up @@ -2529,7 +2529,7 @@ def main():
overtraining_threshold=args.overtraining_threshold,
pretrained=args.pretrained,
custom_pretrained=args.custom_pretrained,
sync_graph=args.sync_graph,
cleanup=args.cleanup,
index_algorithm=args.index_algorithm,
cache_data_in_gpu=args.cache_data_in_gpu,
g_pretrained_path=args.g_pretrained_path,
Expand Down
176 changes: 52 additions & 124 deletions rvc/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
cache_data_in_gpu = strtobool(sys.argv[13])
overtraining_detector = strtobool(sys.argv[14])
overtraining_threshold = int(sys.argv[15])
sync_graph = strtobool(sys.argv[16])
cleanup = strtobool(sys.argv[16])

current_dir = os.getcwd()
experiment_dir = os.path.join(current_dir, "logs", model_name)
Expand Down Expand Up @@ -198,8 +198,8 @@ def start():
pretrainG,
pretrainD,
pitch_guidance,
custom_total_epoch,
custom_save_every_weights,
total_epoch,
save_every_weights,
config,
device,
),
Expand Down Expand Up @@ -246,56 +246,9 @@ def continue_overtrain_detector(training_file_path):
smoothed_loss_gen_history,
) = load_from_json(training_file_path)

if sync_graph:
print(
"Sync graph is now activated! With sync graph enabled, the model undergoes a single epoch of training. Once the graphs are synchronized, training proceeds for the previously specified number of epochs."
)
custom_total_epoch = 1
custom_save_every_weights = True

start()

# Synchronize graphs by modifying config files
model_config_file = os.path.join(experiment_dir, "config.json")
rvc_config_file = os.path.join(
now_dir, "rvc", "configs", version, str(sample_rate) + ".json"
)
if not os.path.exists(rvc_config_file):
rvc_config_file = os.path.join(
now_dir, "rvc", "configs", "v1", str(sample_rate) + ".json"
)

pattern = rf"{os.path.basename(model_name)}_(\d+)e_(\d+)s\.pth"

for filename in os.listdir(experiment_dir):
match = re.match(pattern, filename)
if match:
steps = int(match.group(2))

def edit_config(config_file):
"""
Edits the config file to synchronize graphs.
Args:
config_file (str): Path to the config file.
"""
with open(config_file, "r", encoding="utf8") as json_file:
config_data = json.load(json_file)

config_data["train"]["log_interval"] = steps

with open(config_file, "w", encoding="utf8") as json_file:
json.dump(
config_data,
json_file,
indent=2,
separators=(",", ": "),
ensure_ascii=False,
)

edit_config(model_config_file)
edit_config(rvc_config_file)

if cleanup:
print("Removing files from the prior training attempt...")

# Clean up unnecessary files
for root, dirs, files in os.walk(
os.path.join(now_dir, "logs", model_name), topdown=False
Expand All @@ -319,17 +272,10 @@ def edit_config(config_file):
os.remove(item_path)
os.rmdir(folder_path)

print("Successfully synchronized graphs!")
custom_total_epoch = total_epoch
custom_save_every_weights = save_every_weights
continue_overtrain_detector(training_file_path)
start()
else:
custom_total_epoch = total_epoch
custom_save_every_weights = save_every_weights
continue_overtrain_detector(training_file_path)
start()

print("Cleanup done!")

continue_overtrain_detector(training_file_path)
start()

def run(
rank,
Expand Down Expand Up @@ -729,9 +675,7 @@ def train_and_evaluate(
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
with autocast(enabled=False):
loss_mel = F.l1_loss(y_mel, y_hat_mel) * config.train.c_mel
loss_kl = (
kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * config.train.c_kl
)
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * config.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)
loss_gen, losses_gen = generator_loss(y_d_hat_g)
loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl
Expand All @@ -753,66 +697,50 @@ def train_and_evaluate(
scaler.step(optim_g)
scaler.update()

# Logging and checkpointing
if rank == 0:
if global_step % config.train.log_interval == 0:
lr = optim_g.param_groups[0]["lr"]
if loss_mel > 75:
loss_mel = 75
if loss_kl > 9:
loss_kl = 9
scalar_dict = {
"loss/g/total": loss_gen_all,
"loss/d/total": loss_disc,
"learning_rate": lr,
"grad_norm_d": grad_norm_d,
"grad_norm_g": grad_norm_g,
}
scalar_dict.update(
{
"loss/g/fm": loss_fm,
"loss/g/mel": loss_mel,
"loss/g/kl": loss_kl,
}
)
scalar_dict.update(
{f"loss/g/{i}": v for i, v in enumerate(losses_gen)}
)
scalar_dict.update(
{f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)}
)
scalar_dict.update(
{f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)}
)
image_dict = {
"slice/mel_org": plot_spectrogram_to_numpy(
y_mel[0].data.cpu().numpy()
),
"slice/mel_gen": plot_spectrogram_to_numpy(
y_hat_mel[0].data.cpu().numpy()
),
"all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
}

with torch.no_grad():
if hasattr(net_g, "module"):
o, *_ = net_g.module.infer(*reference)
else:
o, *_ = net_g.infer(*reference)
audio_dict = {f"gen/audio_{global_step:07d}": o[0, :, :]}

summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict,
audios=audio_dict,
audio_sample_rate=config.data.sample_rate,
)

global_step += 1
pbar.update(1)

# Logging and checkpointing
if rank == 0:
lr = optim_g.param_groups[0]["lr"]
if loss_mel > 75:
loss_mel = 75
if loss_kl > 9:
loss_kl = 9
scalar_dict = {
"loss/g/total": loss_gen_all,
"loss/d/total": loss_disc,
"learning_rate": lr,
"grad_norm_d": grad_norm_d,
"grad_norm_g": grad_norm_g,
"loss/g/fm": loss_fm,
"loss/g/mel": loss_mel,
"loss/g/kl": loss_kl,
}
# commented out
#scalar_dict.update({f"loss/g/{i}": v for i, v in enumerate(losses_gen)})
#scalar_dict.update({f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)})
#scalar_dict.update({f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)})

image_dict = {
"slice/mel_org": plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()),
"slice/mel_gen": plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()),
"all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
}

with torch.no_grad():
o, *_ = net_g.infer(*reference)
audio_dict = {f"gen/audio_{global_step:07d}": o[0, :, :]}

summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict,
audios=audio_dict,
audio_sample_rate=config.data.sample_rate,
)

# Save checkpoint
model_add = []
model_del = []
Expand Down
8 changes: 4 additions & 4 deletions tabs/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,10 +622,10 @@ def train_tab():
interactive=True,
)
with gr.Column():
sync_graph = gr.Checkbox(
label=i18n("Sync Graph"),
cleanup = gr.Checkbox(
label=i18n("New Model"),
info=i18n(
"Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model."
"Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
),
value=False,
interactive=True,
Expand Down Expand Up @@ -768,7 +768,7 @@ def train_tab():
overtraining_detector,
overtraining_threshold,
pretrained,
sync_graph,
cleanup,
index_algorithm,
cache_dataset_in_gpu,
custom_pretrained,
Expand Down

0 comments on commit 5ebc144

Please sign in to comment.