forked from comfyanonymous/ComfyUI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradio_main.py
130 lines (111 loc) · 4.01 KB
/
gradio_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import ChatTTS
import torchaudio
import torch
import os
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import gradio as gr
with gr.Blocks() as demo:
gr.Markdown(
"""# 高风险需求实现-测试版
## 视频处理
- [ ] 视频去水印
- [ ] 视频去字幕
## 视频优化
- [ ] 视频加字幕
- [ ] 文案生成
- [x] 文字生成语音
"""
)
with gr.Tab("视频处理"):
with gr.Tab("视频去水印"):
# TODO: add video watermark removal
def VideoWatermarkRemoval(video):
return video
inp = gr.Video(label="上传视频")
out = gr.Video(label="生成视频", show_download_button=True)
sub = gr.Button("视频去水印")
sub.click(VideoWatermarkRemoval, inputs=inp, outputs=out)
pass
with gr.Tab("视频去字幕"):
# TODO: add video subtitle removal
def VideoSubtitleRemoval(video):
return video
inp = gr.Video(label="上传视频")
out = gr.Video(label="生成视频", show_download_button=True)
sub = gr.Button("视频去字幕")
sub.click(VideoSubtitleRemoval, inputs=inp, outputs=out)
pass
with gr.Tab("视频优化"):
with gr.Tab("视频生成字幕"):
# TODO: add video captioning
def VideoCaptioning(video):
return video
inp = gr.Video(label="上传视频")
out = gr.Video(label="生成视频", show_download_button=True)
sub = gr.Button("字幕生成")
sub.click(VideoCaptioning, inputs=inp, outputs=out)
pass
with gr.Tab("文案生成"):
# TODO: add text generation
def TongYiGen(title):
return "【" + title + "】是一部好看的电影,值得一看。"
inp = gr.Textbox(label="请输入文章标题")
out = gr.Textbox(label="生成文案", lines=20, show_copy_button=True)
sub = gr.Button("生成文案")
sub.click(TongYiGen, inputs=inp, outputs=out)
pass
with gr.Tab("文字生成语音"):
def test_audio_generation():
notes = [
"C",
"C#",
"D",
"D#",
"E",
"F",
"F#",
"G",
"G#",
"A",
"A#",
"B",
]
octave = 10
note = 10
duration = 1
# fake generation
sr = 48000
a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
frequency = a4_freq * 2 ** (tones_from_a4 / 12)
duration = int(duration)
audio = np.linspace(0, duration, duration * sr)
audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(
np.int16
)
return sr, audio
def Chatts_gen(text):
# load model
chat = ChatTTS.Chat()
print(os.getcwd())
chat.load(
source="custom", custom_path="../models/ChatTTS", compile=False
)
wavs = chat.infer([text])
torchaudio.save(
"../output/audios/gradio_output_1.wav",
torch.from_numpy(wavs[0]).unsqueeze(0),
24000,
)
return 24000, wavs[0]
inp = gr.Textbox(
placeholder="输入文字",
lines=10,
label="请输入文本,最大350字左右",
max_length=350,
)
out = gr.Audio(show_download_button=True)
sub = gr.Button("生成语音")
sub.click(Chatts_gen, inputs=inp, outputs=out)
if __name__ == "__main__":
demo.launch()