-
Notifications
You must be signed in to change notification settings - Fork 1
/
dlsub.py
78 lines (63 loc) · 3.07 KB
/
dlsub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import json
import yaml
from src.helper import ArgumentParser
from src.transcript_manager import download_and_process_transcript
from src.transcript_processor import TranscriptProcessor
from src.markai import AiProcessor
class DlSub:
def __init__(self):
self.argument_parser = ArgumentParser()
self.args = self.argument_parser.parse_arguments()
def format_transcript(self, formatted_transcript):
output_file = os.path.splitext(self.args.output)[0] + '.txt'
if os.path.exists(output_file):
overwrite = input(f"File {output_file} already exists. Overwrite? (y/n) ")
if overwrite.lower() != 'y':
output_file = os.path.splitext(self.args.output)[0] + '_1.txt'
with open(output_file, 'w', encoding='utf-8') as f:
for line in formatted_transcript:
f.write(f"{line}\n")
print(f"Processed transcript saved in {output_file}.")
def process_with_ai(self, ai_processor, formatted_transcript):
output_file_ai = ai_processor.process_with_ai(self.args, formatted_transcript)
print(f"AI processed transcript saved in {output_file_ai}.")
def summarize_with_ai(self, ai_processor, formatted_transcript):
output_file_ai = ai_processor.summarize_with_ai(self.args, formatted_transcript)
print(f"AI summarized transcript saved in {output_file_ai}.")
def run(self):
# Download and process transcript
transcript_list, downloader = download_and_process_transcript(self.args)
if transcript_list is None:
print("Transcript download failed.")
return
# Save transcript to output file
with open(self.args.output, 'w', encoding='utf-8') as f:
json.dump(transcript_list, f)
print(f"Transcript saved in {self.args.output}.")
# Process transcript
if self.args.format or self.args.use_ai:
with open(self.args.output, 'r', encoding='utf-8') as f:
raw_transcript = json.load(f)
processor = TranscriptProcessor(raw_transcript, self.args.language)
formatted_transcript = processor.format_transcript()
if self.args.format:
self.format_transcript(formatted_transcript)
if self.args.use_ai or self.args.summarize:
# Load API key from config_ai.yml
with open("config_ai.yml", 'r') as stream:
try:
config = yaml.safe_load(stream)
api_key = config['api_key']
except yaml.YAMLError as exc:
print(exc)
return
ai_processor = AiProcessor(api_key)
# Format transcript with and summarize
if self.args.use_ai:
self.process_with_ai(ai_processor, formatted_transcript)
if self.args.summarize:
self.summarize_with_ai(ai_processor, formatted_transcript)
if __name__ == '__main__':
app = DlSub()
app.run()