-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_audio.py
119 lines (98 loc) · 4.68 KB
/
split_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
This script splits an audio file into segments based on a provided cut list.
Each segment is named according to the titles specified in the cut list.
Usage:
python split_audio.py -c cutlist.txt -i input.mp3 -o output_folder -f mp3
Arguments:
-c, --cutlist: Path to the cut list file.
-i, --input: Path to the input audio file.
-o, --output: Path to the output folder where the segments will be saved.
-f, --format: Format of the input audio file (e.g., mp3, wav, ogg, flac, aac, aiff, m4a, mp4, wma, alac).
Cut List Format:
The cut list file should contain lines in the following format:
<time_mark> <title>
- <time_mark>: Time in the format 'mm:ss' (minutes:seconds).
- <title>: The name of the segment.
Example Cut List:
0:00 The_Beginning_of_an_Epic_Adventure
1:30 The_Rise_of_the_Hero's_Journey
3:45 The_Climax_of_the_Story!
In this example:
- The first segment is named 'The_Beginning_of_an_Epic_Adventure' and starts at 0:00.
- The second segment is named 'The_Rise_of_the_Hero's_Journey' and starts at 1:30.
- The third segment is named 'The_Climax_of_the_Story!' and starts at 3:45.
Note:
- The script sanitizes the titles to ensure they are safe for use as file names.
- Unsafe characters (e.g., /, \, :, *, ?, ", <, >, |, ') are replaced with underscores (_).
"""
import argparse
from pydub import AudioSegment
import os
import re
def parse_cut_list(file_path):
cut_list = []
with open(file_path, 'r') as file:
for line_number, line in enumerate(file, start=1):
line = line.strip() # Remove leading/trailing whitespace
if not line: # Skip empty lines
continue
parts = line.split(' ', 1)
if len(parts) != 2:
print(f"Error: Line {line_number} in cut list file is not in the correct format.")
print(f"Line: '{line}'")
continue
time_mark, title = parts
try:
minutes, seconds = map(int, time_mark.split(':'))
time_in_ms = (minutes * 60 + seconds) * 1000
cut_list.append((time_in_ms, title))
except ValueError:
print(f"Error: Invalid time mark format on line {line_number}.")
print(f"Line: '{line}'")
return cut_list
def sanitize_filename(filename):
# Replace or remove characters that are not allowed in file names
sanitized = re.sub(r'[<>:"/\\|?*\'`]', '_', filename)
return sanitized
def format_time(milliseconds):
seconds, milliseconds = divmod(milliseconds, 1000)
minutes, seconds = divmod(seconds, 60)
return f"{minutes:02}:{seconds:02}"
def split_audio(input_file, cut_list, output_folder, input_format):
audio = AudioSegment.from_file(input_file, format=input_format)
os.makedirs(output_folder, exist_ok=True)
for i in range(len(cut_list) - 1):
start_time = cut_list[i][0]
end_time = cut_list[i + 1][0]
title = cut_list[i][1]
sanitized_title = sanitize_filename(title)
segment = audio[start_time:end_time]
output_file = os.path.join(output_folder, f"{sanitized_title}.mp3")
# Print progress information
start_time_str = format_time(start_time)
end_time_str = format_time(end_time)
print(f"Extracting '{title}' from {start_time_str} to {end_time_str}...")
segment.export(output_file, format="mp3")
print(f"Saved {output_file}")
# Handle the last segment
last_start_time = cut_list[-1][0]
last_title = cut_list[-1][1]
sanitized_last_title = sanitize_filename(last_title)
last_segment = audio[last_start_time:]
last_output_file = os.path.join(output_folder, f"{sanitized_last_title}.mp3")
# Print progress information for the last segment
start_time_str = format_time(last_start_time)
print(f"Extracting '{last_title}' from {start_time_str} to end...")
last_segment.export(last_output_file, format="mp3")
print(f"Saved {last_output_file}")
def main():
parser = argparse.ArgumentParser(description="Split an audio file at specified time marks.")
parser.add_argument('-c', '--cutlist', required=True, help='Path to the cut list file')
parser.add_argument('-i', '--input', required=True, help='Path to the input audio file')
parser.add_argument('-o', '--output', required=True, help='Path to the output folder')
parser.add_argument('-f', '--format', required=True, help='Format of the input audio file (e.g., mp3, wav, ogg, flac, aac, aiff, m4a, mp4, wma, alac)')
args = parser.parse_args()
cut_list = parse_cut_list(args.cutlist)
split_audio(args.input, cut_list, args.output, args.format)
if __name__ == "__main__":
main()