forked from taixujianyitianxiawudi/pixiv-sketch-recorder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmix.py
263 lines (245 loc) · 10.9 KB
/
mix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
import re
import subprocess
import time
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-o")#下载目标文件夹
parser.add_argument("-f")#还没用
parser.add_argument("-e")
args = parser.parse_args()
file_path = args.o
def read_filename_from_txt(target):
filename = None
for line in artists:
if target in line:
if ',' in line:
filename = line.split(',')[1]
return filename
def filename_generator(filename):
time1 = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
if 'youtube' in filename:
short_filename = read_filename_from_txt(filename)
if short_filename == None:
short_filename = filename[-11:] + time1 + ".ts"
elif 'bilibili' in filename:
short_filename = read_filename_from_txt(filename)
if short_filename == None:
pattern = re.compile(r'https://live.bilibili.com/([0-9a-zA-Z\_-]+)')
short_filename = pattern.search(filename)[1] + time1 + ".ts"
elif 'twitch' in filename:
short_filename = read_filename_from_txt(filename)
if short_filename == None:
pattern = re.compile(r'https://www.twitch.tv/([0-9a-zA-Z\_-]+)')
short_filename = pattern.search(filename)[1] + time1 + ".ts"
elif 'pixiv' in filename:
#https://sketch.pixiv.net/@junkato/lives
short_filename = read_filename_from_txt(filename)
if short_filename == None:
pattern = re.compile(r'https://sketch.pixiv.net/@([0-9a-zA-Z\_-]+)')
short_filename = pattern.search(filename)[1] + time1 + ".ts"
else:#其他名称
short_filename = read_filename_from_txt(filename)
short_filename = short_filename + time1 + ".ts"
return file_path + short_filename
def pixiv_get_artist_name_from_url(artist_main_page):
pattern = re.compile(r'https://sketch.pixiv.net/@([0-9a-zA-Z\_-]+)')
artist_name = pattern.search(artist_main_page)[1]
return artist_name
#考虑到可能会手动开别的streamlink,所以逻辑改为开cmd之前找一次,开完cmd之后再找一次,比较两者的差异
def get_streamlink_pid_new():
cmd = "tasklist"
out = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
infos = out.stdout.read().splitlines()
current_stream_pid_list = []
#infos的前三行是表格的标题,所以省略
for i in infos[3:]:
pid_i = str(i)
#这个正则表达式后面加了括号表示或的关系,但是也被包括进去了
# 以第一个[0]解决的是findall的list,下面的[0]是因为前面是Pid后面是services或者console,选第一个
result = re.findall(r'streamlink\.exe\s+([0-9]+)', pid_i)
if result == []:
pass
else:
current_stream_pid_list.append(result[0])
#找到所有添加完了,开始比较
return current_stream_pid_list
def get_target_m3u8(artist_name, artist_id):
#https://sketch.pixiv.net/@chit2553/lives/1851352306408316726
#https://sketch.pixiv.net/@yoshikadu/lives/825561229801201329
url = "https://sketch.pixiv.net/@" + artist_name + "/lives/" + artist_id
cmd = "powershell ./gg.ps1 -url " + url
out = subprocess.Popen(cmd, stdout=subprocess.PIPE)
result = out.stdout.read()
return result
def proxy_wget(url):
cmd = "powershell ./gl.ps1 -url " + url
#cmd2 = "powershell Invoke-WebRequest sketch.pixiv.net -SessionVariable "
out = subprocess.Popen(cmd, stdout=subprocess.PIPE)
result = out.stdout.read()
#和request得到的应该是一样的结果
return result
def show_all_pid():
cmd = "tasklist"
out = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
infos = out.stdout.read().splitlines()
pid_list = []
#infos的前三行是表格的标题,所以省略
for i in infos[3:]:
pid_i = str(i)
#这个正则表达式后面加了括号表示或的关系,但是也被包括进去了
# 以第一个[0]解决的是findall的list,下面的[0]是因为前面是Pid后面是services或者console,选第一个
result = re.findall(r'([0-9]+)\s(Services|Console)', pid_i)[0]
pid_list.append(result[0])
return pid_list
#这个用于判断主播是否下线,如果下线了,那就移除recording_list
def check_alive(stream_pid, recording_list_name_and_pid):
flag = True
#check pid
current_pid_list = show_all_pid()
#把对应的pid移出name_and_pid_list,还要把stream_pid_list也移除出去
if stream_pid not in current_pid_list:
print('remove id: ' + str(stream_pid))
stream_pid_list.remove(stream_pid)
#找到记录表里的那个已经不存在的stream_pid的位置,然后移除整条信息
for i in recording_list_name_and_pid:
if stream_pid == i[2]:
try:
name_pid_list.remove(i)
except:
pass
#进pixiv主页,把他们的id给扒下来
#<a class="thumb" href="/@horosuken/lives/5290106517565302048">
def start_record_main():
#还没给特殊符号加斜杠
for artist_name in artists:
artist_name = artist_name.split(",")[0]
#有可能是三兄弟,也有可能是Pixiv,这里分流
if 'youtube' in artist_name:
get_user_and_record_youtube(artist_name)
elif 'bilibili' in artist_name:
get_user_and_record_youtube(artist_name)
elif 'twitch' in artist_name:
get_user_and_record_youtube(artist_name)
elif 'pixiv' in artist_name:
get_user_and_record_pixiv(artist_name)
else:
get_user_and_record_youtube(artist_name)
def get_user_and_record_pixiv(artist_name):
full_artist_name = artist_name
artist_name = pixiv_get_artist_name_from_url(artist_name)
try:
#page_soup = str(proxy_wget(PIXIV_SKETCH_URL))
proxy_wget(PIXIV_SKETCH_URL)
f = open('temp_result.txt' ,encoding='utf-8')
page_soup = f.read()
f.close()
except:
print("Can't connect pixiv, please check your network.")
return None
record_flag = True
pattern_id = re.compile(r'<a class="thumb" href="/@' + artist_name + '/lives/(\d+)">')
artist_id = pattern_id.search(page_soup)
for i in name_pid_list:
if full_artist_name in i:
record_flag = False
if artist_id == None:
record_flag = False
if record_flag == True:
print(artist_id[1])
pattren_m3u8 = re.compile(r'"' + artist_id[1] + '":{"finished_at"' + '\S+' +'hls_movie\S{3}(\S+m3u8)\S+' + '"web","id":"' + artist_id[1] + '",')
artist_m3u8 = pattren_m3u8.search(page_soup)
#m3u8url = re.findall(r'hls_movie\S{3}(\S+m3u8)', page_request)[0]
#artist_id[1]是某个人的数字ID
#aritst_m3u8[1]是没有去掉u002F的 m3u8链接
if artist_m3u8 == None:
#找不到的话就进个人主页找
get_target_m3u8(artist_name, artist_id[1])
f = open('artist_m3u8.txt', encoding='utf-8')
content = f.read()
pattren_target_m3u8 = re.compile(r'hls_movie\S{3}(\S+m3u8)')
artist_m3u8 = pattren_target_m3u8.search(content)
f.close()
if artist_m3u8 == None:
print("Find user id, but cant find .m3u8.")
else:
print(artist_m3u8[1].replace("\\u002F", '/'))
recorded_filename = filename_generator(full_artist_name)
old_streamlink_pid_list = get_streamlink_pid_new()
#print("old")
#print(old_streamlink_pid_list)
cmd = "streamlink " + artist_m3u8[1].replace("\\u002F", '/') + " best " + "-o " + recorded_filename
print(cmd)
temp_record = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
time.sleep(8)
print("now recording: " + artist_name)
#print("new")
new_streamlink_pid_list = get_streamlink_pid_new()
#print(new_streamlink_pid_list)
new_streamlink_pid = list(set(new_streamlink_pid_list) - set(old_streamlink_pid_list))
if new_streamlink_pid == []:
print("streamlink doesnt start! name is" + artist_name +" . ")
else:
stream_pid_list.append(new_streamlink_pid[0])
name_pid_list.append((full_artist_name, temp_record.pid, new_streamlink_pid[0]))
#初始化
def get_user_and_record_youtube(artist_name):
record_flag = True
for i in name_pid_list:
if artist_name in i:
record_flag = False
if record_flag == True:
recorded_filename = filename_generator(artist_name)
old_streamlink_pid_list = get_streamlink_pid_new()
#print("old")
#print(old_streamlink_pid_list)
cmd = "streamlink " + artist_name + " best " + "-o " + recorded_filename
temp_record = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
time.sleep(8)
print("now recording: " + artist_name)
#print("new")
new_streamlink_pid_list = get_streamlink_pid_new()
#print(new_streamlink_pid_list)
new_streamlink_pid = list(set(new_streamlink_pid_list) - set(old_streamlink_pid_list))
if new_streamlink_pid == []:
print("streamlink doesnt start! name is" + artist_name +" . ")
else:
stream_pid_list.append(new_streamlink_pid[0])
name_pid_list.append((artist_name, temp_record.pid, new_streamlink_pid[0]))
artists = []
name_pid_list = []
stream_pid_list = []
count = 0
PIXIV_SKETCH_URL = "https://sketch.pixiv.net/lives"
CHECK_INTERVAL = '60'
for line in open("pixiv.txt", 'r'):
artists.append(line.replace('\n','').replace(' ',''))
#总循环,每过一定时间进行一次判断
while 1 > 0:
count += 1
recording_list = []
offline_list = []
print("starting " + "round " + str(count) + " ")
start_record_main()
record_status = ""
#name_pid_list (name, pid)
for i in name_pid_list:
recording_list.append(i[0])
for i in artists:
if i not in recording_list:
offline_list.append(i)
record_status += "Recording: "
for i in recording_list:
record_status = record_status + " " + i + ","
record_status = record_status[:-1] + ". "
record_status += "Offline:"
for i in offline_list:
record_status = record_status + " " + i + ","
record_status = record_status[:-1] + "."
print(record_status)
print(name_pid_list)
#展示完一轮数据之后,休眠,然后再检查一遍有无离线,开始新的一轮
time.sleep(int(CHECK_INTERVAL))
print("checking offline")
for i in name_pid_list:
#i[2]是streamid,只要这个消失了,i[1]的pid也消失了
check_alive(i[2], name_pid_list)