-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtodo.txt
150 lines (132 loc) · 9.68 KB
/
todo.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# 数据未完全下载,可能不报错,如何校验?待解决
IncompleteRead(3670016 bytes read, 1219501 more expected)
downloads\loj.ac\134\testdata\1.in Error:"message:"('Connection broken: IncompleteRead(3670016 bytes read, 1219501 more expected)', IncompleteRead(3670016 bytes read, 1219501 more expected)),"file:"downloads\loj.ac\134\testdata\1.in,"url:"https://files.loj.ac/libreoj-data/6ee89ec4-6494-5763-8ea6-e68f902d317e?response-content-disposition=attachment%3B%20filename%3D%221.in%22&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=5d9c40ebc7ca054399154bcebc5c3a5c%2F20240808%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240808T034012Z&X-Amz-Expires=72000&X-Amz-SignedHeaders=host&X-Amz-Signature=df617f68650b77bb523fa71221c91ee319a383c495b3283480ee4c691128153a
解决思路:
获取需要下载的信息保存在本地download_info.json,下载每题前先检测是否有该文件,文件中记录有哪些文件未完整下载
#3238 checker的格式似乎不完整
# 3484 下载1354% ???? ../downloads\loj.ac\3484\testdata\anagramistica.out.2d
# 513,514提交答案题目,judgeinfo里没有timelimit信息,导致异常
# 2882 有2个图片未能下载。
图片下载出错:http://z4a.net/images/2018/10/17/Snipaste_2018-10-17_20-59-03..png,错误信息:..\downloads\loj.ac\2882\additional_file\Snipaste_2018-10-17_20-59-03.png出错重试.('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))
图片下载出错:http://z4a.net/images/2018/10/17/Snipaste_2018-10-17_21-03-29.png,错误信息:..\downloads\loj.ac\2882\additional_file\Snipaste_2018-10-17_21-03-29.png出错重试.('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))
# 3079 创建文件/文件夹出错,loguru未记录。
File "F:\MYOI\loj\LOJ-Spider\loj_download.py", line 43, in resume_download
with open(file_path, 'ab') as file:
FileNotFoundError: [Errno 2] No such file or directory: '../downloads\\loj.ac\\3079\\additional_file\\game1.in'
# 获取测试数据出错,重试机制好像没启动
2089 超时?
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 748, in _error_catcher
yield
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 873, in _raw_read
data = self._fp_read(amt, read1=read1) if not fp_closed else b""
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 856, in _fp_read
return self._fp.read(amt) if amt is not None else self._fp.read()
File "C:\Python\Python310\lib\http\client.py", line 464, in read
s = self.fp.read(amt)
File "C:\Python\Python310\lib\socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "C:\Python\Python310\lib\ssl.py", line 1273, in recv_into
return self.read(nbytes, buffer)
File "C:\Python\Python310\lib\ssl.py", line 1129, in read
return self._sslobj.read(len, buffer)
TimeoutError: The read operation timed out
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\models.py", line 820, in generate
yield from self.raw.stream(chunk_size, decode_content=True)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 1060, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 949, in read
data = self._raw_read(amt)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 872, in _raw_read
with self._error_catcher():
File "C:\Python\Python310\lib\contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\response.py", line 753, in _error_catcher
raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='api.loj.ac', port=443): Read timed out.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\get_by_schedule.py", line 58, in wrapper
return job_func(*args, **kwargs)
File "E:\爬虫集合\LOJ-Spider\get_by_schedule.py", line 72, in get_problem_from_list
message = get_problem('https', 'loj.ac', pid)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 336, in wrapped_f
return copy(f, *args, **kw)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 475, in __call__
do = self.iter(retry_state=retry_state)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 376, in iter
result = action(retry_state)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 418, in exc_check
raise retry_exc.reraise()
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 185, in reraise
raise self.last_attempt.result()
File "C:\Python\Python310\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "C:\Python\Python310\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\tenacity\__init__.py", line 478, in __call__
result = fn(*args, **kwargs)
File "E:\爬虫集合\LOJ-Spider\loj_download.py", line 176, in get_problem
).json()
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\models.py", line 974, in json
return complexjson.loads(self.text, **kwargs)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\models.py", line 926, in text
if not self.content:
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\models.py", line 902, in content
self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b""
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\models.py", line 826, in generate
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='api.loj.ac', port=443): Read timed out.
OK # 没题的时候引发异常result.get('localizedContentsOfAllLocales') 已改,需测试
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\loj_download.py", line 369, in run
get_problem(protocol, host, i)
File "E:\爬虫集合\LOJ-Spider\loj_download.py", line 152, in get_problem
if not result.get('localizedContentsOfAllLocales'):
KeyError: 'localizedContentsOfAllLocales'
OK 出错重试的次数未执行(而抛出异常才能起作用,即raise exception(...))
OK 150 testdata/config.yaml 不完整,已解决,主要是subtasks未获取
OK 有些文件下载没有headers['content-length'],造成无法下载
HTTPSConnectionPool(host='api.loj.ac', port=443): Read timed out. (read timeout=5)
================
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 466, in _make_request
self._validate_conn(conn)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 1095, in _validate_conn
conn.connect()
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connection.py", line 652, in connect
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connection.py", line 805, in _ssl_wrap_socket_and_match_hostname
ssl_sock = ssl_wrap_socket(
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\util\ssl_.py", line 465, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\util\ssl_.py", line 509, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "C:\Python\Python310\lib\ssl.py", line 512, in wrap_socket
return self.sslsocket_class._create(
File "C:\Python\Python310\lib\ssl.py", line 1070, in _create
self.do_handshake()
File "C:\Python\Python310\lib\ssl.py", line 1341, in do_handshake
self._sslobj.do_handshake()
TimeoutError: _ssl.c:980: The handshake operation timed out
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\requests\adapters.py", line 667, in send
resp = conn.urlopen(
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 843, in urlopen
retries = retries.increment(
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\util\retry.py", line 474, in increment
raise reraise(type(error), error, _stacktrace)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\util\util.py", line 39, in reraise
raise value
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 789, in urlopen
response = self._make_request(
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 490, in _make_request
raise new_e
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 468, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File "E:\爬虫集合\LOJ-Spider\venv\lib\site-packages\urllib3\connectionpool.py", line 369, in _raise_timeout
raise ReadTimeoutError(
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='api.loj.ac', port=443): Read timed out. (read timeout=5)