-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
101 lines (73 loc) · 3.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from bs4 import BeautifulSoup
from os import makedirs
from os.path import exists
from requests import get
from time import sleep
def get_page(url):
while True:
try:
page = get(url, headers={'Connection': 'close'})
return page
except:
sleep(3)
def get_soup(url):
return BeautifulSoup(get_page(url).text, 'lxml')
def make_dir(path):
if exists(path) == False:
makedirs(path)
main_url = get_soup('https://nyahentai.github.io').h2.text
cha_main_url = main_url + '/characters'
cha_links = {}
cha_needs = input('\nWhat are you need? ')
make_dir('lists/')
print('')
for letter in cha_needs:
print(letter, end=' ')
cha_url = cha_main_url + '/' + letter
cha_list = []
while True:
cha_soup = get_soup(cha_url)
cha_divs = cha_soup.find('div', {'class': 'container'}).find_all('a')
for cha_div in cha_divs:
cha_links[cha_div.text.split(' (')[0]] = cha_div.attrs['href']
cha_list.append(cha_div.text)
print('.', end='')
if (cha_soup.find('a', {'rel': 'next'}) == None):
break
cha_url = main_url + cha_soup.find('a', {'rel': 'next'}).attrs['href']
with open('lists/' + letter + '.txt', 'w', encoding='utf-8') as txt:
txt.write('\n'.join(cha_list))
print('')
make_dir('books/')
while True:
cha_name = input('\nWhat is his/her name? ')
cha_chinese = input('Only chinese? (yes/no) ')
cha_path = 'books/' + cha_name + '/'
make_dir(cha_path)
cha_url = main_url + cha_links[cha_name]
if cha_chinese == 'yes':
cha_url += '/chinese'
while True:
cha_soup = get_soup(cha_url)
bok_divs = cha_soup.find_all('a', {'style': 'padding:0 0 142% 0'})
for bok_div in bok_divs:
cha_soup = get_soup(cha_url)
bok_divs = cha_soup.find_all('a', {'style': 'padding:0 0 142% 0'})
bok_name = bok_div.find('img', {'is': 'lazyload-image'}).attrs['alt']
bok_path = cha_path + bok_name.replace('/', '').replace(':', '').replace('*', '').replace('?', '').replace('"', '').replace('<', '').replace('<', '').replace('>', '').replace('|', '') + '/'
make_dir(bok_path)
bok_soup = get_soup(main_url + bok_div.attrs['href'])
pic_divs = bok_soup.find_all('a', {'class': 'gallerythumb'})
print('\n' + bok_name, end=' ')
for pic_div in pic_divs:
pic_url = pic_div.attrs['href']
pic_soup = get_soup(main_url + pic_url)
pic_number = str(pic_soup.find('span', {'class': 'current'}).text)
pic_content = pic_soup.find('img', {'class': 'current-img fit-horizontal'}).attrs['src']
with open(bok_path + pic_number + '.jpg', 'wb') as jpg:
jpg.write(get_page(pic_content).content)
print('.', end='')
if (cha_soup.find('a', {'rel': 'next'}) == None):
print('')
break
cha_url = main_url + cha_soup.find('a', {'rel': 'next'}).attrs['href']