-
Notifications
You must be signed in to change notification settings - Fork 0
/
eh.py
193 lines (151 loc) · 6.63 KB
/
eh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#! python3
"""
###############################################################
made by toiletclogger69
https://github.com/toiletclogger69/eh_download
pip install gooey requests bs4
to run :
python eh.py (launch the gooey app)
if you don't want to use gooey and only this script, you can call download_gallery directly, eg :
download_gallery(url_to_the_gallery_you_want, true)
you can set TRUE_IF_CLI = True if you want to use the script directly with a text file "urls.txt" containing all urls
###############################################################
download a list of gallery from e-hentai
you can pass a list of url in the text area and it will download every page,
and put them in their respective gallery folder
###############################################################
3~4s between the download of each page, or you get flagged as a bot
the images downloaded are the sample resolution for eh (1280 * xxxx), if you want better resolution go with a torrent
"""
from sys import stdout, exit
from os import path, makedirs, getcwd
from time import sleep
from random import randint
import requests
from bs4 import BeautifulSoup
from gooey import Gooey, GooeyParser
# python -m venv env && env\Scripts\activate.bat
# change this var to change the folder name
GALLERY_FOLDER = "doujins"
''' add a urls.txt file in the current folder, add every urls inside, separated by a newline eg:
https://e-hentai.org/g/lots_of_number1/lots_of_number1/
https://e-hentai.org/g/lots_of_number2/lots_of_number2/
https://e-hentai.org/g/lots_of_number3/lots_of_number3/
then set this to True and execture the script with python eh.py'''
TRUE_IF_CLI = False
# TRUE_IF_CLI = True
headers = {"User-Agent" : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
###############################################################
def download_gallery(url='', true_if_replace_name=False) -> int:
# ignore warning "offensive content"
if "?nw=always" not in url:
url += "?nw=always"
html_request = requests.get(url, headers=headers)
html_request.raise_for_status()
html_page = BeautifulSoup(html_request.text, 'html.parser')
# if wrong url, or error 404
if "Key missing, or incorrect key provided." in html_page.text or "error 404" in html_page.text:
print(f"couldn't find {url}")
print("-1%")
stdout.flush()
sleep(6)
return
# find title
title_bs4 = html_page.find('title')
title_gallery = title_bs4.text.split(" - E-Hentai Galleries")[0]
# find number of pages
meta = html_page.find_all(class_ ="gdt2")
number_of_page = [x for x in meta if "pages" in x.text][0].text
number_of_page = int(number_of_page.split(" pages")[0])
# find first page url
first_page = html_page.find_all(class_ ="gdtm")[0]
url_next_page = first_page.find('a')['href']
# create the folder
if true_if_replace_name:
title_gallery = ''.join(char for char in title_gallery if (char.isalnum() or char in r"_-[]()\{\} "))
gallery_path = path.join(getcwd(), GALLERY_FOLDER, title_gallery)
makedirs(gallery_path, exist_ok=True)
# informations
print("\n\n============================\n")
print(f'\t{number_of_page} pages - {url}')
# issue with UnicodeEncodeError when using Gooey, issue seems difficult to fix properly so instead we just ignore it
try:
print(f'\t{title_gallery}')
except UnicodeEncodeError:
print("\tThis windows cannot display non-ascii character, cannot display the title")
print("\n----------------------------\n")
for page_number in range(number_of_page):
# don't touch the sleep amount or you will get flagged as a bot
sleep(randint(3, 4))
html_request = requests.get(url_next_page, headers=headers)
html_request.raise_for_status()
html_page = BeautifulSoup(html_request.text, 'html.parser')
# find the picture div
element_image = html_page.find("div", {"id": "i3"})
# find the next url page link
element_link = element_image.find('a')
url_next_page = element_link['href']
# find the current picture url
element_source = element_image.find('img')
url_image = element_source['src']
# 0001.jpg, 0049.png, etc.
file_name = path.basename(f"{str(page_number):0>4}.{url_image.split('.')[-1]}")
content_image = requests.get(url_image, headers=headers)
with open(path.join(gallery_path, file_name), 'wb+') as file:
for chunk in content_image.iter_content(100000):
file.write(chunk)
print(f"{file_name}\t\t{int((page_number / (int(number_of_page) - 1)) * 100):0>2}%")
stdout.flush()
return 1
@Gooey(progress_regex=r"(-?\d+)%$"
, disable_progress_bar_animation=True
, program_name='dgeh'
, requires_shell=False
, default_size=(500, 770)
, program_description="Download gallery on eh"
, menu=[{'name': 'About'
, 'items': [{
'type': 'AboutDialog'
, 'menuTitle': 'About'
, 'version': '1.1.3'
, 'copyright': '2021'
, 'website': 'https://github.com/toiletclogger69/eh_download'
, 'developer': 'toiletclogger69'
}]
}]
)
def main():
parser = GooeyParser(prog="gallery eh downloader")
parser.add_argument('urls'
, metavar='liste of url'
, help='urls are separated by a newline, no blank line allowed'
, widget='Textarea'
, gooey_options={'height': 300, })
parser.add_argument('-true_if_window_compatible'
, metavar='Replace character'
, action='store_true'
, required=False
, help='Some character ( / | ? ! etc. ) in gallery title are not valid in windows file name, if set those character will be removed')
args = parser.parse_args()
liste_urls = args.urls.split('\n')
for current_url in liste_urls:
if current_url and current_url.startswith("http"):
download_gallery(current_url, args.true_if_window_compatible)
print("")
print("")
print("-1%")
stdout.flush()
sleep(2)
if __name__ == "__main__":
try:
if TRUE_IF_CLI:
""" Read every url from a text file """
with open("urls.txt", 'r') as file_text:
list_url = file_text.readlines()
for current_url in list_url:
if current_url and current_url.startswith("http"):
download_gallery(current_url, True)
else:
exit(main())
except KeyboardInterrupt:
print("\nEnd program")