-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.py
84 lines (70 loc) · 2.12 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import requests
import shutil
import re
import os
import os.path
from bs4 import BeautifulSoup
# GET NUMBER OF PAGES
def pgnum(pages):
for link in all_anchors:
href = str(link.get('href'))
if href.find('page') >= 0:
currpages = int((re.findall(r'\d+', href))[-1])
if currpages > pages:
pages = currpages
return pages
# GET THE IMAGE URL AND APPEND IT TO A LIST FROM THE PROVIDED URL
def getlink(url):
soup = BeautifulSoup(requests.get(url).text, "html.parser")
all_anchors = soup.findAll("img")
lonk = []
for link in all_anchors:
href = str(link.get('src'))
lonk.append(href)
for href in lonk:
if href.startswith('https://images'):
parts = re.search(
r'(.*\/\d*\/)(thumb.*?(\d+\.\w+))', href).groups()
href = parts[0] + parts[-1]
pic_name = parts[2]
print(href)
urllist.append([href, pic_name])
# SAVE THE IMAGES FROM THE LINKS IN THE LIST
def downloader(urllist):
no = 0
DEFAULT_DIRECTORY = str(os.getcwd()) + f"/wallpapers"
os.chdir(DEFAULT_DIRECTORY)
for item in urllist[0:]:
pic_url = item[0]
pic_name = item[1]
print(item)
print(str(no) + " from " + str(len(urllist)) + " pictures")
response = requests.get(pic_url, stream=True)
os.chdir(DEFAULT_DIRECTORY + "/")
with open(pic_name, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
os.chdir(DEFAULT_DIRECTORY)
no += 1
del response
inp = input("Enter the URL:\n>")
url = f'{inp}&page=1'
baseurl = url[0:url.find('page=')+5]
# print(baseurl)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_anchors = soup.find_all("a")
pages = -1
pages = pgnum(pages)
print(pages)
urllist = []
if os.path.exists(str(os.getcwd() + "/wallpapers")):
pass
else:
os.mkdir("wallpapers")
for each in range(1, pages):
print(baseurl+str(each))
getlink(baseurl+str(each))
print(len(urllist))
print(str(each) + " from " + str(pages) + " pages")
# LETS DO THIS!
downloader(urllist)