Skip to content

Commit 1522617

Browse files
appledoracclauss
andauthored
Show images from google query (TheAlgorithms#4853)
* Added new script to open the google image tab with a search query. * Added new script to open the google image tab with a search query. * Added new script to open the google image tab with a search query with doctests. * Fixed doctest error, removed print() from method, changed return type * Update web_programming/show_image_tab_from_google_query.py using iterators instead of lists Co-authored-by: Christian Clauss <[email protected]> * Update web_programming/show_image_tab_from_google_query.py Improve readability by removing one-time used variable Co-authored-by: Christian Clauss <[email protected]> * Update web_programming/show_image_tab_from_google_query.py Decreasing complication through standard practices. Co-authored-by: Christian Clauss <[email protected]> * Update web_programming/show_image_tab_from_google_query.py Exception Handling Co-authored-by: Christian Clauss <[email protected]> * changed complete method to download images from google search query * Update download_images_from_google_query.py * Delete show_image_tab_from_google_query.py Co-authored-by: Christian Clauss <[email protected]>
1 parent 4cf1aae commit 1522617

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import json
2+
import os
3+
import re
4+
import sys
5+
import urllib.request
6+
7+
import requests
8+
from bs4 import BeautifulSoup
9+
10+
headers = {
11+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
12+
" (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
13+
}
14+
15+
16+
def download_images_from_google_query(query: str = "dhaka", max_images: int = 5) -> int:
17+
"""Searches google using the provided query term and downloads the images in a folder.
18+
19+
Args:
20+
query : The image search term to be provided by the user. Defaults to
21+
"dhaka".
22+
image_numbers : [description]. Defaults to 5.
23+
24+
Returns:
25+
The number of images successfully downloaded.
26+
27+
>>> download_images_from_google_query()
28+
5
29+
>>> download_images_from_google_query("potato")
30+
5
31+
"""
32+
max_images = min(max_images, 50) # Prevent abuse!
33+
params = {
34+
"q": query,
35+
"tbm": "isch",
36+
"hl": "en",
37+
"ijn": "0",
38+
}
39+
40+
html = requests.get("https://www.google.com/search", params=params, headers=headers)
41+
soup = BeautifulSoup(html.text, "html.parser")
42+
matched_images_data = "".join(
43+
re.findall(r"AF_initDataCallback\(([^<]+)\);", str(soup.select("script")))
44+
)
45+
46+
matched_images_data_fix = json.dumps(matched_images_data)
47+
matched_images_data_json = json.loads(matched_images_data_fix)
48+
49+
matched_google_image_data = re.findall(
50+
r"\[\"GRID_STATE0\",null,\[\[1,\[0,\".*?\",(.*),\"All\",",
51+
matched_images_data_json,
52+
)
53+
if not matched_google_image_data:
54+
return 0
55+
56+
removed_matched_google_images_thumbnails = re.sub(
57+
r"\[\"(https\:\/\/encrypted-tbn0\.gstatic\.com\/images\?.*?)\",\d+,\d+\]",
58+
"",
59+
str(matched_google_image_data),
60+
)
61+
62+
matched_google_full_resolution_images = re.findall(
63+
r"(?:'|,),\[\"(https:|http.*?)\",\d+,\d+\]",
64+
removed_matched_google_images_thumbnails,
65+
)
66+
for index, fixed_full_res_image in enumerate(matched_google_full_resolution_images):
67+
if index >= max_images:
68+
return index
69+
original_size_img_not_fixed = bytes(fixed_full_res_image, "ascii").decode(
70+
"unicode-escape"
71+
)
72+
original_size_img = bytes(original_size_img_not_fixed, "ascii").decode(
73+
"unicode-escape"
74+
)
75+
opener = urllib.request.build_opener()
76+
opener.addheaders = [
77+
(
78+
"User-Agent",
79+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
80+
" (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582",
81+
)
82+
]
83+
urllib.request.install_opener(opener)
84+
path_name = f"query_{query.replace(' ', '_')}"
85+
if not os.path.exists(path_name):
86+
os.makedirs(path_name)
87+
urllib.request.urlretrieve(
88+
original_size_img, f"{path_name}/original_size_img_{index}.jpg"
89+
)
90+
return index
91+
92+
93+
if __name__ == "__main__":
94+
try:
95+
image_count = download_images_from_google_query(sys.argv[1])
96+
print(f"{image_count} images were downloaded to disk.")
97+
except IndexError:
98+
print("Please provide a search term.")
99+
raise

0 commit comments

Comments
 (0)