fastai · arman-hk · Apr 14, 2024
diff --git a/utils.py b/utils.py
@@ -42,31 +42,37 @@ def search_images_bing(key, term, min_sz=128, max_images=150):
 
 # -
 
-def search_images_ddg(key,max_n=200):
-     """Search for 'key' with DuckDuckGo and return a unique urls of 'max_n' images
-        (Adopted from https://github.com/deepanprabhu/duckduckgo-images-api)
-     """
-     url        = 'https://duckduckgo.com/'
-     params     = {'q':key}
-     res        = requests.post(url,data=params)
-     searchObj  = re.search(r'vqd=([\d-]+)\&',res.text)
-     if not searchObj: print('Token Parsing Failed !'); return
-     requestUrl = url + 'i.js'
-     headers    = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'}
-     params     = (('l','us-en'),('o','json'),('q',key),('vqd',searchObj.group(1)),('f',',,,'),('p','1'),('v7exp','a'))
-     urls       = []
-     while True:
-         try:
-             res  = requests.get(requestUrl,headers=headers,params=params)
-             data = json.loads(res.text)
-             for obj in data['results']:
-                 urls.append(obj['image'])
-                 max_n = max_n - 1
-                 if max_n < 1: return L(set(urls))     # dedupe
-             if 'next' not in data: return L(set(urls))
-             requestUrl = url + data['next']
-         except:
-             pass
+def search_images_ddg(key, max_n=200):
+    """
+    Search for 'key' with DuckDuckGo and return unique URLs of up to 'max_n' images.
+    If an error occurs or token parsing fails, prints an error message and returns collected URLs.
+    """
+    url = 'https://duckduckgo.com/'
+    params = {'q': key}
+    try:
+        res = requests.post(url, data=params)
+        searchObj = re.search(r'vqd=([\d-]+)&', res.text)
+        if not searchObj:
+            print('Token Parsing Failed! Unable to retrieve search token.')
+            return L(set())  # return an empty list-like object
+        requestUrl = url + 'i.js'
+        headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'}
+        params = (('l', 'us-en'), ('o', 'json'), ('q', key), ('vqd', searchObj.group(1)), ('f', ',,,'), ('p', '1'), ('v7exp', 'a'))
+        urls = []
+        while max_n > 0:
+            res = requests.get(requestUrl, headers=headers, params=params)
+            data = json.loads(res.text)
+            for obj in data['results']:
+                urls.append(obj['image'])
+                max_n -= 1
+                if max_n < 1:
+                    return L(set(urls))  # dedupe and return
+            if 'next' not in data:
+                return L(set(urls))
+            requestUrl = url + data['next']
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return L(set(urls))  # return whatever has been gathered so far
 
 
 def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)):