Skip to content

Commit bef4ca0

Browse files
authored
Merge pull request #206 from BelKed/advanced-search
Implementation of advanced search
2 parents eb000ff + cc0c92f commit bef4ca0

10 files changed

+461
-520
lines changed

pycaching/geocaching.py

+107-134
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import re
88
import subprocess
99
from os import path
10-
from typing import Optional, Union
10+
from typing import Generator, Optional, Union
1111
from urllib.parse import parse_qs, urljoin, urlparse
1212

1313
import bs4
1414
import requests
1515
from bs4.element import Script # Direct import as `bs4.Script` requires version >= 4.9.1.
1616

17-
from pycaching.cache import Cache, Size, Status
17+
from pycaching.cache import Cache
1818
from pycaching.errors import Error, LoginFailedException, NotLoggedInException, PMOnlyException, TooManyRequestsError
1919
from pycaching.geo import Point, Rectangle
2020
from pycaching.log import Log
@@ -52,7 +52,7 @@ class Geocaching(object):
5252
"search": "play/search",
5353
"search_more": "play/search/more-results",
5454
"my_logs": "my/logs.aspx",
55-
"api_search": "api/proxy/web/search",
55+
"api_search": "api/proxy/web/search/v2",
5656
}
5757
_credentials_file = ".gc_credentials"
5858

@@ -243,116 +243,43 @@ def get_logged_user(self, login_page=None):
243243
m = re.search(r'"username":\s*"(.*)"', js_content)
244244
return m.group(1) if m else None
245245

246-
def search(self, point, limit=float("inf")):
247-
"""Return a generator of caches around some point.
248-
249-
Search for caches around some point by loading search pages and parsing the data from these
250-
pages. Yield :class:`.Cache` objects filled with data from search page. You can provide limit
251-
as a convenient way to stop generator after certain number of caches.
252-
253-
:param .geo.Point point: Search center point.
254-
:param int limit: Maximum number of caches to generate.
255-
"""
256-
logging.info("Searching at {}".format(point))
257-
258-
start_index = 0
259-
while True:
260-
# get one page
261-
geocaches_table, whole_page = self._search_get_page(point, start_index)
262-
rows = geocaches_table.find_all("tr")
263-
264-
# leave loop if there are no (more) results
265-
if not rows:
266-
return
267-
268-
# prepare language-dependent mappings
269-
if start_index == 0:
270-
cache_sizes_filter_wrapper = whole_page.find("div", class_="cache-sizes-wrapper")
271-
localized_size_mapping = {
272-
# key = "Small" (localized), value = Size.small
273-
label.find("span").text.strip(): Size.from_number(label.find("input").get("value"))
274-
for label in cache_sizes_filter_wrapper.find_all("label")
275-
}
276-
277-
# parse caches in result
278-
for start_index, row in enumerate(rows, start_index):
279-
280-
limit -= 1 # handle limit
281-
if limit < 0:
282-
return
283-
284-
# parse raw data
285-
cache_details = row.find("span", "cache-details").text.split("|")
286-
wp = cache_details[1].strip()
287-
288-
# create and fill cache object
289-
# values are sanitized and converted in Cache setters
290-
c = Cache(self, wp)
291-
c.type = cache_details[0]
292-
c.name = row.find("span", "cache-name").text
293-
badge = row.find("svg", class_="badge")
294-
c.found = "found" in str(badge) if badge is not None else False
295-
c.favorites = row.find(attrs={"data-column": "FavoritePoint"}).text
296-
if not (row.get("class") and "disabled" in row.get("class")):
297-
c.status = Status.enabled
298-
c.pm_only = row.find("td", "pm-upsell") is not None
299-
300-
if c.pm_only:
301-
# PM only caches doesn't have other attributes filled in
302-
yield c
303-
continue
304-
305-
c.size = localized_size_mapping[row.find(attrs={"data-column": "ContainerSize"}).text.strip()]
306-
c.difficulty = row.find(attrs={"data-column": "Difficulty"}).text
307-
c.terrain = row.find(attrs={"data-column": "Terrain"}).text
308-
c.hidden = row.find(attrs={"data-column": "PlaceDate"}).text
309-
c.author = row.find("span", "owner").text[3:] # delete "by "
310-
311-
logging.debug("Cache parsed: {}".format(c))
312-
yield c
313-
314-
start_index += 1
315-
316-
def _search_get_page(self, point, start_index):
317-
"""Return one page for standard search as class:`bs4.BeautifulSoup` object.
318-
319-
:param .geo.Point point: Search center point.
320-
:param int start_index: Determines the page. If start_index is greater than zero, this
321-
method will use AJAX andpoint which is much faster.
246+
def search(
247+
self,
248+
point: Point,
249+
limit: int = float("inf"),
250+
*,
251+
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
252+
reverse: bool = False,
253+
per_query: int = 200,
254+
wait_sleep: bool = True,
255+
) -> Generator[Optional[Cache], None, None]:
256+
"""Search for caches around a specified location using a search API.
257+
258+
:param point: The :class:`.geo.Point` object representing the center point of the search.
259+
:param limit: The maximum number of caches to load.
260+
Defaults to infinity.
261+
:param sort_by: The criterion to sort the caches by.
262+
Defaults to :code:`SortOrder.date_last_visited`.
263+
:param reverse: If :code:`True`, the order of the results is reversed.
264+
Defaults to :code:`False`.
265+
:param per_query: The number of caches to request in each query.
266+
Defaults to :code:`200`.
267+
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
268+
if set to :code:`True`, otherwise just yield :code:`None`.
269+
Defaults to :code:`True`.
270+
:return: A generator that yields :class:`.Cache` objects.
322271
"""
323-
assert hasattr(point, "format") and callable(point.format)
324-
logging.debug("Loading page from start_index {}".format(start_index))
325-
326-
if start_index == 0:
327-
# first request has to load normal search page
328-
logging.debug("Using normal search endpoint")
329-
330-
# make request
331-
res = self._request(
332-
self._urls["search"],
333-
params={
334-
"origin": point.format_decimal(),
335-
},
336-
)
337-
return res.find(id="geocaches"), res
338272

339-
else:
340-
# other requests can use AJAX endpoint
341-
logging.debug("Using AJAX search endpoint")
342-
343-
# make request
344-
res = self._request(
345-
self._urls["search_more"],
346-
params={
347-
"origin": point.format_decimal(),
348-
"startIndex": start_index,
349-
"ssvu": 2,
350-
"selectAll": "false",
351-
},
352-
expect="json",
353-
)
354-
355-
return bs4.BeautifulSoup(res["HtmlString"].strip(), "html.parser"), None
273+
return self.advanced_search(
274+
{
275+
"origin": "{},{}".format(point.latitude, point.longitude),
276+
"asc": str(not reverse).lower(),
277+
"sort": sort_by.value,
278+
},
279+
per_query=per_query,
280+
limit=limit,
281+
wait_sleep=wait_sleep,
282+
)
356283

357284
@deprecated
358285
def search_quick(self, area):
@@ -366,55 +293,99 @@ def search_quick(self, area):
366293

367294
return self.search_rect(area)
368295

369-
# add some shortcuts ------------------------------------------------------
370-
371296
def search_rect(
372297
self,
373298
rect: Rectangle,
299+
limit: int = float("inf"),
374300
*,
375-
per_query: int = 200,
376301
sort_by: Union[str, SortOrder] = SortOrder.date_last_visited,
377302
reverse: bool = False,
378-
limit: int = float("inf"),
303+
per_query: int = 200,
379304
origin: Optional[Point] = None,
380-
wait_sleep: bool = True
381-
):
382-
"""
383-
Return a generator of caches in given Rectange area.
384-
385-
:param rect: Search area.
386-
:param int per_query: Number of caches requested in single query.
387-
:param sort_by: Order cached by given criterion.
388-
:param reverse: Reverse sort order.
389-
:param limit: Maximum number of caches to return.
390-
:param origin: Origin point for search by distance.
391-
:param wait_sleep: In case of rate limits exceeding, wait appropriate time if set True,
392-
otherwise just yield None.
305+
wait_sleep: bool = True,
306+
) -> Generator[Optional[Cache], None, None]:
307+
"""Search for caches in a specified :class:`.Rectangle` area using a search API.
308+
309+
:param rect: The :class:`.Rectangle` object representing the search area.
310+
:param limit: The maximum number of caches to load.
311+
Defaults to infinity.
312+
:param sort_by: The criterion to sort the caches by.
313+
Defaults to :code:`SortOrder.date_last_visited`.
314+
:param reverse: If :code:`True`, the order of the results is reversed.
315+
Defaults to :code:`False`.
316+
:param per_query: The number of caches to request in each query.
317+
Defaults to :code:`200`.
318+
:param origin: The origin point for search by distance, required when sorting by distance.
319+
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
320+
if set to :code:`True`, otherwise just yield :code:`None`.
321+
Defaults to :code:`True`.
322+
:return: A generator that yields :class:`.Cache` objects.
393323
"""
324+
394325
if not isinstance(sort_by, SortOrder):
395326
sort_by = SortOrder(sort_by)
396327

397-
if limit <= 0:
398-
return
399-
400-
take_amount = min(limit, per_query)
401328
params = {
402329
"box": "{},{},{},{}".format(
403330
rect.corners[0].latitude,
404331
rect.corners[0].longitude,
405332
rect.corners[1].latitude,
406333
rect.corners[1].longitude,
407334
),
408-
"take": take_amount,
409335
"asc": str(not reverse).lower(),
410-
"skip": 0,
411336
"sort": sort_by.value,
412337
}
413338

414339
if sort_by is SortOrder.distance:
415340
assert isinstance(origin, Point)
416341
params["origin"] = "{},{}".format(origin.latitude, origin.longitude)
417342

343+
return self.advanced_search(
344+
params,
345+
per_query=per_query,
346+
limit=limit,
347+
wait_sleep=wait_sleep,
348+
)
349+
350+
def advanced_search(
351+
self,
352+
options: dict,
353+
limit: int = float("inf"),
354+
per_query: int = 200,
355+
wait_sleep: bool = True,
356+
) -> Generator[Optional[Cache], None, None]:
357+
"""Perform an advanced search for geocaches with specific search criteria.
358+
359+
The search is performed using the options provided in the :code:`options` parameter.
360+
Example of the :code:`options` parameter::
361+
362+
# https://www.geocaching.com/play/search?owner[0]=Geocaching%20HQ&a=0
363+
options = {"owner[0]": "Geocaching HQ", "a": "0"}
364+
365+
:param options: A dictionary of search options.
366+
:param limit: The maximum number of caches to load.
367+
Defaults to infinity.
368+
:param per_query: The number of caches to request in each query.
369+
Defaults to :code:`200`.
370+
:param wait_sleep: In case of rate limits exceeding, wait appropriate time
371+
if set to :code:`True`, otherwise just yield :code:`None`.
372+
Defaults to :code:`True`.
373+
:return: A generator that yields :class:`.Cache` objects.
374+
"""
375+
376+
if limit <= 0:
377+
return
378+
379+
take_amount = min(limit, per_query)
380+
381+
params = options.copy()
382+
params.update(
383+
{
384+
"take": take_amount,
385+
"skip": 0,
386+
}
387+
)
388+
418389
total, offset = None, 0
419390
while (offset < limit) and ((total is None) or (offset < total)):
420391
params["skip"] = offset
@@ -434,6 +405,8 @@ def search_rect(
434405
total = resp["total"]
435406
offset += take_amount
436407

408+
# add some shortcuts ------------------------------------------------------
409+
437410
def geocode(self, location):
438411
"""Return a :class:`.Point` object from geocoded location.
439412

0 commit comments

Comments
 (0)