Skip to content

Commit

Permalink
Fix recipe link discovery.
Browse files Browse the repository at this point in the history
Now includes additional categories that were broken out of the normal level range categories.
  • Loading branch information
doxxx committed Feb 15, 2019
1 parent 518ad53 commit ef7fc5f
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@
70: [ 220, 230, 250, 280, 310 ], # 290, 300, 320, 350, 380
}

LEVEL_RANGE = ["{0}-{1}".format(start, start + 4) for start in range(1, 70, 5)]
NUM_LEVEL_RANGES = len(LEVEL_RANGE)
MAX_LEVEL = 70
LEVEL_RANGES = ["{0}-{1}".format(start, start + 4) for start in range(1, MAX_LEVEL, 5)]
NUM_LEVEL_RANGES = len(LEVEL_RANGES)
NUM_ADDITIONAL_CATEGORIES = 6
LINK_CATEGORIES = ['%d' % (level_range,) for level_range in range(0, NUM_LEVEL_RANGES)] + \
['c%d' % (cat,) for cat in range(1, NUM_ADDITIONAL_CATEGORIES+1)]

EMBED_CODE_RE = re.compile("\\[db:recipe=([0-9a-f]+)]")

Expand All @@ -86,6 +90,7 @@ async def wait_with_progress(coros: list, desc: str = None, unit: str = None):
async def fetch(session: aiohttp.ClientSession, url: str, **kwargs):
err_count = 0
while err_count < 5:
# noinspection PyBroadException
try:
async with FETCH_SEMAPHORE:
async with session.get(url, **kwargs) as res:
Expand Down Expand Up @@ -115,20 +120,20 @@ def parse_recipe_links_page(text: str):
return links, show_end, total


async def fetch_recipe_links_page(session: aiohttp.ClientSession, cls: str, level_range: int, page: int):
async def fetch_recipe_links_page(session: aiohttp.ClientSession, cls: str, category: str, page: int):
params = {
"category2": CLASSES.index(cls),
"category3": level_range,
"category3": category,
"page": page,
}
return parse_recipe_links_page(await fetch(session, RECIPE_LIST_URL, params=params))


async def fetch_recipe_links_range(session: aiohttp.ClientSession, cls: str, level_range: int):
async def fetch_recipe_links_range(session: aiohttp.ClientSession, cls: str, category: str):
links = []
page = 1
while True:
page_links, show_end, total = await fetch_recipe_links_page(session, cls, level_range, page)
page_links, show_end, total = await fetch_recipe_links_page(session, cls, category, page)
links += page_links
if show_end < total:
page += 1
Expand All @@ -139,7 +144,7 @@ async def fetch_recipe_links_range(session: aiohttp.ClientSession, cls: str, lev

async def fetch_recipe_links(session: aiohttp.ClientSession, cls: str):
results = wait_with_progress(
[fetch_recipe_links_range(session, cls, level_range) for level_range in range(0, NUM_LEVEL_RANGES)],
[fetch_recipe_links_range(session, cls, category) for category in LINK_CATEGORIES],
desc=f"Fetching {cls} links",
unit=""
)
Expand Down Expand Up @@ -259,7 +264,7 @@ async def fetch_class(session: aiohttp.ClientSession, additional_languages: dict

async def scrape_to_file(session: aiohttp.ClientSession, additional_languages: dict, cls: str):
recipes = await fetch_class(session, additional_languages, cls)
with open("out/" + cls + ".json", mode="wt", encoding="utf-8") as db_file:
with open(f"out/{cls}.json", mode="wt", encoding="utf-8") as db_file:
json.dump(recipes, db_file, indent=2, sort_keys=True, ensure_ascii=False)


Expand Down

0 comments on commit ef7fc5f

Please sign in to comment.