Skip to content

Commit

Permalink
tumblr_backup: Stop if API responses stop making forward progress
Browse files Browse the repository at this point in the history
When backing up likes, the API repeats responses past offset=1000.
Inspect _links and stop if the "before" parameter fails to change.

Fixes bbolli#217
  • Loading branch information
cebtenzzre committed Sep 25, 2020
1 parent 01d3528 commit e8c6ea0
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions tumblr_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ def _backup(posts):
# Get the JSON entries from the API, which we can only do for MAX_POSTS posts at once.
# Posts "arrive" in reverse chronological order. Post #0 is the most recent one.
i = options.skip
last_next_before = None
while True:
# find the upper bound
log(account, "Getting posts %d to %d (of %d expected)\r" % (i, i + MAX_POSTS - 1, count_estimate))
Expand All @@ -596,9 +597,19 @@ def _backup(posts):
continue

posts = _get_content(soup)
# `_backup(posts)` can be empty even when `posts` is not if we don't backup reblogged posts
if not posts or not _backup(posts):
log(account, "Backing up posts found empty set of posts, finishing\r")
if not posts:
log(account, "Found empty set of posts, finishing\r")
break

next_before = soup['response']['_links']['next']['query_params'].get('before')
if next_before is not None:
if next_before == last_next_before:
log(account, "Found same API response twice, finishing\r")
break
last_next_before = next_before

if not _backup(posts):
log(account, "Found last requested post, finishing\r")
break

i += MAX_POSTS
Expand Down

0 comments on commit e8c6ea0

Please sign in to comment.