From e8c6ea016aa4a9576282146282ba735effa00901 Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Fri, 25 Sep 2020 02:24:43 -0400 Subject: [PATCH] tumblr_backup: Stop if API responses stop making forward progress When backing up likes, the API repeats responses past offset=1000. Inspect _links and stop if the "before" parameter fails to change. Fixes #217 --- tumblr_backup.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tumblr_backup.py b/tumblr_backup.py index ca46b72..4316ec6 100755 --- a/tumblr_backup.py +++ b/tumblr_backup.py @@ -585,6 +585,7 @@ def _backup(posts): # Get the JSON entries from the API, which we can only do for MAX_POSTS posts at once. # Posts "arrive" in reverse chronological order. Post #0 is the most recent one. i = options.skip + last_next_before = None while True: # find the upper bound log(account, "Getting posts %d to %d (of %d expected)\r" % (i, i + MAX_POSTS - 1, count_estimate)) @@ -596,9 +597,19 @@ def _backup(posts): continue posts = _get_content(soup) - # `_backup(posts)` can be empty even when `posts` is not if we don't backup reblogged posts - if not posts or not _backup(posts): - log(account, "Backing up posts found empty set of posts, finishing\r") + if not posts: + log(account, "Found empty set of posts, finishing\r") + break + + next_before = soup['response']['_links']['next']['query_params'].get('before') + if next_before is not None: + if next_before == last_next_before: + log(account, "Found same API response twice, finishing\r") + break + last_next_before = next_before + + if not _backup(posts): + log(account, "Found last requested post, finishing\r") break i += MAX_POSTS