From 5cc0d84e166ff041b18c7197c1774189d98f4acd Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Mon, 21 Sep 2020 00:36:24 -0400 Subject: [PATCH] tumblr_backup: --continue and related new behavior Fixes #115 Included revisions: - Allow setting options in BACKUP_CHANGING_OPTIONS to match the backup -- redundant, but sometimes more convenient. --- tumblr_backup.py | 330 ++++++++++++++++++++++++++++++++++------------- util.py | 17 +++ wget.py | 21 +-- 3 files changed, 263 insertions(+), 105 deletions(-) diff --git a/tumblr_backup.py b/tumblr_backup.py index ac0d48f..62f2b49 100755 --- a/tumblr_backup.py +++ b/tumblr_backup.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, division, print_function, with_statement # standard Python library imports +import contextlib import errno import hashlib import imghdr @@ -18,14 +19,15 @@ import threading import time from collections import defaultdict -from datetime import datetime +from datetime import datetime, timedelta +from tempfile import NamedTemporaryFile from glob import glob from os.path import join, split, splitext from xml.sax.saxutils import escape from util import (AsyncCallable, ConnectionFile, LockedQueue, MultiCondition, PY3, disable_unraisable_hook, - is_dns_working, make_requests_session, no_internet, nullcontext, path_is_on_vfat, to_bytes, - to_unicode) + is_dns_working, make_requests_session, no_internet, nullcontext, opendir, path_is_on_vfat, to_bytes, + to_unicode, try_unlink) from wget import HTTPError, HTTP_RETRY, HTTP_TIMEOUT, WGError, WgetRetrieveWrapper, setup_wget, urlopen try: @@ -153,6 +155,11 @@ def test_jpg(h, f): FILE_ENCODING = 'utf-8' TIME_ENCODING = locale.getlocale(locale.LC_TIME)[1] or FILE_ENCODING +MUST_MATCH_OPTIONS = ('dirs', 'likes', 'blosxom', 'hostdirs', 'image_names') +BACKUP_CHANGING_OPTIONS = ( + 'save_images', 'save_video', 'save_video_tumblr', 'save_audio', 'save_notes', 'copy_notes', 'notes_limit', 'json', + 'count', 'skip', 'period', 'request', 'filter', 'no_reblog', 'exif', 'prev_archives', 'idents') + main_thread_lock = threading.RLock() multicond = MultiCondition(main_thread_lock) disable_note_scraper = set() # type: Set[str] @@ -160,6 +167,15 @@ def test_jpg(h, f): prev_resps = None # type: Optional[Tuple[str, ...]] +def load_bs4(reason): + sys.modules['soupsieve'] = () # type: ignore[assignment] + try: + from bs4 import BeautifulSoup + except ImportError: + raise RuntimeError("Cannot {} without module 'bs4'".format(reason)) + return BeautifulSoup + + class Logger(object): def __init__(self): self.lock = threading.Lock() @@ -219,10 +235,32 @@ def open_file(open_fn, parts): return open_fn(path_to(*parts)) +@contextlib.contextmanager def open_text(*parts): - return open_file( - lambda f: io.open(f, 'w', encoding=FILE_ENCODING, errors='xmlcharrefreplace'), parts - ) + dest_path = open_file(lambda f: f, parts) + dest_dirname, dest_basename = split(dest_path) + + with NamedTemporaryFile('w', prefix='.{}.'.format(dest_basename), dir=dest_dirname, delete=False) as partf: + # Yield the file for writing + with io.open(partf.fileno(), 'w', encoding=FILE_ENCODING, errors='xmlcharrefreplace', closefd=False) as f: + yield f + + # NamedTemporaryFile is created 0600, set mode to the usual 0644 + os.fchmod(partf.fileno(), 0o644) + + # Flush buffers and sync the inode + partf.flush() + os.fsync(partf) # type: ignore + + pfname = partf.name + + # Move to final destination + if PY3: + os.replace(pfname, dest_path) + else: + if os.name == 'nt': + try_unlink(dest_path) # Avoid potential FileExistsError + os.rename(pfname, dest_path) def strftime(fmt, t=None): @@ -243,24 +281,25 @@ def get_api_url(account): ) -def set_period(): +def set_period(period): """Prepare the period start and end timestamps""" i = 0 - tm = [int(options.period[:4]), 1, 1, 0, 0, 0, 0, 0, -1] - if len(options.period) >= 6: + tm = [int(period[:4]), 1, 1, 0, 0, 0, 0, 0, -1] + if len(period) >= 6: i = 1 - tm[1] = int(options.period[4:6]) - if len(options.period) == 8: + tm[1] = int(period[4:6]) + if len(period) == 8: i = 2 - tm[2] = int(options.period[6:8]) + tm[2] = int(period[6:8]) def mktime(tml): tmt = tuple(tml) # type: Any return time.mktime(tmt) - options.p_start = int(mktime(tm)) + p_start = int(mktime(tm)) tm[i] += 1 - options.p_stop = int(mktime(tm)) + p_stop = int(mktime(tm)) + return p_start, p_stop class ApiParser(object): @@ -543,6 +582,18 @@ def dup(fd): return fd return True # Either we copied it or we didn't need to +def check_optional_modules(): + if options.exif: + if pyexiv2 is None: + raise RuntimeError("--exif: module 'pyexif2' is not installed") + if not hasattr(pyexiv2, 'ImageMetadata'): + raise RuntimeError("--exif: module 'pyexiv2' is missing features, perhaps you need 'py3exiv2'?") + if options.filter is not None and pyjq is None: + raise RuntimeError("--filter: module 'pyjq' is not installed") + if options.prev_archives and scandir is None: + raise RuntimeError("--prev-archives: Python is less than 3.5 and module 'scandir' is not installed") + + class Index(object): def __init__(self, blog, body_class='index'): self.blog = blog @@ -632,7 +683,8 @@ def next_month(inc): archive.append(self.blog.footer(base, pp, np, suffix)) - arch.write('\n'.join(archive)) + with arch as archf: + archf.write('\n'.join(archive)) assert first_file is not None return first_file @@ -739,6 +791,100 @@ def footer(base, previous_page, next_page, suffix): f += '\n' return f + @staticmethod + def process_existing_backup(account, prev_archive): + complete_backup = os.path.exists(path_to('.complete')) + if options.resume and complete_backup: + raise RuntimeError('{}: Cannot continue complete backup'.format(account)) + try: + with io.open(path_to('.first_run_options'), encoding=FILE_ENCODING) as f: + first_run_options = json.load(f) + except EnvironmentError as e: + if getattr(e, 'errno', None) != errno.ENOENT: + raise + first_run_options = None + + class Options(object): + def __init__(self, fro): self.fro = fro + def differs(self, opt): return opt not in self.fro or orig_options[opt] != self.fro[opt] + def first(self, opts): return {opt: self.fro.get(opt, '') for opt in opts} + @staticmethod + def this(opts): return {opt: orig_options[opt] for opt in opts} + + # These options must always match + if first_run_options is not None: + opts = Options(first_run_options) + mustmatchdiff = tuple(filter(opts.differs, MUST_MATCH_OPTIONS)) + if mustmatchdiff: + raise RuntimeError('{}: The script was given {} but the existing backup was made with {}'.format( + account, opts.this(mustmatchdiff), opts.first(mustmatchdiff))) + + backdiff = tuple(filter(opts.differs, BACKUP_CHANGING_OPTIONS)) + if options.resume: + backdiff_nondef = tuple(opt for opt in backdiff if orig_options[opt] != parser.get_default(opt)) + if backdiff_nondef: + raise RuntimeError('{}: The script was given {} but the existing backup was made with {}'.format( + account, opts.this(backdiff_nondef), opts.first(backdiff_nondef))) + elif complete_backup: + pass # Complete archives may be added to with different options + elif not backdiff: + raise RuntimeError('{}: Found incomplete archive, try --continue'.format(account)) + elif not options.ignore_resume: + raise RuntimeError('{}: Refusing to make a different backup (with {} instead of {}) over an incomplete ' + 'archive. Delete the old backup to start fresh, or skip this check with ' + '--continue=ignore.'.format(account, opts.this(backdiff), opts.first(backdiff))) + + if prev_archive is not None: + try: + with io.open(join(prev_archive, '.first_run_options'), encoding=FILE_ENCODING) as f: + pa_first_run_options = json.load(f) + except EnvironmentError as e: + if getattr(e, 'errno', None) != errno.ENOENT: + raise + pa_first_run_options = None + + # These options must always match + if pa_first_run_options is not None: + pa_opts = Options(pa_first_run_options) + mustmatchdiff = tuple(filter(pa_opts.differs, MUST_MATCH_OPTIONS)) + if mustmatchdiff: + raise RuntimeError('{}: The script was given {} but the previous archive was made with {}'.format( + account, pa_opts.this(mustmatchdiff), pa_opts.first(mustmatchdiff))) + + oldest_tstamp = None + if not complete_backup: + # Read every post to find the oldest timestamp we've saved. + filter_ = join('*', dir_index) if options.dirs else '*' + post_ext + post_glob = glob(path_to(post_dir, filter_)) + if options.resume and post_glob: + log('{}: Found incomplete backup. Finding oldest post (may take a while)\n'.format(account)) + BeautifulSoup = load_bs4('continue incomplete backup') + + for post in post_glob: + with io.open(post, encoding=FILE_ENCODING) as pf: + soup = BeautifulSoup(pf, 'lxml') + postdate = soup.find('time')['datetime'] + del soup + # No datetime.fromisoformat or datetime.timestamp on Python 2 + tstamp = ((datetime.strptime(postdate, '%Y-%m-%dT%H:%M:%SZ') - datetime(1970, 1, 1)) + // timedelta(seconds=1)) + oldest_tstamp = tstamp if oldest_tstamp is None else min(tstamp, oldest_tstamp) + + if first_run_options is not None and options.resume: + # Load saved options + for opt in BACKUP_CHANGING_OPTIONS: + setattr(options, opt, first_run_options[opt]) + else: + # Load original options + for opt in BACKUP_CHANGING_OPTIONS: + setattr(options, opt, orig_options[opt]) + if first_run_options is None and not (complete_backup or post_glob or options.post_count): + # Presumably this is the initial backup of this blog + with open_text('.first_run_options') as f: + f.write(to_unicode(json.dumps(orig_options))) + + return oldest_tstamp + def backup(self, account, prev_archive): """makes single files and an index for every post on a public Tumblr blog account""" @@ -766,6 +912,12 @@ def backup(self, account, prev_archive): self.post_count = 0 self.filter_skipped = 0 + oldest_tstamp = self.process_existing_backup(account, prev_archive) + check_optional_modules() + + if options.idents and not isinstance(options.idents, frozenset): + options.idents = frozenset(options.idents) + # get the highest post id already saved ident_max = None if options.incremental: @@ -774,11 +926,12 @@ def backup(self, account, prev_archive): long(splitext(split(f)[1])[0]) for f in glob(path_to(post_dir, '*' + post_ext)) ) - log.status('Backing up posts after {}\r'.format(ident_max)) except ValueError: # max() arg is an empty sequence pass - else: - log.status('Getting basic information\r') + else: + log('{}: Backing up posts after {}\n'.format(account, ident_max)) + + log.status('Getting basic information\r') api_parser = ApiParser(base, account) api_thread = AsyncCallable(main_thread_lock, api_parser.apiparse, 'API Thread') @@ -806,6 +959,9 @@ def backup(self, account, prev_archive): # use the meta information to create a HTML header TumblrPost.post_header = self.header(body_class='post') + jq_filter = None if options.filter is None else pyjq.compile(options.filter) # pytype: disable=attribute-error + request_sets = None if options.request is None else {typ: set(tags) for typ, tags in options.request} + # start the thread pool backup_pool = ThreadPool() @@ -821,16 +977,16 @@ def _backup(posts, post_respfiles): if options.count and self.post_count >= options.count: return False if options.period: - if post.date >= options.p_stop: + if post.date >= options.period[1]: raise RuntimeError('Found post with date ({}) older than before param ({})'.format( - post.date, options.p_stop)) - if post.date < options.p_start: + post.date, options.period[1])) + if post.date < options.period[0]: return False - if options.request: - if post.typ not in options.request: + if request_sets: + if post.typ not in request_sets: continue - tags = options.request[post.typ] - if not (TAG_ANY in tags or tags & post.tags_lower): + tags = request_sets[post.typ] + if not (TAG_ANY in tags or tags & {t.lower() for t in post.tags}): continue if options.no_reblog: if 'reblogged_from_name' in p or 'reblogged_root_name' in p: @@ -842,7 +998,7 @@ def _backup(posts, post_respfiles): continue if os.path.exists(open_file(lambda f: f, post.get_path())) and options.no_post_clobber: continue # Post exists and no-clobber enabled - if options.filter and not options.filter.first(p): + if jq_filter and not jq_filter.first(p): self.filter_skipped += 1 continue @@ -860,7 +1016,9 @@ def _backup(posts, post_respfiles): # Get the JSON entries from the API, which we can only do for MAX_POSTS posts at once. # Posts "arrive" in reverse chronological order. Post #0 is the most recent one. i = options.skip - before = options.p_stop if options.period else None + before = options.period[1] if options.period else None + if before is not None and oldest_tstamp is not None: + before = min(before, oldest_tstamp) while True: # find the upper bound @@ -914,6 +1072,17 @@ def _backup(posts, post_respfiles): ix.build_index() ix.save_index() + if not os.path.exists(path_to('.complete')): + # Make .complete file + sf = opendir(save_folder, os.O_RDONLY) + try: + os.fdatasync(sf) + with io.open(open_file(lambda f: f, ('.complete',)), 'wb') as f: + os.fsync(f) # type: ignore + os.fdatasync(sf) + finally: + os.close(sf) + log.status(None) log('{}: {} {}posts backed up{}\n'.format( account, self.post_count, 'liked ' if options.likes else '', @@ -951,9 +1120,6 @@ def __init__(self, post, backup_account, respfile, prev_archive): self.reblogged_root = post.get('reblogged_root_url') self.source_title = post.get('source_title', '') self.source_url = post.get('source_url', '') - self.tags_lower = None # type: Optional[Set[str]] - if options.request: - self.tags_lower = {t.lower() for t in self.tags} self.file_name = join(self.ident, dir_index) if options.dirs else self.ident + post_ext self.llink = self.ident if options.dirs else self.file_name self.media_dir = join(post_dir, self.ident) if options.dirs else media_dir @@ -1104,6 +1270,11 @@ def get_youtube_url(self, youtube_url): } if options.cookiefile is not None: ydl_options['cookiefile'] = options.cookiefile + try: + import youtube_dl + from youtube_dl.utils import sanitize_filename + except ImportError: + raise RuntimeError("--save-video: module 'youtube_dl' is not installed") ydl = youtube_dl.YoutubeDL(ydl_options) ydl.add_default_info_extractors() try: @@ -1262,6 +1433,9 @@ def get_post(self): notes_html = u'' + if options.save_notes or options.copy_notes: + BeautifulSoup = load_bs4('save notes' if options.save_notes else 'copy notes') + if options.copy_notes: # Copy notes from prev_archive with io.open(join(self.prev_archive, post_dir, self.ident + post_ext)) as post_file: @@ -1271,6 +1445,7 @@ def get_post(self): notes_html = u''.join([n.prettify() for n in notes.find_all('li')]) if options.save_notes and self.backup_account not in disable_note_scraper and not notes_html.strip(): + import note_scraper # Scrape and save notes while True: ns_stdout_rd, ns_stdout_wr = multiprocessing.Pipe(duplex=False) @@ -1344,9 +1519,10 @@ def get_path(self): def save_post(self): """saves this post locally""" - with open_text(*self.get_path()) as f: + path = path_to(*self.get_path()) + with open_text(path) as f: f.write(self.get_post()) - os.utime(f.name, (self.date, self.date)) + os.utime(path, (self.date, self.date)) if options.json: with open_text(json_dir, self.ident + '.json') as f: f.write(self.get_json_content()) @@ -1520,10 +1696,6 @@ def handle_term_signal(signum, frame): import argparse class CSVCallback(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, set(values.split(','))) - - class CSVListCallback(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, list(values.split(','))) @@ -1537,9 +1709,9 @@ def __call__(self, parser, namespace, values, option_string=None): parser.error("{}: invalid post type '{}'".format(option_string, typ)) for typ in POST_TYPES if typ == TYPE_ANY else (typ,): if parts: - request[typ] = request.get(typ, set()).union(parts) + request[typ] = request.get(typ, ()) + parts else: - request[typ] = {TAG_ANY} + request[typ] = (TAG_ANY,) setattr(namespace, self.dest, request) class TagsCallback(RequestCallback): @@ -1548,6 +1720,18 @@ def __call__(self, parser, namespace, values, option_string=None): parser, namespace, TYPE_ANY + ':' + values.replace(',', ':'), option_string, ) + class PeriodCallback(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + try: + pformat = {'y': '%Y', 'm': '%Y%m', 'd': '%Y%m%d'}[values] + except KeyError: + period = values.replace('-', '') + if not re.match(r'^\d{4}(\d\d)?(\d\d)?$', period): + parser.error("Period must be 'y', 'm', 'd' or YYYY[MM[DD]]") + else: + period = time.strftime(pformat) + setattr(namespace, self.dest, set_period(period)) + parser = argparse.ArgumentParser(usage='%(prog)s [options] blog-name ...', description='Makes a local backup of Tumblr blogs.') parser.add_argument('-O', '--outdir', help='set the output directory (default: blog-name)') @@ -1575,7 +1759,8 @@ def __call__(self, parser, namespace, values, option_string=None): ' (useful for cron jobs)') parser.add_argument('-n', '--count', type=int, help='save only COUNT posts') parser.add_argument('-s', '--skip', type=int, default=0, help='skip the first SKIP posts') - parser.add_argument('-p', '--period', help="limit the backup to PERIOD ('y', 'm', 'd' or YYYY[MM[DD]])") + parser.add_argument('-p', '--period', action=PeriodCallback, + help="limit the backup to PERIOD ('y', 'm', 'd' or YYYY[MM[DD]])") parser.add_argument('-N', '--posts-per-page', type=int, default=50, metavar='COUNT', help='set the number of posts per monthly page, 0 for unlimited') parser.add_argument('-Q', '--request', action=RequestCallback, @@ -1592,11 +1777,11 @@ def __call__(self, parser, namespace, values, option_string=None): parser.add_argument('--no-reblog', action='store_true', help="don't save reblogged posts") parser.add_argument('-I', '--image-names', choices=('o', 'i', 'bi'), default='o', metavar='FMT', help="image filename format ('o'=original, 'i'=, 'bi'=_)") - parser.add_argument('-e', '--exif', action=CSVCallback, default=set(), metavar='KW', + parser.add_argument('-e', '--exif', action=CSVCallback, default=[], metavar='KW', help='add EXIF keyword tags to each picture' " (comma-separated values; '-' to remove all tags, '' to add no extra tags)") parser.add_argument('-S', '--no-ssl-verify', action='store_true', help='ignore SSL verification errors') - parser.add_argument('--prev-archives', action=CSVListCallback, default=[], metavar='DIRS', + parser.add_argument('--prev-archives', action=CSVCallback, default=[], metavar='DIRS', help='comma-separated list of directories (one per blog) containing previous blog archives') parser.add_argument('-M', '--timestamping', action='store_true', help="don't re-download files if the remote timestamp and size match the local file") @@ -1610,27 +1795,21 @@ def __call__(self, parser, namespace, values, option_string=None): parser.add_argument('--user-agent', help='User agent string to use with HTTP requests') parser.add_argument('--no-post-clobber', action='store_true', help='Do not re-download existing posts') parser.add_argument('--threads', type=int, default=20, help='number of threads to use for post retrieval') + parser.add_argument('--continue', action='store_true', dest='resume', help='Continue an incomplete first backup') + parser.add_argument('--continue=ignore', action='store_true', dest='ignore_resume', + help='Force backup over an incomplete archive with different options') parser.add_argument('blogs', nargs='*') options = parser.parse_args() + blogs = options.blogs or DEFAULT_BLOGS + del options.blogs + orig_options = vars(options).copy() + if not blogs: + parser.error('Missing blog-name') + if sum(1 for arg in ('resume', 'ignore_resume', 'incremental', 'auto') if getattr(options, arg)) > 1: + parser.error('Only one of --continue, --continue=ignore, --incremental, and --auto may be given') if options.auto is not None and options.auto != time.localtime().tm_hour: options.incremental = True - if options.period: - try: - pformat = {'y': '%Y', 'm': '%Y%m', 'd': '%Y%m%d'}[options.period] - options.period = time.strftime(pformat) - except KeyError: - options.period = options.period.replace('-', '') - if not re.match(r'^\d{4}(\d\d)?(\d\d)?$', options.period): - parser.error("Period must be 'y', 'm', 'd' or YYYY[MM[DD]]") - set_period() - - wget_retrieve = WgetRetrieveWrapper(options, log) - setup_wget(not options.no_ssl_verify, options.user_agent) - - blogs = options.blogs or DEFAULT_BLOGS - if not blogs: - parser.error("Missing blog-name") if options.count is not None and options.count < 0: parser.error("--count: count must not be negative") if options.skip < 0: @@ -1641,44 +1820,16 @@ def __call__(self, parser, namespace, values, option_string=None): parser.error("-O can only be used for a single blog-name") if options.dirs and options.tag_index: parser.error("-D cannot be used with --tag-index") - if options.exif: - if pyexiv2 is None: - parser.error("--exif: module 'pyexif2' is not installed") - if not hasattr(pyexiv2, 'ImageMetadata'): - parser.error("--exif: module 'pyexiv2' is missing features, perhaps you need 'py3exiv2'?") - if options.save_video: - try: - import youtube_dl - from youtube_dl.utils import sanitize_filename - except ImportError: - parser.error("--save-video: module 'youtube_dl' is not installed") - if options.save_notes or options.copy_notes: - sys.modules['soupsieve'] = () # type: ignore[assignment] - try: - from bs4 import BeautifulSoup - except ImportError: - parser.error("--{}: module 'bs4' is not installed".format( - 'save-notes' if options.save_notes else 'copy-notes' - )) if options.cookiefile is not None and not os.access(options.cookiefile, os.R_OK): parser.error('--cookiefile: file cannot be read') - if options.save_notes: - import note_scraper - if options.copy_notes: - if not options.prev_archives: - parser.error('--copy-notes requires --prev-archives') + if options.copy_notes and not options.prev_archives: + parser.error('--copy-notes requires --prev-archives') if options.notes_limit is not None: if not options.save_notes: parser.error('--notes-limit requires --save-notes') if options.notes_limit < 1: parser.error('--notes-limit: Value must be at least 1') - if options.filter is not None: - if pyjq is None: - parser.error("--filter: module 'pyjq' is not installed") - options.filter = pyjq.compile(options.filter) if options.prev_archives: - if scandir is None: - parser.error("--prev-archives: Python is less than 3.5 and module 'scandir' is not installed") if len(options.prev_archives) != len(blogs): parser.error('--prev-archives: expected {} directories, got {}'.format( len(blogs), len(options.prev_archives), @@ -1692,6 +1843,11 @@ def __call__(self, parser, namespace, values, option_string=None): if options.threads < 1: parser.error('--threads: must use at least one thread') + check_optional_modules() + + wget_retrieve = WgetRetrieveWrapper(options, log) + setup_wget(not options.no_ssl_verify, options.user_agent) + ApiParser.setup() global backup_account diff --git a/util.py b/util.py index f5f5dc6..4ff58d1 100644 --- a/util.py +++ b/util.py @@ -4,6 +4,7 @@ import collections import contextlib +import errno import io import os import socket @@ -501,3 +502,19 @@ def quit(self): except queue.Full: pass self.thread.join() + + +def opendir(dir_, flags): + try: + flags |= os.O_DIRECTORY + except AttributeError: + dir_ += os.path.sep # Fallback, some systems don't support O_DIRECTORY + return os.open(dir_, flags) + + +def try_unlink(path): + try: + os.unlink(path) + except EnvironmentError as e: + if getattr(e, 'errno', None) != errno.ENOENT: + raise diff --git a/wget.py b/wget.py index 49f933e..a3b9c88 100644 --- a/wget.py +++ b/wget.py @@ -13,7 +13,8 @@ from tempfile import NamedTemporaryFile from wsgiref.handlers import format_date_time -from util import PY3, URLLIB3_FROM_PIP, get_supported_encodings, is_dns_working, no_internet, setup_urllib3_ssl +from util import (PY3, URLLIB3_FROM_PIP, + get_supported_encodings, is_dns_working, no_internet, opendir, setup_urllib3_ssl, try_unlink) try: from urllib.parse import urlsplit, urljoin @@ -569,13 +570,7 @@ def _retrieve_loop(hstat, url, dest_file, adjust_basename, options, log): got_head = False # used for time-stamping dest_dirname, dest_basename = os.path.split(dest_file) - flags = os.O_RDONLY - try: - flags |= os.O_DIRECTORY - except AttributeError: - dest_dirname += os.path.sep # Fallback, some systems don't support O_DIRECTORY - - hstat.dest_dir = os.open(dest_dirname, flags) + hstat.dest_dir = opendir(dest_dirname, os.O_RDONLY) hstat.set_part_file_supplier(functools.partial( lambda pfx, dir_: NamedTemporaryFile('wb', prefix=pfx, dir=dir_, delete=False), '.{}.'.format(dest_basename), dest_dirname, @@ -728,19 +723,9 @@ def _retrieve_loop(hstat, url, dest_file, adjust_basename, options, log): os.replace(os.path.basename(pfname), new_dest_basename, src_dir_fd=hstat.dest_dir, dst_dir_fd=hstat.dest_dir) - # Sync the directory and return - os.fdatasync(hstat.dest_dir) return -def try_unlink(path): - try: - os.unlink(path) - except EnvironmentError as e: - if getattr(e, 'errno', None) != errno.ENOENT: - raise - - def setup_wget(ssl_verify, user_agent): if not ssl_verify: # Hide the InsecureRequestWarning from urllib3