From e4b2994f4a3b9459c631f9342bd1f1870f710b1e Mon Sep 17 00:00:00 2001 From: Christoph Fink Date: Mon, 8 Jan 2024 10:26:31 +0200 Subject: [PATCH 1/2] GH action workflows --- .github/ISSUE_TEMPLATE/bug_report.md | 29 ++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++ .github/workflows/build.yml | 33 +++++++++ .github/workflows/lint.yml | 19 +++++ .github/workflows/release.yml | 86 +++++++++++++++++++++++ 5 files changed, 187 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..cd5f232 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Environment:** + - OS: + - Python package source (PyPi, conda, ...) + - Versions of Python, Java Development Kit, Python modules + +**Additional context** +Add any other context about the problem here. + +**Test data and/or script/notebook** +If you have a reproducible example case, including test data or a snippet of code, please attach them to this issue. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..a0f2e68 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,33 @@ +# vim: set tabstop=2 softtabstop=2 shiftwidth=2 expandtab: + +name: Build main branch + +on: + pull_request: + branches: [main] + types: [closed] + push: + branches: [main] + workflow_call: + +jobs: + build: + name: Build wheel(s) and packages + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "pip" + - name: Install build tools + run: python -m pip install build + - name: Build wheel(s) and packages + run: python -m build . + - name: Upload built packages + uses: actions/upload-artifact@v3 + with: + name: package + path: dist/*.* diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..6a2aed3 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,19 @@ +# vim: set tabstop=2 softtabstop=2 shiftwidth=2 expandtab: + +name: Lint code of pull requests +on: + pull_request: + +jobs: + lint: + name: Linting code + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "pip" + - run: pip install black flake8 + - run: python -m black --check . + - run: python -m flake8 . diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8d510b5 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,86 @@ +# vim: set tabstop=2 softtabstop=2 shiftwidth=2 expandtab: + +name: Create a release and deploy to PyPi whenever a protected tag (v0.0.0) is created + +on: + push: + tags: + - v*.*.* + +jobs: + build: + name: Build package + uses: ./.github/workflows/build.yml + secrets: inherit + + merge-into-stable: + name: Update stable branch to point to this release + runs-on: ubuntu-latest + needs: [build] + if: "!contains(github.ref, 'dev')" + permissions: write-all + steps: + - name: Clone repository, check-out stable + uses: actions/checkout@v3 + with: + fetch-depth: 0 + ref: stable + - name: Merge tag into stable + run: | + TAG="${{github.ref}}" # /ref/tags/v0.0.0 + git merge "${TAG:10}" + git push + + deploy: + name: Upload built package to PyPi + runs-on: ubuntu-latest + needs: [build] + steps: + - name: Download built artifacts + uses: actions/download-artifact@v3 + with: + name: package + path: dist/ + - name: Upload package to PyPi + uses: pypa/gh-action-pypi-publish@release/v1.5 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + skip_existing: true + + release: + name: Create a new release + runs-on: ubuntu-latest + needs: [deploy] + if: "!contains(github.ref, 'dev')" + permissions: + contents: write + steps: + - name: Download built artifacts + uses: actions/download-artifact@v3 + with: + name: package + path: dist/ + - name: Create release and upload package + uses: softprops/action-gh-release@v1 + with: + files: dist/* + + prerelease: + name: Create a new pre-release + runs-on: ubuntu-latest + needs: [deploy] + if: contains(github.ref, 'dev') + permissions: + contents: write + steps: + - name: Download built artifacts + uses: actions/download-artifact@v3 + with: + name: package + path: dist/ + - name: Create release and upload package + uses: softprops/action-gh-release@v1 + with: + files: dist/* + prerelease: true From 69c9e233f5303921bbb4833ce1a13e3861b91d61 Mon Sep 17 00:00:00 2001 From: Christoph Fink Date: Mon, 8 Jan 2024 10:28:44 +0200 Subject: [PATCH 2/2] linted --- flickrhistory/basicflickrhistorydownloader.py | 72 ++++++++----------- flickrhistory/cache.py | 32 +++------ flickrhistory/cacheupdaterthread.py | 5 +- flickrhistory/config.py | 40 +++-------- flickrhistory/databaseobjects.py | 59 +++++---------- flickrhistory/fancyflickrhistorydownloader.py | 10 +-- flickrhistory/photodownloader.py | 21 ++---- flickrhistory/photodownloaderthread.py | 11 +-- flickrhistory/timespan.py | 47 +++--------- flickrhistory/userprofiledownloader.py | 13 +--- flickrhistory/userprofileupdaterthread.py | 34 ++++----- 11 files changed, 102 insertions(+), 242 deletions(-) diff --git a/flickrhistory/basicflickrhistorydownloader.py b/flickrhistory/basicflickrhistorydownloader.py index 0ac8e50..886fd78 100644 --- a/flickrhistory/basicflickrhistorydownloader.py +++ b/flickrhistory/basicflickrhistorydownloader.py @@ -76,9 +76,7 @@ def download(self): # start downloaders for _ in range(self.NUM_WORKERS): worker = PhotoDownloaderThread( - self._api_key_manager, - self._todo_deque, - self._done_queue + self._api_key_manager, self._todo_deque, self._done_queue ) worker.start() self._worker_threads.append(worker) @@ -86,8 +84,7 @@ def download(self): # start user profile updaters for i in range(self.NUM_WORKERS): worker = UserProfileUpdaterThread( - self._api_key_manager, - (i + 1, self.NUM_WORKERS) + self._api_key_manager, (i + 1, self.NUM_WORKERS) ) worker.start() self._worker_threads.append(worker) @@ -100,10 +97,7 @@ def download(self): self.report_progress() time.sleep(self.STATUS_UPDATE_SEC) - except ( - KeyboardInterrupt, - SigTermReceivedException - ): + except (KeyboardInterrupt, SigTermReceivedException): self.announce_shutdown() for worker in self._worker_threads: worker.shutdown.set() @@ -128,11 +122,11 @@ def report_progress(self): photos=photo_count, profiles=profile_count, workers=(threading.active_count() - self.NUM_MANAGERS), - todo=len(self._todo_deque) + todo=len(self._todo_deque), ), file=sys.stderr, end=self.STATUS_UPDATE_LINE_END, - flush=True + flush=True, ) def announce_shutdown(self): @@ -141,7 +135,7 @@ def announce_shutdown(self): "Cleaning up" + (" " * 69), # 80 - len("Cleaning up") file=sys.stderr, end=self.STATUS_UPDATE_LINE_END, - flush=True + flush=True, ) def summarise_overall_progress(self): @@ -152,14 +146,10 @@ def summarise_overall_progress(self): """ photo_count, _, profile_count, _ = self._statistics print( - ( - "Downloaded {photos:d} photos " - + "and {profiles:d} user profiles" - ).format( - photos=photo_count, - profiles=profile_count + ("Downloaded {photos:d} photos " + "and {profiles:d} user profiles").format( + photos=photo_count, profiles=profile_count ), - file=sys.stderr + file=sys.stderr, ) @property @@ -169,10 +159,7 @@ def gaps_in_download_history(self): one_day = datetime.timedelta(days=1) # for comparison for i in range(len(already_downloaded) - 1): - gap = TimeSpan( - already_downloaded[i].end, - already_downloaded[i + 1].start - ) + gap = TimeSpan(already_downloaded[i].end, already_downloaded[i + 1].start) if gap.duration > one_day: divider = math.ceil(gap.duration / one_day) for part_of_gap in gap / divider: @@ -191,7 +178,8 @@ def already_downloaded_timespans(self): # delete existing 0-length time spans timespans = [ - timespan for timespan in timespans + timespan + for timespan in timespans if timespan.duration > datetime.timedelta(0) ] @@ -206,15 +194,11 @@ def already_downloaded_timespans(self): # on top of that, some small timestamps seems to be simply 0 +- timezone offset # which invalidates pretty much the entire first day after epoch 0 # this is why we use epoch 0 + 1 day - zero = ( - datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc) - + datetime.timedelta(days=1) - ) + zero = datetime.datetime.fromtimestamp( + 0, tz=datetime.timezone.utc + ) + datetime.timedelta(days=1) now = datetime.datetime.now(datetime.timezone.utc) - timespans += [ - TimeSpan(zero, zero), - TimeSpan(now, now) - ] + timespans += [TimeSpan(zero, zero), TimeSpan(now, now)] return sum(timespans) # sum resolves overlaps @@ -222,18 +206,22 @@ def already_downloaded_timespans(self): def _statistics(self): runtime = float((datetime.datetime.now() - self.started).total_seconds()) - photo_count = sum([ - worker.count - for worker in self._worker_threads - if isinstance(worker, PhotoDownloaderThread) - ]) + photo_count = sum( + [ + worker.count + for worker in self._worker_threads + if isinstance(worker, PhotoDownloaderThread) + ] + ) photo_rate = photo_count / runtime - profile_count = sum([ - worker.count - for worker in self._worker_threads - if isinstance(worker, UserProfileUpdaterThread) - ]) + profile_count = sum( + [ + worker.count + for worker in self._worker_threads + if isinstance(worker, UserProfileUpdaterThread) + ] + ) profile_rate = profile_count / runtime return (photo_count, photo_rate, profile_count, profile_rate) diff --git a/flickrhistory/cache.py b/flickrhistory/cache.py index ac4e0c5..52f5150 100644 --- a/flickrhistory/cache.py +++ b/flickrhistory/cache.py @@ -46,11 +46,7 @@ class Cache: """ - def __init__( - self, - cache=None, - cache_file_basename=None - ): + def __init__(self, cache=None, cache_file_basename=None): """Initialise a Cache object, load cache from file.""" self._cache = {} @@ -60,11 +56,11 @@ def __init__( self._cache_file = os.path.abspath( os.path.join( ( - os.environ.get('LOCALAPPDATA') - or os.environ.get('XDG_CACHE_HOME') - or os.path.join(os.environ['HOME'], '.cache') + os.environ.get("LOCALAPPDATA") + or os.environ.get("XDG_CACHE_HOME") + or os.path.join(os.environ["HOME"], ".cache") ), - "{:s}.yml".format(cache_file_basename) + "{:s}.yml".format(cache_file_basename), ) ) @@ -76,19 +72,13 @@ def _load_cache(self): cache = {} try: - cache.update( - yaml.safe_load( - open(self._cache_file, "r", encoding="utf-8") - ) - ) + cache.update(yaml.safe_load(open(self._cache_file, "r", encoding="utf-8"))) except FileNotFoundError: pass if cache == {}: warnings.warn( - "No cache found in file {}, starting empty".format( - self._cache_file - ) + "No cache found in file {}, starting empty".format(self._cache_file) ) return cache @@ -98,14 +88,10 @@ def _save_cache(self): yaml.dump( self._cache, open(self._cache_file, "w", encoding="utf-8"), - Dumper=YamlNoAliasDumper + Dumper=YamlNoAliasDumper, ) except PermissionError: - warnings.warn( - "Could not write cache to {}".format( - self._cache_file - ) - ) + warnings.warn("Could not write cache to {}".format(self._cache_file)) def __getitem__(self, pos): """Retrieve a cache entry.""" diff --git a/flickrhistory/cacheupdaterthread.py b/flickrhistory/cacheupdaterthread.py index a0b037a..e2c2da7 100644 --- a/flickrhistory/cacheupdaterthread.py +++ b/flickrhistory/cacheupdaterthread.py @@ -32,10 +32,7 @@ class CacheUpdaterThread(threading.Thread): """Wraps an ApiDownloader to run in a separate thread.""" - def __init__( - self, - done_queue - ): + def __init__(self, done_queue): """ Intialize a CacheUpdaterThread. diff --git a/flickrhistory/config.py b/flickrhistory/config.py index ae63d3d..649e4a8 100644 --- a/flickrhistory/config.py +++ b/flickrhistory/config.py @@ -57,28 +57,14 @@ class Config: _config = {} - def __init__( - self, - config=None, - config_files=None, - config_files_basename=None - ): + def __init__(self, config=None, config_files=None, config_files_basename=None): """Initialise a Config object, load configuration from file.""" if not self._config: - self._config.update( - self._load_config( - config_files, - config_files_basename - ) - ) + self._config.update(self._load_config(config_files, config_files_basename)) if config is not None: self._config.update(config) - def _load_config( - self, - config_files, - config_files_basename - ): + def _load_config(self, config_files, config_files_basename): config = {} if config_files is not None: @@ -93,30 +79,24 @@ def _load_config( os.path.abspath( os.path.join( ( - os.environ.get('APPDATA') - or os.environ.get('XDG_CONFIG_HOME') - or os.path.join(os.environ['HOME'], '.config') + os.environ.get("APPDATA") + or os.environ.get("XDG_CONFIG_HOME") + or os.path.join(os.environ["HOME"], ".config") ), - "{:s}.yml".format(config_files_basename) + "{:s}.yml".format(config_files_basename), ) - ) + ), ] for config_file in config_files: try: - config.update( - yaml.safe_load( - open(config_file, "r", encoding="utf-8") - ) - ) + config.update(yaml.safe_load(open(config_file, "r", encoding="utf-8"))) except FileNotFoundError: pass if config == {}: warnings.warn( - "No configuration found in files {}.".format( - ",".join(config_files) - ) + "No configuration found in files {}.".format(",".join(config_files)) ) return config diff --git a/flickrhistory/databaseobjects.py b/flickrhistory/databaseobjects.py index d98e463..6b62c89 100644 --- a/flickrhistory/databaseobjects.py +++ b/flickrhistory/databaseobjects.py @@ -19,10 +19,7 @@ """Base classes to represent flickr posts and users.""" -__all__ = [ - "FlickrPhoto", - "FlickrUser" -] +__all__ = ["FlickrPhoto", "FlickrUser"] import datetime @@ -48,16 +45,14 @@ class FlickrUser(Base): id = sqlalchemy.Column(sqlalchemy.BigInteger) farm = sqlalchemy.Column(sqlalchemy.SmallInteger) nsid = sqlalchemy.Column( - sqlalchemy.Text, - sqlalchemy.Computed("id::TEXT || '@N0' || farm::TEXT") + sqlalchemy.Text, sqlalchemy.Computed("id::TEXT || '@N0' || farm::TEXT") ) name = sqlalchemy.Column(sqlalchemy.Text) first_name = sqlalchemy.Column(sqlalchemy.Text) last_name = sqlalchemy.Column(sqlalchemy.Text) real_name = sqlalchemy.Column( - sqlalchemy.Text, - sqlalchemy.Computed("first_name || ' ' || last_name") + sqlalchemy.Text, sqlalchemy.Computed("first_name || ' ' || last_name") ) city = sqlalchemy.Column(sqlalchemy.Text) @@ -78,19 +73,13 @@ class FlickrUser(Base): photos = sqlalchemy.orm.relationship("FlickrPhoto", back_populates="user") - __table_args__ = ( - sqlalchemy.PrimaryKeyConstraint("id", "farm"), - ) + __table_args__ = (sqlalchemy.PrimaryKeyConstraint("id", "farm"),) @classmethod def from_raw_api_data_flickrphotossearch(cls, data): """Initialise a new FlickrUser with a flickr.photos.search data dict.""" user_id, farm = data["owner"].split("@N0") - user_data = { - "id": user_id, - "farm": farm, - "name": data["ownername"] - } + user_data = {"id": user_id, "farm": farm, "name": data["ownername"]} return cls(**user_data) @classmethod @@ -105,37 +94,28 @@ def from_raw_api_data_flickrprofilegetprofile(cls, data): # "joindate" needs special attentation try: join_date = datetime.datetime.fromtimestamp( - int(data["join_date"]), - tz=datetime.timezone.utc + int(data["join_date"]), tz=datetime.timezone.utc ) except KeyError: join_date = None - user_data = { - "id": user_id, - "farm": farm, - - "join_date": join_date - } + user_data = {"id": user_id, "farm": farm, "join_date": join_date} # all the other fields can be added as they are (if they exist) for field in [ "first_name", "last_name", - "city", "country", "hometown", - "occupation", "description", - "website", "facebook", "twitter", "tumblr", "instagram", - "pinterest" + "pinterest", ]: try: user_data[field] = data[field] @@ -174,7 +154,7 @@ class FlickrPhoto(Base): sqlalchemy.Computed( "'https://live.staticflickr.com/' || server::TEXT || '/' || " + "id::TEXT || '_' || encode(secret, 'hex') || '_z.jpg'" - ) + ), ) page_url = sqlalchemy.Column( sqlalchemy.Text, @@ -182,7 +162,7 @@ class FlickrPhoto(Base): "'https://www.flickr.com/photos/' || " + "user_id::TEXT || '@N0' || user_farm::TEXT || '/' || " + "id::TEXT || '/'" - ) + ), ) geom = sqlalchemy.Column(geoalchemy2.Geometry("POINT", 4326)) @@ -194,9 +174,7 @@ class FlickrPhoto(Base): __table_args__ = ( sqlalchemy.ForeignKeyConstraint( - ["user_id", "user_farm"], - ["users.id", "users.farm"], - "FlickrUser" + ["user_id", "user_farm"], ["users.id", "users.farm"], "FlickrUser" ), ) @@ -227,10 +205,7 @@ def from_raw_api_data_flickrphotossearch(cls, data): try: photo_data["secret"] = bytes.fromhex(data["secret"]) - except ( - ValueError, # some non-hex character - KeyError - ): + except (ValueError, KeyError): # some non-hex character pass try: @@ -259,8 +234,7 @@ def from_raw_api_data_flickrphotossearch(cls, data): try: photo_data["date_posted"] = datetime.datetime.fromtimestamp( - int(data["dateupload"]), - tz=datetime.timezone.utc + int(data["dateupload"]), tz=datetime.timezone.utc ) except KeyError: pass @@ -271,13 +245,12 @@ def from_raw_api_data_flickrphotossearch(cls, data): latitude = float(data["latitude"]) assert longitude != 0 and latitude != 0 photo_data["geom"] = "SRID=4326;POINT({longitude:f} {latitude:f})".format( - longitude=longitude, - latitude=latitude + longitude=longitude, latitude=latitude ) except ( AssertionError, # lon/lat is at exactly 0°N/S, 0°W/E -> bogus - KeyError, # not contained in API dict - TypeError # weird data returned + KeyError, # not contained in API dict + TypeError, # weird data returned ): pass diff --git a/flickrhistory/fancyflickrhistorydownloader.py b/flickrhistory/fancyflickrhistorydownloader.py index 22bf054..e40d965 100644 --- a/flickrhistory/fancyflickrhistorydownloader.py +++ b/flickrhistory/fancyflickrhistorydownloader.py @@ -47,7 +47,6 @@ class FancyFlickrHistoryDownloader(BasicFlickrHistoryDownloader): + "{t.normal}{t.blue}" + version + "{t.bold} ###" - + "{t.normal}" ) @@ -55,16 +54,12 @@ class FancyFlickrHistoryDownloader(BasicFlickrHistoryDownloader): "{t.normal} Downloaded metadata for " + "{t.bold}{t.magenta}{photos: 9d} 📷 photos " + "{t.normal}{t.magenta}{photo_rate: 11.1f}/s\n" - + "{t.normal} and updated " + "{t.bold}{t.red}{profiles: 9d} 👱 user profiles " + "{t.normal}{t.red}{profile_rate: 3.1f}/s\n" - + "{t.normal} using " + "{t.bold}{t.green}{workers: 9d} 💪 workers\n" - + "{t.normal}{t.bold} TODO: {todo: 12d} 🚧 time slots" - + "{t.normal}" ) STATUS_LINES = len(STATUS.splitlines()) @@ -76,7 +71,6 @@ class FancyFlickrHistoryDownloader(BasicFlickrHistoryDownloader): + "{t.normal}{t.magenta}{photo_rate: 11.1f}/s\n" + "{t.normal}and updated {t.bold}{t.red}{profiles: 9d} 👱 user profiles " + "{t.normal}{t.red}{profile_rate: 3.1f}/s\n" - + "{t.normal}" ) @@ -108,7 +102,7 @@ def report_progress(self): profiles=profile_count, profile_rate=profile_rate, workers=(threading.active_count() - self.NUM_MANAGERS), - todo=len(self._todo_deque) + todo=len(self._todo_deque), ) ) @@ -136,6 +130,6 @@ def summarise_overall_progress(self): photos=photo_count, photo_rate=photo_rate, profiles=profile_count, - profile_rate=profile_rate + profile_rate=profile_rate, ) ) diff --git a/flickrhistory/photodownloader.py b/flickrhistory/photodownloader.py index 96fb7fd..1b06241 100644 --- a/flickrhistory/photodownloader.py +++ b/flickrhistory/photodownloader.py @@ -49,13 +49,11 @@ def photos(self): "method": "flickr.photos.search", "format": "json", "nojsoncallback": 1, - "per_page": 500, "has_geo": 1, "extras": ", ".join( ["description", "date_upload", "date_taken", "geo", "owner_name"] ), - "min_upload_date": self._timespan.start.timestamp(), "max_upload_date": self._timespan.end.timestamp(), "sort": "date-posted-asc", @@ -71,10 +69,7 @@ def photos(self): params.update(query) try: - with requests.get( - self.API_ENDPOINT_URL, - params=params - ) as response: + with requests.get(self.API_ENDPOINT_URL, params=params) as response: results = response.json() except ( ConnectionError, @@ -91,9 +86,8 @@ def photos(self): except TypeError: num_photos = 0 - if ( - num_photos > 4000 - and self._timespan.duration > datetime.timedelta(seconds=1) + if num_photos > 4000 and self._timespan.duration > datetime.timedelta( + seconds=1 ): raise DownloadBatchIsTooLargeError( ( @@ -103,14 +97,13 @@ def photos(self): ) for photo in results["photos"]["photo"]: - # the flickr API is matching date_posted very fuzzily, # let’s not waste time with duplicates if ( - datetime.datetime.fromtimestamp( - int(photo["dateupload"]), - tz=datetime.timezone.utc - ) > self._timespan.end + datetime.datetime.fromtimestamp( + int(photo["dateupload"]), tz=datetime.timezone.utc + ) + > self._timespan.end ): break diff --git a/flickrhistory/photodownloaderthread.py b/flickrhistory/photodownloaderthread.py index 0a5caa9..3eb5cf5 100644 --- a/flickrhistory/photodownloaderthread.py +++ b/flickrhistory/photodownloaderthread.py @@ -37,12 +37,7 @@ class PhotoDownloaderThread(threading.Thread): """Wraps an PhotoDownloader to run in a separate thread.""" - def __init__( - self, - api_key_manager, - todo_deque, - done_queue - ): + def __init__(self, api_key_manager, todo_deque, done_queue): """ Intialize an PhotoDownloaderThread. @@ -81,9 +76,7 @@ def run(self): try: for photo in photo_downloader.photos: - with sqlalchemy.orm.Session( - self._engine - ) as session: + with sqlalchemy.orm.Session(self._engine) as session: try: with session.begin(): flickr_photo = ( diff --git a/flickrhistory/timespan.py b/flickrhistory/timespan.py index c78fc8b..0d4b548 100644 --- a/flickrhistory/timespan.py +++ b/flickrhistory/timespan.py @@ -20,9 +20,7 @@ """A period in time starting at a datetime and ending at another datetime.""" -__all__ = [ - "TimeSpan" -] +__all__ = ["TimeSpan"] import datetime @@ -33,11 +31,7 @@ class TimeSpan(yaml.YAMLObject): """A period in time starting at a datetime and ending at another datetime.""" - def __init__( - self, - start, - end - ): + def __init__(self, start, end): """Initialise a new TimeSpan object.""" self.start = start self.end = end @@ -45,13 +39,8 @@ def __init__( def __str__(self): """Return a string representation of this TimeSpan.""" return ( - "<{:s}" - + "({:%Y-%m-%dT%H:%M:%S.000Z}-{:%Y-%m-%dT%H:%M:%S.000Z})>" - ).format( - self.__class__.__name__, - self.start, - self.end - ) + "<{:s}" + "({:%Y-%m-%dT%H:%M:%S.000Z}-{:%Y-%m-%dT%H:%M:%S.000Z})>" + ).format(self.__class__.__name__, self.start, self.end) def __repr__(self): """Represent this TimeSpan in readable form.""" @@ -94,10 +83,7 @@ def __gt__(self, other): def __eq__(self, other): """Test if this TimeSpan and another one are equal.""" - return ( - self.start == other.start - and self.end == other.end - ) + return self.start == other.start and self.end == other.end def __add__(self, other): """Add another TimeSpan to this one.""" @@ -118,10 +104,7 @@ def __add__(self, other): # then add one or two items to the merged list if last.end >= new.start: merged.append( - TimeSpan( - min(last.start, new.start), - max(last.end, new.end) - ) + TimeSpan(min(last.start, new.start), max(last.end, new.end)) ) else: merged += [last, new] @@ -146,17 +129,9 @@ def __truediv__(self, other): duration = self.duration / other for i in range(other - 1): pieces.append( - TimeSpan( - self.start + i * duration, - self.start + (i + 1) * duration - ) + TimeSpan(self.start + i * duration, self.start + (i + 1) * duration) ) - pieces.append( - TimeSpan( - self.start + (other - 1) * duration, - self.end - ) - ) + pieces.append(TimeSpan(self.start + (other - 1) * duration, self.end)) return pieces @@ -174,9 +149,5 @@ def from_yaml(cls, loader, node): def to_yaml(cls, dumper, data): """Map a TimeSpan object to its YAML representation.""" return dumper.represent_mapping( - "!TimeSpan", - ( - ("start", data.start), - ("end", data.end) - ) + "!TimeSpan", (("start", data.start), ("end", data.end)) ) diff --git a/flickrhistory/userprofiledownloader.py b/flickrhistory/userprofiledownloader.py index 76ce8cc..7ccbf35 100644 --- a/flickrhistory/userprofiledownloader.py +++ b/flickrhistory/userprofiledownloader.py @@ -50,7 +50,7 @@ def get_profile_for_nsid(self, nsid): "method": "flickr.profile.getProfile", "format": "json", "nojsoncallback": 1, - "user_id": nsid + "user_id": nsid, } params = {} @@ -59,10 +59,7 @@ def get_profile_for_nsid(self, nsid): params.update(query) try: - with requests.get( - self.API_ENDPOINT_URL, - params=params - ) as response: + with requests.get(self.API_ENDPOINT_URL, params=params) as response: results = response.json() assert "profile" in results @@ -79,10 +76,6 @@ def get_profile_for_nsid(self, nsid): except AssertionError: # TODO: implement logging and report the response text + headers # if API hicups, return a stub data dict - results = { - "profile": { - "id": nsid - } - } + results = {"profile": {"id": nsid}} return results["profile"] diff --git a/flickrhistory/userprofileupdaterthread.py b/flickrhistory/userprofileupdaterthread.py index c8e9b4e..832d720 100644 --- a/flickrhistory/userprofileupdaterthread.py +++ b/flickrhistory/userprofileupdaterthread.py @@ -37,13 +37,11 @@ class UserProfileUpdaterThread(threading.Thread): """Finds incomplete user profiles and downloads missing data from the flickr API.""" - MAX_RETRIES = 5 # once all users have been updated, retry this times (with 10 min breaks) + MAX_RETRIES = ( + 5 # once all users have been updated, retry this times (with 10 min breaks) + ) - def __init__( - self, - api_key_manager, - partition=None - ): + def __init__(self, api_key_manager, partition=None): """ Intialize a UserProfileUpdateThread. @@ -63,12 +61,9 @@ def __init__( assert part <= number_of_partitions self._bounds = ( (part - 1) * 1.0 / number_of_partitions, - part * 1.0 / number_of_partitions + part * 1.0 / number_of_partitions, ) - except ( - AssertionError, - TypeError - ): + except (AssertionError, TypeError): self._bounds = None self.shutdown = threading.Event() @@ -87,9 +82,9 @@ def nsids_of_users_without_detailed_information(self): # a good way of finding “new” profiles with sqlalchemy.orm.Session(self._engine) as session: if self._bounds is None: - nsids_of_users_without_detailed_information = ( - session.query(FlickrUser.nsid).filter_by(join_date=None) - ) + nsids_of_users_without_detailed_information = session.query( + FlickrUser.nsid + ).filter_by(join_date=None) else: bounds = ( sqlalchemy.select( @@ -98,7 +93,7 @@ def nsids_of_users_without_detailed_information(self): .label("lower"), sqlalchemy.sql.functions.percentile_disc(self._bounds[1]) .within_group(FlickrUser.id) - .label("upper") + .label("upper"), ) .select_from(FlickrUser) .filter_by(join_date=None) @@ -111,7 +106,7 @@ def nsids_of_users_without_detailed_information(self): .yield_per(1000) ) - for nsid, in nsids_of_users_without_detailed_information: + for (nsid,) in nsids_of_users_without_detailed_information: yield nsid def run(self): @@ -120,14 +115,11 @@ def run(self): retries = 0 - while not ( - self.shutdown.is_set() - or retries >= self.MAX_RETRIES - ): + while not (self.shutdown.is_set() or retries >= self.MAX_RETRIES): for nsid in self.nsids_of_users_without_detailed_information: try: with sqlalchemy.orm.Session( - self._engine + self._engine ) as session, session.begin(): flickr_user = ( FlickrUser.from_raw_api_data_flickrprofilegetprofile(