Skip to content

Commit

Permalink
Refactor asset writing
Browse files Browse the repository at this point in the history
  • Loading branch information
FichteFoll committed Feb 25, 2024
1 parent 1df68e6 commit f3f28a8
Showing 1 changed file with 16 additions and 30 deletions.
46 changes: 16 additions & 30 deletions tasks/crawl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import bz2
import gzip
import datetime
import io
import json
import os
import time
Expand All @@ -9,6 +10,7 @@
from decimal import Decimal

from pathlib import Path
from typing import Callable
from urllib.error import HTTPError

from lib.package_control import sys_path
Expand Down Expand Up @@ -43,6 +45,16 @@ def default(self, o):
return json.JSONEncoder.default(self, o)


# Generic type annotations for callable parameters are complicated,
# so we ignore that here for now.
def atomic_write_file(target_path: Path, opener: Callable[..., io.IOBase], content: bytes):
out_path = target_path.with_name(target_path.name + '-new')
with opener(out_path, 'wb') as f:
f.write(content)
target_path.unlink(missing_ok=True)
out_path.rename(target_path)


def store_asset(path: Path, content: str):
"""
Stores an asset uncompressed and as gzip, bzip2 archive.
Expand All @@ -52,13 +64,7 @@ def store_asset(path: Path, content: str):
:param content:
The content
"""
filename = str(path)
new_filename = filename + '-new'
new_filename_gz = filename + '.gz-new'
new_filename_bz2 = filename + '.bz2-new'
filename_gz = filename + '.gz'
filename_bz2 = filename + '.bz2'
filename_sha512 = filename + '.sha512'
filename_sha512 = path.with_suffix(path.suffix + '.sha512')

encoded_content = content.encode('utf-8')
content_hash = hashlib.sha512(encoded_content).hexdigest().encode('utf-8')
Expand All @@ -72,29 +78,9 @@ def store_asset(path: Path, content: str):
except FileNotFoundError:
pass

with open(new_filename, 'wb') as f:
f.write(encoded_content)
try:
os.unlink(filename)
except FileNotFoundError:
pass
os.rename(new_filename, filename)

with gzip.open(new_filename_gz, 'w') as f:
f.write(encoded_content)
try:
os.unlink(filename_gz)
except FileNotFoundError:
pass
os.rename(new_filename_gz, filename_gz)

with bz2.open(new_filename_bz2, 'w') as f:
f.write(encoded_content)
try:
os.unlink(filename_bz2)
except FileNotFoundError:
pass
os.rename(new_filename_bz2, filename_bz2)
atomic_write_file(path, open, encoded_content)
atomic_write_file(path.with_suffix('.gz'), gzip.open, encoded_content)
atomic_write_file(path.with_suffix('.bz2'), bz2.open, encoded_content)

with open(filename_sha512, 'wb') as f:
f.write(content_hash)
Expand Down

0 comments on commit f3f28a8

Please sign in to comment.