From 502efb941a8177611d6295c864f7a88553af2fa7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 18 Apr 2020 14:57:00 -0700 Subject: [PATCH] photos-to-sqlite upload photos.db dirname command, closes #4 --- photos_to_sqlite/cli.py | 68 +++++++++++++++++++++++++++++++++++++++ photos_to_sqlite/utils.py | 14 ++++++++ 2 files changed, 82 insertions(+) create mode 100644 photos_to_sqlite/utils.py diff --git a/photos_to_sqlite/cli.py b/photos_to_sqlite/cli.py index 7b98ac4..9aff5bd 100644 --- a/photos_to_sqlite/cli.py +++ b/photos_to_sqlite/cli.py @@ -1,7 +1,9 @@ import click import sqlite_utils +import boto3 import json import pathlib +from .utils import calculate_hash @click.group() @@ -35,3 +37,69 @@ def s3_auth(auth): } ) open(auth, "w").write(json.dumps(auth_data, indent=4) + "\n") + + +@cli.command() +@click.argument( + "db_path", + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), + required=True, +) +@click.argument( + "directories", + nargs=-1, + type=click.Path(file_okay=False, dir_okay=True, allow_dash=False), +) +@click.option( + "-a", + "--auth", + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), + default="auth.json", + help="Path to auth.json token file", +) +def upload(db_path, directories, auth): + "Upload photos from directories to S3" + creds = json.load(open(auth)) + db = sqlite_utils.Database(db_path) + client = boto3.client( + "s3", + aws_access_key_id=creds["photos_s3_access_key_id"], + aws_secret_access_key=creds["photos_s3_secret_access_key"], + ) + uploads = db.table("photos", pk="sha256") + for directory in directories: + path = pathlib.Path(directory) + images = ( + p.resolve() + for p in path.glob("**/*") + if p.suffix in [".jpg", ".jpeg", ".png", ".gif", ".heic"] + ) + for filepath in images: + sha256 = calculate_hash(filepath) + ext = filepath.suffix.lstrip(".") + uploads.upsert({"sha256": sha256, "filepath": str(filepath), "ext": ext}) + print(filepath) + keyname = "{}.{}".format(sha256, ext) + client.upload_file( + str(filepath), + "dogsheep-photos-simon", + keyname, + ExtraArgs={ + "ContentType": { + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "png": "image/png", + "gif": "image/gif", + "heic": "image/heic", + }[ext] + }, + ) + print( + " ... uploaded: {}".format( + client.generate_presigned_url( + "get_object", + Params={"Bucket": "dogsheep-photos-simon", "Key": keyname,}, + ExpiresIn=600, + ) + ) + ) diff --git a/photos_to_sqlite/utils.py b/photos_to_sqlite/utils.py new file mode 100644 index 0000000..29ca958 --- /dev/null +++ b/photos_to_sqlite/utils.py @@ -0,0 +1,14 @@ +import hashlib + +HASH_BLOCK_SIZE = 1024 * 1024 + + +def calculate_hash(path): + m = hashlib.sha256() + with path.open("rb") as fp: + while True: + data = fp.read(HASH_BLOCK_SIZE) + if not data: + break + m.update(data) + return m.hexdigest()