From af09eee70beb3c389a1052be1b45f18919716594 Mon Sep 17 00:00:00 2001 From: Rhet Turnbull Date: Sun, 19 Nov 2023 08:43:48 -0800 Subject: [PATCH] Adds --skip-dups to osxphotos import, #1264: --- osxphotos/cli/import_cli.py | 85 ++++++++++++++++++++++++++++++----- osxphotos/fingerprintquery.py | 44 ++++++++++++++++++ osxphotos/sqlite3_datetime.py | 57 +++++++++++++++++++++++ tests/test_cli_import.py | 18 ++++++++ 4 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 osxphotos/fingerprintquery.py create mode 100644 osxphotos/sqlite3_datetime.py diff --git a/osxphotos/cli/import_cli.py b/osxphotos/cli/import_cli.py index 071918c21..e65eb690e 100644 --- a/osxphotos/cli/import_cli.py +++ b/osxphotos/cli/import_cli.py @@ -24,6 +24,13 @@ from rich.markdown import Markdown from strpdatetime import strpdatetime +from osxphotos.platform import assert_macos + +assert_macos() + +from photoscript import Photo, PhotosLibrary + +import osxphotos.sqlite3_datetime as sqlite3_datetime from osxphotos._constants import _OSXPHOTOS_NONE_SENTINEL, SQLITE_CHECK_SAME_THREAD from osxphotos._version import __version__ from osxphotos.cli.cli_params import TIMESTAMP_OPTION, VERBOSE_OPTION @@ -38,17 +45,14 @@ datetime_utc_to_local, ) from osxphotos.exiftool import ExifToolCaching, get_exiftool_path +from osxphotos.fingerprint import fingerprint +from osxphotos.fingerprintquery import FingerprintQuery from osxphotos.photoinfo import PhotoInfoNone from osxphotos.photosalbum import PhotosAlbumPhotoScript from osxphotos.phototemplate import PhotoTemplate, RenderOptions -from osxphotos.platform import assert_macos from osxphotos.sqlitekvstore import SQLiteKVStore from osxphotos.unicode import normalize_unicode -from osxphotos.utils import pluralize - -assert_macos() - -from photoscript import Photo, PhotosLibrary +from osxphotos.utils import get_last_library_path, pluralize from .cli_params import THEME_OPTION from .click_rich_echo import rich_click_echo, rich_echo_error @@ -60,6 +64,8 @@ # functions vs ReportWriter class used by export) and I've kept everything for import # self-contained in this one file +# register datetime adapters/converters for sqlite3 +sqlite3_datetime.register() MetaData = namedtuple("MetaData", ["title", "description", "keywords", "location"]) @@ -998,6 +1004,18 @@ def get_help(self, ctx): Note: in Photos, only albums can contain photos and folders may contain albums or other folders. + ## Duplicate Checking + + By default, `osxphotos import` will import all files passed to it even if duplicates + exist in the Photos library. If you want to skip duplicate files, you can use the + `--skip-dups` option which will cause osxphotos to check for exact duplicates (based on file fingerprint) + and skip those files. Alternatively, you can use `--dup-check` to enable Photos' own duplicate + checking. If a duplicate is encountered with `--dup-check`, Photos will prompt you + to skip or import the duplicate file. + + If you use the `--verbose` option, osxphotos will report on any duplicates it finds + even if you don't use `--skip-dups` or `--dup-check`. + ## Metadata `osxphotos import` can set metadata (title, description, keywords, and location) for @@ -1060,7 +1078,7 @@ def get_help(self, ctx): - `{exiftool}`: Format: '{exiftool:GROUP:TAGNAME}'; use exiftool (https://exiftool.org) to extract metadata, in form GROUP:TAGNAME, from image. E.g. '{exiftool:EXIF:Make}' to get camera make, or {exiftool:IPTC:Keywords} to extract - keywords. See https://exiftool.org/TagNames/ for list of valid tag names. + keywords. See https://exiftooip=l.org/TagNames/ for list of valid tag names. You must specify group (e.g. EXIF, IPTC, etc) as used in `exiftool -G`. exiftool must be installed in the path to use this template (alternatively, you can use `--exiftool-path` to specify the path to exiftool.) @@ -1317,7 +1335,21 @@ def get_help(self, ctx): "If you set '--relative-to /Volumes/photos/import' " "then '{filepath}' will be set to 'album/img_1234.jpg'", ) -@click.option("--dup-check", "-D", is_flag=True, help="Check for duplicates on import.") +@click.option( + "--dup-check", + "-D", + is_flag=True, + help="Use Photos' built-in duplicate checkign to check for duplicates on import. " + "Using --dup-check will cause Photos to display a dialog box for each duplicate photo found, " + "requesting confirmation to import the duplicate. See also --skip-dups.", +) +@click.option( + "--skip-dups", + is_flag=True, + help="Skip duplicate photos on import; osxphotos will not import any photos that appear to be duplicates. " + "Unlike --dup-check, this does not use Photos' built in duplicate checking feature and " + "does not display a dialog box for each duplicate found. See also --dup-check.", +) @click.option( "--split-folder", "-f", @@ -1388,6 +1420,14 @@ def get_help(self, ctx): "You can run more than one function by repeating the '--post-function' option with different arguments. " "See Post Function below.", ) +@click.option( + "--library", + metavar="LIBRARY_PATH", + type=click.Path(exists=True), + help="Path to the Photos library you are importing into. This is not usually needed. " + "You will only need to specify this if osxphotos cannot determine the path to the library " + "in which case osxphotos will tell you to use the --library option when you run the import command.", +) @THEME_OPTION @click.argument("files", nargs=-1) @click.pass_obj @@ -1407,6 +1447,7 @@ def import_cli( files, glob, keyword, + library, location, merge_keywords, no_progress, @@ -1415,6 +1456,7 @@ def import_cli( relative_to, report, resume, + skip_dups, split_folder, theme, timestamp, @@ -1474,6 +1516,15 @@ def import_cli( ) import_db.about = f"osxphotos import database\n{OSXPHOTOS_ABOUT_STRING}" + # need to get the library path to initialize FingerprintQuery + last_library = library or get_last_library_path() + if not last_library: + rich_echo_error( + "[error]Could not determine path to Photos library. " + "Please specify path to library with --library option." + ) + fq = FingerprintQuery(last_library) + imported_count = 0 error_count = 0 skipped_count = 0 @@ -1502,10 +1553,24 @@ def import_cli( continue verbose(f"Importing [filepath]{filepath}[/]") + if duplicates := fq.photos_by_fingerprint(fingerprint(filepath)): + # duplicate of file already in Photos library + verbose( + f"File [filepath]{filepath}[/] is a duplicate of photos in the library: " + f"{', '.join([f'[filename]{f}[/] ([uuid]{u}[/])' for u, f in duplicates])}" + ) + report_data[filepath] = ReportRecord( filepath=filepath, filename=filepath.name ) report_record = report_data[filepath] + + if skip_dups: + verbose(f"Skipping duplicate [filepath]{filepath}[/]") + skipped_count += 1 + report_record.imported = False + continue + photo, error = import_photo(filepath, dup_check, verbose) if error: error_count += 1 @@ -1601,10 +1666,10 @@ def import_cli( write_report(report_file, report_data, append) verbose(f"Wrote import report to [filepath]{report_file}[/]") - skipped_str = f"[num]{skipped_count}[/] skipped" if resume else "" + skipped_str = f", [num]{skipped_count}[/] skipped" if resume or skip_dups else "" echo( f"Done: imported [num]{imported_count}[/] {pluralize(imported_count, 'file', 'files')}, " f"[num]{error_count}[/] {pluralize(error_count, 'error', 'errors')}" - f", {skipped_str}", + f"{skipped_str}", emoji=False, ) diff --git a/osxphotos/fingerprintquery.py b/osxphotos/fingerprintquery.py new file mode 100644 index 000000000..e2f53fff2 --- /dev/null +++ b/osxphotos/fingerprintquery.py @@ -0,0 +1,44 @@ +"""Query Photos database for photos matching fingerprint """ + +from __future__ import annotations + +import pathlib +import sqlite3 + +from ._constants import _DB_TABLE_NAMES +from .photosdb.photosdb_utils import get_photos_version_from_model + + +class FingerprintQuery: + """Class to query Photos database for photos matching fingerprint""" + + def __init__(self, photos_library: str | pathlib.Path): + """Create a new FingerprintQuery object + + Args: + photos_library: path to Photos library + """ + self.photos_library = ( + pathlib.Path(photos_library) + if not isinstance(photos_library, pathlib.Path) + else photos_library + ) + if self.photos_library.is_dir(): + # assume path to root of Photos library + # if not, assume it's the path to the Photos.sqlite file + self.photos_library = self.photos_library / "database" / "Photos.sqlite" + self.conn = sqlite3.connect(str(self.photos_library)) + self.photos_version = get_photos_version_from_model(str(self.photos_library)) + + def photos_by_fingerprint(self, fingerprint: str) -> list[tuple[str, str]]: + """Return a list of tuples of (uuid, fingerprint) for all photos matching fingerprint""" + + asset_table = _DB_TABLE_NAMES[self.photos_version]["ASSET"] + sql = f""" + SELECT {asset_table}.ZUUID, + ZADDITIONALASSETATTRIBUTES.ZORIGINALFILENAME + FROM {asset_table} + JOIN ZADDITIONALASSETATTRIBUTES ON ZADDITIONALASSETATTRIBUTES.ZASSET = {asset_table}.Z_PK + WHERE ZADDITIONALASSETATTRIBUTES.ZMASTERFINGERPRINT = ? + """ + return self.conn.execute(sql, (fingerprint,)).fetchall() diff --git a/osxphotos/sqlite3_datetime.py b/osxphotos/sqlite3_datetime.py new file mode 100644 index 000000000..7b3c79352 --- /dev/null +++ b/osxphotos/sqlite3_datetime.py @@ -0,0 +1,57 @@ +"""Sqlite3 datetime adapters; import this module to register adapters for datetime objects; +these were built in before Python 3.12 but are deprecated in 3.12 """ + +import datetime +import sqlite3 + +# Reference: https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#sqlite3-adapter-converter-recipes + + +def adapt_date_iso(val): + """Adapt datetime.date to ISO 8601 date.""" + return val.isoformat() + + +def adapt_datetime_iso(val): + """Adapt datetime.datetime to timezone-naive ISO 8601 date.""" + return val.isoformat() + + +def adapt_datetime_epoch(val): + """Adapt datetime.datetime to Unix timestamp.""" + return int(val.timestamp()) + + +def convert_date(val): + """Convert ISO 8601 date to datetime.date object.""" + return datetime.date.fromisoformat(val.decode()) + + +def convert_datetime(val): + """Convert ISO 8601 datetime to datetime.datetime object.""" + return datetime.datetime.fromisoformat(val.decode()) + + +def convert_timestamp(val): + """Convert Unix epoch timestamp to datetime.datetime object.""" + return datetime.datetime.fromtimestamp(int(val)) + + +def register_adapters(): + """Register adapters for datetime objects.""" + sqlite3.register_adapter(datetime.date, adapt_date_iso) + sqlite3.register_adapter(datetime.datetime, adapt_datetime_iso) + sqlite3.register_adapter(datetime.datetime, adapt_datetime_epoch) + + +def register_converters(): + """Register converters for datetime objects.""" + sqlite3.register_converter("date", convert_date) + sqlite3.register_converter("datetime", convert_datetime) + sqlite3.register_converter("timestamp", convert_timestamp) + + +def register(): + """Register adapters and converters for datetime objects.""" + register_adapters() + register_converters() diff --git a/tests/test_cli_import.py b/tests/test_cli_import.py index 15f84846d..b96eb9d6c 100644 --- a/tests/test_cli_import.py +++ b/tests/test_cli_import.py @@ -177,6 +177,24 @@ def test_import_dup_check(): assert photo_1.filename == file_1 +@pytest.mark.test_import +def test_import_skip_dups(): + """Test basic import with --skip_dups""" + + cwd = os.getcwd() + test_image_1 = os.path.join(cwd, TEST_IMAGE_1) + runner = CliRunner() + result = runner.invoke( + import_cli, + ["--verbose", "--skip-dups", test_image_1], + terminal_width=TERMINAL_WIDTH, + ) + + assert result.exit_code == 0 + assert "Skipping duplicate" in result.output + assert "1 skipped" in result.output + + @pytest.mark.test_import def test_import_album(): """Test basic import to an album"""