Skip to content

Commit

Permalink
Adds --skip-dups to osxphotos import, #1264:
Browse files Browse the repository at this point in the history
  • Loading branch information
RhetTbull committed Nov 19, 2023
1 parent 950260d commit af09eee
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 10 deletions.
85 changes: 75 additions & 10 deletions osxphotos/cli/import_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
from rich.markdown import Markdown
from strpdatetime import strpdatetime

from osxphotos.platform import assert_macos

assert_macos()

from photoscript import Photo, PhotosLibrary

import osxphotos.sqlite3_datetime as sqlite3_datetime
from osxphotos._constants import _OSXPHOTOS_NONE_SENTINEL, SQLITE_CHECK_SAME_THREAD
from osxphotos._version import __version__
from osxphotos.cli.cli_params import TIMESTAMP_OPTION, VERBOSE_OPTION
Expand All @@ -38,17 +45,14 @@
datetime_utc_to_local,
)
from osxphotos.exiftool import ExifToolCaching, get_exiftool_path
from osxphotos.fingerprint import fingerprint
from osxphotos.fingerprintquery import FingerprintQuery
from osxphotos.photoinfo import PhotoInfoNone
from osxphotos.photosalbum import PhotosAlbumPhotoScript
from osxphotos.phototemplate import PhotoTemplate, RenderOptions
from osxphotos.platform import assert_macos
from osxphotos.sqlitekvstore import SQLiteKVStore
from osxphotos.unicode import normalize_unicode
from osxphotos.utils import pluralize

assert_macos()

from photoscript import Photo, PhotosLibrary
from osxphotos.utils import get_last_library_path, pluralize

from .cli_params import THEME_OPTION
from .click_rich_echo import rich_click_echo, rich_echo_error
Expand All @@ -60,6 +64,8 @@
# functions vs ReportWriter class used by export) and I've kept everything for import
# self-contained in this one file

# register datetime adapters/converters for sqlite3
sqlite3_datetime.register()

MetaData = namedtuple("MetaData", ["title", "description", "keywords", "location"])

Expand Down Expand Up @@ -998,6 +1004,18 @@ def get_help(self, ctx):
Note: in Photos, only albums can contain photos and folders
may contain albums or other folders.
## Duplicate Checking
By default, `osxphotos import` will import all files passed to it even if duplicates
exist in the Photos library. If you want to skip duplicate files, you can use the
`--skip-dups` option which will cause osxphotos to check for exact duplicates (based on file fingerprint)
and skip those files. Alternatively, you can use `--dup-check` to enable Photos' own duplicate
checking. If a duplicate is encountered with `--dup-check`, Photos will prompt you
to skip or import the duplicate file.
If you use the `--verbose` option, osxphotos will report on any duplicates it finds
even if you don't use `--skip-dups` or `--dup-check`.
## Metadata
`osxphotos import` can set metadata (title, description, keywords, and location) for
Expand Down Expand Up @@ -1060,7 +1078,7 @@ def get_help(self, ctx):
- `{exiftool}`: Format: '{exiftool:GROUP:TAGNAME}'; use exiftool (https://exiftool.org)
to extract metadata, in form GROUP:TAGNAME, from image.
E.g. '{exiftool:EXIF:Make}' to get camera make, or {exiftool:IPTC:Keywords} to extract
keywords. See https://exiftool.org/TagNames/ for list of valid tag names.
keywords. See https://exiftooip=l.org/TagNames/ for list of valid tag names.
You must specify group (e.g. EXIF, IPTC, etc) as used in `exiftool -G`.
exiftool must be installed in the path to use this template (alternatively, you can use
`--exiftool-path` to specify the path to exiftool.)
Expand Down Expand Up @@ -1317,7 +1335,21 @@ def get_help(self, ctx):
"If you set '--relative-to /Volumes/photos/import' "
"then '{filepath}' will be set to 'album/img_1234.jpg'",
)
@click.option("--dup-check", "-D", is_flag=True, help="Check for duplicates on import.")
@click.option(
"--dup-check",
"-D",
is_flag=True,
help="Use Photos' built-in duplicate checkign to check for duplicates on import. "
"Using --dup-check will cause Photos to display a dialog box for each duplicate photo found, "
"requesting confirmation to import the duplicate. See also --skip-dups.",
)
@click.option(
"--skip-dups",
is_flag=True,
help="Skip duplicate photos on import; osxphotos will not import any photos that appear to be duplicates. "
"Unlike --dup-check, this does not use Photos' built in duplicate checking feature and "
"does not display a dialog box for each duplicate found. See also --dup-check.",
)
@click.option(
"--split-folder",
"-f",
Expand Down Expand Up @@ -1388,6 +1420,14 @@ def get_help(self, ctx):
"You can run more than one function by repeating the '--post-function' option with different arguments. "
"See Post Function below.",
)
@click.option(
"--library",
metavar="LIBRARY_PATH",
type=click.Path(exists=True),
help="Path to the Photos library you are importing into. This is not usually needed. "
"You will only need to specify this if osxphotos cannot determine the path to the library "
"in which case osxphotos will tell you to use the --library option when you run the import command.",
)
@THEME_OPTION
@click.argument("files", nargs=-1)
@click.pass_obj
Expand All @@ -1407,6 +1447,7 @@ def import_cli(
files,
glob,
keyword,
library,
location,
merge_keywords,
no_progress,
Expand All @@ -1415,6 +1456,7 @@ def import_cli(
relative_to,
report,
resume,
skip_dups,
split_folder,
theme,
timestamp,
Expand Down Expand Up @@ -1474,6 +1516,15 @@ def import_cli(
)
import_db.about = f"osxphotos import database\n{OSXPHOTOS_ABOUT_STRING}"

# need to get the library path to initialize FingerprintQuery
last_library = library or get_last_library_path()
if not last_library:
rich_echo_error(
"[error]Could not determine path to Photos library. "
"Please specify path to library with --library option."
)
fq = FingerprintQuery(last_library)

imported_count = 0
error_count = 0
skipped_count = 0
Expand Down Expand Up @@ -1502,10 +1553,24 @@ def import_cli(
continue

verbose(f"Importing [filepath]{filepath}[/]")
if duplicates := fq.photos_by_fingerprint(fingerprint(filepath)):
# duplicate of file already in Photos library
verbose(
f"File [filepath]{filepath}[/] is a duplicate of photos in the library: "
f"{', '.join([f'[filename]{f}[/] ([uuid]{u}[/])' for u, f in duplicates])}"
)

report_data[filepath] = ReportRecord(
filepath=filepath, filename=filepath.name
)
report_record = report_data[filepath]

if skip_dups:
verbose(f"Skipping duplicate [filepath]{filepath}[/]")
skipped_count += 1
report_record.imported = False
continue

photo, error = import_photo(filepath, dup_check, verbose)
if error:
error_count += 1
Expand Down Expand Up @@ -1601,10 +1666,10 @@ def import_cli(
write_report(report_file, report_data, append)
verbose(f"Wrote import report to [filepath]{report_file}[/]")

skipped_str = f"[num]{skipped_count}[/] skipped" if resume else ""
skipped_str = f", [num]{skipped_count}[/] skipped" if resume or skip_dups else ""
echo(
f"Done: imported [num]{imported_count}[/] {pluralize(imported_count, 'file', 'files')}, "
f"[num]{error_count}[/] {pluralize(error_count, 'error', 'errors')}"
f", {skipped_str}",
f"{skipped_str}",
emoji=False,
)
44 changes: 44 additions & 0 deletions osxphotos/fingerprintquery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Query Photos database for photos matching fingerprint """

from __future__ import annotations

import pathlib
import sqlite3

from ._constants import _DB_TABLE_NAMES
from .photosdb.photosdb_utils import get_photos_version_from_model


class FingerprintQuery:
"""Class to query Photos database for photos matching fingerprint"""

def __init__(self, photos_library: str | pathlib.Path):
"""Create a new FingerprintQuery object
Args:
photos_library: path to Photos library
"""
self.photos_library = (
pathlib.Path(photos_library)
if not isinstance(photos_library, pathlib.Path)
else photos_library
)
if self.photos_library.is_dir():
# assume path to root of Photos library
# if not, assume it's the path to the Photos.sqlite file
self.photos_library = self.photos_library / "database" / "Photos.sqlite"
self.conn = sqlite3.connect(str(self.photos_library))
self.photos_version = get_photos_version_from_model(str(self.photos_library))

def photos_by_fingerprint(self, fingerprint: str) -> list[tuple[str, str]]:
"""Return a list of tuples of (uuid, fingerprint) for all photos matching fingerprint"""

asset_table = _DB_TABLE_NAMES[self.photos_version]["ASSET"]
sql = f"""
SELECT {asset_table}.ZUUID,
ZADDITIONALASSETATTRIBUTES.ZORIGINALFILENAME
FROM {asset_table}
JOIN ZADDITIONALASSETATTRIBUTES ON ZADDITIONALASSETATTRIBUTES.ZASSET = {asset_table}.Z_PK
WHERE ZADDITIONALASSETATTRIBUTES.ZMASTERFINGERPRINT = ?
"""
return self.conn.execute(sql, (fingerprint,)).fetchall()
57 changes: 57 additions & 0 deletions osxphotos/sqlite3_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Sqlite3 datetime adapters; import this module to register adapters for datetime objects;
these were built in before Python 3.12 but are deprecated in 3.12 """

import datetime
import sqlite3

# Reference: https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#sqlite3-adapter-converter-recipes


def adapt_date_iso(val):
"""Adapt datetime.date to ISO 8601 date."""
return val.isoformat()


def adapt_datetime_iso(val):
"""Adapt datetime.datetime to timezone-naive ISO 8601 date."""
return val.isoformat()


def adapt_datetime_epoch(val):
"""Adapt datetime.datetime to Unix timestamp."""
return int(val.timestamp())


def convert_date(val):
"""Convert ISO 8601 date to datetime.date object."""
return datetime.date.fromisoformat(val.decode())


def convert_datetime(val):
"""Convert ISO 8601 datetime to datetime.datetime object."""
return datetime.datetime.fromisoformat(val.decode())


def convert_timestamp(val):
"""Convert Unix epoch timestamp to datetime.datetime object."""
return datetime.datetime.fromtimestamp(int(val))


def register_adapters():
"""Register adapters for datetime objects."""
sqlite3.register_adapter(datetime.date, adapt_date_iso)
sqlite3.register_adapter(datetime.datetime, adapt_datetime_iso)
sqlite3.register_adapter(datetime.datetime, adapt_datetime_epoch)


def register_converters():
"""Register converters for datetime objects."""
sqlite3.register_converter("date", convert_date)
sqlite3.register_converter("datetime", convert_datetime)
sqlite3.register_converter("timestamp", convert_timestamp)


def register():
"""Register adapters and converters for datetime objects."""
register_adapters()
register_converters()
18 changes: 18 additions & 0 deletions tests/test_cli_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,24 @@ def test_import_dup_check():
assert photo_1.filename == file_1


@pytest.mark.test_import
def test_import_skip_dups():
"""Test basic import with --skip_dups"""

cwd = os.getcwd()
test_image_1 = os.path.join(cwd, TEST_IMAGE_1)
runner = CliRunner()
result = runner.invoke(
import_cli,
["--verbose", "--skip-dups", test_image_1],
terminal_width=TERMINAL_WIDTH,
)

assert result.exit_code == 0
assert "Skipping duplicate" in result.output
assert "1 skipped" in result.output


@pytest.mark.test_import
def test_import_album():
"""Test basic import to an album"""
Expand Down

0 comments on commit af09eee

Please sign in to comment.