diff --git a/README.rst b/README.rst index 27e5640..95cd3cc 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,15 @@ Usage: ``borg-import rsynchl RSYNC_ROOT BORG_REPOSITORY`` See ``borg-import rsynchl -h`` for help. +`rsync-time-backup `_ +--------------------------------------------------------------------- + +Similar to `rsynchl`, except with timestamp extraction optimized for `rsync-time-backup` folder names. + +Usage: ``borg-import rsync_tmbackup --prefix=foo- RSYNC_ROOT BORG_REPOSITORY`` + +See ``borg-import rsync_tmbackup -h`` for help. + Backup tools based on rsync with hard links ------------------------------------------- diff --git a/src/borg_import/helpers/testsuite/test_timestamps.py b/src/borg_import/helpers/testsuite/test_timestamps.py index 08e79e9..1a747af 100644 --- a/src/borg_import/helpers/testsuite/test_timestamps.py +++ b/src/borg_import/helpers/testsuite/test_timestamps.py @@ -18,7 +18,27 @@ def test_datetime_from_mtime(tmpdir): def test_datetime_from_string(): - assert datetime_from_string('1999-12-31T23:59:59') == datetime(1999, 12, 31, 23, 59, 59) - assert datetime_from_string('Mon Oct 31 23:35:50 UTC 2016') == datetime(2016, 10, 31, 23, 35, 50) + dfs = datetime_from_string('1999-12-31T23:59:59') + dt_trg = datetime(1999, 12, 31, 23, 59, 59).astimezone(tz=timezone.utc) + assert dfs == dt_trg + # Of course, two datetimes can be equal in different timezones. Make + # sure the timezone info matches UTC, which borg itself expects. + assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc + + # FIXME: When this format is passed to datetime_from_string, the internal + # strptime discards timezone info, and creates a naive time. + # UTC is handled specially inside datetime_from_string to accommodate + # strptime's quirks; local conversions using this format may or may not work. + dfs = datetime_from_string('Mon Oct 31 23:35:50 UTC 2016') + dt_trg = datetime(2016, 10, 31, 23, 35, 50, tzinfo=timezone.utc) + assert dfs == dt_trg + assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc + + # rsync-time-backup format. + dfs = datetime_from_string('2022-12-21-063019') + dt_trg = datetime(2022, 12, 21, 6, 30, 19).astimezone(tz=timezone.utc) + assert dfs == dt_trg + assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc + with pytest.raises(ValueError): datetime_from_string('total crap') diff --git a/src/borg_import/helpers/timestamps.py b/src/borg_import/helpers/timestamps.py index 0352fc5..acbefbc 100644 --- a/src/borg_import/helpers/timestamps.py +++ b/src/borg_import/helpers/timestamps.py @@ -10,6 +10,7 @@ def datetime_from_mtime(path): at backup time). """ t = path.stat().st_mtime + # Borg needs tz-aware timestamps in UTC timezone. return datetime.fromtimestamp(t, tz=timezone.utc) @@ -17,7 +18,9 @@ def datetime_from_string(s): """ parse datetime from a string - returns a datetime object if the format could be parsed. + returns a tz-aware datetime object in UTC timezone if the format could be + parsed. + raises ValueError if not. """ s = s.strip() @@ -29,10 +32,30 @@ def datetime_from_string(s): '%Y-%m-%d %H:%M', # date tool output [C / en_US locale]: '%a %b %d %H:%M:%S %Z %Y', + # rsync-time-backup format + '%Y-%m-%d-%H%M%S' # for more, see https://xkcd.com/1179/ ]: try: - return datetime.strptime(s, ts_format) + if ts_format in ('%a %b %d %H:%M:%S %Z %Y',) and 'UTC' in s: + # %Z returns a naive datetime, despite a timezone being specified. + # However, strptime %Z only tends to work on local times and + # UTC. + # + # Per astimezone docs: + # If self is naive, it is presumed to represent time in the + # system timezone. + # + # If we had a UTC timezone, prevent conversion to aware + # datetime from assuming a local timezone before conversion + # to UTC. + return datetime.strptime(s, ts_format).replace(tzinfo=timezone.utc) + else: + # If "UTC" wasn't specified using the above ts_format, assume + # the timezone specified was local and hope for the best. + # This handles all other ts_formats as well, which are assumed + # to be local since they don't carry timezone. + return datetime.strptime(s, ts_format).astimezone(tz=timezone.utc) except ValueError: # didn't work with this format, try next pass diff --git a/src/borg_import/main.py b/src/borg_import/main.py index 28b11c9..5c49e41 100755 --- a/src/borg_import/main.py +++ b/src/borg_import/main.py @@ -9,6 +9,7 @@ from .rsnapshots import get_snapshots from .rsynchl import get_rsyncsnapshots +from .rsync_tmbackup import get_tmbackup_snapshots log = logging.getLogger(__name__) @@ -206,6 +207,81 @@ def import_rsynchl(self, args): import_journal.unlink() +class rsyncTmBackupImporter(Importer): + name = 'rsync_tmbackup' + description = 'import rsync-time-backup backups' + epilog = """ + Imports from rsync-time-backup backup sets by renaming each snapshot to a + common name independent of the snapshot, which allows the Borg files cache + to work with maximum efficiency. The only difference between this and + generic rsync+hardlink backups is how archive timestamps are derived. + + An archive will be created for each folder in the rsync_root. A non-empty + prefix is required. The archive name will be the prefix concatenated with + the timestamp folder name (%Y-%m-%d-%H%M%S local time) and the archive + timestamp will be derived from the folder name (ISO 8601 UTC). If the borg + repository already contains an archive with the derived name, that folder + will be skipped. + + The directory is called "borg-import-dir" inside the specified root, + and borg-import will note which snapshot is currently located there + in a file called "borg-import-dir.snapshot" besides it, in case + things go wrong. + + Otherwise nothing in the rsync root is modified, and neither + are the contents of the snapshots. + """ + + def populate_parser(self, parser): + parser.add_argument('rsync_root', metavar='RSYNC_ROOT', + help='Path to root directory', type=Path) + # TODO: support the full wealth of borg possibilities + parser.add_argument('repository', metavar='BORG_REPOSITORY', + help='Borg repository (must be an absolute local path or a remote repo specification)') + parser.set_defaults(function=self.import_rsync_tmbackup) + + def import_rsync_tmbackup(self, args): + existing_archives = list_borg_archives(args) + + import_path = args.rsync_root / 'borg-import-dir' + import_journal = args.rsync_root / 'borg-import-dir.snapshot' + + if import_path.exists(): + print('{} exists. Cannot continue.'.format(import_path)) + return 1 + + if not args.prefix: + print('"--prefix" argument must be non-empty to use rsync-time-backup import') + return 1 + + for rsnapshot in get_tmbackup_snapshots(args.rsync_root, args.prefix): + timestamp = rsnapshot['timestamp'].replace(microsecond=0) + snapshot_original_path = rsnapshot['path'] + name = rsnapshot['name'] + + if name in existing_archives: + print('Skipping (already exists in repository):', name) + continue + + print('Importing {} (timestamp {}) '.format(name, timestamp)) + log.debug(' Moving {} -> {}'.format(rsnapshot['path'], import_path)) + + # We move the snapshots to import_path so that the files cache in Borg can work effectively. + + with import_journal.open('w') as fd: + fd.write('Current snapshot: %s\n' % rsnapshot['name']) + fd.write('Original path: %s\n' % snapshot_original_path) + + snapshot_original_path.rename(import_path) + + try: + borg_import(args, name, import_path, timestamp=timestamp) + finally: + log.debug(' Moving {} -> {}'.format(import_path, rsnapshot['path'])) + import_path.rename(snapshot_original_path) + import_journal.unlink() + + def build_parser(): common_parser = argparse.ArgumentParser(add_help=False) common_group = common_parser.add_argument_group('Common options') diff --git a/src/borg_import/rsync_tmbackup.py b/src/borg_import/rsync_tmbackup.py new file mode 100644 index 0000000..3c6ca54 --- /dev/null +++ b/src/borg_import/rsync_tmbackup.py @@ -0,0 +1,37 @@ +import re +from pathlib import Path + +from .helpers.discover import discover, parser +from .helpers.names import make_name +from .helpers.timestamps import datetime_from_string + + +def get_tmbackup_snapshots(root, prefix): + """Get all snapshot metadata discovered in the rsync root directory.""" + regex = re.compile(r'(?P.+)') + + if not Path("backup.marker").exists(): + raise FileNotFoundError("backup.marker file should exist for rsync-time-backup import") + + for path in discover(str(root), 1): + parsed = parser(path, regex) + if parsed is not None and parsed['snapshot_date'] not in ("latest",): + abs_path = root / path + meta = dict( + name=make_name("".join([prefix, parsed['snapshot_date']])), + path=abs_path, + timestamp=datetime_from_string(path), + ) + yield meta + elif parsed['snapshot_date'] in ("latest",): + # latest is a symlink to the most recent build. Import it anyway + # in case user wants to do borg mount/has existing references + # to latest. + abs_path = root / path + timestamp = Path("latest").resolve().name + meta = dict( + name=make_name("".join([prefix, "latest"])), + path=abs_path, + timestamp=datetime_from_string(timestamp), + ) + yield meta