diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index 78efadf82..19b4088aa 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -73,6 +73,12 @@ Note: for the stdout case, the index file will **not** be restored automatically. + -F / --full + Force a full recover. By default, an incremental recover is made + if possible, by only copying the latest backup delta to the recovered + ZODB file. A full recover will always be done if a pack has occured + since the last incremental backup. + -w --with-verify Verify on the fly the backup files on recovering. This option runs @@ -185,7 +191,7 @@ class Options: mode = None # BACKUP, RECOVER or VERIFY file = None # name of input Data.fs file repository = None # name of directory holding backups - full = False # True forces full backup + full = False # True forces full backup or full recovery date = None # -D argument, if any output = None # where to write recovered data; None = stdout quick = False # -Q flag state @@ -396,9 +402,8 @@ def func(data): return bytesread, sum.hexdigest() -def recover_repofiles(options, repofiles, outfp): +def recover_repofiles(options, repofiles, datfile, outfp): if options.withverify: - datfile = os.path.splitext(repofiles[0])[0] + '.dat' with open(datfile) as fp: truth_dict = {} for line in fp: @@ -709,15 +714,7 @@ def do_backup(options): do_full_backup(options) -def do_recover(options): - # Find the first full backup at or before the specified date - repofiles = find_files(options) - if not repofiles: - if options.date: - raise NoFiles(f'No files in repository before {options.date}') - else: - raise NoFiles('No files in repository') - +def do_full_recover(options, repofiles): files_to_close = () if options.output is None: log('Recovering file to stdout') @@ -734,17 +731,8 @@ def do_recover(options): files_to_close += (outfp,) try: - recover_repofiles(options, repofiles, outfp) - if options.output is not None: - last_base = os.path.splitext(repofiles[-1])[0] - source_index = '%s.index' % last_base - target_index = '%s.index' % options.output - if os.path.exists(source_index): - log('Restoring index file %s to %s', - source_index, target_index) - shutil.copyfile(source_index, target_index) - else: - log('No index file to restore: %s', source_index) + datfile = os.path.splitext(repofiles[0])[0] + '.dat' + recover_repofiles(options, repofiles, datfile, outfp) finally: for f in files_to_close: f.close() @@ -758,6 +746,88 @@ def do_recover(options): raise +def do_incremental_recover(options, repofiles): + datfile = os.path.splitext(repofiles[0])[0] + '.dat' + log('Recovering (incrementally) file to %s', options.output) + with open(datfile) as fp, open(options.output, 'r+b') as outfp: + outfp.seek(0, 2) + initial_length = outfp.tell() + previous_chunk = None + for line in fp: + fn, startpos, endpos, _ = chunk = line.split() + startpos = int(startpos) + endpos = int(endpos) + if endpos > initial_length: + break + previous_chunk = chunk + else: + if endpos == initial_length: + log('Target file is same size as latest backup, ' + 'doing nothing.') + return + else: + log('Target file is longer than latest backup, ' + 'falling back to a full recover.') + return do_full_recover(options, repofiles) + if previous_chunk is None: + log('Target file shorter than full backup, ' + 'falling back to a full recover.') + return do_full_recover(options, repofiles) + check_startpos = int(previous_chunk[1]) + check_endpos = int(previous_chunk[2]) + outfp.seek(check_startpos) + if previous_chunk[3] != checksum(outfp, check_endpos - check_startpos): + log('Last whole common chunk checksum did not match with backup, ' + 'falling back to a full recover.') + return do_full_recover(options, repofiles) + assert outfp.tell() == startpos, (outfp.tell(), startpos) + + if startpos < initial_length: + log('Truncating target file %i bytes before its end', + initial_length - startpos) + filename = os.path.join(options.repository, + os.path.basename(fn)) + first_file_to_restore = repofiles.index(filename) + assert first_file_to_restore > 0, ( + first_file_to_restore, options.repository, fn, filename, repofiles) + + temporary_output_file = options.output + '.part' + os.rename(options.output, temporary_output_file) + with open(temporary_output_file, 'r+b') as outfp: + outfp.seek(startpos) + recover_repofiles(options, + repofiles[first_file_to_restore:], + datfile, + outfp) + os.rename(temporary_output_file, options.output) + + +def do_recover(options): + # Find the first full backup at or before the specified date + repofiles = find_files(options) + if not repofiles: + if options.date: + raise NoFiles(f'No files in repository before {options.date}') + else: + raise NoFiles('No files in repository') + + if options.full or not os.path.exists(options.output): + do_full_recover(options, repofiles) + else: + do_incremental_recover(options, repofiles) + + if options.output is not None: + last_base = os.path.splitext(repofiles[-1])[0] + source_index = '%s.index' % last_base + target_index = '%s.index' % options.output + if os.path.exists(source_index): + log('Restoring index file %s to %s', + source_index, target_index) + shutil.copyfile(source_index, target_index) + else: + log('No index file to restore: %s', source_index) + + def do_verify(options): # Verify the sizes and checksums of all files mentioned in the .dat file repofiles = find_files(options) diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index f19c81703..0936f46c7 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -849,8 +849,7 @@ def test_w_changes(self): self.assertEqual(index.maxKey(), db.maxkey) -class Test_do_recover(OptionsTestBase, unittest.TestCase): - +class Mixin_do_recover: def _callFUT(self, options): from ZODB.scripts.repozo import do_recover return do_recover(options) @@ -887,6 +886,17 @@ def test_no_files_before_explicit_date(self): files.append(self._makeFile(h, m, s, e)) self.assertRaises(NoFiles, self._callFUT, options) + +class Test_do_full_recover( + Mixin_do_recover, + OptionsTestBase, + unittest.TestCase +): + def _makeOptions(self, **kw): + options = super()._makeOptions(**kw) + options.full = True + return options + def test_w_full_backup_latest_no_index(self): import tempfile dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') @@ -995,6 +1005,205 @@ def test_w_incr_backup_with_verify_size_inconsistent(self): self.assertTrue(os.path.exists(output + '.part')) +class Test_do_incremental_recover( + Mixin_do_recover, + OptionsTestBase, + unittest.TestCase +): + def setUp(self): + from ZODB.scripts import repozo + self._old_verbosity = repozo.VERBOSE + self._old_stderr = sys.stderr + repozo.VERBOSE = True + sys.stderr = StringIO() + + def tearDown(self): + from ZODB.scripts import repozo + sys.stderr = self._old_stderr + repozo.VERBOSE = self._old_verbosity + + def _makeOptions(self, **kw): + options = super()._makeOptions(**kw) + options.full = False + return options + + def _createRecoveredDataFS(self, output, options): + self._makeFile(2, 3, 4, '.fs', 'AAA') + self._makeFile(4, 5, 6, '.deltafs', 'BBB') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBB') + self.assertFalse(os.path.exists(output + '.part')) + return output + + def test_do_nothing(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=False) + self._createRecoveredDataFS(output, options) + self._callFUT(options) + self.assertIn( + "doing nothing", sys.stderr.getvalue()) + + def test_w_incr_recover_from_incr_backup(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=False) + self._createRecoveredDataFS(output, options) + # Create 2 more .deltafs, to prove the code knows where to pick up + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile(8, 9, 10, '.deltafs', 'DDD') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n' # noqa: E501 line too long + '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBBCCCDDD') + self.assertFalse(os.path.exists(output + '.part')) + + def test_w_incr_backup_with_verify_sum_inconsistent(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile(8, 9, 10, '.deltafs', 'DDD') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166af\n' # noqa: E501 line too long + '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long + from ZODB.scripts.repozo import VerificationFail + self.assertRaises(VerificationFail, self._callFUT, options) + self.assertTrue(os.path.exists(output + '.part')) + + def test_w_incr_backup_with_verify_size_inconsistent_too_small(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile(8, 9, 10, '.deltafs', 'DDD') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 8 defb99e69a9f1f6e06f15006b1f166ae\n' # noqa: E501 line too long + '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long + from ZODB.scripts.repozo import VerificationFail + self.assertRaises(VerificationFail, self._callFUT, options) + self.assertTrue(os.path.exists(output + '.part')) + + def test_w_incr_backup_with_verify_size_inconsistent_too_big(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile(8, 9, 10, '.deltafs', 'DDD') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 10 defb99e69a9f1f6e06f15006b1f166ae\n' # noqa: E501 line too long + '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long + from ZODB.scripts.repozo import VerificationFail + self.assertRaises(VerificationFail, self._callFUT, options) + self.assertTrue(os.path.exists(output + '.part')) + + def test_w_inc_backup_switch_auto_to_full_recover_if_output_larger_than_dat(self): # noqa: E501 line too long + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=False) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long + # The ZODB is longer than announced in the .dat file + with open(output, 'r+b') as f: + f.write(b'AAABBBCCCDDD') + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBBCCC') + self.assertFalse(os.path.exists(output + '.part')) + self.assertIn( + "falling back to a full recover", sys.stderr.getvalue()) + + def test_w_inc_backup_switch_auto_to_full_recover_if_last_chunk_is_wrong(self): # noqa: E501 line too long + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=False) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBBCCC') + self.assertFalse(os.path.exists(output + '.part')) + self.assertIn( + "Last whole common chunk checksum did not match with backup, falling back to a full recover.", # noqa: E501 line too long + sys.stderr.getvalue()) + + def test_w_inc_backup_switch_auto_to_full_recover_after_pack(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=False) + self._makeFile(2, 3, 4, '.fs', 'AAA') + self._makeFile(4, 5, 6, '.deltafs', 'BBB') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBB') + + self._makeFile(6, 7, 8, '.fs', 'CCDD') + self._makeFile( + 6, 7, 8, '.dat', + '/backup/2010-05-14-06-07-08.fs 0 4 dc0ee37408176d839c13f291a4d588de\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'CCDD') + self.assertFalse(os.path.exists(output + '.part')) + self.assertIn( + 'Target file is longer than latest backup, falling back to a full recover.', # noqa: E501 line too long + sys.stderr.getvalue()) + + class Test_do_verify(OptionsTestBase, unittest.TestCase): def _callFUT(self, options): @@ -1281,7 +1490,8 @@ def test_suite(): loadTestsFromTestCase(Test_do_full_backup), loadTestsFromTestCase(Test_do_incremental_backup), # unittest.makeSuite(Test_do_backup), #TODO - loadTestsFromTestCase(Test_do_recover), + loadTestsFromTestCase(Test_do_full_recover), + loadTestsFromTestCase(Test_do_incremental_recover), loadTestsFromTestCase(Test_do_verify), # N.B.: this test take forever to run (~40sec on a fast laptop), # *and* it is non-deterministic.