diff --git a/client/cipd.py b/client/cipd.py index 927ec594354..6fb16324368 100644 --- a/client/cipd.py +++ b/client/cipd.py @@ -409,7 +409,8 @@ def get_client(service_url, package_name, version, cache_dir, timeout=None): version_cache = isolateserver.DiskCache( unicode(os.path.join(cache_dir, 'versions')), isolateserver.CachePolicies(0, 0, 300), - hashlib.sha1) + hashlib.sha1, + trim=True) with version_cache: version_cache.cleanup() # Convert |version| to a string that may be used as a filename in disk @@ -431,7 +432,8 @@ def get_client(service_url, package_name, version, cache_dir, timeout=None): instance_cache = isolateserver.DiskCache( unicode(os.path.join(cache_dir, 'clients')), isolateserver.CachePolicies(0, 0, 5), - hashlib.sha1) + hashlib.sha1, + trim=True) with instance_cache: instance_cache.cleanup() if instance_id not in instance_cache: diff --git a/client/isolateserver.py b/client/isolateserver.py index 367a36aac2d..c8feb0961a2 100755 --- a/client/isolateserver.py +++ b/client/isolateserver.py @@ -980,6 +980,14 @@ def write(self, digest, content): """ raise NotImplementedError() + def trim(self): + """Enforces cache policies. + + Returns: + Number of items evicted. + """ + raise NotImplementedError() + class MemoryCache(LocalCache): """LocalCache implementation that stores everything in memory.""" @@ -1031,6 +1039,10 @@ def write(self, digest, content): self._added.append(len(data)) return digest + def trim(self): + """Trimming is not implemented for MemoryCache.""" + return 0 + class CachePolicies(object): def __init__(self, max_cache_size, min_free_space, max_items): @@ -1055,7 +1067,7 @@ class DiskCache(LocalCache): """ STATE_FILE = u'state.json' - def __init__(self, cache_dir, policies, hash_algo, trim=True): + def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None): """ Arguments: cache_dir: directory where to place the cache. @@ -1085,7 +1097,7 @@ def __init__(self, cache_dir, policies, hash_algo, trim=True): self._operations = [] with tools.Profiler('Setup'): with self._lock: - self._load(trim=trim) + self._load(trim, time_fn) def __contains__(self, digest): with self._lock: @@ -1157,6 +1169,7 @@ def cleanup(self): logging.warning('Removed %d lost files', len(previous)) for filename in previous: self._lru.pop(filename) + self._save() # What remains to be done is to hash every single item to # detect corruption, then save to ensure state.json is up to date. @@ -1255,9 +1268,9 @@ def get_timestamp(self, digest): def trim(self): """Forces retention policies.""" with self._lock: - self._trim() + return self._trim() - def _load(self, trim): + def _load(self, trim, time_fn): """Loads state of the cache from json file. If cache_dir does not exist on disk, it is created. @@ -1275,6 +1288,8 @@ def _load(self, trim): logging.error('Failed to load cache state: %s' % (err,)) # Don't want to keep broken state file. file_path.try_remove(self.state_file) + if time_fn: + self._lru.time_fn = time_fn if trim: self._trim() # We want the initial cache size after trimming, i.e. what is readily @@ -1338,6 +1353,7 @@ def _trim(self): usage_percent, self.policies.max_cache_size / 1024.) self._save() + return trimmed_due_to_space def _path(self, digest): """Returns the path to one item.""" @@ -1349,7 +1365,9 @@ def _remove_lru_file(self, allow_protected): try: digest, (size, _) = self._lru.get_oldest() if not allow_protected and digest == self._protected: - raise Error('Not enough space to fetch the whole isolated tree') + raise Error( + 'Not enough space to fetch the whole isolated tree; %sb free, min ' + 'is %sb' % (self._free_disk, self.policies.min_free_space)) except KeyError: raise Error('Nothing to remove') digest, (size, _) = self._lru.pop_oldest() @@ -1992,7 +2010,7 @@ def add_cache_options(parser): parser.add_option_group(cache_group) -def process_cache_options(options, trim=True): +def process_cache_options(options, **kwargs): if options.cache: policies = CachePolicies( options.max_cache_size, options.min_free_space, options.max_items) @@ -2002,7 +2020,7 @@ def process_cache_options(options, trim=True): unicode(os.path.abspath(options.cache)), policies, isolated_format.get_hash_algo(options.namespace), - trim=trim) + **kwargs) else: return MemoryCache() diff --git a/client/named_cache.py b/client/named_cache.py index 7566ccca9a5..e3bf5e81c86 100644 --- a/client/named_cache.py +++ b/client/named_cache.py @@ -196,11 +196,15 @@ def trim(self, min_free_space): If min_free_space is None, disk free space is not checked. Requires NamedCache to be open. + + Returns: + Number of caches deleted. """ self._lock.assert_locked() if not os.path.isdir(self.root_dir): - return + return 0 + total = 0 free_space = 0 if min_free_space: free_space = file_path.get_free_space(self.root_dir) @@ -212,7 +216,7 @@ def trim(self, min_free_space): try: name, (path, _) = self._lru.get_oldest() except KeyError: - return + return total named_dir = self._get_named_path(name) if fs.islink(named_dir): fs.unlink(named_dir) @@ -223,6 +227,8 @@ def trim(self, min_free_space): if min_free_space: free_space = file_path.get_free_space(self.root_dir) self._lru.pop(name) + total += 1 + return total _DIR_ALPHABET = string.ascii_letters + string.digits diff --git a/client/run_isolated.py b/client/run_isolated.py index 72dcfff85af..ff4397fe2e3 100755 --- a/client/run_isolated.py +++ b/client/run_isolated.py @@ -824,8 +824,13 @@ def install_client_and_packages( def clean_caches(options, isolate_cache, named_cache_manager): - """Trims isolated and named caches.""" - # Which cache to trim first? Which of caches was used least recently? + """Trims isolated and named caches. + + The goal here is to coherently trim both caches, deleting older items + independent of which container they belong to. + """ + # TODO(maruel): Trim CIPD cache the same way. + total = 0 with named_cache_manager.open(): oldest_isolated = isolate_cache.get_oldest() oldest_named = named_cache_manager.get_oldest() @@ -840,9 +845,13 @@ def clean_caches(options, isolate_cache, named_cache_manager): ), ] trimmers.sort(key=lambda (_, ts): ts) + # TODO(maruel): This is incorrect, we want to trim 'items' that are strictly + # the oldest independent of in which cache they live in. Right now, the + # cache with the oldest item pays the price. for trim, _ in trimmers: - trim() + total += trim() isolate_cache.cleanup() + return total def create_option_parser(): diff --git a/client/tests/isolateserver_test.py b/client/tests/isolateserver_test.py index e4799a4d590..b92755e45f0 100755 --- a/client/tests/isolateserver_test.py +++ b/client/tests/isolateserver_test.py @@ -1336,7 +1336,8 @@ def get_free_space(p): #cache.touch() def get_cache(self): - return isolateserver.DiskCache(self.tempdir, self._policies, self._algo) + return isolateserver.DiskCache( + self.tempdir, self._policies, self._algo, trim=True) def to_hash(self, content): return self._algo(content).hexdigest(), content diff --git a/client/tests/run_isolated_smoke_test.py b/client/tests/run_isolated_smoke_test.py index 2d2316ce488..92687387fac 100755 --- a/client/tests/run_isolated_smoke_test.py +++ b/client/tests/run_isolated_smoke_test.py @@ -241,7 +241,7 @@ def tree_modes(root): """ out = {} offset = len(root.rstrip('/\\')) + 1 - out['.'] = oct(os.stat(root).st_mode) + out[u'.'] = oct(os.stat(root).st_mode) for dirpath, dirnames, filenames in os.walk(root): for filename in filenames: p = os.path.join(dirpath, filename) @@ -443,13 +443,13 @@ def _test_corruption_common(self, new_content): _out, _err, returncode = self._run(self._cmd_args(isolated_hash)) self.assertEqual(0, returncode) expected = { - '.': (040700, 040700, 040777), - 'state.json': (0100600, 0100600, 0100666), + u'.': (040700, 040700, 040777), + u'state.json': (0100600, 0100600, 0100666), # The reason for 0100666 on Windows is that the file node had to be # modified to delete the hardlinked node. The read only bit is reset on # load. - file1_hash: (0100400, 0100400, 0100666), - isolated_hash: (0100400, 0100400, 0100444), + unicode(file1_hash): (0100400, 0100400, 0100666), + unicode(isolated_hash): (0100400, 0100400, 0100444), } self.assertTreeModes(self.cache, expected) @@ -467,7 +467,7 @@ def _test_corruption_common(self, new_content): out, err, returncode = self._run(self._cmd_args(isolated_hash)) self.assertEqual(0, returncode, (out, err, returncode)) expected = { - '.': (040700, 040700, 040777), + u'.': (040700, 040700, 040777), u'state.json': (0100600, 0100600, 0100666), unicode(file1_hash): (0100400, 0100400, 0100666), unicode(isolated_hash): (0100400, 0100400, 0100444), diff --git a/client/tests/run_isolated_test.py b/client/tests/run_isolated_test.py index e367fa82294..a58e1261721 100755 --- a/client/tests/run_isolated_test.py +++ b/client/tests/run_isolated_test.py @@ -9,6 +9,7 @@ import base64 import contextlib import functools +import hashlib import json import logging import os @@ -24,6 +25,7 @@ import cipd import isolated_format import isolateserver +import named_cache import run_isolated from depot_tools import auto_stub from depot_tools import fix_encoding @@ -39,6 +41,9 @@ import cipdserver_mock +ALGO = hashlib.sha1 + + def write_content(filepath, content): with open(filepath, 'wb') as f: f.write(content) @@ -48,6 +53,19 @@ def json_dumps(data): return json.dumps(data, sort_keys=True, separators=(',', ':')) +def genTree(path): + """Returns a dict with {filepath: content}.""" + if not os.path.isdir(path): + return None + out = {} + for root, _, filenames in os.walk(path): + for filename in filenames: + p = os.path.join(root, filename) + with open(p, 'rb') as f: + out[os.path.relpath(p, path)] = f.read() + return out + + @contextlib.contextmanager def init_named_caches_stub(_run_dir): yield @@ -92,9 +110,10 @@ def setUp(self): self.cipd_server = cipdserver_mock.MockCipdServer() def tearDown(self): + # Remove mocks. + super(RunIsolatedTestBase, self).tearDown() file_path.rmtree(self.tempdir) self.cipd_server.close() - super(RunIsolatedTestBase, self).tearDown() @property def run_test_temp_dir(self): @@ -558,6 +577,103 @@ def test_run_tha_test_non_isolated(self): [([u'/bin/echo', u'hello', u'world'], {'detached': True})], self.popen_calls) + def test_clean_caches(self): + # Create an isolated cache and a named cache each with 2 items. Ensure that + # one item from each is removed. + fake_time = 1 + fake_free_space = [102400] + np = self.temp_join('named_cache') + ip = self.temp_join('isolated_cache') + args = [ + '--named-cache-root', np, '--cache', ip, '--clean', + '--min-free-space', '10240', + ] + self.mock(file_path, 'get_free_space', lambda _: fake_free_space[0]) + parser, options, _ = run_isolated.parse_args(args) + isolate_cache = isolateserver.process_cache_options( + options, trim=False, time_fn=lambda: fake_time) + self.assertIsInstance(isolate_cache, isolateserver.DiskCache) + named_cache_manager = named_cache.process_named_cache_options( + parser, options) + self.assertIsInstance(named_cache_manager, named_cache.CacheManager) + + # Add items to these caches. + small = '0123456789' + big = small * 1014 + small_digest = unicode(ALGO(small).hexdigest()) + big_digest = unicode(ALGO(big).hexdigest()) + with isolate_cache: + fake_time = 1 + isolate_cache.write(big_digest, [big]) + fake_time = 2 + isolate_cache.write(small_digest, [small]) + with named_cache_manager.open(time_fn=lambda: fake_time): + fake_time = 1 + p = named_cache_manager.request('first') + with open(os.path.join(p, 'big'), 'wb') as f: + f.write(big) + fake_time = 3 + p = named_cache_manager.request('second') + with open(os.path.join(p, 'small'), 'wb') as f: + f.write(small) + + # Ensures the cache contain the expected data. + actual = genTree(np) + # Figure out the cache path names. + cache_small = [ + os.path.dirname(n) for n in actual if os.path.basename(n) == 'small'][0] + cache_big = [ + os.path.dirname(n) for n in actual if os.path.basename(n) == 'big'][0] + expected = { + os.path.join(cache_small, u'small'): small, + os.path.join(cache_big, u'big'): big, + u'state.json': + '{"items":[["first",["%s",1]],["second",["%s",3]]],"version":2}' % ( + cache_big, cache_small), + } + self.assertEqual(expected, actual) + expected = { + big_digest: big, + small_digest: small, + u'state.json': + '{"items":[["%s",[10140,1]],["%s",[10,2]]],"version":2}' % ( + big_digest, small_digest), + } + self.assertEqual(expected, genTree(ip)) + + # Request triming. + fake_free_space[0] = 1020 + # Abuse the fact that named cache is trimed after isolated cache. + def rmtree(p): + self.assertEqual(os.path.join(np, cache_big), p) + fake_free_space[0] += 10240 + return old_rmtree(p) + old_rmtree = self.mock(file_path, 'rmtree', rmtree) + isolate_cache = isolateserver.process_cache_options(options, trim=False) + named_cache_manager = named_cache.process_named_cache_options( + parser, options) + actual = run_isolated.clean_caches( + options, isolate_cache, named_cache_manager) + self.assertEqual(2, actual) + # One of each entry should have been cleaned up. This only happen to work + # because: + # - file_path.get_free_space() is mocked + # - DiskCache.trim() keeps its own internal counter while deleting files so + # it ignores get_free_space() output while deleting files. + actual = genTree(np) + expected = { + os.path.join(cache_small, u'small'): small, + u'state.json': + '{"items":[["second",["%s",3]]],"version":2}' % cache_small, + } + self.assertEqual(expected, actual) + expected = { + small_digest: small, + u'state.json': + '{"items":[["%s",[10,2]]],"version":2}' % small_digest, + } + self.assertEqual(expected, genTree(ip)) + class RunIsolatedTestRun(RunIsolatedTestBase): def test_output(self):